1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
|
From 7a834bb35ed751bee34c510cc6999cd239842f0d Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 1 Dec 2021 17:41:09 +0100
Subject: [PATCH 12/68] softirq: Use a dedicated thread for timer wakeups.
Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/6.1/older/patches-6.1.141-rt52.tar.xz
A timer/hrtimer softirq is raised in-IRQ context. With threaded
interrupts enabled or on PREEMPT_RT this leads to waking the ksoftirqd
for the processing of the softirq.
Once the ksoftirqd is marked as pending (or is running) it will collect
all raised softirqs. This in turn means that a softirq which would have
been processed at the end of the threaded interrupt, which runs at an
elevated priority, is now moved to ksoftirqd which runs at SCHED_OTHER
priority and competes with every regular task for CPU resources.
This introduces long delays on heavy loaded systems and is not desired
especially if the system is not overloaded by the softirqs.
Split the TIMER_SOFTIRQ and HRTIMER_SOFTIRQ processing into a dedicated
timers thread and let it run at the lowest SCHED_FIFO priority.
RT tasks are are woken up from hardirq context so only timer_list timers
and hrtimers for "regular" tasks are processed here. The higher priority
ensures that wakeups are performed before scheduling SCHED_OTHER tasks.
Using a dedicated variable to store the pending softirq bits values
ensure that the timer are not accidentally picked up by ksoftirqd and
other threaded interrupts.
It shouldn't be picked up by ksoftirqd since it runs at lower priority.
However if the timer bits are ORed while a threaded interrupt is
running, then the timer softirq would be performed at higher priority.
The new timer thread will block on the softirq lock before it starts
softirq work. This "race window" isn't closed because while timer thread
is performing the softirq it can get PI-boosted via the softirq lock by
a random force-threaded thread.
The timer thread can pick up pending softirqs from ksoftirqd but only
if the softirq load is high. It is not be desired that the picked up
softirqs are processed at SCHED_FIFO priority under high softirq load
but this can already happen by a PI-boost by a force-threaded interrupt.
Reported-by: kernel test robot <lkp@intel.com> [ static timer_threads ]
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
include/linux/interrupt.h | 16 +++++++
kernel/softirq.c | 92 +++++++++++++++++++++++++++++++++++++--
kernel/time/hrtimer.c | 4 +-
kernel/time/timer.c | 2 +-
4 files changed, 108 insertions(+), 6 deletions(-)
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 2610a7d156da..b0da2976f899 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -609,6 +609,22 @@ extern void __raise_softirq_irqoff(unsigned int nr);
extern void raise_softirq_irqoff(unsigned int nr);
extern void raise_softirq(unsigned int nr);
+#ifdef CONFIG_PREEMPT_RT
+extern void raise_timer_softirq(void);
+extern void raise_hrtimer_softirq(void);
+
+#else
+static inline void raise_timer_softirq(void)
+{
+ raise_softirq(TIMER_SOFTIRQ);
+}
+
+static inline void raise_hrtimer_softirq(void)
+{
+ raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+}
+#endif
+
DECLARE_PER_CPU(struct task_struct *, ksoftirqd);
static inline struct task_struct *this_cpu_ksoftirqd(void)
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 9ab5ca399a99..92cc1de67205 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -666,6 +666,29 @@ static inline void tick_irq_exit(void)
#endif
}
+#ifdef CONFIG_PREEMPT_RT
+static DEFINE_PER_CPU(struct task_struct *, timersd);
+static DEFINE_PER_CPU(unsigned long, pending_timer_softirq);
+
+static unsigned int local_pending_timers(void)
+{
+ return __this_cpu_read(pending_timer_softirq);
+}
+
+static void wake_timersd(void)
+{
+ struct task_struct *tsk = __this_cpu_read(timersd);
+
+ if (tsk)
+ wake_up_process(tsk);
+}
+
+#else
+
+static inline void wake_timersd(void) { }
+
+#endif
+
static inline void __irq_exit_rcu(void)
{
#ifndef __ARCH_IRQ_EXIT_IRQS_DISABLED
@@ -675,8 +698,13 @@ static inline void __irq_exit_rcu(void)
#endif
account_hardirq_exit(current);
preempt_count_sub(HARDIRQ_OFFSET);
- if (!in_interrupt() && local_softirq_pending())
- invoke_softirq();
+ if (!in_interrupt()) {
+ if (local_softirq_pending())
+ invoke_softirq();
+
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && local_pending_timers())
+ wake_timersd();
+ }
tick_irq_exit();
}
@@ -1005,12 +1033,70 @@ static struct smp_hotplug_thread softirq_threads = {
.thread_comm = "ksoftirqd/%u",
};
+#ifdef CONFIG_PREEMPT_RT
+static void timersd_setup(unsigned int cpu)
+{
+ sched_set_fifo_low(current);
+}
+
+static int timersd_should_run(unsigned int cpu)
+{
+ return local_pending_timers();
+}
+
+static void run_timersd(unsigned int cpu)
+{
+ unsigned int timer_si;
+
+ ksoftirqd_run_begin();
+
+ timer_si = local_pending_timers();
+ __this_cpu_write(pending_timer_softirq, 0);
+ or_softirq_pending(timer_si);
+
+ __do_softirq();
+
+ ksoftirqd_run_end();
+}
+
+static void raise_ktimers_thread(unsigned int nr)
+{
+ trace_softirq_raise(nr);
+ __this_cpu_or(pending_timer_softirq, 1 << nr);
+}
+
+void raise_hrtimer_softirq(void)
+{
+ raise_ktimers_thread(HRTIMER_SOFTIRQ);
+}
+
+void raise_timer_softirq(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ raise_ktimers_thread(TIMER_SOFTIRQ);
+ wake_timersd();
+ local_irq_restore(flags);
+}
+
+static struct smp_hotplug_thread timer_threads = {
+ .store = &timersd,
+ .setup = timersd_setup,
+ .thread_should_run = timersd_should_run,
+ .thread_fn = run_timersd,
+ .thread_comm = "ktimers/%u",
+};
+#endif
+
static __init int spawn_ksoftirqd(void)
{
cpuhp_setup_state_nocalls(CPUHP_SOFTIRQ_DEAD, "softirq:dead", NULL,
takeover_tasklets);
BUG_ON(smpboot_register_percpu_thread(&softirq_threads));
-
+#ifdef CONFIG_PREEMPT_RT
+ BUG_ON(smpboot_register_percpu_thread(&timer_threads));
+#endif
return 0;
}
early_initcall(spawn_ksoftirqd);
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index b3860ec12450..bf167c424095 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1872,7 +1872,7 @@ void hrtimer_interrupt(struct clock_event_device *dev)
if (!ktime_before(now, cpu_base->softirq_expires_next)) {
cpu_base->softirq_expires_next = KTIME_MAX;
cpu_base->softirq_activated = 1;
- raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+ raise_hrtimer_softirq();
}
__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
@@ -1985,7 +1985,7 @@ void hrtimer_run_queues(void)
if (!ktime_before(now, cpu_base->softirq_expires_next)) {
cpu_base->softirq_expires_next = KTIME_MAX;
cpu_base->softirq_activated = 1;
- raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+ raise_hrtimer_softirq();
}
__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index e09852be4e63..591cac6f03ec 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1852,7 +1852,7 @@ static void run_local_timers(void)
if (time_before(jiffies, base->next_expiry))
return;
}
- raise_softirq(TIMER_SOFTIRQ);
+ raise_timer_softirq();
}
/*
--
2.49.0
|