blob: 4a5c6e73ecd41e7107a89b098f4eea9fefc647f1
1 | /* |
2 | * Common SMP CPU bringup/teardown functions |
3 | */ |
4 | #include <linux/cpu.h> |
5 | #include <linux/err.h> |
6 | #include <linux/smp.h> |
7 | #include <linux/delay.h> |
8 | #include <linux/init.h> |
9 | #include <linux/list.h> |
10 | #include <linux/slab.h> |
11 | #include <linux/sched.h> |
12 | #include <linux/export.h> |
13 | #include <linux/percpu.h> |
14 | #include <linux/kthread.h> |
15 | #include <linux/smpboot.h> |
16 | |
17 | #include "smpboot.h" |
18 | |
19 | #ifdef CONFIG_SMP |
20 | |
21 | #ifdef CONFIG_GENERIC_SMP_IDLE_THREAD |
22 | /* |
23 | * For the hotplug case we keep the task structs around and reuse |
24 | * them. |
25 | */ |
26 | static DEFINE_PER_CPU(struct task_struct *, idle_threads); |
27 | |
28 | struct task_struct *idle_thread_get(unsigned int cpu) |
29 | { |
30 | struct task_struct *tsk = per_cpu(idle_threads, cpu); |
31 | |
32 | if (!tsk) |
33 | return ERR_PTR(-ENOMEM); |
34 | init_idle(tsk, cpu); |
35 | return tsk; |
36 | } |
37 | |
38 | void __init idle_thread_set_boot_cpu(void) |
39 | { |
40 | per_cpu(idle_threads, smp_processor_id()) = current; |
41 | } |
42 | |
43 | /** |
44 | * idle_init - Initialize the idle thread for a cpu |
45 | * @cpu: The cpu for which the idle thread should be initialized |
46 | * |
47 | * Creates the thread if it does not exist. |
48 | */ |
49 | static inline void idle_init(unsigned int cpu) |
50 | { |
51 | struct task_struct *tsk = per_cpu(idle_threads, cpu); |
52 | |
53 | if (!tsk) { |
54 | tsk = fork_idle(cpu); |
55 | if (IS_ERR(tsk)) |
56 | pr_err("SMP: fork_idle() failed for CPU %u\n", cpu); |
57 | else |
58 | per_cpu(idle_threads, cpu) = tsk; |
59 | } |
60 | } |
61 | |
62 | /** |
63 | * idle_threads_init - Initialize idle threads for all cpus |
64 | */ |
65 | void __init idle_threads_init(void) |
66 | { |
67 | unsigned int cpu, boot_cpu; |
68 | |
69 | boot_cpu = smp_processor_id(); |
70 | |
71 | for_each_possible_cpu(cpu) { |
72 | if (cpu != boot_cpu) |
73 | idle_init(cpu); |
74 | } |
75 | } |
76 | #endif |
77 | |
78 | #endif /* #ifdef CONFIG_SMP */ |
79 | |
80 | static LIST_HEAD(hotplug_threads); |
81 | static DEFINE_MUTEX(smpboot_threads_lock); |
82 | |
83 | struct smpboot_thread_data { |
84 | unsigned int cpu; |
85 | unsigned int status; |
86 | struct smp_hotplug_thread *ht; |
87 | }; |
88 | |
89 | enum { |
90 | HP_THREAD_NONE = 0, |
91 | HP_THREAD_ACTIVE, |
92 | HP_THREAD_PARKED, |
93 | }; |
94 | |
95 | /** |
96 | * smpboot_thread_fn - percpu hotplug thread loop function |
97 | * @data: thread data pointer |
98 | * |
99 | * Checks for thread stop and park conditions. Calls the necessary |
100 | * setup, cleanup, park and unpark functions for the registered |
101 | * thread. |
102 | * |
103 | * Returns 1 when the thread should exit, 0 otherwise. |
104 | */ |
105 | static int smpboot_thread_fn(void *data) |
106 | { |
107 | struct smpboot_thread_data *td = data; |
108 | struct smp_hotplug_thread *ht = td->ht; |
109 | |
110 | while (1) { |
111 | set_current_state(TASK_INTERRUPTIBLE); |
112 | preempt_disable(); |
113 | if (kthread_should_stop()) { |
114 | __set_current_state(TASK_RUNNING); |
115 | preempt_enable(); |
116 | /* cleanup must mirror setup */ |
117 | if (ht->cleanup && td->status != HP_THREAD_NONE) |
118 | ht->cleanup(td->cpu, cpu_online(td->cpu)); |
119 | kfree(td); |
120 | return 0; |
121 | } |
122 | |
123 | if (kthread_should_park()) { |
124 | __set_current_state(TASK_RUNNING); |
125 | preempt_enable(); |
126 | if (ht->park && td->status == HP_THREAD_ACTIVE) { |
127 | BUG_ON(td->cpu != smp_processor_id()); |
128 | ht->park(td->cpu); |
129 | td->status = HP_THREAD_PARKED; |
130 | } |
131 | kthread_parkme(); |
132 | /* We might have been woken for stop */ |
133 | continue; |
134 | } |
135 | |
136 | BUG_ON(td->cpu != smp_processor_id()); |
137 | |
138 | /* Check for state change setup */ |
139 | switch (td->status) { |
140 | case HP_THREAD_NONE: |
141 | __set_current_state(TASK_RUNNING); |
142 | preempt_enable(); |
143 | if (ht->setup) |
144 | ht->setup(td->cpu); |
145 | td->status = HP_THREAD_ACTIVE; |
146 | continue; |
147 | |
148 | case HP_THREAD_PARKED: |
149 | __set_current_state(TASK_RUNNING); |
150 | preempt_enable(); |
151 | if (ht->unpark) |
152 | ht->unpark(td->cpu); |
153 | td->status = HP_THREAD_ACTIVE; |
154 | continue; |
155 | } |
156 | |
157 | if (!ht->thread_should_run(td->cpu)) { |
158 | preempt_enable_no_resched(); |
159 | schedule(); |
160 | } else { |
161 | __set_current_state(TASK_RUNNING); |
162 | preempt_enable(); |
163 | ht->thread_fn(td->cpu); |
164 | } |
165 | } |
166 | } |
167 | |
168 | static int |
169 | __smpboot_create_thread(struct smp_hotplug_thread *ht, unsigned int cpu) |
170 | { |
171 | struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu); |
172 | struct smpboot_thread_data *td; |
173 | |
174 | if (tsk) |
175 | return 0; |
176 | |
177 | td = kzalloc_node(sizeof(*td), GFP_KERNEL, cpu_to_node(cpu)); |
178 | if (!td) |
179 | return -ENOMEM; |
180 | td->cpu = cpu; |
181 | td->ht = ht; |
182 | |
183 | tsk = kthread_create_on_cpu(smpboot_thread_fn, td, cpu, |
184 | ht->thread_comm); |
185 | if (IS_ERR(tsk)) { |
186 | kfree(td); |
187 | return PTR_ERR(tsk); |
188 | } |
189 | /* |
190 | * Park the thread so that it could start right on the CPU |
191 | * when it is available. |
192 | */ |
193 | kthread_park(tsk); |
194 | get_task_struct(tsk); |
195 | *per_cpu_ptr(ht->store, cpu) = tsk; |
196 | if (ht->create) { |
197 | /* |
198 | * Make sure that the task has actually scheduled out |
199 | * into park position, before calling the create |
200 | * callback. At least the migration thread callback |
201 | * requires that the task is off the runqueue. |
202 | */ |
203 | if (!wait_task_inactive(tsk, TASK_PARKED)) |
204 | WARN_ON(1); |
205 | else |
206 | ht->create(cpu); |
207 | } |
208 | return 0; |
209 | } |
210 | |
211 | int smpboot_create_threads(unsigned int cpu) |
212 | { |
213 | struct smp_hotplug_thread *cur; |
214 | int ret = 0; |
215 | |
216 | mutex_lock(&smpboot_threads_lock); |
217 | list_for_each_entry(cur, &hotplug_threads, list) { |
218 | ret = __smpboot_create_thread(cur, cpu); |
219 | if (ret) |
220 | break; |
221 | } |
222 | mutex_unlock(&smpboot_threads_lock); |
223 | return ret; |
224 | } |
225 | |
226 | static void smpboot_unpark_thread(struct smp_hotplug_thread *ht, unsigned int cpu) |
227 | { |
228 | struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu); |
229 | |
230 | if (!ht->selfparking) |
231 | kthread_unpark(tsk); |
232 | } |
233 | |
234 | int smpboot_unpark_threads(unsigned int cpu) |
235 | { |
236 | struct smp_hotplug_thread *cur; |
237 | |
238 | mutex_lock(&smpboot_threads_lock); |
239 | list_for_each_entry(cur, &hotplug_threads, list) |
240 | if (cpumask_test_cpu(cpu, cur->cpumask)) |
241 | smpboot_unpark_thread(cur, cpu); |
242 | mutex_unlock(&smpboot_threads_lock); |
243 | return 0; |
244 | } |
245 | |
246 | static void smpboot_park_thread(struct smp_hotplug_thread *ht, unsigned int cpu) |
247 | { |
248 | struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu); |
249 | |
250 | if (tsk && !ht->selfparking) |
251 | kthread_park(tsk); |
252 | } |
253 | |
254 | int smpboot_park_threads(unsigned int cpu) |
255 | { |
256 | struct smp_hotplug_thread *cur; |
257 | |
258 | mutex_lock(&smpboot_threads_lock); |
259 | list_for_each_entry_reverse(cur, &hotplug_threads, list) |
260 | smpboot_park_thread(cur, cpu); |
261 | mutex_unlock(&smpboot_threads_lock); |
262 | return 0; |
263 | } |
264 | |
265 | static void smpboot_destroy_threads(struct smp_hotplug_thread *ht) |
266 | { |
267 | unsigned int cpu; |
268 | |
269 | /* We need to destroy also the parked threads of offline cpus */ |
270 | for_each_possible_cpu(cpu) { |
271 | struct task_struct *tsk = *per_cpu_ptr(ht->store, cpu); |
272 | |
273 | if (tsk) { |
274 | kthread_stop(tsk); |
275 | put_task_struct(tsk); |
276 | *per_cpu_ptr(ht->store, cpu) = NULL; |
277 | } |
278 | } |
279 | } |
280 | |
281 | /** |
282 | * smpboot_register_percpu_thread_cpumask - Register a per_cpu thread related |
283 | * to hotplug |
284 | * @plug_thread: Hotplug thread descriptor |
285 | * @cpumask: The cpumask where threads run |
286 | * |
287 | * Creates and starts the threads on all online cpus. |
288 | */ |
289 | int smpboot_register_percpu_thread_cpumask(struct smp_hotplug_thread *plug_thread, |
290 | const struct cpumask *cpumask) |
291 | { |
292 | unsigned int cpu; |
293 | int ret = 0; |
294 | |
295 | if (!alloc_cpumask_var(&plug_thread->cpumask, GFP_KERNEL)) |
296 | return -ENOMEM; |
297 | cpumask_copy(plug_thread->cpumask, cpumask); |
298 | |
299 | get_online_cpus(); |
300 | mutex_lock(&smpboot_threads_lock); |
301 | for_each_online_cpu(cpu) { |
302 | ret = __smpboot_create_thread(plug_thread, cpu); |
303 | if (ret) { |
304 | smpboot_destroy_threads(plug_thread); |
305 | free_cpumask_var(plug_thread->cpumask); |
306 | goto out; |
307 | } |
308 | if (cpumask_test_cpu(cpu, cpumask)) |
309 | smpboot_unpark_thread(plug_thread, cpu); |
310 | } |
311 | list_add(&plug_thread->list, &hotplug_threads); |
312 | out: |
313 | mutex_unlock(&smpboot_threads_lock); |
314 | put_online_cpus(); |
315 | return ret; |
316 | } |
317 | EXPORT_SYMBOL_GPL(smpboot_register_percpu_thread_cpumask); |
318 | |
319 | /** |
320 | * smpboot_unregister_percpu_thread - Unregister a per_cpu thread related to hotplug |
321 | * @plug_thread: Hotplug thread descriptor |
322 | * |
323 | * Stops all threads on all possible cpus. |
324 | */ |
325 | void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread) |
326 | { |
327 | get_online_cpus(); |
328 | mutex_lock(&smpboot_threads_lock); |
329 | list_del(&plug_thread->list); |
330 | smpboot_destroy_threads(plug_thread); |
331 | mutex_unlock(&smpboot_threads_lock); |
332 | put_online_cpus(); |
333 | free_cpumask_var(plug_thread->cpumask); |
334 | } |
335 | EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread); |
336 | |
337 | /** |
338 | * smpboot_update_cpumask_percpu_thread - Adjust which per_cpu hotplug threads stay parked |
339 | * @plug_thread: Hotplug thread descriptor |
340 | * @new: Revised mask to use |
341 | * |
342 | * The cpumask field in the smp_hotplug_thread must not be updated directly |
343 | * by the client, but only by calling this function. |
344 | * This function can only be called on a registered smp_hotplug_thread. |
345 | */ |
346 | int smpboot_update_cpumask_percpu_thread(struct smp_hotplug_thread *plug_thread, |
347 | const struct cpumask *new) |
348 | { |
349 | struct cpumask *old = plug_thread->cpumask; |
350 | cpumask_var_t tmp; |
351 | unsigned int cpu; |
352 | |
353 | if (!alloc_cpumask_var(&tmp, GFP_KERNEL)) |
354 | return -ENOMEM; |
355 | |
356 | get_online_cpus(); |
357 | mutex_lock(&smpboot_threads_lock); |
358 | |
359 | /* Park threads that were exclusively enabled on the old mask. */ |
360 | cpumask_andnot(tmp, old, new); |
361 | for_each_cpu_and(cpu, tmp, cpu_online_mask) |
362 | smpboot_park_thread(plug_thread, cpu); |
363 | |
364 | /* Unpark threads that are exclusively enabled on the new mask. */ |
365 | cpumask_andnot(tmp, new, old); |
366 | for_each_cpu_and(cpu, tmp, cpu_online_mask) |
367 | smpboot_unpark_thread(plug_thread, cpu); |
368 | |
369 | cpumask_copy(old, new); |
370 | |
371 | mutex_unlock(&smpboot_threads_lock); |
372 | put_online_cpus(); |
373 | |
374 | free_cpumask_var(tmp); |
375 | |
376 | return 0; |
377 | } |
378 | EXPORT_SYMBOL_GPL(smpboot_update_cpumask_percpu_thread); |
379 | |
380 | static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD); |
381 | |
382 | /* |
383 | * Called to poll specified CPU's state, for example, when waiting for |
384 | * a CPU to come online. |
385 | */ |
386 | int cpu_report_state(int cpu) |
387 | { |
388 | return atomic_read(&per_cpu(cpu_hotplug_state, cpu)); |
389 | } |
390 | |
391 | /* |
392 | * If CPU has died properly, set its state to CPU_UP_PREPARE and |
393 | * return success. Otherwise, return -EBUSY if the CPU died after |
394 | * cpu_wait_death() timed out. And yet otherwise again, return -EAGAIN |
395 | * if cpu_wait_death() timed out and the CPU still hasn't gotten around |
396 | * to dying. In the latter two cases, the CPU might not be set up |
397 | * properly, but it is up to the arch-specific code to decide. |
398 | * Finally, -EIO indicates an unanticipated problem. |
399 | * |
400 | * Note that it is permissible to omit this call entirely, as is |
401 | * done in architectures that do no CPU-hotplug error checking. |
402 | */ |
403 | int cpu_check_up_prepare(int cpu) |
404 | { |
405 | if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) { |
406 | atomic_set(&per_cpu(cpu_hotplug_state, cpu), CPU_UP_PREPARE); |
407 | return 0; |
408 | } |
409 | |
410 | switch (atomic_read(&per_cpu(cpu_hotplug_state, cpu))) { |
411 | |
412 | case CPU_POST_DEAD: |
413 | |
414 | /* The CPU died properly, so just start it up again. */ |
415 | atomic_set(&per_cpu(cpu_hotplug_state, cpu), CPU_UP_PREPARE); |
416 | return 0; |
417 | |
418 | case CPU_DEAD_FROZEN: |
419 | |
420 | /* |
421 | * Timeout during CPU death, so let caller know. |
422 | * The outgoing CPU completed its processing, but after |
423 | * cpu_wait_death() timed out and reported the error. The |
424 | * caller is free to proceed, in which case the state |
425 | * will be reset properly by cpu_set_state_online(). |
426 | * Proceeding despite this -EBUSY return makes sense |
427 | * for systems where the outgoing CPUs take themselves |
428 | * offline, with no post-death manipulation required from |
429 | * a surviving CPU. |
430 | */ |
431 | return -EBUSY; |
432 | |
433 | case CPU_BROKEN: |
434 | |
435 | /* |
436 | * The most likely reason we got here is that there was |
437 | * a timeout during CPU death, and the outgoing CPU never |
438 | * did complete its processing. This could happen on |
439 | * a virtualized system if the outgoing VCPU gets preempted |
440 | * for more than five seconds, and the user attempts to |
441 | * immediately online that same CPU. Trying again later |
442 | * might return -EBUSY above, hence -EAGAIN. |
443 | */ |
444 | return -EAGAIN; |
445 | |
446 | default: |
447 | |
448 | /* Should not happen. Famous last words. */ |
449 | return -EIO; |
450 | } |
451 | } |
452 | |
453 | /* |
454 | * Mark the specified CPU online. |
455 | * |
456 | * Note that it is permissible to omit this call entirely, as is |
457 | * done in architectures that do no CPU-hotplug error checking. |
458 | */ |
459 | void cpu_set_state_online(int cpu) |
460 | { |
461 | (void)atomic_xchg(&per_cpu(cpu_hotplug_state, cpu), CPU_ONLINE); |
462 | } |
463 | |
464 | #ifdef CONFIG_HOTPLUG_CPU |
465 | |
466 | /* |
467 | * Wait for the specified CPU to exit the idle loop and die. |
468 | */ |
469 | bool cpu_wait_death(unsigned int cpu, int seconds) |
470 | { |
471 | int jf_left = seconds * HZ; |
472 | int oldstate; |
473 | bool ret = true; |
474 | int sleep_jf = 1; |
475 | |
476 | might_sleep(); |
477 | |
478 | /* The outgoing CPU will normally get done quite quickly. */ |
479 | if (atomic_read(&per_cpu(cpu_hotplug_state, cpu)) == CPU_DEAD) |
480 | goto update_state; |
481 | udelay(5); |
482 | |
483 | /* But if the outgoing CPU dawdles, wait increasingly long times. */ |
484 | while (atomic_read(&per_cpu(cpu_hotplug_state, cpu)) != CPU_DEAD) { |
485 | schedule_timeout_uninterruptible(sleep_jf); |
486 | jf_left -= sleep_jf; |
487 | if (jf_left <= 0) |
488 | break; |
489 | sleep_jf = DIV_ROUND_UP(sleep_jf * 11, 10); |
490 | } |
491 | update_state: |
492 | oldstate = atomic_read(&per_cpu(cpu_hotplug_state, cpu)); |
493 | if (oldstate == CPU_DEAD) { |
494 | /* Outgoing CPU died normally, update state. */ |
495 | smp_mb(); /* atomic_read() before update. */ |
496 | atomic_set(&per_cpu(cpu_hotplug_state, cpu), CPU_POST_DEAD); |
497 | } else { |
498 | /* Outgoing CPU still hasn't died, set state accordingly. */ |
499 | if (atomic_cmpxchg(&per_cpu(cpu_hotplug_state, cpu), |
500 | oldstate, CPU_BROKEN) != oldstate) |
501 | goto update_state; |
502 | ret = false; |
503 | } |
504 | return ret; |
505 | } |
506 | |
507 | /* |
508 | * Called by the outgoing CPU to report its successful death. Return |
509 | * false if this report follows the surviving CPU's timing out. |
510 | * |
511 | * A separate "CPU_DEAD_FROZEN" is used when the surviving CPU |
512 | * timed out. This approach allows architectures to omit calls to |
513 | * cpu_check_up_prepare() and cpu_set_state_online() without defeating |
514 | * the next cpu_wait_death()'s polling loop. |
515 | */ |
516 | bool cpu_report_death(void) |
517 | { |
518 | int oldstate; |
519 | int newstate; |
520 | int cpu = smp_processor_id(); |
521 | |
522 | do { |
523 | oldstate = atomic_read(&per_cpu(cpu_hotplug_state, cpu)); |
524 | if (oldstate != CPU_BROKEN) |
525 | newstate = CPU_DEAD; |
526 | else |
527 | newstate = CPU_DEAD_FROZEN; |
528 | } while (atomic_cmpxchg(&per_cpu(cpu_hotplug_state, cpu), |
529 | oldstate, newstate) != oldstate); |
530 | return newstate == CPU_DEAD; |
531 | } |
532 | |
533 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ |
534 |