blob: 409557ebcf05e943ee71c8acb273c4f621015205
1 | /* |
2 | * linux/kernel/panic.c |
3 | * |
4 | * Copyright (C) 1991, 1992 Linus Torvalds |
5 | */ |
6 | |
7 | /* |
8 | * This function is used through-out the kernel (including mm and fs) |
9 | * to indicate a major problem. |
10 | */ |
11 | #include <linux/debug_locks.h> |
12 | #include <linux/interrupt.h> |
13 | #include <linux/kmsg_dump.h> |
14 | #include <linux/kallsyms.h> |
15 | #include <linux/notifier.h> |
16 | #include <linux/vt_kern.h> |
17 | #include <linux/module.h> |
18 | #include <linux/random.h> |
19 | #include <linux/ftrace.h> |
20 | #include <linux/reboot.h> |
21 | #include <linux/delay.h> |
22 | #include <linux/kexec.h> |
23 | #include <linux/sched.h> |
24 | #include <linux/sysrq.h> |
25 | #include <linux/init.h> |
26 | #include <linux/nmi.h> |
27 | #include <linux/console.h> |
28 | #include <linux/bug.h> |
29 | #ifdef CONFIG_AMLOGIC_RAMDUMP |
30 | #include <linux/amlogic/ramdump.h> |
31 | #endif |
32 | |
33 | #define PANIC_TIMER_STEP 100 |
34 | #define PANIC_BLINK_SPD 18 |
35 | |
36 | int panic_on_oops = CONFIG_PANIC_ON_OOPS_VALUE; |
37 | static unsigned long tainted_mask; |
38 | static int pause_on_oops; |
39 | static int pause_on_oops_flag; |
40 | static DEFINE_SPINLOCK(pause_on_oops_lock); |
41 | bool crash_kexec_post_notifiers; |
42 | int panic_on_warn __read_mostly; |
43 | |
44 | int panic_timeout = CONFIG_PANIC_TIMEOUT; |
45 | EXPORT_SYMBOL_GPL(panic_timeout); |
46 | |
47 | ATOMIC_NOTIFIER_HEAD(panic_notifier_list); |
48 | |
49 | EXPORT_SYMBOL(panic_notifier_list); |
50 | |
51 | static long no_blink(int state) |
52 | { |
53 | return 0; |
54 | } |
55 | |
56 | /* Returns how long it waited in ms */ |
57 | long (*panic_blink)(int state); |
58 | EXPORT_SYMBOL(panic_blink); |
59 | |
60 | /* |
61 | * Stop ourself in panic -- architecture code may override this |
62 | */ |
63 | void __weak panic_smp_self_stop(void) |
64 | { |
65 | while (1) |
66 | cpu_relax(); |
67 | } |
68 | |
69 | /* |
70 | * Stop ourselves in NMI context if another CPU has already panicked. Arch code |
71 | * may override this to prepare for crash dumping, e.g. save regs info. |
72 | */ |
73 | void __weak nmi_panic_self_stop(struct pt_regs *regs) |
74 | { |
75 | panic_smp_self_stop(); |
76 | } |
77 | |
78 | /* |
79 | * Stop other CPUs in panic. Architecture dependent code may override this |
80 | * with more suitable version. For example, if the architecture supports |
81 | * crash dump, it should save registers of each stopped CPU and disable |
82 | * per-CPU features such as virtualization extensions. |
83 | */ |
84 | void __weak crash_smp_send_stop(void) |
85 | { |
86 | static int cpus_stopped; |
87 | |
88 | /* |
89 | * This function can be called twice in panic path, but obviously |
90 | * we execute this only once. |
91 | */ |
92 | if (cpus_stopped) |
93 | return; |
94 | |
95 | /* |
96 | * Note smp_send_stop is the usual smp shutdown function, which |
97 | * unfortunately means it may not be hardened to work in a panic |
98 | * situation. |
99 | */ |
100 | smp_send_stop(); |
101 | cpus_stopped = 1; |
102 | } |
103 | |
104 | atomic_t panic_cpu = ATOMIC_INIT(PANIC_CPU_INVALID); |
105 | |
106 | /* |
107 | * A variant of panic() called from NMI context. We return if we've already |
108 | * panicked on this CPU. If another CPU already panicked, loop in |
109 | * nmi_panic_self_stop() which can provide architecture dependent code such |
110 | * as saving register state for crash dump. |
111 | */ |
112 | void nmi_panic(struct pt_regs *regs, const char *msg) |
113 | { |
114 | int old_cpu, cpu; |
115 | |
116 | cpu = raw_smp_processor_id(); |
117 | old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, cpu); |
118 | |
119 | if (old_cpu == PANIC_CPU_INVALID) |
120 | panic("%s", msg); |
121 | else if (old_cpu != cpu) |
122 | nmi_panic_self_stop(regs); |
123 | } |
124 | EXPORT_SYMBOL(nmi_panic); |
125 | |
126 | /** |
127 | * panic - halt the system |
128 | * @fmt: The text string to print |
129 | * |
130 | * Display a message, then perform cleanups. |
131 | * |
132 | * This function never returns. |
133 | */ |
134 | void panic(const char *fmt, ...) |
135 | { |
136 | static char buf[1024]; |
137 | va_list args; |
138 | long i, i_next = 0; |
139 | int state = 0; |
140 | int old_cpu, this_cpu; |
141 | bool _crash_kexec_post_notifiers = crash_kexec_post_notifiers; |
142 | |
143 | /* |
144 | * Disable local interrupts. This will prevent panic_smp_self_stop |
145 | * from deadlocking the first cpu that invokes the panic, since |
146 | * there is nothing to prevent an interrupt handler (that runs |
147 | * after setting panic_cpu) from invoking panic() again. |
148 | */ |
149 | local_irq_disable(); |
150 | |
151 | /* |
152 | * It's possible to come here directly from a panic-assertion and |
153 | * not have preempt disabled. Some functions called from here want |
154 | * preempt to be disabled. No point enabling it later though... |
155 | * |
156 | * Only one CPU is allowed to execute the panic code from here. For |
157 | * multiple parallel invocations of panic, all other CPUs either |
158 | * stop themself or will wait until they are stopped by the 1st CPU |
159 | * with smp_send_stop(). |
160 | * |
161 | * `old_cpu == PANIC_CPU_INVALID' means this is the 1st CPU which |
162 | * comes here, so go ahead. |
163 | * `old_cpu == this_cpu' means we came from nmi_panic() which sets |
164 | * panic_cpu to this CPU. In this case, this is also the 1st CPU. |
165 | */ |
166 | this_cpu = raw_smp_processor_id(); |
167 | old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu); |
168 | |
169 | if (old_cpu != PANIC_CPU_INVALID && old_cpu != this_cpu) |
170 | panic_smp_self_stop(); |
171 | |
172 | console_verbose(); |
173 | bust_spinlocks(1); |
174 | va_start(args, fmt); |
175 | vsnprintf(buf, sizeof(buf), fmt, args); |
176 | va_end(args); |
177 | pr_emerg("Kernel panic - not syncing: %s\n", buf); |
178 | #ifdef CONFIG_DEBUG_BUGVERBOSE |
179 | /* |
180 | * Avoid nested stack-dumping if a panic occurs during oops processing |
181 | */ |
182 | if (!test_taint(TAINT_DIE) && oops_in_progress <= 1) |
183 | dump_stack(); |
184 | #endif |
185 | |
186 | #ifdef CONFIG_AMLOGIC_RAMDUMP |
187 | ramdump_sync_data(); |
188 | #endif |
189 | /* |
190 | * If we have crashed and we have a crash kernel loaded let it handle |
191 | * everything else. |
192 | * If we want to run this after calling panic_notifiers, pass |
193 | * the "crash_kexec_post_notifiers" option to the kernel. |
194 | * |
195 | * Bypass the panic_cpu check and call __crash_kexec directly. |
196 | */ |
197 | if (!_crash_kexec_post_notifiers) { |
198 | printk_nmi_flush_on_panic(); |
199 | __crash_kexec(NULL); |
200 | |
201 | /* |
202 | * Note smp_send_stop is the usual smp shutdown function, which |
203 | * unfortunately means it may not be hardened to work in a |
204 | * panic situation. |
205 | */ |
206 | smp_send_stop(); |
207 | } else { |
208 | /* |
209 | * If we want to do crash dump after notifier calls and |
210 | * kmsg_dump, we will need architecture dependent extra |
211 | * works in addition to stopping other CPUs. |
212 | */ |
213 | crash_smp_send_stop(); |
214 | } |
215 | |
216 | /* |
217 | * Run any panic handlers, including those that might need to |
218 | * add information to the kmsg dump output. |
219 | */ |
220 | atomic_notifier_call_chain(&panic_notifier_list, 0, buf); |
221 | |
222 | /* Call flush even twice. It tries harder with a single online CPU */ |
223 | printk_nmi_flush_on_panic(); |
224 | kmsg_dump(KMSG_DUMP_PANIC); |
225 | |
226 | /* |
227 | * If you doubt kdump always works fine in any situation, |
228 | * "crash_kexec_post_notifiers" offers you a chance to run |
229 | * panic_notifiers and dumping kmsg before kdump. |
230 | * Note: since some panic_notifiers can make crashed kernel |
231 | * more unstable, it can increase risks of the kdump failure too. |
232 | * |
233 | * Bypass the panic_cpu check and call __crash_kexec directly. |
234 | */ |
235 | if (_crash_kexec_post_notifiers) |
236 | __crash_kexec(NULL); |
237 | |
238 | #ifdef CONFIG_VT |
239 | unblank_screen(); |
240 | #endif |
241 | console_unblank(); |
242 | |
243 | /* |
244 | * We may have ended up stopping the CPU holding the lock (in |
245 | * smp_send_stop()) while still having some valuable data in the console |
246 | * buffer. Try to acquire the lock then release it regardless of the |
247 | * result. The release will also print the buffers out. Locks debug |
248 | * should be disabled to avoid reporting bad unlock balance when |
249 | * panic() is not being callled from OOPS. |
250 | */ |
251 | debug_locks_off(); |
252 | console_flush_on_panic(); |
253 | |
254 | if (!panic_blink) |
255 | panic_blink = no_blink; |
256 | |
257 | if (panic_timeout > 0) { |
258 | /* |
259 | * Delay timeout seconds before rebooting the machine. |
260 | * We can't use the "normal" timers since we just panicked. |
261 | */ |
262 | pr_emerg("Rebooting in %d seconds..\n", panic_timeout); |
263 | |
264 | for (i = 0; i < panic_timeout * 1000; i += PANIC_TIMER_STEP) { |
265 | touch_nmi_watchdog(); |
266 | if (i >= i_next) { |
267 | i += panic_blink(state ^= 1); |
268 | i_next = i + 3600 / PANIC_BLINK_SPD; |
269 | } |
270 | mdelay(PANIC_TIMER_STEP); |
271 | } |
272 | } |
273 | if (panic_timeout != 0) { |
274 | /* |
275 | * This will not be a clean reboot, with everything |
276 | * shutting down. But if there is a chance of |
277 | * rebooting the system it will be rebooted. |
278 | */ |
279 | emergency_restart(); |
280 | } |
281 | #ifdef __sparc__ |
282 | { |
283 | extern int stop_a_enabled; |
284 | /* Make sure the user can actually press Stop-A (L1-A) */ |
285 | stop_a_enabled = 1; |
286 | pr_emerg("Press Stop-A (L1-A) to return to the boot prom\n"); |
287 | } |
288 | #endif |
289 | #if defined(CONFIG_S390) |
290 | { |
291 | unsigned long caller; |
292 | |
293 | caller = (unsigned long)__builtin_return_address(0); |
294 | disabled_wait(caller); |
295 | } |
296 | #endif |
297 | pr_emerg("---[ end Kernel panic - not syncing: %s\n", buf); |
298 | local_irq_enable(); |
299 | for (i = 0; ; i += PANIC_TIMER_STEP) { |
300 | touch_softlockup_watchdog(); |
301 | if (i >= i_next) { |
302 | i += panic_blink(state ^= 1); |
303 | i_next = i + 3600 / PANIC_BLINK_SPD; |
304 | } |
305 | mdelay(PANIC_TIMER_STEP); |
306 | } |
307 | } |
308 | |
309 | EXPORT_SYMBOL(panic); |
310 | |
311 | |
312 | struct tnt { |
313 | u8 bit; |
314 | char true; |
315 | char false; |
316 | }; |
317 | |
318 | static const struct tnt tnts[] = { |
319 | { TAINT_PROPRIETARY_MODULE, 'P', 'G' }, |
320 | { TAINT_FORCED_MODULE, 'F', ' ' }, |
321 | { TAINT_CPU_OUT_OF_SPEC, 'S', ' ' }, |
322 | { TAINT_FORCED_RMMOD, 'R', ' ' }, |
323 | { TAINT_MACHINE_CHECK, 'M', ' ' }, |
324 | { TAINT_BAD_PAGE, 'B', ' ' }, |
325 | { TAINT_USER, 'U', ' ' }, |
326 | { TAINT_DIE, 'D', ' ' }, |
327 | { TAINT_OVERRIDDEN_ACPI_TABLE, 'A', ' ' }, |
328 | { TAINT_WARN, 'W', ' ' }, |
329 | { TAINT_CRAP, 'C', ' ' }, |
330 | { TAINT_FIRMWARE_WORKAROUND, 'I', ' ' }, |
331 | { TAINT_OOT_MODULE, 'O', ' ' }, |
332 | { TAINT_UNSIGNED_MODULE, 'E', ' ' }, |
333 | { TAINT_SOFTLOCKUP, 'L', ' ' }, |
334 | { TAINT_LIVEPATCH, 'K', ' ' }, |
335 | }; |
336 | |
337 | /** |
338 | * print_tainted - return a string to represent the kernel taint state. |
339 | * |
340 | * 'P' - Proprietary module has been loaded. |
341 | * 'F' - Module has been forcibly loaded. |
342 | * 'S' - SMP with CPUs not designed for SMP. |
343 | * 'R' - User forced a module unload. |
344 | * 'M' - System experienced a machine check exception. |
345 | * 'B' - System has hit bad_page. |
346 | * 'U' - Userspace-defined naughtiness. |
347 | * 'D' - Kernel has oopsed before |
348 | * 'A' - ACPI table overridden. |
349 | * 'W' - Taint on warning. |
350 | * 'C' - modules from drivers/staging are loaded. |
351 | * 'I' - Working around severe firmware bug. |
352 | * 'O' - Out-of-tree module has been loaded. |
353 | * 'E' - Unsigned module has been loaded. |
354 | * 'L' - A soft lockup has previously occurred. |
355 | * 'K' - Kernel has been live patched. |
356 | * |
357 | * The string is overwritten by the next call to print_tainted(). |
358 | */ |
359 | const char *print_tainted(void) |
360 | { |
361 | static char buf[ARRAY_SIZE(tnts) + sizeof("Tainted: ")]; |
362 | |
363 | if (tainted_mask) { |
364 | char *s; |
365 | int i; |
366 | |
367 | s = buf + sprintf(buf, "Tainted: "); |
368 | for (i = 0; i < ARRAY_SIZE(tnts); i++) { |
369 | const struct tnt *t = &tnts[i]; |
370 | *s++ = test_bit(t->bit, &tainted_mask) ? |
371 | t->true : t->false; |
372 | } |
373 | *s = 0; |
374 | } else |
375 | snprintf(buf, sizeof(buf), "Not tainted"); |
376 | |
377 | return buf; |
378 | } |
379 | |
380 | int test_taint(unsigned flag) |
381 | { |
382 | return test_bit(flag, &tainted_mask); |
383 | } |
384 | EXPORT_SYMBOL(test_taint); |
385 | |
386 | unsigned long get_taint(void) |
387 | { |
388 | return tainted_mask; |
389 | } |
390 | |
391 | /** |
392 | * add_taint: add a taint flag if not already set. |
393 | * @flag: one of the TAINT_* constants. |
394 | * @lockdep_ok: whether lock debugging is still OK. |
395 | * |
396 | * If something bad has gone wrong, you'll want @lockdebug_ok = false, but for |
397 | * some notewortht-but-not-corrupting cases, it can be set to true. |
398 | */ |
399 | void add_taint(unsigned flag, enum lockdep_ok lockdep_ok) |
400 | { |
401 | if (lockdep_ok == LOCKDEP_NOW_UNRELIABLE && __debug_locks_off()) |
402 | pr_warn("Disabling lock debugging due to kernel taint\n"); |
403 | |
404 | set_bit(flag, &tainted_mask); |
405 | } |
406 | EXPORT_SYMBOL(add_taint); |
407 | |
408 | static void spin_msec(int msecs) |
409 | { |
410 | int i; |
411 | |
412 | for (i = 0; i < msecs; i++) { |
413 | touch_nmi_watchdog(); |
414 | mdelay(1); |
415 | } |
416 | } |
417 | |
418 | /* |
419 | * It just happens that oops_enter() and oops_exit() are identically |
420 | * implemented... |
421 | */ |
422 | static void do_oops_enter_exit(void) |
423 | { |
424 | unsigned long flags; |
425 | static int spin_counter; |
426 | |
427 | if (!pause_on_oops) |
428 | return; |
429 | |
430 | spin_lock_irqsave(&pause_on_oops_lock, flags); |
431 | if (pause_on_oops_flag == 0) { |
432 | /* This CPU may now print the oops message */ |
433 | pause_on_oops_flag = 1; |
434 | } else { |
435 | /* We need to stall this CPU */ |
436 | if (!spin_counter) { |
437 | /* This CPU gets to do the counting */ |
438 | spin_counter = pause_on_oops; |
439 | do { |
440 | spin_unlock(&pause_on_oops_lock); |
441 | spin_msec(MSEC_PER_SEC); |
442 | spin_lock(&pause_on_oops_lock); |
443 | } while (--spin_counter); |
444 | pause_on_oops_flag = 0; |
445 | } else { |
446 | /* This CPU waits for a different one */ |
447 | while (spin_counter) { |
448 | spin_unlock(&pause_on_oops_lock); |
449 | spin_msec(1); |
450 | spin_lock(&pause_on_oops_lock); |
451 | } |
452 | } |
453 | } |
454 | spin_unlock_irqrestore(&pause_on_oops_lock, flags); |
455 | } |
456 | |
457 | /* |
458 | * Return true if the calling CPU is allowed to print oops-related info. |
459 | * This is a bit racy.. |
460 | */ |
461 | int oops_may_print(void) |
462 | { |
463 | return pause_on_oops_flag == 0; |
464 | } |
465 | |
466 | /* |
467 | * Called when the architecture enters its oops handler, before it prints |
468 | * anything. If this is the first CPU to oops, and it's oopsing the first |
469 | * time then let it proceed. |
470 | * |
471 | * This is all enabled by the pause_on_oops kernel boot option. We do all |
472 | * this to ensure that oopses don't scroll off the screen. It has the |
473 | * side-effect of preventing later-oopsing CPUs from mucking up the display, |
474 | * too. |
475 | * |
476 | * It turns out that the CPU which is allowed to print ends up pausing for |
477 | * the right duration, whereas all the other CPUs pause for twice as long: |
478 | * once in oops_enter(), once in oops_exit(). |
479 | */ |
480 | void oops_enter(void) |
481 | { |
482 | tracing_off(); |
483 | /* can't trust the integrity of the kernel anymore: */ |
484 | debug_locks_off(); |
485 | do_oops_enter_exit(); |
486 | } |
487 | |
488 | /* |
489 | * 64-bit random ID for oopses: |
490 | */ |
491 | static u64 oops_id; |
492 | |
493 | static int init_oops_id(void) |
494 | { |
495 | if (!oops_id) |
496 | get_random_bytes(&oops_id, sizeof(oops_id)); |
497 | else |
498 | oops_id++; |
499 | |
500 | return 0; |
501 | } |
502 | late_initcall(init_oops_id); |
503 | |
504 | void print_oops_end_marker(void) |
505 | { |
506 | init_oops_id(); |
507 | pr_warn("---[ end trace %016llx ]---\n", (unsigned long long)oops_id); |
508 | } |
509 | |
510 | /* |
511 | * Called when the architecture exits its oops handler, after printing |
512 | * everything. |
513 | */ |
514 | void oops_exit(void) |
515 | { |
516 | do_oops_enter_exit(); |
517 | print_oops_end_marker(); |
518 | kmsg_dump(KMSG_DUMP_OOPS); |
519 | } |
520 | |
521 | struct warn_args { |
522 | const char *fmt; |
523 | va_list args; |
524 | }; |
525 | |
526 | void __warn(const char *file, int line, void *caller, unsigned taint, |
527 | struct pt_regs *regs, struct warn_args *args) |
528 | { |
529 | disable_trace_on_warning(); |
530 | |
531 | pr_warn("------------[ cut here ]------------\n"); |
532 | |
533 | if (file) |
534 | pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS\n", |
535 | raw_smp_processor_id(), current->pid, file, line, |
536 | caller); |
537 | else |
538 | pr_warn("WARNING: CPU: %d PID: %d at %pS\n", |
539 | raw_smp_processor_id(), current->pid, caller); |
540 | |
541 | if (args) |
542 | vprintk(args->fmt, args->args); |
543 | |
544 | if (panic_on_warn) { |
545 | /* |
546 | * This thread may hit another WARN() in the panic path. |
547 | * Resetting this prevents additional WARN() from panicking the |
548 | * system on this thread. Other threads are blocked by the |
549 | * panic_mutex in panic(). |
550 | */ |
551 | panic_on_warn = 0; |
552 | panic("panic_on_warn set ...\n"); |
553 | } |
554 | |
555 | print_modules(); |
556 | |
557 | if (regs) |
558 | show_regs(regs); |
559 | else |
560 | dump_stack(); |
561 | |
562 | print_oops_end_marker(); |
563 | |
564 | /* Just a warning, don't kill lockdep. */ |
565 | add_taint(taint, LOCKDEP_STILL_OK); |
566 | } |
567 | |
568 | #ifdef WANT_WARN_ON_SLOWPATH |
569 | void warn_slowpath_fmt(const char *file, int line, const char *fmt, ...) |
570 | { |
571 | struct warn_args args; |
572 | |
573 | args.fmt = fmt; |
574 | va_start(args.args, fmt); |
575 | __warn(file, line, __builtin_return_address(0), TAINT_WARN, NULL, |
576 | &args); |
577 | va_end(args.args); |
578 | } |
579 | EXPORT_SYMBOL(warn_slowpath_fmt); |
580 | |
581 | void warn_slowpath_fmt_taint(const char *file, int line, |
582 | unsigned taint, const char *fmt, ...) |
583 | { |
584 | struct warn_args args; |
585 | |
586 | args.fmt = fmt; |
587 | va_start(args.args, fmt); |
588 | __warn(file, line, __builtin_return_address(0), taint, NULL, &args); |
589 | va_end(args.args); |
590 | } |
591 | EXPORT_SYMBOL(warn_slowpath_fmt_taint); |
592 | |
593 | void warn_slowpath_null(const char *file, int line) |
594 | { |
595 | __warn(file, line, __builtin_return_address(0), TAINT_WARN, NULL, NULL); |
596 | } |
597 | EXPORT_SYMBOL(warn_slowpath_null); |
598 | #endif |
599 | |
600 | #ifdef CONFIG_CC_STACKPROTECTOR |
601 | |
602 | /* |
603 | * Called when gcc's -fstack-protector feature is used, and |
604 | * gcc detects corruption of the on-stack canary value |
605 | */ |
606 | __visible void __stack_chk_fail(void) |
607 | { |
608 | panic("stack-protector: Kernel stack is corrupted in: %pB\n", |
609 | __builtin_return_address(0)); |
610 | } |
611 | EXPORT_SYMBOL(__stack_chk_fail); |
612 | |
613 | #endif |
614 | |
615 | core_param(panic, panic_timeout, int, 0644); |
616 | core_param(pause_on_oops, pause_on_oops, int, 0644); |
617 | core_param(panic_on_warn, panic_on_warn, int, 0644); |
618 | core_param(crash_kexec_post_notifiers, crash_kexec_post_notifiers, bool, 0644); |
619 | |
620 | static int __init oops_setup(char *s) |
621 | { |
622 | if (!s) |
623 | return -EINVAL; |
624 | if (!strcmp(s, "panic")) |
625 | panic_on_oops = 1; |
626 | return 0; |
627 | } |
628 | early_param("oops", oops_setup); |
629 |