From: Sergey Senozhatsky sergey.senozhatsky@gmail.com
commit ab6f762f0f53162d41497708b33c9a3236d3609e upstream.
printk_deferred(), similarly to printk_safe/printk_nmi, does not immediately attempt to print a new message on the consoles, avoiding calls into non-reentrant kernel paths, e.g. scheduler or timekeeping, which potentially can deadlock the system.
Those printk() flavors, instead, rely on per-CPU flush irq_work to print messages from safer contexts. For same reasons (recursive scheduler or timekeeping calls) printk() uses per-CPU irq_work in order to wake up user space syslog/kmsg readers.
However, only printk_safe/printk_nmi do make sure that per-CPU areas have been initialised and that it's safe to modify per-CPU irq_work. This means that, for instance, should printk_deferred() be invoked "too early", that is before per-CPU areas are initialised, printk_deferred() will perform illegal per-CPU access.
Lech Perczak [0] reports that after commit 1b710b1b10ef ("char/random: silence a lockdep splat with printk()") user-space syslog/kmsg readers are not able to read new kernel messages.
The reason is printk_deferred() being called too early (as was pointed out by Petr and John).
Fix printk_deferred() and do not queue per-CPU irq_work before per-CPU areas are initialized.
Link: https://lore.kernel.org/lkml/aa0732c6-5c4e-8a8b-a1c1-75ebe3dca05b@camlintech... Reported-by: Lech Perczak l.perczak@camlintechnologies.com Signed-off-by: Sergey Senozhatsky sergey.senozhatsky@gmail.com Tested-by: Jann Horn jannh@google.com Reviewed-by: Petr Mladek pmladek@suse.com Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: Theodore Ts'o tytso@mit.edu Cc: John Ogness john.ogness@linutronix.de Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com Conflicts: kernel/printk/internal.h include/linux/printk.h [yyl: adjust context] --- include/linux/printk.h | 5 ----- init/main.c | 1 - kernel/printk/internal.h | 5 +++++ kernel/printk/printk.c | 34 ++++++++++++++++++++++++++++++++++ kernel/printk/printk_safe.c | 11 +---------- 5 files changed, 40 insertions(+), 16 deletions(-)
diff --git a/include/linux/printk.h b/include/linux/printk.h index fe3386d9de5c..895a46da8827 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -246,7 +246,6 @@ __printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...); void dump_stack_print_info(const char *log_lvl); void show_regs_print_info(const char *log_lvl); extern asmlinkage void dump_stack(void) __cold; -extern void printk_safe_init(void); extern void printk_safe_flush(void); extern void printk_safe_flush_on_panic(void); extern void zap_locks(void); @@ -314,10 +313,6 @@ static inline asmlinkage void dump_stack(void) { }
-static inline void printk_safe_init(void) -{ -} - static inline void printk_safe_flush(void) { } diff --git a/init/main.c b/init/main.c index 33b2fbbe8224..e37a6f93f5d1 100644 --- a/init/main.c +++ b/init/main.c @@ -642,7 +642,6 @@ asmlinkage __visible void __init start_kernel(void) softirq_init(); timekeeping_init(); time_init(); - printk_safe_init(); perf_event_init(); profile_init(); call_function_init(); diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 03c10f361a15..5c1693e82ac9 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -33,10 +33,15 @@ __printf(1, 0) int vprintk_default(const char *fmt, va_list args); __printf(1, 0) int vprintk_deferred(const char *fmt, va_list args); __printf(1, 0) int vprintk_func(const char *fmt, va_list args);
+void printk_safe_init(void); +bool printk_percpu_data_ready(void); + void defer_console_output(void);
#else
__printf(1, 0) int vprintk_func(const char *fmt, va_list args) { return 0; }
+static inline void printk_safe_init(void) { } +static inline bool printk_percpu_data_ready(void) { return false; } #endif /* CONFIG_PRINTK */ diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index f67b6a1c8f74..3c11510663d2 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -444,6 +444,18 @@ static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); static char *log_buf = __log_buf; static u32 log_buf_len = __LOG_BUF_LEN;
+/* + * We cannot access per-CPU data (e.g. per-CPU flush irq_work) before + * per_cpu_areas are initialised. This variable is set to true when + * it's safe to access per-CPU data. + */ +static bool __printk_percpu_data_ready __read_mostly; + +bool printk_percpu_data_ready(void) +{ + return __printk_percpu_data_ready; +} + /* Return log buffer address */ char *log_buf_addr_get(void) { @@ -1102,12 +1114,28 @@ static void __init log_buf_add_cpu(void) static inline void log_buf_add_cpu(void) {} #endif /* CONFIG_SMP */
+static void __init set_percpu_data_ready(void) +{ + printk_safe_init(); + /* Make sure we set this flag only after printk_safe() init is done */ + barrier(); + __printk_percpu_data_ready = true; +} + void __init setup_log_buf(int early) { unsigned long flags; char *new_log_buf; unsigned int free;
+ /* + * Some archs call setup_log_buf() multiple times - first is very + * early, e.g. from setup_arch(), and second - when percpu_areas + * are initialised. + */ + if (!early) + set_percpu_data_ready(); + if (log_buf != __log_buf) return;
@@ -2934,6 +2962,9 @@ static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = {
void wake_up_klogd(void) { + if (!printk_percpu_data_ready()) + return; + preempt_disable(); if (waitqueue_active(&log_wait)) { this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP); @@ -2944,6 +2975,9 @@ void wake_up_klogd(void)
void defer_console_output(void) { + if (!printk_percpu_data_ready()) + return; + preempt_disable(); __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT); irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c index 367fcb2c39f0..af5f4e42ca54 100644 --- a/kernel/printk/printk_safe.c +++ b/kernel/printk/printk_safe.c @@ -39,7 +39,6 @@ * There are situations when we want to make sure that all buffers * were handled or when IRQs are blocked. */ -static int printk_safe_irq_ready __read_mostly;
#define SAFE_LOG_BUF_LEN ((1 << CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT) - \ sizeof(atomic_t) - \ @@ -63,7 +62,7 @@ static DEFINE_PER_CPU(struct printk_safe_seq_buf, nmi_print_seq); /* Get flushed in a more safe context. */ static void queue_flush_work(struct printk_safe_seq_buf *s) { - if (printk_safe_irq_ready) + if (printk_percpu_data_ready()) irq_work_queue(&s->work); }
@@ -416,14 +415,6 @@ void __init printk_safe_init(void) #endif }
- /* - * In the highly unlikely event that a NMI were to trigger at - * this moment. Make sure IRQ work is set up before this - * variable is set. - */ - barrier(); - printk_safe_irq_ready = 1; - /* Flush pending messages that did not have scheduled IRQ works. */ printk_safe_flush(); }