From: Liao Chang liaochang1@huawei.com
mainline inclusion from mainline-5.7 commit c16816acd08697b02a53f56f8936497a9f6f6e7a category: bugfix bugzilla: NA CVE: NA
-------------------------------------------------
In general calling generic_handle_irq() with interrupts disabled from non interrupt context is harmless. For some interrupt controllers like the x86 trainwrecks this is outright dangerous as it might corrupt state if an interrupt affinity change is pending.
Add infrastructure which allows to mark interrupts as unsafe and catch such usage in generic_handle_irq().
Reported-by: sathyanarayanan.kuppuswamy@linux.intel.com Signed-off-by: Thomas Gleixner tglx@linutronix.de Acked-by: Marc Zyngier maz@kernel.org Link: https://lkml.kernel.org/r/20200306130623.590923677@linutronix.de Signed-off-by: Liao Chang liaochang1@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/irq.h | 13 +++++++++++++ kernel/irq/internals.h | 8 ++++++++ kernel/irq/irqdesc.c | 6 ++++++ kernel/irq/resend.c | 5 +++-- 4 files changed, 30 insertions(+), 2 deletions(-)
diff --git a/include/linux/irq.h b/include/linux/irq.h index 014d5804c1b59..ca367d98a991e 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -210,6 +210,8 @@ struct irq_data { * IRQD_CAN_RESERVE - Can use reservation mode * IRQD_MSI_NOMASK_QUIRK - Non-maskable MSI quirk for affinity change * required + * IRQD_HANDLE_ENFORCE_IRQCTX - Enforce that handle_irq_*() is only invoked + * from actual interrupt context. * IRQD_AFFINITY_ON_ACTIVATE - Affinity is set on activation. Don't call * irq_chip::irq_set_affinity() when deactivated. */ @@ -235,6 +237,7 @@ enum { IRQD_DEFAULT_TRIGGER_SET = (1 << 25), IRQD_CAN_RESERVE = (1 << 26), IRQD_MSI_NOMASK_QUIRK = (1 << 27), + IRQD_HANDLE_ENFORCE_IRQCTX = (1 << 28), IRQD_AFFINITY_ON_ACTIVATE = (1 << 29), };
@@ -305,6 +308,16 @@ static inline bool irqd_is_single_target(struct irq_data *d) return __irqd_to_state(d) & IRQD_SINGLE_TARGET; }
+static inline void irqd_set_handle_enforce_irqctx(struct irq_data *d) +{ + __irqd_to_state(d) |= IRQD_HANDLE_ENFORCE_IRQCTX; +} + +static inline bool irqd_is_handle_enforce_irqctx(struct irq_data *d) +{ + return __irqd_to_state(d) & IRQD_HANDLE_ENFORCE_IRQCTX; +} + static inline bool irqd_is_wakeup_set(struct irq_data *d) { return __irqd_to_state(d) & IRQD_WAKEUP_STATE; diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index ed4843df6220c..207bac6ad4c30 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -422,6 +422,10 @@ static inline struct cpumask *irq_desc_get_pending_mask(struct irq_desc *desc) { return desc->pending_mask; } +static inline bool handle_enforce_irqctx(struct irq_data *data) +{ + return irqd_is_handle_enforce_irqctx(data); +} bool irq_fixup_move_pending(struct irq_desc *desc, bool force_clear); #else /* CONFIG_GENERIC_PENDING_IRQ */ static inline bool irq_can_move_pcntxt(struct irq_data *data) @@ -448,6 +452,10 @@ static inline bool irq_fixup_move_pending(struct irq_desc *desc, bool fclear) { return false; } +static inline bool handle_enforce_irqctx(struct irq_data *data) +{ + return false; +} #endif /* !CONFIG_GENERIC_PENDING_IRQ */
#if !defined(CONFIG_IRQ_DOMAIN) || !defined(CONFIG_IRQ_DOMAIN_HIERARCHY) diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index dc4549fc71f95..ffdf02b01d816 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -633,9 +633,15 @@ void irq_init_desc(unsigned int irq) int generic_handle_irq(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); + struct irq_data *data;
if (!desc) return -EINVAL; + + data = irq_desc_get_irq_data(desc); + if (WARN_ON_ONCE(!in_irq() && handle_enforce_irqctx(data))) + return -EPERM; + generic_handle_irq_desc(desc); return 0; } diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c index 98c04ca5fa43d..5064b13b80d60 100644 --- a/kernel/irq/resend.c +++ b/kernel/irq/resend.c @@ -72,8 +72,9 @@ void check_irq_resend(struct irq_desc *desc) desc->istate &= ~IRQS_PENDING; desc->istate |= IRQS_REPLAY;
- if (!desc->irq_data.chip->irq_retrigger || - !desc->irq_data.chip->irq_retrigger(&desc->irq_data)) { + if ((!desc->irq_data.chip->irq_retrigger || + !desc->irq_data.chip->irq_retrigger(&desc->irq_data)) && + !handle_enforce_irqctx(&desc->irq_data)) { #ifdef CONFIG_HARDIRQS_SW_RESEND unsigned int irq = irq_desc_get_irq(desc);
From: Liao Chang liaochang1@huawei.com
mainline inclusion from mainline-5.10 commit cd1752d34ef33d68d82ef9dcc699b4eaa17c07fc category: bugfix bugzilla: NA CVE: NA
-------------------------------------------------
On resending an interrupt, we only check the outermost irqchip for a irq_retrigger callback. However, this callback could be implemented at an inner level. Use irq_chip_retrigger_hierarchy() in this case.
Reviewed-by: Valentin Schneider valentin.schneider@arm.com Signed-off-by: Marc Zyngier maz@kernel.org Signed-off-by: Liao Chang liaochang1@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/irq/resend.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-)
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c index 5064b13b80d60..34f79ddfef182 100644 --- a/kernel/irq/resend.c +++ b/kernel/irq/resend.c @@ -49,6 +49,18 @@ static DECLARE_TASKLET(resend_tasklet, resend_irqs, 0);
#endif
+static int try_retrigger(struct irq_desc *desc) +{ + if (desc->irq_data.chip->irq_retrigger) + return desc->irq_data.chip->irq_retrigger(&desc->irq_data); + +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + return irq_chip_retrigger_hierarchy(&desc->irq_data); +#else + return 0; +#endif +} + /* * IRQ resend * @@ -72,9 +84,7 @@ void check_irq_resend(struct irq_desc *desc) desc->istate &= ~IRQS_PENDING; desc->istate |= IRQS_REPLAY;
- if ((!desc->irq_data.chip->irq_retrigger || - !desc->irq_data.chip->irq_retrigger(&desc->irq_data)) && - !handle_enforce_irqctx(&desc->irq_data)) { + if (!try_retrigger(desc)) { #ifdef CONFIG_HARDIRQS_SW_RESEND unsigned int irq = irq_desc_get_irq(desc);
From: Liao Chang liaochang1@huawei.com
mainline inclusion from mainline-5.10 commit 17f644e949ffb14e9c8870d99bc574066d8b685c category: bugfix bugzilla: NA CVE: NA
-------------------------------------------------
While digging around IRQCHIP_EOI_IF_HANDLED and irq/resend.c, it has come to my attention that the IRQ resend situation seems a bit precarious for the GIC(s).
When marking an IRQ with IRQS_PENDING, handle_fasteoi_irq() will bail out and issue an irq_eoi(). Should the IRQ in question be re-enabled, check_irq_resend() will trigger a SW resend, which will go through the flow handler again and issue *another* irq_eoi() on the *same* IRQ activation. This is something the GIC spec clearly describes as a bad idea: any EOI must match a previous ACK.
Implement irq_chip.irq_retrigger() for the GIC chips by setting the GIC pending bit of the relevant IRQ. After being called by check_irq_resend(), this will eventually trigger a *new* interrupt which we will handle as usual.
Signed-off-by: Valentin Schneider valentin.schneider@arm.com Signed-off-by: Marc Zyngier maz@kernel.org Link: https://lore.kernel.org/r/20200730170321.31228-2-valentin.schneider@arm.com Signed-off-by: Liao Chang liaochang1@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/irqchip/irq-gic-v3.c | 7 +++++++ drivers/irqchip/irq-gic.c | 6 ++++++ 2 files changed, 13 insertions(+)
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index acf0ae4ef612a..9dc7ab1da2d02 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -1248,6 +1248,11 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, #define gic_smp_init() do { } while(0) #endif
+static int gic_retrigger(struct irq_data *data) +{ + return !gic_irq_set_irqchip_state(data, IRQCHIP_STATE_PENDING, true); +} + #ifdef CONFIG_CPU_PM static int gic_cpu_pm_notifier(struct notifier_block *self, unsigned long cmd, void *v) @@ -1283,6 +1288,7 @@ static struct irq_chip gic_chip = { .irq_eoi = gic_eoi_irq, .irq_set_type = gic_set_type, .irq_set_affinity = gic_set_affinity, + .irq_retrigger = gic_retrigger, .irq_get_irqchip_state = gic_irq_get_irqchip_state, .irq_set_irqchip_state = gic_irq_set_irqchip_state, .irq_nmi_setup = gic_irq_nmi_setup, @@ -1299,6 +1305,7 @@ static struct irq_chip gic_eoimode1_chip = { .irq_eoi = gic_eoimode1_eoi_irq, .irq_set_type = gic_set_type, .irq_set_affinity = gic_set_affinity, + .irq_retrigger = gic_retrigger, .irq_get_irqchip_state = gic_irq_get_irqchip_state, .irq_set_irqchip_state = gic_irq_set_irqchip_state, .irq_set_vcpu_affinity = gic_irq_set_vcpu_affinity, diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index ced10c44b68a8..0ffadbdf98a48 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -350,6 +350,11 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, } #endif
+static int gic_retrigger(struct irq_data *data) +{ + return !gic_irq_set_irqchip_state(data, IRQCHIP_STATE_PENDING, true); +} + static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs) { u32 irqstat, irqnr; @@ -420,6 +425,7 @@ static const struct irq_chip gic_chip = { .irq_unmask = gic_unmask_irq, .irq_eoi = gic_eoi_irq, .irq_set_type = gic_set_type, + .irq_retrigger = gic_retrigger, .irq_get_irqchip_state = gic_irq_get_irqchip_state, .irq_set_irqchip_state = gic_irq_set_irqchip_state, .flags = IRQCHIP_SET_TYPE_MASKED |
From: Liao Chang liaochang1@huawei.com
mainline inclusion from mainline-5.10 commit 5f774f5e12512b850a611aa99b4601d7eac50edb category: bugfix bugzilla: NA CVE: NA
-------------------------------------------------
It is pretty easy to provide a retrigger callback for the ITS, as it we already have the required support in terms of irq_set_irqchip_state().
Note that this only works for device-generated LPIs, and not the GICv4 doorbells, which should never have to be retriggered anyway.
Reviewed-by: Valentin Schneider valentin.schneider@arm.com Signed-off-by: Marc Zyngier maz@kernel.org Signed-off-by: Liao Chang liaochang1@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/irqchip/irq-gic-v3-its.c | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 03b720aaa0546..489e2e85ce3a3 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1467,6 +1467,11 @@ static int its_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu_info) } }
+static int its_irq_retrigger(struct irq_data *d) +{ + return !its_irq_set_irqchip_state(d, IRQCHIP_STATE_PENDING, true); +} + static struct irq_chip its_irq_chip = { .name = "ITS", .irq_mask = its_mask_irq, @@ -1475,6 +1480,7 @@ static struct irq_chip its_irq_chip = { .irq_set_affinity = its_set_affinity, .irq_compose_msi_msg = its_irq_compose_msi_msg, .irq_set_irqchip_state = its_irq_set_irqchip_state, + .irq_retrigger = its_irq_retrigger, .irq_set_vcpu_affinity = its_irq_set_vcpu_affinity, };
From: Liao Chang liaochang1@huawei.com
mainline inclusion from mainline-5.10 commit 1b57d91b969cda1d2c3530f2e829ca366a9c7df7 category: bugfix bugzilla: NA CVE: NA
-------------------------------------------------
The GIC irqchips can now use a HW resend when a retrigger is invoked by check_irq_resend(). However, should the HW resend fail, check_irq_resend() will still attempt to trigger a SW resend, which is still a bad idea for the GICs.
Prevent this from happening by setting IRQD_HANDLE_ENFORCE_IRQCTX on all GIC IRQs. Technically per-cpu IRQs do not need this, as their flow handlers never set IRQS_PENDING, but this aligns all IRQs wrt context enforcement: this also forces all GIC IRQ handling to happen in IRQ context (as defined by in_irq()).
Signed-off-by: Valentin Schneider valentin.schneider@arm.com Signed-off-by: Marc Zyngier maz@kernel.org Link: https://lore.kernel.org/r/20200730170321.31228-3-valentin.schneider@arm.com Signed-off-by: Liao Chang liaochang1@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/irqchip/irq-gic-v3.c | 5 ++++- drivers/irqchip/irq-gic.c | 6 +++++- 2 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 9dc7ab1da2d02..8d25588735610 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -1322,6 +1322,7 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw) { struct irq_chip *chip = &gic_chip; + struct irq_data *irqd = irq_desc_get_irq_data(irq_to_desc(irq));
if (static_branch_likely(&supports_deactivate_key)) chip = &gic_eoimode1_chip; @@ -1348,7 +1349,7 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq, irq_domain_set_info(d, irq, hw, chip, d->host_data, handle_fasteoi_irq, NULL, NULL); irq_set_probe(irq); - irqd_set_single_target(irq_desc_get_irq_data(irq_to_desc(irq))); + irqd_set_single_target(irqd); } /* LPIs */ if (hw >= 8192 && hw < GIC_ID_NR) { @@ -1358,6 +1359,8 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq, handle_fasteoi_irq, NULL, NULL); }
+ /* Prevents SW retriggers which mess up the ACK/EOI ordering */ + irqd_set_handle_enforce_irqctx(irqd); return 0; }
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 0ffadbdf98a48..76e61392c3df5 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -978,6 +978,7 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw) { struct gic_chip_data *gic = d->host_data; + struct irq_data *irqd = irq_desc_get_irq_data(irq_to_desc(irq));
if (hw < 32) { irq_set_percpu_devid(irq); @@ -988,8 +989,11 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq, irq_domain_set_info(d, irq, hw, &gic->chip, d->host_data, handle_fasteoi_irq, NULL, NULL); irq_set_probe(irq); - irqd_set_single_target(irq_desc_get_irq_data(irq_to_desc(irq))); + irqd_set_single_target(irqd); } + + /* Prevents SW retriggers which mess up the ACK/EOI ordering */ + irqd_set_handle_enforce_irqctx(irqd); return 0; }