Guilherme G. Piccoli (2): x86/PCI: Export find_cap() to be used in early PCI code x86/quirks: Add parameter to clear MSIs early on boot
.../admin-guide/kernel-parameters.txt | 6 ++++ arch/x86/include/asm/pci-direct.h | 2 ++ arch/x86/kernel/aperture_64.c | 30 ++--------------- arch/x86/kernel/early-quirks.c | 32 +++++++++++++++++++ arch/x86/pci/common.c | 4 +++ arch/x86/pci/early.c | 25 +++++++++++++++ 6 files changed, 71 insertions(+), 28 deletions(-)
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/6998 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/7...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/6998 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/7...
From: "Guilherme G. Piccoli" gpiccoli@canonical.com
maillist inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8X2RA CVE: NA
Reference: https://lore.kernel.org/linux-pci/20181018183721.27467-2-gpiccoli@canonical....
-------------------------------
This patch exports (and renames) the function find_cap() to be used in the early PCI quirk code, by the next patch.
This is being moved out from AGP code to generic early-PCI code since it's not AGP-specific and can be used for any PCI device. No functional changes intended.
Signed-off-by: Guilherme G. Piccoli gpiccoli@canonical.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Signed-off-by: Xiongfeng Wang wangxiongfeng2@huawei.com --- arch/x86/include/asm/pci-direct.h | 1 + arch/x86/kernel/aperture_64.c | 30 ++---------------------------- arch/x86/pci/early.c | 25 +++++++++++++++++++++++++ 3 files changed, 28 insertions(+), 28 deletions(-)
diff --git a/arch/x86/include/asm/pci-direct.h b/arch/x86/include/asm/pci-direct.h index 94597a3cf3d0..813996305bf5 100644 --- a/arch/x86/include/asm/pci-direct.h +++ b/arch/x86/include/asm/pci-direct.h @@ -10,6 +10,7 @@ extern u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset); extern u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset); extern u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset); +extern u32 pci_early_find_cap(int bus, int slot, int func, int cap); extern void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, u32 val); extern void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val); extern void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val); diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 4feaa670d578..dd7c987f21a7 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -136,32 +136,6 @@ static u32 __init allocate_aperture(void) }
-/* Find a PCI capability */ -static u32 __init find_cap(int bus, int slot, int func, int cap) -{ - int bytes; - u8 pos; - - if (!(read_pci_config_16(bus, slot, func, PCI_STATUS) & - PCI_STATUS_CAP_LIST)) - return 0; - - pos = read_pci_config_byte(bus, slot, func, PCI_CAPABILITY_LIST); - for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) { - u8 id; - - pos &= ~3; - id = read_pci_config_byte(bus, slot, func, pos+PCI_CAP_LIST_ID); - if (id == 0xff) - break; - if (id == cap) - return pos; - pos = read_pci_config_byte(bus, slot, func, - pos+PCI_CAP_LIST_NEXT); - } - return 0; -} - /* Read a standard AGPv3 bridge header */ static u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order) { @@ -250,8 +224,8 @@ static u32 __init search_agp_bridge(u32 *order, int *valid_agp) case PCI_CLASS_BRIDGE_HOST: case PCI_CLASS_BRIDGE_OTHER: /* needed? */ /* AGP bridge? */ - cap = find_cap(bus, slot, func, - PCI_CAP_ID_AGP); + cap = pci_early_find_cap(bus, slot, + func, PCI_CAP_ID_AGP); if (!cap) break; *valid_agp = 1; diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c index f5fc953e5848..f1ba9d781b52 100644 --- a/arch/x86/pci/early.c +++ b/arch/x86/pci/early.c @@ -51,6 +51,31 @@ void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val) outw(val, 0xcfc + (offset&2)); }
+u32 pci_early_find_cap(int bus, int slot, int func, int cap) +{ + int bytes; + u8 pos; + + if (!(read_pci_config_16(bus, slot, func, PCI_STATUS) & + PCI_STATUS_CAP_LIST)) + return 0; + + pos = read_pci_config_byte(bus, slot, func, PCI_CAPABILITY_LIST); + for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) { + u8 id; + + pos &= ~3; + id = read_pci_config_byte(bus, slot, func, pos+PCI_CAP_LIST_ID); + if (id == 0xff) + break; + if (id == cap) + return pos; + pos = read_pci_config_byte(bus, slot, func, + pos+PCI_CAP_LIST_NEXT); + } + return 0; +} + int early_pci_allowed(void) { return (pci_probe & (PCI_PROBE_CONF1|PCI_PROBE_NOEARLY)) ==
From: "Guilherme G. Piccoli" gpiccoli@canonical.com
maillist inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8X2RA CVE: NA
Reference: https://lore.kernel.org/linux-pci/20181018183721.27467-3-gpiccoli@canonical....
-------------------------------
We observed a kdump failure in x86 that was narrowed down to MSI irq storm coming from a PCI network device. The bug manifests as a lack of progress in the boot process of kdump kernel, and a flood of kernel messages like:
[...] [ 342.265294] do_IRQ: 0.155 No irq handler for vector [ 342.266916] do_IRQ: 0.155 No irq handler for vector [ 347.258422] do_IRQ: 14053260 callbacks suppressed [...]
The root cause of the issue is that kexec process of the kdump kernel doesn't ensure PCI devices are reset or MSI capabilities are disabled, so a PCI adapter could produce a huge amount of irqs which would steal all the processing time for the CPU (specially since we usually restrict kdump kernel to use a single CPU only).
This patch implements the kernel parameter "pci=clearmsi" to clear the MSI/MSI-X enable bits in the Message Control register for all PCI devices during early boot time, thus preventing potential issues in the kexec'ed kernel. PCI spec also supports/enforces this need (see PCI Local Bus spec sections 6.8.1.3 and 6.8.2.3).
Suggested-by: Dan Streetman ddstreet@canonical.com Suggested-by: Gavin Shan shan.gavin@linux.alibaba.com Signed-off-by: Guilherme G. Piccoli gpiccoli@canonical.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Signed-off-by: Xiongfeng Wang wangxiongfeng2@huawei.com --- .../admin-guide/kernel-parameters.txt | 6 ++++ arch/x86/include/asm/pci-direct.h | 1 + arch/x86/kernel/early-quirks.c | 32 +++++++++++++++++++ arch/x86/pci/common.c | 4 +++ 4 files changed, 43 insertions(+)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 8900211484ed..2895df71f103 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4405,6 +4405,12 @@ nomsi [MSI] If the PCI_MSI kernel config parameter is enabled, this kernel boot option can be used to disable the use of MSI interrupts system-wide. + clearmsi [X86] Clears MSI/MSI-X enable bits early in boot + time in order to avoid issues like adapters + screaming irqs and preventing boot progress. + Also, it enforces the PCI Local Bus spec + rule that those bits should be 0 in system reset + events (useful for kexec/kdump cases). noioapicquirk [APIC] Disable all boot interrupt quirks. Safety option to keep boot IRQs enabled. This should never be necessary. diff --git a/arch/x86/include/asm/pci-direct.h b/arch/x86/include/asm/pci-direct.h index 813996305bf5..ebb3db2eee41 100644 --- a/arch/x86/include/asm/pci-direct.h +++ b/arch/x86/include/asm/pci-direct.h @@ -15,5 +15,6 @@ extern void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, u32 val); extern void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val); extern void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val);
+extern unsigned int pci_early_clear_msi; extern int early_pci_allowed(void); #endif /* _ASM_X86_PCI_DIRECT_H */ diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index a6c1867fc7aa..11bd85bcbaf8 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -29,6 +29,37 @@ #include <asm/irq_remapping.h> #include <asm/early_ioremap.h>
+static void __init early_pci_clear_msi(int bus, int slot, int func) +{ + int pos; + u16 ctrl; + + if (likely(!pci_early_clear_msi)) + return; + + pr_info_once("Clearing MSI/MSI-X enable bits early in boot (quirk)\n"); + + pos = pci_early_find_cap(bus, slot, func, PCI_CAP_ID_MSI); + if (pos) { + ctrl = read_pci_config_16(bus, slot, func, pos + PCI_MSI_FLAGS); + ctrl &= ~PCI_MSI_FLAGS_ENABLE; + write_pci_config_16(bus, slot, func, pos + PCI_MSI_FLAGS, ctrl); + + /* Read again to flush previous write */ + ctrl = read_pci_config_16(bus, slot, func, pos + PCI_MSI_FLAGS); + } + + pos = pci_early_find_cap(bus, slot, func, PCI_CAP_ID_MSIX); + if (pos) { + ctrl = read_pci_config_16(bus, slot, func, pos + PCI_MSIX_FLAGS); + ctrl &= ~PCI_MSIX_FLAGS_ENABLE; + write_pci_config_16(bus, slot, func, pos + PCI_MSIX_FLAGS, ctrl); + + /* Read again to flush previous write */ + ctrl = read_pci_config_16(bus, slot, func, pos + PCI_MSIX_FLAGS); + } +} + static void __init fix_hypertransport_config(int num, int slot, int func) { u32 htcfg; @@ -728,6 +759,7 @@ static struct chipset early_qrk[] __initdata = { PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, { PCI_VENDOR_ID_BROADCOM, 0x4331, PCI_CLASS_NETWORK_OTHER, PCI_ANY_ID, 0, apple_airport_reset}, + { PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0, early_pci_clear_msi}, {} };
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index ddb798603201..618b01d60af2 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -34,6 +34,7 @@ int noioapicreroute = 1; #endif int pcibios_last_bus = -1; unsigned long pirq_table_addr; +unsigned int pci_early_clear_msi; const struct pci_raw_ops *__read_mostly raw_pci_ops; const struct pci_raw_ops *__read_mostly raw_pci_ext_ops;
@@ -614,6 +615,9 @@ char *__init pcibios_setup(char *str) } else if (!strcmp(str, "skip_isa_align")) { pci_probe |= PCI_CAN_SKIP_ISA_ALIGN; return NULL; + } else if (!strcmp(str, "clearmsi")) { + pci_early_clear_msi = 1; + return NULL; } else if (!strcmp(str, "noioapicquirk")) { noioapicquirk = 1; return NULL;