From: Eric Auger eric.auger@redhat.com
mainline inclusion from mainline-5.3 commit b9a7f9816483b193 category: bugfix bugzilla: 17401 CVE: NA
-------------------------------------------------
Several call sites are about to check whether a device belongs to the PCI sub-hierarchy of a candidate PCI-PCI bridge. Introduce an helper to perform that check.
Signed-off-by: Eric Auger eric.auger@redhat.com Reviewed-by: Lu Baolu baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel jroedel@suse.de Signed-off-by: Xiongfeng Wang wangxiongfeng2@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/iommu/intel-iommu.c | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-)
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index f6d7955..2f52ea8 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -826,12 +826,39 @@ static int iommu_dummy(struct device *dev) return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO; }
+/** + * is_downstream_to_pci_bridge - test if a device belongs to the PCI + * sub-hierarchy of a candidate PCI-PCI bridge + * @dev: candidate PCI device belonging to @bridge PCI sub-hierarchy + * @bridge: the candidate PCI-PCI bridge + * + * Return: true if @dev belongs to @bridge PCI sub-hierarchy, else false. + */ +static bool +is_downstream_to_pci_bridge(struct device *dev, struct device *bridge) +{ + struct pci_dev *pdev, *pbridge; + + if (!dev_is_pci(dev) || !dev_is_pci(bridge)) + return false; + + pdev = to_pci_dev(dev); + pbridge = to_pci_dev(bridge); + + if (pbridge->subordinate && + pbridge->subordinate->number <= pdev->bus->number && + pbridge->subordinate->busn_res.end >= pdev->bus->number) + return true; + + return false; +} + static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn) { struct dmar_drhd_unit *drhd = NULL; struct intel_iommu *iommu; struct device *tmp; - struct pci_dev *ptmp, *pdev = NULL; + struct pci_dev *pdev = NULL; u16 segment = 0; int i;
@@ -877,13 +904,7 @@ static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devf goto out; }
- if (!pdev || !dev_is_pci(tmp)) - continue; - - ptmp = to_pci_dev(tmp); - if (ptmp->subordinate && - ptmp->subordinate->number <= pdev->bus->number && - ptmp->subordinate->busn_res.end >= pdev->bus->number) + if (is_downstream_to_pci_bridge(dev, tmp)) goto got_pdev; }
From: Eric Auger eric.auger@redhat.com
mainline inclusion from mainline-5.3 commit e143fd4598dd category: bugfix bugzilla: 17401 CVE: NA
-------------------------------------------------
When reading the vtd specification and especially the Reserved Memory Region Reporting Structure chapter, it is not obvious a device scope element cannot be a PCI-PCI bridge, in which case all downstream ports are likely to access the reserved memory region. Let's handle this case in device_has_rmrr.
Fixes: ea2447f700ca ("intel-iommu: Prevent devices with RMRRs from being placed into SI Domain")
Signed-off-by: Eric Auger eric.auger@redhat.com Reviewed-by: Lu Baolu baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel jroedel@suse.de Signed-off-by: Xiongfeng Wang wangxiongfeng2@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/iommu/intel-iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 2f52ea8..927b870 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2888,7 +2888,8 @@ static bool device_has_rmrr(struct device *dev) */ for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, i, tmp) - if (tmp == dev) { + if (tmp == dev || + is_downstream_to_pci_bridge(dev, tmp)) { rcu_read_unlock(); return true; }
From: Eric Auger eric.auger@redhat.com
mainline inclusion from mainline-5.3 commit 3855ba2d834d category: bugfix bugzilla: 17414 CVE: NA
-------------------------------------------------
In the case the RMRR device scope is a PCI-PCI bridge, let's check the device belongs to the PCI sub-hierarchy.
Fixes: 0659b8dc45a6 ("iommu/vt-d: Implement reserved region get/put callbacks")
Signed-off-by: Eric Auger eric.auger@redhat.com Reviewed-by: Lu Baolu baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel jroedel@suse.de Signed-off-by: Xiongfeng Wang wangxiongfeng2@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/iommu/intel-iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 927b870..f97f0e9 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5241,7 +5241,8 @@ static void intel_iommu_get_resv_regions(struct device *device, struct iommu_resv_region *resv; size_t length;
- if (i_dev != device) + if (i_dev != device && + !is_downstream_to_pci_bridge(device, i_dev)) continue;
length = rmrr->end_address - rmrr->base_address + 1;
From: Hanjun Guo guohanjun@huawei.com
mainline inclusion from mainline-5.6-rc1 commit c01a4a1 category: bugfix bugzilla: 30365 CVE: NA
-------------------------------------------------
I2C clock frequency of Designware ip for Hisilicon Hip08 Lite is 125M, use a new ACPI HID to enable it.
Tested-by: Sheng Feng fengsheng5@huawei.com Signed-off-by: Hanjun Guo guohanjun@huawei.com Reviewed-by: Jarkko Nikula jarkko.nikula@linux.intel.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com
Conflicts: drivers/acpi/acpi_apd.c
Signed-off-by: Xiongfeng Wang wangxiongfeng2@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/acpi/acpi_apd.c | 7 +++++++ 1 file changed, 7 insertions(+)
diff --git a/drivers/acpi/acpi_apd.c b/drivers/acpi/acpi_apd.c index ddf598a..7291499 100644 --- a/drivers/acpi/acpi_apd.c +++ b/drivers/acpi/acpi_apd.c @@ -162,6 +162,12 @@ static int st_misc_setup(struct apd_private_data *pdata) .setup = acpi_apd_setup, .fixed_clk_rate = 250000000, }; + +static const struct apd_device_desc hip08_lite_i2c_desc = { + .setup = acpi_apd_setup, + .fixed_clk_rate = 125000000, +}; + static const struct apd_device_desc thunderx2_i2c_desc = { .setup = acpi_apd_setup, .fixed_clk_rate = 125000000, @@ -239,6 +245,7 @@ static int acpi_apd_create_device(struct acpi_device *adev, { "CAV9007", APD_ADDR(thunderx2_i2c_desc) }, { "HISI02A1", APD_ADDR(hip07_i2c_desc) }, { "HISI02A2", APD_ADDR(hip08_i2c_desc) }, + { "HISI02A3", APD_ADDR(hip08_lite_i2c_desc) }, { "HISI0173", APD_ADDR(hip08_spi_desc) }, #endif { }
From: Hanjun Guo guohanjun@huawei.com
mainline inclusion from mainline-5.6-rc1 commit dec0a81 category: bugfix bugzilla: 30365 CVE: NA
-------------------------------------------------
Add ACPI HID HISI02A3 for Hisilicon Hip08 Lite, which has different clock frequency from Hip08 for I2C controller.
Tested-by: Sheng Feng fengsheng5@huawei.com Signed-off-by: Hanjun Guo guohanjun@huawei.com Acked-by: Nikula jarkko.nikula@linux.intel.com Reviewed-by: Andy Shevchenko andriy.shevchenko@linux.intel.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Xiongfeng Wang wangxiongfeng2@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/i2c/busses/i2c-designware-platdrv.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c index b5750fd..48b9f09 100644 --- a/drivers/i2c/busses/i2c-designware-platdrv.c +++ b/drivers/i2c/busses/i2c-designware-platdrv.c @@ -151,6 +151,7 @@ static int dw_i2c_acpi_configure(struct platform_device *pdev) { "APMC0D0F", 0 }, { "HISI02A1", 0 }, { "HISI02A2", 0 }, + { "HISI02A3", 0 }, { } }; MODULE_DEVICE_TABLE(acpi, dw_i2c_acpi_match);
From: Daniel Drake drake@endlessm.com
mainline inclusion from mainline-v5.3-rc1 commit 52ae346bd26c7a8b17ea82e9a09671e98c5402b7 category: bugfix bugzilla: 17251 CVE: NA
-------------------------------------------------
This variable is a period unit (number of clock cycles per jiffy), not a frequency (which is number of cycles per second).
Give it a more appropriate name.
Suggested-by: Ingo Molnar mingo@kernel.org Signed-off-by: Daniel Drake drake@endlessm.com Reviewed-by: Thomas Gleixner tglx@linutronix.de Cc: Andy Lutomirski luto@kernel.org Cc: Borislav Petkov bp@alien8.de Cc: H. Peter Anvin hpa@zytor.com Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Peter Zijlstra peterz@infradead.org Cc: len.brown@intel.com Cc: linux@endlessm.com Cc: rafael.j.wysocki@intel.com Link: http://lkml.kernel.org/r/20190509055417.13152-2-drake@endlessm.com Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Xiongfeng Wang wangxiongfeng2@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/x86/include/asm/apic.h | 2 +- arch/x86/kernel/apic/apic.c | 20 ++++++++++---------- arch/x86/kernel/cpu/mshyperv.c | 4 ++-- arch/x86/kernel/cpu/vmware.c | 2 +- arch/x86/kernel/jailhouse.c | 2 +- arch/x86/kernel/tsc_msr.c | 4 ++-- 6 files changed, 17 insertions(+), 17 deletions(-)
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 3c1e51e..ada38e8 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -52,7 +52,7 @@ static inline void generic_apic_probe(void) extern int local_apic_timer_c2_ok;
extern int disable_apic; -extern unsigned int lapic_timer_frequency; +extern unsigned int lapic_timer_period;
extern enum apic_intr_mode_id apic_intr_mode; enum apic_intr_mode_id { diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 8fdda70..c101137 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -193,7 +193,7 @@ static __init int setup_apicpmtimer(char *s) .flags = IORESOURCE_MEM | IORESOURCE_BUSY, };
-unsigned int lapic_timer_frequency = 0; +unsigned int lapic_timer_period = 0;
static void apic_pm_activate(void);
@@ -494,7 +494,7 @@ static int lapic_timer_shutdown(struct clock_event_device *evt) if (evt->features & CLOCK_EVT_FEAT_DUMMY) return 0;
- __setup_APIC_LVTT(lapic_timer_frequency, oneshot, 1); + __setup_APIC_LVTT(lapic_timer_period, oneshot, 1); return 0; }
@@ -798,11 +798,11 @@ static void __init lapic_cal_handler(struct clock_event_device *dev)
static int __init lapic_init_clockevent(void) { - if (!lapic_timer_frequency) + if (!lapic_timer_period) return -1;
/* Calculate the scaled math multiplication factor */ - lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR, + lapic_clockevent.mult = div_sc(lapic_timer_period/APIC_DIVISOR, TICK_NSEC, lapic_clockevent.shift); lapic_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent); @@ -833,7 +833,7 @@ static int __init calibrate_APIC_clock(void) */ if (!lapic_init_clockevent()) { apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n", - lapic_timer_frequency); + lapic_timer_period); /* * Direct calibration methods must have an always running * local APIC timer, no need for broadcast timer. @@ -914,13 +914,13 @@ static int __init calibrate_APIC_clock(void) pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1, &delta, &deltatsc);
- lapic_timer_frequency = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; + lapic_timer_period = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; lapic_init_clockevent();
apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult); apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", - lapic_timer_frequency); + lapic_timer_period);
if (boot_cpu_has(X86_FEATURE_TSC)) { apic_printk(APIC_VERBOSE, "..... CPU clock speed is " @@ -931,13 +931,13 @@ static int __init calibrate_APIC_clock(void)
apic_printk(APIC_VERBOSE, "..... host bus clock speed is " "%u.%04u MHz.\n", - lapic_timer_frequency / (1000000 / HZ), - lapic_timer_frequency % (1000000 / HZ)); + lapic_timer_period / (1000000 / HZ), + lapic_timer_period % (1000000 / HZ));
/* * Do a sanity check on the APIC calibration result */ - if (lapic_timer_frequency < (1000000 / HZ)) { + if (lapic_timer_period < (1000000 / HZ)) { local_irq_enable(); pr_warning("APIC frequency too slow, disabling apic timer\n"); return -1; diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 852e74e..9266382 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -260,9 +260,9 @@ static void __init ms_hyperv_init_platform(void)
rdmsrl(HV_X64_MSR_APIC_FREQUENCY, hv_lapic_frequency); hv_lapic_frequency = div_u64(hv_lapic_frequency, HZ); - lapic_timer_frequency = hv_lapic_frequency; + lapic_timer_period = hv_lapic_frequency; pr_info("Hyper-V: LAPIC Timer Frequency: %#x\n", - lapic_timer_frequency); + lapic_timer_period); }
register_nmi_handler(NMI_UNKNOWN, hv_nmi_unknown, NMI_FLAG_FIRST, diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index d805202c..9780568 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -157,7 +157,7 @@ static void __init vmware_platform_setup(void)
#ifdef CONFIG_X86_LOCAL_APIC /* Skip lapic calibration since we know the bus frequency. */ - lapic_timer_frequency = ecx / HZ; + lapic_timer_period = ecx / HZ; pr_info("Host bus clock speed read from hypervisor : %u Hz\n", ecx); #endif diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c index 108c48d..3f133b4d 100644 --- a/arch/x86/kernel/jailhouse.c +++ b/arch/x86/kernel/jailhouse.c @@ -44,7 +44,7 @@ static void jailhouse_get_wallclock(struct timespec64 *now)
static void __init jailhouse_timer_init(void) { - lapic_timer_frequency = setup_data.apic_khz * (1000 / HZ); + lapic_timer_period = setup_data.apic_khz * (1000 / HZ); }
static unsigned long jailhouse_get_tsc(void) diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c index 3d0e9aee..067858f 100644 --- a/arch/x86/kernel/tsc_msr.c +++ b/arch/x86/kernel/tsc_msr.c @@ -71,7 +71,7 @@ struct freq_desc { /* * MSR-based CPU/TSC frequency discovery for certain CPUs. * - * Set global "lapic_timer_frequency" to bus_clock_cycles/jiffy + * Set global "lapic_timer_period" to bus_clock_cycles/jiffy * Return processor base frequency in KHz, or 0 on failure. */ unsigned long cpu_khz_from_msr(void) @@ -104,7 +104,7 @@ unsigned long cpu_khz_from_msr(void) res = freq * ratio;
#ifdef CONFIG_X86_LOCAL_APIC - lapic_timer_frequency = (freq * 1000) / HZ; + lapic_timer_period = (freq * 1000) / HZ; #endif
/*
From: Thomas Gleixner tglx@linutronix.de
mainline inclusion from mainline-v5.3-rc1 commit c8c4076723da category: bugfix bugzilla: 17251 CVE: NA
-------------------------------------------------
Recent Intel chipsets including Skylake and ApolloLake have a special ITSSPRC register which allows the 8254 PIT to be gated. When gated, the 8254 registers can still be programmed as normal, but there are no IRQ0 timer interrupts.
Some products such as the Connex L1430 and exone go Rugged E11 use this register to ship with the PIT gated by default. This causes Linux to fail to boot:
Kernel panic - not syncing: IO-APIC + timer doesn't work! Boot with apic=debug and send a report.
The panic happens before the framebuffer is initialized, so to the user, it appears as an early boot hang on a black screen.
Affected products typically have a BIOS option that can be used to enable the 8254 and make Linux work (Chipset -> South Cluster Configuration -> Miscellaneous Configuration -> 8254 Clock Gating), however it would be best to make Linux support the no-8254 case.
Modern sytems allow to discover the TSC and local APIC timer frequencies, so the calibration against the PIT is not required. These systems have always running timers and the local APIC timer works also in deep power states.
So the setup of the PIT including the IO-APIC timer interrupt delivery checks are a pointless exercise.
Skip the PIT setup and the IO-APIC timer interrupt checks on these systems, which avoids the panic caused by non ticking PITs and also speeds up the boot process.
Thanks to Daniel for providing the changelog, initial analysis of the problem and testing against a variety of machines.
Reported-by: Daniel Drake drake@endlessm.com Signed-off-by: Thomas Gleixner tglx@linutronix.de Tested-by: Daniel Drake drake@endlessm.com Cc: bp@alien8.de Cc: hpa@zytor.com Cc: linux@endlessm.com Cc: rafael.j.wysocki@intel.com Cc: hdegoede@redhat.com Link: https://lkml.kernel.org/r/20190628072307.24678-1-drake@endlessm.com Signed-off-by: Xiongfeng Wang wangxiongfeng2@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/x86/include/asm/apic.h | 2 ++ arch/x86/include/asm/time.h | 1 + arch/x86/kernel/apic/apic.c | 27 +++++++++++++++++++++++++++ arch/x86/kernel/apic/io_apic.c | 4 ++++ arch/x86/kernel/i8253.c | 25 ++++++++++++++++++++++++- arch/x86/kernel/time.c | 7 +++++-- 6 files changed, 63 insertions(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index ada38e8..b18fdea 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -174,6 +174,7 @@ static inline int apic_is_clustered_box(void) extern void lapic_assign_legacy_vector(unsigned int isairq, bool replace); extern void lapic_online(void); extern void lapic_offline(void); +extern bool apic_needs_pit(void);
#else /* !CONFIG_X86_LOCAL_APIC */ static inline void lapic_shutdown(void) { } @@ -187,6 +188,7 @@ static inline void init_bsp_APIC(void) { } static inline void apic_intr_mode_init(void) { } static inline void lapic_assign_system_vectors(void) { } static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { } +static inline bool apic_needs_pit(void) { return true; } #endif /* !CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_X86_X2APIC diff --git a/arch/x86/include/asm/time.h b/arch/x86/include/asm/time.h index cef818b..8ac563a 100644 --- a/arch/x86/include/asm/time.h +++ b/arch/x86/include/asm/time.h @@ -7,6 +7,7 @@
extern void hpet_time_init(void); extern void time_init(void); +extern bool pit_timer_init(void);
extern struct clock_event_device *global_clock_event;
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index c101137..7bdc5ce 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -814,6 +814,33 @@ static int __init lapic_init_clockevent(void) return 0; }
+bool __init apic_needs_pit(void) +{ + /* + * If the frequencies are not known, PIT is required for both TSC + * and apic timer calibration. + */ + if (!tsc_khz || !cpu_khz) + return true; + + /* Is there an APIC at all? */ + if (!boot_cpu_has(X86_FEATURE_APIC)) + return true; + + /* Deadline timer is based on TSC so no further PIT action required */ + if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) + return false; + + /* APIC timer disabled? */ + if (disable_apic_timer) + return true; + /* + * The APIC timer frequency is known already, no PIT calibration + * required. If unknown, let the PIT be initialized. + */ + return lapic_timer_period == 0; +} + static int __init calibrate_APIC_clock(void) { struct clock_event_device *levt = this_cpu_ptr(&lapic_events); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index fa3b85b..0784aeb 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -58,6 +58,7 @@ #include <asm/acpi.h> #include <asm/dma.h> #include <asm/timer.h> +#include <asm/time.h> #include <asm/i8259.h> #include <asm/setup.h> #include <asm/irq_remapping.h> @@ -2130,6 +2131,9 @@ static inline void __init check_timer(void) unsigned long flags; int no_pin1 = 0;
+ if (!global_clock_event) + return; + local_irq_save(flags);
/* diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index 0d307a6..2b7999a 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c @@ -8,6 +8,7 @@ #include <linux/timex.h> #include <linux/i8253.h>
+#include <asm/apic.h> #include <asm/hpet.h> #include <asm/time.h> #include <asm/smp.h> @@ -18,10 +19,32 @@ */ struct clock_event_device *global_clock_event;
-void __init setup_pit_timer(void) +/* + * Modern chipsets can disable the PIT clock which makes it unusable. It + * would be possible to enable the clock but the registers are chipset + * specific and not discoverable. Avoid the whack a mole game. + * + * These platforms have discoverable TSC/CPU frequencies but this also + * requires to know the local APIC timer frequency as it normally is + * calibrated against the PIT interrupt. + */ +static bool __init use_pit(void) +{ + if (!IS_ENABLED(CONFIG_X86_TSC) || !boot_cpu_has(X86_FEATURE_TSC)) + return true; + + /* This also returns true when APIC is disabled */ + return apic_needs_pit(); +} + +bool __init pit_timer_init(void) { + if (!use_pit()) + return false; + clockevent_i8253_init(true); global_clock_event = &i8253_clockevent; + return true; }
#ifndef CONFIG_X86_64 diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index fddaefc..548dc9a 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -81,8 +81,11 @@ static void __init setup_default_timer_irq(void) /* Default timer init function */ void __init hpet_time_init(void) { - if (!hpet_enable()) - setup_pit_timer(); + if (!hpet_enable()) { + if (!pit_timer_init()) + return; + } + setup_default_timer_irq(); }
From: Thomas Gleixner tglx@linutronix.de
mainline inclusion from mainline-v5.6-rc1 commit 979923871f69 category: bugfix bugzilla: 17251 CVE: NA
-------------------------------------------------
Tony reported a boot regression caused by the recent workaround for systems which have a disabled (clock gate off) PIT.
On his machine the kernel fails to initialize the PIT because apic_needs_pit() does not take into account whether the local APIC interrupt delivery mode will actually allow to setup and use the local APIC timer. This should be easy to reproduce with acpi=off on the command line which also disables HPET.
Due to the way the PIT/HPET and APIC setup ordering works (APIC setup can require working PIT/HPET) the information is not available at the point where apic_needs_pit() makes this decision.
To address this, split out the interrupt mode selection from apic_intr_mode_init(), invoke the selection before making the decision whether PIT is required or not, and add the missing checks into apic_needs_pit().
Fixes: c8c4076723da ("x86/timer: Skip PIT initialization on modern chipsets") Reported-by: Anthony Buckley tony.buckley000@gmail.com Tested-by: Anthony Buckley tony.buckley000@gmail.com Signed-off-by: Thomas Gleixner tglx@linutronix.de Signed-off-by: Ingo Molnar mingo@kernel.org Cc: Daniel Drake drake@endlessm.com Link: https://bugzilla.kernel.org/show_bug.cgi?id=206125 Link: https://lore.kernel.org/r/87sgk6tmk2.fsf@nanos.tec.linutronix.de
Signed-off-by: Xiongfeng Wang wangxiongfeng2@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/x86/include/asm/apic.h | 2 ++ arch/x86/include/asm/x86_init.h | 2 ++ arch/x86/kernel/apic/apic.c | 23 ++++++++++++++++++----- arch/x86/kernel/time.c | 12 ++++++++++-- arch/x86/kernel/x86_init.c | 1 + arch/x86/xen/enlighten_pv.c | 1 + 6 files changed, 34 insertions(+), 7 deletions(-)
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index b18fdea..b7bd996 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -138,6 +138,7 @@ static inline bool apic_is_x2apic_enabled(void) extern void lapic_shutdown(void); extern void sync_Arb_IDs(void); extern void init_bsp_APIC(void); +extern void apic_intr_mode_select(void); extern void apic_intr_mode_init(void); extern void init_apic_mappings(void); void register_lapic_address(unsigned long address); @@ -185,6 +186,7 @@ static inline void disable_local_APIC(void) { } # define setup_secondary_APIC_clock x86_init_noop static inline void lapic_update_tsc_freq(void) { } static inline void init_bsp_APIC(void) { } +static inline void apic_intr_mode_select(void) { } static inline void apic_intr_mode_init(void) { } static inline void lapic_assign_system_vectors(void) { } static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { } diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index b85a7c5..227b18b 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -51,12 +51,14 @@ struct x86_init_resources { * are set up. * @intr_init: interrupt init code * @trap_init: platform specific trap setup + * @intr_mode_select: interrupt delivery mode selection * @intr_mode_init: interrupt delivery mode setup */ struct x86_init_irqs { void (*pre_vector_init)(void); void (*intr_init)(void); void (*trap_init)(void); + void (*intr_mode_select)(void); void (*intr_mode_init)(void); };
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 7bdc5ce..8fe688e 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -823,8 +823,17 @@ bool __init apic_needs_pit(void) if (!tsc_khz || !cpu_khz) return true;
- /* Is there an APIC at all? */ - if (!boot_cpu_has(X86_FEATURE_APIC)) + /* Is there an APIC at all or is it disabled? */ + if (!boot_cpu_has(X86_FEATURE_APIC) || disable_apic) + return true; + + /* + * If interrupt delivery mode is legacy PIC or virtual wire without + * configuration, the local APIC timer wont be set up. Make sure + * that the PIT is initialized. + */ + if (apic_intr_mode == APIC_PIC || + apic_intr_mode == APIC_VIRTUAL_WIRE_NO_CONFIG) return true;
/* Deadline timer is based on TSC so no further PIT action required */ @@ -1298,7 +1307,7 @@ void __init sync_Arb_IDs(void)
enum apic_intr_mode_id apic_intr_mode;
-static int __init apic_intr_mode_select(void) +static int __init __apic_intr_mode_select(void) { /* Check kernel option */ if (disable_apic) { @@ -1360,6 +1369,12 @@ static int __init apic_intr_mode_select(void) return APIC_SYMMETRIC_IO; }
+/* Select the interrupt delivery mode for the BSP */ +void __init apic_intr_mode_select(void) +{ + apic_intr_mode = __apic_intr_mode_select(); +} + /* * An initial setup of the virtual wire mode. */ @@ -1414,8 +1429,6 @@ void __init apic_intr_mode_init(void) { bool upmode = IS_ENABLED(CONFIG_UP_LATE_INIT);
- apic_intr_mode = apic_intr_mode_select(); - switch (apic_intr_mode) { case APIC_PIC: pr_info("APIC: Keep in PIC mode(8259)\n"); diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 548dc9a..8a2ac1c 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -91,10 +91,18 @@ void __init hpet_time_init(void)
static __init void x86_late_time_init(void) { + /* + * Before PIT/HPET init, select the interrupt mode. This is required + * to make the decision whether PIT should be initialized correct. + */ + x86_init.irqs.intr_mode_select(); + + /* Setup the legacy timers */ x86_init.timers.timer_init(); + /* - * After PIT/HPET timers init, select and setup - * the final interrupt mode for delivering IRQs. + * After PIT/HPET timers init, set up the final interrupt mode for + * delivering IRQs. */ x86_init.irqs.intr_mode_init(); tsc_init(); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 2792b55..915a305 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -59,6 +59,7 @@ struct x86_init_ops x86_init __initdata = { .pre_vector_init = init_ISA_irqs, .intr_init = native_init_IRQ, .trap_init = x86_init_noop, + .intr_mode_select = apic_intr_mode_select, .intr_mode_init = apic_intr_mode_init },
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 1730a26..3efafe2 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1219,6 +1219,7 @@ asmlinkage __visible void __init xen_start_kernel(void) x86_platform.get_nmi_reason = xen_get_nmi_reason;
x86_init.resources.memory_setup = xen_memory_setup; + x86_init.irqs.intr_mode_select = x86_init_noop; x86_init.irqs.intr_mode_init = x86_init_noop; x86_init.oem.arch_setup = xen_arch_setup; x86_init.oem.banner = xen_banner;
From: Jan Stancek jstancek@redhat.com
mainline inclusion from mainline-v5.3 commit afa8b475c1ae category: bugfix bugzilla: 17251 CVE: NA
-------------------------------------------------
KVM guests with commit c8c4076723da ("x86/timer: Skip PIT initialization on modern chipsets") applied to guest kernel have been observed to have unusually higher CPU usage with symptoms of increase in vm exits for HLT and MSW_WRITE (MSR_IA32_TSCDEADLINE).
This is caused by older QEMUs lacking support for X86_FEATURE_ARAT. lapic clock retains CLOCK_EVT_FEAT_C3STOP and nohz stays inactive. There's no usable broadcast device either.
Do the PIT initialization if guest CPU lacks X86_FEATURE_ARAT. On real hardware it shouldn't matter as ARAT and DEADLINE come together.
Fixes: c8c4076723da ("x86/timer: Skip PIT initialization on modern chipsets") Signed-off-by: Jan Stancek jstancek@redhat.com Signed-off-by: Thomas Gleixner tglx@linutronix.de Signed-off-by: Xiongfeng Wang wangxiongfeng2@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/x86/kernel/apic/apic.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 8fe688e..14c957b 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -836,6 +836,10 @@ bool __init apic_needs_pit(void) apic_intr_mode == APIC_VIRTUAL_WIRE_NO_CONFIG) return true;
+ /* Virt guests may lack ARAT, but still have DEADLINE */ + if (!boot_cpu_has(X86_FEATURE_ARAT)) + return true; + /* Deadline timer is based on TSC so no further PIT action required */ if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) return false;
From: Ard Biesheuvel ard.biesheuvel@linaro.org
mainline inclusion from mainline-5.2-rc2 commit 1cf24a2cc3fd category: bugfix bugzilla: 16697 CVE: NA
-------------------------------------------------
The R_AARCH64_PREL16 and R_AARCH64_PREL32 relocations are documented as permitting a range of [-2^15 .. 2^16), resp. [-2^31 .. 2^32). It is also documented that this means we cannot detect overflow in some cases, which is bad.
Since we always interpret the targets of these relocations as signed quantities (e.g., in the ksymtab handling code), let's tighten the overflow checks so that targets that are out of range for our signed interpretation of the relocated quantity get flagged.
Signed-off-by: Ard Biesheuvel ard.biesheuvel@linaro.org Signed-off-by: Will Deacon will.deacon@arm.com Signed-off-by: Xiongfeng Wang wangxiongfeng2@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/kernel/module.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 8998049..0fbd569 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -100,15 +100,27 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len) { s64 sval = do_reloc(op, place, val);
+ /* + * The ELF psABI for AArch64 documents the 16-bit and 32-bit place + * relative relocations as having a range of [-2^15, 2^16) or + * [-2^31, 2^32), respectively. However, in order to be able to detect + * overflows reliably, we have to choose whether we interpret such + * quantities as signed or as unsigned, and stick with it. + * The way we organize our address space requires a signed + * interpretation of 32-bit relative references, so let's use that + * for all R_AARCH64_PRELxx relocations. This means our upper + * bound for overflow detection should be Sxx_MAX rather than Uxx_MAX. + */ + switch (len) { case 16: *(s16 *)place = sval; - if (sval < S16_MIN || sval > U16_MAX) + if (sval < S16_MIN || sval > S16_MAX) return -ERANGE; break; case 32: *(s32 *)place = sval; - if (sval < S32_MIN || sval > U32_MAX) + if (sval < S32_MIN || sval > S32_MAX) return -ERANGE; break; case 64:
From: Ard Biesheuvel ard.biesheuvel@linaro.org
mainline inclusion from mainline-5.2-rc5 commit 3fd00beb14a5 category: bugfix bugzilla: 16697 CVE: NA
-------------------------------------------------
Commit 1cf24a2cc3fd
("arm64/module: deal with ambiguity in PRELxx relocation ranges")
updated the overflow checking logic in the relocation handling code to ensure that PREL16/32 relocations don't overflow signed quantities.
However, the same code path is used for absolute relocations, where the interpretation is the opposite: the only current use case for absolute relocations operating on non-native word size quantities is the CRC32 handling in the CONFIG_MODVERSIONS code, and these CRCs are unsigned 32-bit quantities, which are now being rejected by the module loader if bit 31 happens to be set.
So let's use different ranges for quanties subject to absolute vs. relative relocations: - ABS16/32 relocations should be in the range [0, Uxx_MAX) - PREL16/32 relocations should be in the range [Sxx_MIN, Sxx_MAX) - otherwise, print an error since no other 16 or 32 bit wide data relocations are currently supported.
Signed-off-by: Ard Biesheuvel ard.biesheuvel@linaro.org Signed-off-by: Will Deacon will.deacon@arm.com Signed-off-by: Xiongfeng Wang wangxiongfeng2@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/kernel/module.c | 38 ++++++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 8 deletions(-)
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 0fbd569..834ecf1 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -102,10 +102,10 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len)
/* * The ELF psABI for AArch64 documents the 16-bit and 32-bit place - * relative relocations as having a range of [-2^15, 2^16) or - * [-2^31, 2^32), respectively. However, in order to be able to detect - * overflows reliably, we have to choose whether we interpret such - * quantities as signed or as unsigned, and stick with it. + * relative and absolute relocations as having a range of [-2^15, 2^16) + * or [-2^31, 2^32), respectively. However, in order to be able to + * detect overflows reliably, we have to choose whether we interpret + * such quantities as signed or as unsigned, and stick with it. * The way we organize our address space requires a signed * interpretation of 32-bit relative references, so let's use that * for all R_AARCH64_PRELxx relocations. This means our upper @@ -115,13 +115,35 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len) switch (len) { case 16: *(s16 *)place = sval; - if (sval < S16_MIN || sval > S16_MAX) - return -ERANGE; + switch (op) { + case RELOC_OP_ABS: + if (sval < 0 || sval > U16_MAX) + return -ERANGE; + break; + case RELOC_OP_PREL: + if (sval < S16_MIN || sval > S16_MAX) + return -ERANGE; + break; + default: + pr_err("Invalid 16-bit data relocation (%d)\n", op); + return 0; + } break; case 32: *(s32 *)place = sval; - if (sval < S32_MIN || sval > S32_MAX) - return -ERANGE; + switch (op) { + case RELOC_OP_ABS: + if (sval < 0 || sval > U32_MAX) + return -ERANGE; + break; + case RELOC_OP_PREL: + if (sval < S32_MIN || sval > S32_MAX) + return -ERANGE; + break; + default: + pr_err("Invalid 32-bit data relocation (%d)\n", op); + return 0; + } break; case 64: *(s64 *)place = sval;
From: Masahiro Yamada yamada.masahiro@socionext.com
mainline inclusion from mainline-5.2-rc1 commit 02166b88d376 category: bugfix bugzilla: NA CVE: NA
-------------------------------------------------
This prepares to move CONFIG_OPTIMIZE_INLINING from x86 to a common place. We need to eliminate potential issues beforehand.
If it is enabled for arm64, the following errors are reported:
In file included from include/linux/compiler_types.h:68, from <command-line>: arch/arm64/include/asm/jump_label.h: In function 'cpus_have_const_cap': include/linux/compiler-gcc.h:120:38: warning: asm operand 0 probably doesn't match constraints #define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0) ^~~ arch/arm64/include/asm/jump_label.h:32:2: note: in expansion of macro 'asm_volatile_goto' asm_volatile_goto( ^~~~~~~~~~~~~~~~~ include/linux/compiler-gcc.h:120:38: error: impossible constraint in 'asm' #define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0) ^~~ arch/arm64/include/asm/jump_label.h:32:2: note: in expansion of macro 'asm_volatile_goto' asm_volatile_goto( ^~~~~~~~~~~~~~~~~
Link: http://lkml.kernel.org/r/20190423034959.13525-3-yamada.masahiro@socionext.co... Signed-off-by: Masahiro Yamada yamada.masahiro@socionext.com Tested-by: Mark Rutland mark.rutland@arm.com Cc: Arnd Bergmann arnd@arndb.de Cc: Benjamin Herrenschmidt benh@kernel.crashing.org Cc: Boris Brezillon bbrezillon@kernel.org Cc: Borislav Petkov bp@suse.de Cc: Brian Norris computersforpeace@gmail.com Cc: Christophe Leroy christophe.leroy@c-s.fr Cc: David Woodhouse dwmw2@infradead.org Cc: Heiko Carstens heiko.carstens@de.ibm.com Cc: "H. Peter Anvin" hpa@zytor.com Cc: Ingo Molnar mingo@redhat.com Cc: Marek Vasut marek.vasut@gmail.com Cc: Mathieu Malaterre malat@debian.org Cc: Miquel Raynal miquel.raynal@bootlin.com Cc: Paul Mackerras paulus@samba.org Cc: Ralf Baechle ralf@linux-mips.org Cc: Richard Weinberger richard@nod.at Cc: Russell King rmk+kernel@arm.linux.org.uk Cc: Stefan Agner stefan@agner.ch Cc: Thomas Gleixner tglx@linutronix.de Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Xiongfeng Wang wangxiongfeng2@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/include/asm/cpufeature.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 103ce1d..b4779d9 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -370,7 +370,7 @@ static inline bool cpu_have_feature(unsigned int num) }
/* System capability check for constant caps */ -static inline bool __cpus_have_const_cap(int num) +static __always_inline bool __cpus_have_const_cap(int num) { if (num >= ARM64_NCAPS) return false; @@ -384,7 +384,7 @@ static inline bool cpus_have_cap(unsigned int num) return test_bit(num, cpu_hwcaps); }
-static inline bool cpus_have_const_cap(int num) +static __always_inline bool cpus_have_const_cap(int num) { if (static_branch_likely(&arm64_const_caps_ready)) return __cpus_have_const_cap(num);
From: "Rafael J. Wysocki" rafael.j.wysocki@intel.com
mainline inclusion from mainline-v5.3-rc1 commit 21ba23792622 category: bugfix bugzilla: NA CVE: NA
-------------------------------------------------
If the power state of a device with ACPI PM is changed from D3hot to D3cold, it merely is a matter of dropping references to additional power resources (specifically, those in the list returned by _PR3), and the _PS3 method should not be invoked for the device then (as it has already been evaluated during the previous transition to D3hot).
Fixes: 20dacb71ad28 (ACPI / PM: Rework device power management to follow ACPI 6) Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Reviewed-by: Mika Westerberg mika.westerberg@linux.intel.com Signed-off-by: Xiongfeng Wang wangxiongfeng2@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/acpi/device_pm.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 54b6547..11ba5d2 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -209,9 +209,15 @@ int acpi_device_set_power(struct acpi_device *device, int state) return -ENODEV; }
- result = acpi_dev_pm_explicit_set(device, state); - if (result) - goto end; + /* + * If the device goes from D3hot to D3cold, _PS3 has been + * evaluated for it already, so skip it in that case. + */ + if (device->power.state < ACPI_STATE_D3_HOT) { + result = acpi_dev_pm_explicit_set(device, state); + if (result) + goto end; + }
if (device->power.flags.power_resources) result = acpi_power_transition(device, target_state);
From: "Rafael J. Wysocki" rafael.j.wysocki@intel.com
mainline inclusion from mainline-v5.3-rc1 commit f850a48a0799 category: bugfix bugzilla: 17645 CVE: NA
-------------------------------------------------
If a device with ACPI PM is left in D0 during a system-wide transition to the S3 (suspend-to-RAM) or S4 (hibernation) sleep state, the actual state of the device need not be D0 during resume from it, although its power.state value will still reflect D0 (that is, the power state from before the system-wide transition).
In that case, the acpi_device_set_power() call made to ensure that the power state of the device will be D0 going forward has no effect, because the new state (D0) is equal to the one reflected by the device's power.state value. That does not affect power resources, which are taken care of by acpi_resume_power_resources() called from acpi_pm_finish() during resume from system-wide sleep states, but it still may be necessary to invoke _PS0 for the device on top of that in order to finalize its transition to D0.
For this reason, modify acpi_device_set_power() to allow transitions to D0 to occur even if D0 is the current power state of the device according to its power.state value.
That will not affect power resources, which are assumed to be in the right configuration already (as reflected by the current values of their reference counters), but it may cause _PS0 to be evaluated for the device. However, evaluating _PS0 for a device already in D0 may lead to confusion in general, so invoke _PSC (if present) to check the device's current power state upfront and only evaluate _PS0 for it if _PSC has returned a power state different from D0. [If _PSC is not present or the evaluation of it fails, the power state of the device is assumed to be D0 at this point.]
Fixes: 20dacb71ad28 (ACPI / PM: Rework device power management to follow ACPI 6) Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Reviewed-by: Mika Westerberg mika.westerberg@linux.intel.com Signed-off-by: Xiongfeng Wang wangxiongfeng2@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/acpi/device_pm.c | 53 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 45 insertions(+), 8 deletions(-)
diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 11ba5d2..6687729 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -53,6 +53,19 @@ const char *acpi_power_state_string(int state) } }
+static int acpi_dev_pm_explicit_get(struct acpi_device *device, int *state) +{ + unsigned long long psc; + acpi_status status; + + status = acpi_evaluate_integer(device->handle, "_PSC", NULL, &psc); + if (ACPI_FAILURE(status)) + return -ENODEV; + + *state = psc; + return 0; +} + /** * acpi_device_get_power - Get power state of an ACPI device. * @device: Device to get the power state of. @@ -65,6 +78,7 @@ const char *acpi_power_state_string(int state) int acpi_device_get_power(struct acpi_device *device, int *state) { int result = ACPI_STATE_UNKNOWN; + int error;
if (!device || !state) return -EINVAL; @@ -81,18 +95,16 @@ int acpi_device_get_power(struct acpi_device *device, int *state) * if available. */ if (device->power.flags.power_resources) { - int error = acpi_power_get_inferred_state(device, &result); + error = acpi_power_get_inferred_state(device, &result); if (error) return error; } if (device->power.flags.explicit_get) { - acpi_handle handle = device->handle; - unsigned long long psc; - acpi_status status; + int psc;
- status = acpi_evaluate_integer(handle, "_PSC", NULL, &psc); - if (ACPI_FAILURE(status)) - return -ENODEV; + error = acpi_dev_pm_explicit_get(device, &psc); + if (error) + return error;
/* * The power resources settings may indicate a power state @@ -159,7 +171,8 @@ int acpi_device_set_power(struct acpi_device *device, int state)
/* Make sure this is a valid target state */
- if (state == device->power.state) { + /* There is a special case for D0 addressed below. */ + if (state > ACPI_STATE_D0 && state == device->power.state) { ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device [%s] already in %s\n", device->pnp.bus_id, acpi_power_state_string(state))); @@ -227,6 +240,30 @@ int acpi_device_set_power(struct acpi_device *device, int state) if (result) goto end; } + + if (device->power.state == ACPI_STATE_D0) { + int psc; + + /* Nothing to do here if _PSC is not present. */ + if (!device->power.flags.explicit_get) + return 0; + + /* + * The power state of the device was set to D0 last + * time, but that might have happened before a + * system-wide transition involving the platform + * firmware, so it may be necessary to evaluate _PS0 + * for the device here. However, use extra care here + * and evaluate _PSC to check the device's current power + * state, and only invoke _PS0 if the evaluation of _PSC + * is successful and it returns a power state different + * from D0. + */ + result = acpi_dev_pm_explicit_get(device, &psc); + if (result || psc == ACPI_STATE_D0) + return 0; + } + result = acpi_dev_pm_explicit_set(device, ACPI_STATE_D0); }
From: "Rafael J. Wysocki" rafael.j.wysocki@intel.com
mainline inclusion from mainline-v5.3-rc3 commit 42787ed79638 category: bugfix bugzilla: 17645 CVE: NA
-------------------------------------------------
Commit f850a48a0799 ("ACPI: PM: Allow transitions to D0 to occur in special cases") overlooked the fact that acpi_power_transition() may change the power.state value for the target device and if that happens, it may confuse acpi_device_set_power() and cause it to omit the _PS0 evaluation which on some systems is necessary to change power states of devices from low-power to D0.
Fix that by saving the current value of power.state for the target device before passing it to acpi_power_transition() and using the saved value in a subsequent check.
Fixes: f850a48a0799 ("ACPI: PM: Allow transitions to D0 to occur in special cases") Reported-by: Kai-Heng Feng kai.heng.feng@canonical.com Reported-by: Mario Limonciello mario.limonciello@dell.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Tested-by: Kai-Heng Feng kai.heng.feng@canonical.com Tested-by: Mario Limonciello mario.limonciello@dell.com Signed-off-by: Xiongfeng Wang wangxiongfeng2@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/acpi/device_pm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 6687729..19a96b4 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -235,13 +235,15 @@ int acpi_device_set_power(struct acpi_device *device, int state) if (device->power.flags.power_resources) result = acpi_power_transition(device, target_state); } else { + int cur_state = device->power.state; + if (device->power.flags.power_resources) { result = acpi_power_transition(device, ACPI_STATE_D0); if (result) goto end; }
- if (device->power.state == ACPI_STATE_D0) { + if (cur_state == ACPI_STATE_D0) { int psc;
/* Nothing to do here if _PSC is not present. */
From: "Rafael J. Wysocki" rafael.j.wysocki@intel.com
mainline inclusion from mainline-v5.3-rc1 commit b51033e06c2e category: bugfix bugzilla: 17644 CVE: NA
-------------------------------------------------
In pci_pm_complete() there are checks to decide whether or not to resume devices that were left in runtime-suspend during the preceding system-wide transition into a sleep state. They involve checking the current power state of the device and comparing it with the power state of it set before the preceding system-wide transition, but the platform component of the device's power state is not handled correctly in there.
Namely, on platforms with ACPI, the device power state information needs to be updated with care, so that the reference counters of power resources used by the device (if any) are set to ensure that the refreshed power state of it will be maintained going forward.
To that end, introduce a new ->refresh_state() platform PM callback for PCI devices, for asking the platform to refresh the device power state data and ensure that the corresponding power state will be maintained going forward, make it invoke acpi_device_update_power() (for devices with ACPI PM) on platforms with ACPI and make pci_pm_complete() use it, through a new pci_refresh_power_state() wrapper function.
Fixes: a0d2a959d3da (PCI: Avoid unnecessary resume after direct-complete) Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Reviewed-by: Mika Westerberg mika.westerberg@linux.intel.com
Conflicts: drivers/pci/pci.c [wangxiongfeng: fix a little conflicts]
Signed-off-by: Xiongfeng Wang wangxiongfeng2@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/pci/pci-acpi.c | 9 +++++++++ drivers/pci/pci-driver.c | 9 ++++++++- drivers/pci/pci.c | 21 +++++++++++++++++++++ drivers/pci/pci.h | 4 ++++ 4 files changed, 42 insertions(+), 1 deletion(-)
diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index f7218c1..e442ab3 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -583,6 +583,14 @@ static pci_power_t acpi_pci_get_power_state(struct pci_dev *dev) return state_conv[state]; }
+static void acpi_pci_refresh_power_state(struct pci_dev *dev) +{ + struct acpi_device *adev = ACPI_COMPANION(&dev->dev); + + if (adev && acpi_device_power_manageable(adev)) + acpi_device_update_power(adev, NULL); +} + static int acpi_pci_propagate_wakeup(struct pci_bus *bus, bool enable) { while (bus->parent) { @@ -639,6 +647,7 @@ static bool acpi_pci_need_resume(struct pci_dev *dev) .is_manageable = acpi_pci_power_manageable, .set_state = acpi_pci_set_power_state, .get_state = acpi_pci_get_power_state, + .refresh_state = acpi_pci_refresh_power_state, .choose_state = acpi_pci_choose_state, .set_wakeup = acpi_pci_wakeup, .need_resume = acpi_pci_need_resume, diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index ccb455c..45c3a78 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -703,7 +703,14 @@ static void pci_pm_complete(struct device *dev) if (pm_runtime_suspended(dev) && pm_resume_via_firmware()) { pci_power_t pre_sleep_state = pci_dev->current_state;
- pci_update_current_state(pci_dev, pci_dev->current_state); + pci_refresh_power_state(pci_dev); + /* + * On platforms with ACPI this check may also trigger for + * devices sharing power resources if one of those power + * resources has been activated as a result of a change of the + * power state of another device sharing it. However, in that + * case it is also better to resume the device, in general. + */ if (pci_dev->current_state < pre_sleep_state) pm_request_resume(dev); } diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index a0a00ec..cea3c77 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -776,6 +776,12 @@ static inline pci_power_t platform_pci_get_power_state(struct pci_dev *dev) return pci_platform_pm ? pci_platform_pm->get_state(dev) : PCI_UNKNOWN; }
+static inline void platform_pci_refresh_power_state(struct pci_dev *dev) +{ + if (pci_platform_pm && pci_platform_pm->refresh_state) + pci_platform_pm->refresh_state(dev); +} + static inline pci_power_t platform_pci_choose_state(struct pci_dev *dev) { return pci_platform_pm ? @@ -928,6 +934,21 @@ void pci_update_current_state(struct pci_dev *dev, pci_power_t state) }
/** + * pci_refresh_power_state - Refresh the given device's power state data + * @dev: Target PCI device. + * + * Ask the platform to refresh the devices power state information and invoke + * pci_update_current_state() to update its current PCI power state. + */ +void pci_refresh_power_state(struct pci_dev *dev) +{ + if (platform_pci_power_manageable(dev)) + platform_pci_refresh_power_state(dev); + + pci_update_current_state(dev, dev->current_state); +} + +/** * pci_platform_power_transition - Use platform to change device power state * @dev: PCI device to handle. * @state: State to put the device into. diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 6f82c38..76b34ef 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -49,6 +49,8 @@ int pci_mmap_fits(struct pci_dev *pdev, int resno, struct vm_area_struct *vmai, * * @get_state: queries the platform firmware for a device's current power state * + * @refresh_state: asks the platform to refresh the device's power state data + * * @choose_state: returns PCI power state of given device preferred by the * platform; to be used during system-wide transitions from a * sleeping state to the working state and vice versa @@ -66,6 +68,7 @@ struct pci_platform_pm_ops { bool (*is_manageable)(struct pci_dev *dev); int (*set_state)(struct pci_dev *dev, pci_power_t state); pci_power_t (*get_state)(struct pci_dev *dev); + void (*refresh_state)(struct pci_dev *dev); pci_power_t (*choose_state)(struct pci_dev *dev); int (*set_wakeup)(struct pci_dev *dev, bool enable); bool (*need_resume)(struct pci_dev *dev); @@ -73,6 +76,7 @@ struct pci_platform_pm_ops {
int pci_set_platform_pm(const struct pci_platform_pm_ops *ops); void pci_update_current_state(struct pci_dev *dev, pci_power_t state); +void pci_refresh_power_state(struct pci_dev *dev); void pci_power_up(struct pci_dev *dev); void pci_disable_enabled_device(struct pci_dev *dev); int pci_finish_runtime_suspend(struct pci_dev *dev);
From: Arvind Sankar nivedita@alum.mit.edu
mainline inclusion from v5.5-rc1 commit dacc9092336b category: bugfix bugzilla: 28820 CVE: NA ---------------------------
When checking whether the reported lfb_size makes sense, the height * stride result is page-aligned before seeing whether it exceeds the reported size.
This doesn't work if height * stride is not an exact number of pages. For example, as reported in the kernel bugzilla below, an 800x600x32 EFI framebuffer gets skipped because of this.
Move the PAGE_ALIGN to after the check vs size.
Reported-by: Christopher Head chead@chead.ca Tested-by: Christopher Head chead@chead.ca Signed-off-by: Arvind Sankar nivedita@alum.mit.edu Signed-off-by: Borislav Petkov bp@suse.de Link: https://bugzilla.kernel.org/show_bug.cgi?id=206051 Link: https://lkml.kernel.org/r/20200107230410.2291947-1-nivedita@alum.mit.edu Signed-off-by: Hongbo Yao yaohongbo@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/x86/kernel/sysfb_simplefb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/kernel/sysfb_simplefb.c b/arch/x86/kernel/sysfb_simplefb.c index 85195d4..f321534 100644 --- a/arch/x86/kernel/sysfb_simplefb.c +++ b/arch/x86/kernel/sysfb_simplefb.c @@ -94,11 +94,11 @@ __init int create_simplefb(const struct screen_info *si, if (si->orig_video_isVGA == VIDEO_TYPE_VLFB) size <<= 16; length = mode->height * mode->stride; - length = PAGE_ALIGN(length); if (length > size) { printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n"); return -EINVAL; } + length = PAGE_ALIGN(length);
/* setup IORESOURCE_MEM as framebuffer memory */ memset(&res, 0, sizeof(res));
From: James Morse james.morse@arm.com
mainline inclusion from v5.3-rc1 commit 40ca0ce56d4b category: bugfix bugzilla: 19923 CVE: NA ---------------------------
Comparing the arm-arm's pseudocode for AArch64.PCAlignmentFault() with AArch64.SPAlignmentFault() shows that SP faults don't copy the faulty-SP to FAR_EL1, but this is where we read from, and the address we provide to user-space with the BUS_ADRALN signal.
For user-space this value will be UNKNOWN due to the previous ERET to user-space. If the last value is preserved, on systems with KASLR or KPTI this will be the user-space link-register left in FAR_EL1 by tramp_exit(). Fix this to retrieve the original sp_el0 value, and pass this to do_sp_pc_fault().
SP alignment faults from EL1 will cause us to take the fault again when trying to store the pt_regs. This eventually takes us to the overflow stack. Remove the ESR_ELx_EC_SP_ALIGN check as we will never make it this far.
Fixes: 60ffc30d5652 ("arm64: Exception handling") Signed-off-by: James Morse james.morse@arm.com [will: change label name and fleshed out comment] Signed-off-by: Will Deacon will@kernel.org Signed-off-by: Hongbo Yao yaohongbo@huawei.com Reviewed-by: Xuefeng Wang wxf.wang@hisilicon.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/kernel/entry.S | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-)
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index e577599..f5c863b 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -597,10 +597,8 @@ el1_sync: b.eq el1_ia cmp x24, #ESR_ELx_EC_SYS64 // configurable trap b.eq el1_undef - cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception - b.eq el1_sp_pc cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception - b.eq el1_sp_pc + b.eq el1_pc cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL1 b.eq el1_undef cmp x24, #ESR_ELx_EC_BREAKPT_CUR // debug exception in EL1 @@ -622,9 +620,11 @@ el1_da: bl do_mem_abort
kernel_exit 1 -el1_sp_pc: +el1_pc: /* - * Stack or PC alignment exception handling + * PC alignment exception handling. We don't handle SP alignment faults, + * since we will have hit a recursive exception when trying to push the + * initial pt_regs. */ mrs x0, far_el1 inherit_daif pstate=x23, tmp=x2 @@ -753,9 +753,9 @@ el0_sync: cmp x24, #ESR_ELx_EC_SYS64 // configurable trap b.eq el0_sys cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception - b.eq el0_sp_pc + b.eq el0_sp cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception - b.eq el0_sp_pc + b.eq el0_pc cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0 b.eq el0_undef cmp x24, #ESR_ELx_EC_BREAKPT_LOW // debug exception in EL0 @@ -779,7 +779,7 @@ el0_sync_compat: cmp x24, #ESR_ELx_EC_FP_EXC32 // FP/ASIMD exception b.eq el0_fpsimd_exc cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception - b.eq el0_sp_pc + b.eq el0_pc cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0 b.eq el0_undef cmp x24, #ESR_ELx_EC_CP15_32 // CP15 MRC/MCR trap @@ -869,11 +869,15 @@ el0_fpsimd_exc: mov x1, sp bl do_fpsimd_exc b ret_to_user +el0_sp: + ldr x26, [sp, #S_SP] + b el0_sp_pc +el0_pc: + mrs x26, far_el1 el0_sp_pc: /* * Stack or PC alignment exception handling */ - mrs x26, far_el1 gic_prio_kentry_setup tmp=x0 enable_da_f #ifdef CONFIG_TRACE_IRQFLAGS