From: Zheng Chongzhen zhengchongzhen@wxiat.com
Sunway inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I56OSP
--------------------------------
When porting ZX200 chipset driver on SW64 platform, we meet an IOMMU exception show as follows:
iommu_interrupt, iommu_status = 0xc8014000dffe0000, devid 0xa00, dva 0xdffe0000, 1Unable to handle kernel paging request at virtual address 0000000000000040 CPU 0 swapper/0(0): Oops 0 pc = [<ffffffff81424b80>] ra = [<ffffffff81424b24>] ps = 0001 Not tainted pc is at iommu_interrupt+0x140/0x3e0 ra is at iommu_interrupt+0xe4/0x3e0 v0 = 0000000000000051 t0 = c8014000dffe0000 t1 = 0000000000000000 t2 = 0000000000000000 t3 = 0000000000000000 t4 = 0000000000000001 t5 = 0000000000000001 t6 = 0000000000000000 t7 = ffffffff82948000 s0 = fff00003ffff0400 s1 = 0000000000000001 s2 = 0000000000000a00 s3 = 0000000000000a00 s4 = 00000000dffe0000 s5 = fff0000100680e80 s6 = ffffffff8294ba70 a0 = 0000000000000001 a1 = 0000000000000001 a2 = ffffffff8294b790 a3 = ffffffff8294b7a8 a4 = 0000000000000000 a5 = ffffffff82c5fb7a t8 = 0000000000000001 t9 = fffffffffffcac48 t10 = 0000000000000000 t11= 0000000000000000 pv = ffffffff809f4f10 at = ffffffff82bff6c0 gp = ffffffff82c1f510 sp = (____ptrval____)
The root cause is that the device which raises iommu exception is not in the device list, then reference a null sdev will cause a page fualt. To work around this problem, we apply this patch by just clearing IOMMUEXCPT_STATUS and then go on.
BTW, why the device raise IOMMU exception is not a valid device ID, it's a puzzling problem.
Signed-off-by: Zheng Chongzhen zhengchongzhen@wxiat.com
Signed-off-by: Gu Zitao guzitao@wxiat.com --- drivers/iommu/sw64/sunway_iommu.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-)
diff --git a/drivers/iommu/sw64/sunway_iommu.c b/drivers/iommu/sw64/sunway_iommu.c index 5797adf8fbc5..b6c8f1272d28 100644 --- a/drivers/iommu/sw64/sunway_iommu.c +++ b/drivers/iommu/sw64/sunway_iommu.c @@ -648,10 +648,21 @@ irqreturn_t iommu_interrupt(int irq, void *dev) type = (iommu_status >> 59) & 0x7; devid = (iommu_status >> 37) & 0xffff; dva = iommu_status & 0xffffffff; - sdev = search_dev_data(devid); - sdomain = sdev->domain; pr_info("%s, iommu_status = %#lx, devid %#lx, dva %#lx, ", __func__, iommu_status, devid, dva); + + sdev = search_dev_data(devid); + if (sdev == NULL) { + pr_info("no such dev!!!\n"); + + iommu_status &= ~(1UL << 62); + write_piu_ior0(hose->node, hose->index, + IOMMUEXCPT_STATUS, iommu_status); + + return IRQ_HANDLED; + } + + sdomain = sdev->domain; switch (type) { case DTE_LEVEL1: pr_info("invalid level1 dte, addr:%#lx, val:%#lx\n", @@ -674,7 +685,6 @@ irqreturn_t iommu_interrupt(int irq, void *dev) fetch_pte(sdomain, dva, PTE_LEVEL2_VAL));
iommu_status &= ~(1UL << 62); - iommu_status = iommu_status | (1UL << 63); write_piu_ior0(hose->node, hose->index, IOMMUEXCPT_STATUS, iommu_status); break;
From: Min Fanlei minfanlei@wxiat.com
Sunway inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I56WV8
--------------------------------
The last page_ref_count() call will cause kernel panic if kvm memory pool is at the end of DRAM. This patch reorders the checks to avoid illegal atomic_read operation.
Signed-off-by: Min Fanlei minfanlei@wxiat.com
Signed-off-by: Gu Zitao guzitao@wxiat.com --- arch/sw_64/kernel/setup.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/arch/sw_64/kernel/setup.c b/arch/sw_64/kernel/setup.c index 4d39db5fb1ef..e20e215dd08a 100644 --- a/arch/sw_64/kernel/setup.c +++ b/arch/sw_64/kernel/setup.c @@ -1026,8 +1026,7 @@ static int __init sw64_kvm_pool_init(void) end_page = pfn_to_page((kvm_mem_base + kvm_mem_size - 1) >> PAGE_SHIFT);
p = base_page; - while (page_ref_count(p) == 0 && - (unsigned long)p <= (unsigned long)end_page) { + while (p <= end_page && page_ref_count(p) == 0) { set_page_count(p, 1); page_mapcount_reset(p); SetPageReserved(p);
From: Yang Qiang yangqiang@wxiat.com
Sunway inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5GE5X
--------------------------------
Due to sw64 ABI sepecification, a large kernel image may override the data structures of UEFI BIOS.
To solve this problem, we have expanded UEFI BIOS to 1GB to make sure that runtime service code and data structures reside in the high address below 1GB which may be beyond ktext map. So fix it.
Signed-off-by: Yang Qiang yangqiang@wxiat.com
Signed-off-by: Gu Zitao guzitao@wxiat.com --- arch/sw_64/kernel/setup.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/arch/sw_64/kernel/setup.c b/arch/sw_64/kernel/setup.c index e20e215dd08a..85d172c32239 100644 --- a/arch/sw_64/kernel/setup.c +++ b/arch/sw_64/kernel/setup.c @@ -560,22 +560,20 @@ static void __init setup_machine_fdt(void) #ifdef CONFIG_USE_OF void *dt_virt; const char *name; - unsigned long phys_addr;
/* Give a chance to select kernel builtin DTB firstly */ if (IS_ENABLED(CONFIG_SW64_BUILTIN_DTB)) dt_virt = (void *)__dtb_start; else { dt_virt = (void *)sunway_boot_params->dtb_start; - if (dt_virt < (void *)__bss_stop) { + if (virt_to_phys(dt_virt) < virt_to_phys(__bss_stop)) { pr_emerg("BUG: DTB has been corrupted by kernel image!\n"); while (true) cpu_relax(); } }
- phys_addr = __phys_addr((unsigned long)dt_virt); - if (!phys_addr_valid(phys_addr) || + if (!phys_addr_valid(virt_to_phys(dt_virt)) || !early_init_dt_scan(dt_virt)) { pr_crit("\n" "Error: invalid device tree blob at virtual address %px\n"
From: He Chuyue hechuyue@wxiat.com
Sunway inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I56X48
--------------------------------
According to CORE3B core software interface manual, the performance counter PC_1 on sw64 supports event index up to 0x3d. Now fix it, and remove some unused macros from wrperfmon.h.
Signed-off-by: He Chuyue hechuyue@wxiat.com
Signed-off-by: Gu Zitao guzitao@wxiat.com --- arch/sw_64/include/asm/wrperfmon.h | 4 +--- arch/sw_64/kernel/perf_event.c | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-)
diff --git a/arch/sw_64/include/asm/wrperfmon.h b/arch/sw_64/include/asm/wrperfmon.h index 098702573bfc..15f7f6beb07c 100644 --- a/arch/sw_64/include/asm/wrperfmon.h +++ b/arch/sw_64/include/asm/wrperfmon.h @@ -33,10 +33,8 @@
#define PC0_RAW_BASE 0x0 #define PC1_RAW_BASE 0x100 -#define PC0_MIN 0x0 #define PC0_MAX 0xF -#define PC1_MIN 0x0 -#define PC1_MAX 0x37 +#define PC1_MAX 0x3D
#define SW64_PERFCTRL_KM 2 #define SW64_PERFCTRL_UM 3 diff --git a/arch/sw_64/kernel/perf_event.c b/arch/sw_64/kernel/perf_event.c index 70f1f2781016..e9aae53a56f6 100644 --- a/arch/sw_64/kernel/perf_event.c +++ b/arch/sw_64/kernel/perf_event.c @@ -244,7 +244,7 @@ static const struct sw64_perf_event *core3_map_cache_event(u64 config)
/* * r0xx for counter0, r1yy for counter1. - * According to the datasheet, 00 <= xx <= 0F, 00 <= yy <= 37 + * According to the datasheet, 00 <= xx <= 0F, 00 <= yy <= 3D */ static bool core3_raw_event_valid(u64 config) {
From: Zhu Donghong zhudonghong@wxiat.com
Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5GDHC
--------------------------------
Add a driver for the SW64 implementation of the CPU interrupt controller(INTC). In fact, the INTC is just a software simulated module to connect global interrupt sources to the local interrupt controller on each core.
In addition, we rename irq-lpc.c and SW64_CHIP3_LPC config.
Signed-off-by: Zhu Donghong zhudonghong@wxiat.com
Signed-off-by: Gu Zitao guzitao@wxiat.com --- arch/sw_64/Kconfig | 8 +- arch/sw_64/chip/chip3/Makefile | 1 - drivers/irqchip/Kconfig | 33 +++--- drivers/irqchip/Makefile | 4 +- drivers/irqchip/irq-intc-v1.c | 104 ------------------ .../irqchip/irq-sw64-intc-v2.c | 64 ++++++++++- .../{irq-lpc.c => irq-sw64-lpc-intc.c} | 0 7 files changed, 84 insertions(+), 130 deletions(-) delete mode 100644 drivers/irqchip/irq-intc-v1.c rename arch/sw_64/chip/chip3/irq_chip.c => drivers/irqchip/irq-sw64-intc-v2.c (59%) rename drivers/irqchip/{irq-lpc.c => irq-sw64-lpc-intc.c} (100%)
diff --git a/arch/sw_64/Kconfig b/arch/sw_64/Kconfig index 0e32fc7e1f9a..ec6e583a5d9a 100644 --- a/arch/sw_64/Kconfig +++ b/arch/sw_64/Kconfig @@ -234,6 +234,7 @@ config PLATFORM_XUELANG depends on SW64_CHIP3 select SPARSE_IRQ select SYS_HAS_EARLY_PRINTK + select SW64_INTC_V2 help Sunway chip3 board chipset
@@ -736,15 +737,10 @@ endmenu
menu "Boot options"
-config SW64_IRQ_CHIP - bool - config USE_OF bool "Flattened Device Tree support" - select GENERIC_IRQ_CHIP - select IRQ_DOMAIN - select SW64_IRQ_CHIP select OF + select IRQ_DOMAIN help Include support for flattened device tree machine descriptions.
diff --git a/arch/sw_64/chip/chip3/Makefile b/arch/sw_64/chip/chip3/Makefile index 2b7b5790003f..ba0ab3f67f98 100644 --- a/arch/sw_64/chip/chip3/Makefile +++ b/arch/sw_64/chip/chip3/Makefile @@ -4,5 +4,4 @@ obj-y := chip.o i2c-lib.o
obj-$(CONFIG_PCI) += pci-quirks.o obj-$(CONFIG_PCI_MSI) += msi.o vt_msi.o -obj-$(CONFIG_SW64_IRQ_CHIP) += irq_chip.o obj-$(CONFIG_CPUFREQ_DEBUGFS) += cpufreq_debugfs.o diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 5bf6cf60999b..dd35895c92f3 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -1,21 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only menu "IRQ chip support"
-config SW64_INTC - bool "SW64 Platform-Level Interrupt Controller" - depends on ACPI && SW64 - help - This enables support for the INTC chip found in SW systems. - The INTC controls devices interrupts and connects them to each - core's local interrupt controller. - -config SW64_CHIP3_LPC - bool "SW64 Chip3 Buildin LPC Interrupt Controller" - depends on SW64_CHIP3 - help - This enables support for the LPC interrupt controller bultin in - on chip3 series. - config IRQCHIP def_bool y depends on OF_IRQ @@ -26,6 +11,24 @@ config ARM_GIC select GENERIC_IRQ_MULTI_HANDLER select GENERIC_IRQ_EFFECTIVE_AFF_MASK
+config SW64_INTC_V2 + bool "SW64 Interrupt Controller V2" + depends on SW64_CHIP3 + default y + select GENERIC_IRQ_CHIP + select IRQ_DOMAIN + help + This enables support for the INTC chip found in SW CHIP3 systems. + The INTC controls devices interrupts and connects them to each + core's local interrupt controller. + +config SW64_LPC_INTC + bool "SW64 cpu builtin LPC Interrupt Controller" + depends on SW64_INTC_V2 + help + Say yes here to add support for the SW64 cpu builtin LPC + IRQ controller. + config ARM_GIC_PM bool depends on PM diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index 78eb12ab4d4c..14a022c074ce 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -27,8 +27,8 @@ obj-$(CONFIG_ARCH_SUNXI) += irq-sun4i.o obj-$(CONFIG_ARCH_SUNXI) += irq-sunxi-nmi.o obj-$(CONFIG_ARCH_SPEAR3XX) += spear-shirq.o obj-$(CONFIG_ARM_GIC) += irq-gic.o irq-gic-common.o -obj-$(CONFIG_SW64_INTC) += irq-intc-v1.o -obj-$(CONFIG_SW64_CHIP3_LPC) += irq-lpc.o +obj-$(CONFIG_SW64_INTC_V2) += irq-sw64-intc-v2.o +obj-$(CONFIG_SW64_LPC_INTC) += irq-sw64-lpc-intc.o obj-$(CONFIG_ARM_GIC_PM) += irq-gic-pm.o obj-$(CONFIG_ARCH_REALVIEW) += irq-gic-realview.o obj-$(CONFIG_ARM_GIC_V2M) += irq-gic-v2m.o diff --git a/drivers/irqchip/irq-intc-v1.c b/drivers/irqchip/irq-intc-v1.c deleted file mode 100644 index 4519e96526fb..000000000000 --- a/drivers/irqchip/irq-intc-v1.c +++ /dev/null @@ -1,104 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include <linux/acpi_iort.h> -#include <linux/msi.h> -#include <linux/acpi.h> -#include <linux/irqdomain.h> -#include <linux/interrupt.h> -#include <linux/cpumask.h> -#include <linux/io.h> -#include <linux/percpu.h> -#include <linux/slab.h> -#include <linux/irqchip.h> -#include <asm/sw64io.h> -static void fake_irq_mask(struct irq_data *data) -{ -} - -static void fake_irq_unmask(struct irq_data *data) -{ -} - -static struct irq_chip onchip_intc = { - .name = "SW fake Intc", - .irq_mask = fake_irq_mask, - .irq_unmask = fake_irq_unmask, -}; - -static int sw_intc_domain_map(struct irq_domain *d, unsigned int irq, - irq_hw_number_t hw) -{ - irq_set_chip_and_handler(irq, &onchip_intc, handle_level_irq); - irq_set_status_flags(irq, IRQ_LEVEL); - return 0; -} - -static const struct irq_domain_ops intc_irq_domain_ops = { - .xlate = irq_domain_xlate_onecell, - .map = sw_intc_domain_map, -}; - -#ifdef CONFIG_ACPI - -static int __init -intc_parse_madt(union acpi_subtable_headers *header, - const unsigned long end) -{ - struct acpi_madt_io_sapic *its_entry; - static struct irq_domain *root_domain; - int intc_irqs = 8, irq_base = NR_IRQS_LEGACY; - irq_hw_number_t hwirq_base = 0; - int irq_start = -1; - - its_entry = (struct acpi_madt_io_sapic *)header; - - intc_irqs -= hwirq_base; /* calculate # of irqs to allocate */ - - irq_base = irq_alloc_descs(irq_start, 16, intc_irqs, - numa_node_id()); - if (irq_base < 0) { - WARN(1, "Cannot allocate irq_descs @ IRQ%d, assuming pre-allocated\n", - irq_start); - irq_base = irq_start; - } - - root_domain = irq_domain_add_legacy(NULL, intc_irqs, irq_base, - hwirq_base, &intc_irq_domain_ops, NULL); - - if (!root_domain) - pr_err("Failed to create irqdomain"); - - irq_set_default_host(root_domain); - - sw64_io_write(0, MCU_DVC_INT_EN, 0xff); - - return 0; -} - -static int __init acpi_intc_init(void) -{ - int count = 0; - - count = acpi_table_parse_madt(ACPI_MADT_TYPE_IO_SAPIC, - intc_parse_madt, 0); - - if (count <= 0) { - pr_err("No valid intc entries exist\n"); - return -EINVAL; - } - return 0; -} -#else -static int __init acpi_intc_init(void) -{ - return 0; -} -#endif - -static int __init intc_init(void) -{ - acpi_intc_init(); - - return 0; -} -subsys_initcall(intc_init); diff --git a/arch/sw_64/chip/chip3/irq_chip.c b/drivers/irqchip/irq-sw64-intc-v2.c similarity index 59% rename from arch/sw_64/chip/chip3/irq_chip.c rename to drivers/irqchip/irq-sw64-intc-v2.c index 24dfa1e1a898..8640c4aa9506 100644 --- a/arch/sw_64/chip/chip3/irq_chip.c +++ b/drivers/irqchip/irq-sw64-intc-v2.c @@ -1,8 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/irqdomain.h> #include <linux/irqchip.h> - -#include <asm/irq_impl.h> +#include <linux/acpi.h> +#include <linux/acpi_iort.h> +#include <linux/of_irq.h> +#include <asm/sw64io.h>
static void fake_irq_mask(struct irq_data *data) { @@ -32,6 +34,64 @@ static const struct irq_domain_ops sw64_intc_domain_ops = { .map = sw64_intc_domain_map, };
+static int __init +intc_parse_madt(union acpi_subtable_headers *header, + const unsigned long end) +{ + struct acpi_madt_io_sapic *its_entry; + static struct irq_domain *root_domain; + int intc_irqs = 8, irq_base = NR_IRQS_LEGACY; + irq_hw_number_t hwirq_base = 0; + int irq_start = -1; + + its_entry = (struct acpi_madt_io_sapic *)header; + + intc_irqs -= hwirq_base; /* calculate # of irqs to allocate */ + + irq_base = irq_alloc_descs(irq_start, 16, intc_irqs, + numa_node_id()); + if (irq_base < 0) { + WARN(1, "Cannot allocate irq_descs @ IRQ%d, assuming pre-allocated\n", + irq_start); + irq_base = irq_start; + } + + root_domain = irq_domain_add_legacy(NULL, intc_irqs, irq_base, + hwirq_base, &sw64_intc_domain_ops, NULL); + + if (!root_domain) + pr_err("Failed to create irqdomain"); + + irq_set_default_host(root_domain); + + sw64_io_write(0, MCU_DVC_INT_EN, 0xff); + + return 0; +} + +static int __init acpi_intc_init(void) +{ + int count = 0; + + count = acpi_table_parse_madt(ACPI_MADT_TYPE_IO_SAPIC, + intc_parse_madt, 0); + + if (count <= 0) { + pr_err("No valid intc entries exist\n"); + return -EINVAL; + } + return 0; +} + +static int __init intc_init(void) +{ + acpi_intc_init(); + + return 0; +} + +subsys_initcall(intc_init); + static struct irq_domain *root_domain;
static int __init diff --git a/drivers/irqchip/irq-lpc.c b/drivers/irqchip/irq-sw64-lpc-intc.c similarity index 100% rename from drivers/irqchip/irq-lpc.c rename to drivers/irqchip/irq-sw64-lpc-intc.c
From: Mao Minkai maominkai@wxiat.com
Sunway inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I56W9F
--------------------------------
When destination is not 8-byte aligned, csum_partial_cfu_dest_unaligned() should be used to avoid kernel unaligned exception.
Fixes: 2fcadd2861b4 ("sw64: optimize ip checksum calculation") Signed-off-by: Mao Minkai maominkai@wxiat.com
Signed-off-by: Gu Zitao guzitao@wxiat.com --- arch/sw_64/lib/csum_partial_copy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/sw_64/lib/csum_partial_copy.c b/arch/sw_64/lib/csum_partial_copy.c index 441ae5575de5..5e5274e82b2b 100644 --- a/arch/sw_64/lib/csum_partial_copy.c +++ b/arch/sw_64/lib/csum_partial_copy.c @@ -128,9 +128,9 @@ static __wsum __csum_and_copy(const void __user *src, void *dst, int len) (const unsigned long __user *) src, (unsigned long *) dst, len-8); } else { - checksum = csum_partial_cfu_dest_aligned( + checksum = csum_partial_cfu_dest_unaligned( (const unsigned long __user *) src, - (unsigned long *) dst, len-8); + (unsigned long *) dst, doff, len-8); } return (__force __wsum)from64to16(checksum); }
From: Xiong Aifei xiongaifei@wxiat.com
Sunway inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5GDKC
--------------------------------
Signed-off-by: Xiong Aifei xiongaifei@wxiat.com
Signed-off-by: Gu Zitao guzitao@wxiat.com --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 4 ++-- drivers/gpu/drm/radeon/radeon_vce.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 851d64e83166..28c4e1fe5cd4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1355,7 +1355,7 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev) }
#if IS_ENABLED(CONFIG_SW64) - _memset_c_io(hpd, 0, mec_hpd_size); + memset_io(hpd, 0, mec_hpd_size); #else memset(hpd, 0, mec_hpd_size); #endif @@ -4654,7 +4654,7 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) mutex_unlock(&adev->srbm_mutex); } else { #if IS_ENABLED(CONFIG_SW64) - _memset_c_io((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); + memset_io((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); #else memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); #endif diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c index 68cc5a347d3b..b9680d38d924 100644 --- a/drivers/gpu/drm/radeon/radeon_vce.c +++ b/drivers/gpu/drm/radeon/radeon_vce.c @@ -240,7 +240,7 @@ int radeon_vce_resume(struct radeon_device *rdev) }
#ifdef __sw_64__ - _memset_c_io(cpu_addr, 0, radeon_bo_size(rdev->vce.vcpu_bo)); + memset_io(cpu_addr, 0, radeon_bo_size(rdev->vce.vcpu_bo)); #else memset(cpu_addr, 0, radeon_bo_size(rdev->vce.vcpu_bo)); #endif
From: He Chuyue hechuyue@wxiat.com
Sunway inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I56X48
--------------------------------
To read raw event count, perf stat calls sw64_pmu_start() to start the event and then calls sw64_perf_event_set_period() to set a new period to sample over.
It used to initialize hwc.prev_count and PMC with different values and this will result in error sample values. To fix this problem, initialize them to 0 consistently.
Signed-off-by: He Chuyue hechuyue@wxiat.com
Signed-off-by: Gu Zitao guzitao@wxiat.com --- arch/sw_64/kernel/perf_event.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/arch/sw_64/kernel/perf_event.c b/arch/sw_64/kernel/perf_event.c index e9aae53a56f6..52ec34e33269 100644 --- a/arch/sw_64/kernel/perf_event.c +++ b/arch/sw_64/kernel/perf_event.c @@ -297,31 +297,33 @@ static int sw64_perf_event_set_period(struct perf_event *event, { long left = local64_read(&hwc->period_left); long period = hwc->sample_period; - int ret = 0; + int overflow = 0; + unsigned long value;
if (unlikely(left <= -period)) { left = period; local64_set(&hwc->period_left, left); hwc->last_period = period; - ret = 1; + overflow = 1; }
if (unlikely(left <= 0)) { left += period; local64_set(&hwc->period_left, left); hwc->last_period = period; - ret = 1; + overflow = 1; }
if (left > (long)sw64_pmu->pmc_max_period) left = sw64_pmu->pmc_max_period;
- local64_set(&hwc->prev_count, (unsigned long)(-left)); - sw64_write_pmc(idx, (unsigned long)(sw64_pmu->pmc_max_period - left)); + value = sw64_pmu->pmc_max_period - left; + local64_set(&hwc->prev_count, value); + sw64_write_pmc(idx, value);
perf_event_update_userpage(event);
- return ret; + return overflow; }
/*
From: Lu Feifei lufeifei@wxiat.com
Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56WV8
--------------------------------
This patch introduces memory-hotplug support for guest os, and it should set CONFIG_KVM_MEMHOTLPUG=y on host to enable this feature.
Currently, only 1GB memory-hotplug granularity is supported, and multiple granularity support will be implemented in the future.
Signed-off-by: Lu Feifei lufeifei@wxiat.com
Signed-off-by: Gu Zitao guzitao@wxiat.com --- arch/sw_64/chip/chip3/chip.c | 5 + arch/sw_64/include/asm/hcall.h | 1 + arch/sw_64/include/asm/irq_impl.h | 1 + arch/sw_64/include/asm/kvm_asm.h | 3 + arch/sw_64/include/asm/kvm_host.h | 11 +- arch/sw_64/include/asm/memory.h | 1 + arch/sw_64/kvm/Kconfig | 7 + arch/sw_64/kvm/handle_exit.c | 5 + arch/sw_64/kvm/kvm-sw64.c | 113 ++++++++++++- arch/sw_64/mm/init.c | 9 ++ drivers/misc/Kconfig | 8 + drivers/misc/Makefile | 1 + drivers/misc/sunway-ged.c | 253 ++++++++++++++++++++++++++++++ 13 files changed, 410 insertions(+), 8 deletions(-) create mode 100644 drivers/misc/sunway-ged.c
diff --git a/arch/sw_64/chip/chip3/chip.c b/arch/sw_64/chip/chip3/chip.c index 2103d93a53a2..02b369b2b37b 100644 --- a/arch/sw_64/chip/chip3/chip.c +++ b/arch/sw_64/chip/chip3/chip.c @@ -701,6 +701,11 @@ void handle_chip_irq(unsigned long type, unsigned long vector, handle_irq(type); set_irq_regs(old_regs); return; + case INT_VT_HOTPLUG: + old_regs = set_irq_regs(regs); + handle_irq(type); + set_irq_regs(old_regs); + return; case INT_PC0: perf_irq(PERFMON_PC0, regs); return; diff --git a/arch/sw_64/include/asm/hcall.h b/arch/sw_64/include/asm/hcall.h index 8117752b657e..b5438b477c87 100644 --- a/arch/sw_64/include/asm/hcall.h +++ b/arch/sw_64/include/asm/hcall.h @@ -18,6 +18,7 @@ enum HCALL_TYPE { HCALL_SWNET = 20, /* guest request swnet service */ HCALL_SWNET_IRQ = 21, /* guest request swnet intr */ HCALL_FATAL_ERROR = 22, /* guest fatal error, issued by hmcode */ + HCALL_MEMHOTPLUG = 23, /* guest memory hotplug event */ NR_HCALL };
diff --git a/arch/sw_64/include/asm/irq_impl.h b/arch/sw_64/include/asm/irq_impl.h index 3679793d8b65..b568efef6994 100644 --- a/arch/sw_64/include/asm/irq_impl.h +++ b/arch/sw_64/include/asm/irq_impl.h @@ -32,6 +32,7 @@ enum sw64_irq_type { INT_RTC = 9, INT_FAULT = 10, INT_VT_SERIAL = 12, + INT_VT_HOTPLUG = 13, INT_DEV = 17, INT_NMI = 18, INT_LEGACY = 31, diff --git a/arch/sw_64/include/asm/kvm_asm.h b/arch/sw_64/include/asm/kvm_asm.h index 4b851682188c..7e2c92ed4574 100644 --- a/arch/sw_64/include/asm/kvm_asm.h +++ b/arch/sw_64/include/asm/kvm_asm.h @@ -11,4 +11,7 @@ #define SW64_KVM_EXIT_RESTART 17 #define SW64_KVM_EXIT_FATAL_ERROR 22
+#ifdef CONFIG_KVM_MEMHOTPLUG +#define SW64_KVM_EXIT_MEMHOTPLUG 23 +#endif #endif /* _ASM_SW64_KVM_ASM_H */ diff --git a/arch/sw_64/include/asm/kvm_host.h b/arch/sw_64/include/asm/kvm_host.h index e4ebb993153c..6d292c086347 100644 --- a/arch/sw_64/include/asm/kvm_host.h +++ b/arch/sw_64/include/asm/kvm_host.h @@ -29,7 +29,7 @@ #include <asm/kvm_mmio.h>
#define KVM_MAX_VCPUS 64 -#define KVM_USER_MEM_SLOTS 512 +#define KVM_USER_MEM_SLOTS 64
#define KVM_HALT_POLL_NS_DEFAULT 0 #define KVM_IRQCHIP_NUM_PINS 256 @@ -42,12 +42,16 @@ #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE)
struct kvm_arch_memory_slot { - + unsigned long host_phys_addr; + bool valid; };
struct kvm_arch { unsigned long host_phys_addr; unsigned long size; + + /* segment table */ + unsigned long *seg_pgd; };
@@ -100,6 +104,9 @@ struct kvm_vcpu_stat { u64 halt_poll_invalid; };
+#ifdef CONFIG_KVM_MEMHOTPLUG +void vcpu_mem_hotplug(struct kvm_vcpu *vcpu, unsigned long start_addr); +#endif int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, int exception_index, struct hcall_args *hargs); void vcpu_send_ipi(struct kvm_vcpu *vcpu, int target_vcpuid); diff --git a/arch/sw_64/include/asm/memory.h b/arch/sw_64/include/asm/memory.h index d3191165c7b5..b2b7492ae477 100644 --- a/arch/sw_64/include/asm/memory.h +++ b/arch/sw_64/include/asm/memory.h @@ -6,6 +6,7 @@ #include <linux/numa.h> #endif
+#define MIN_MEMORY_BLOCK_SIZE_VM_MEMHP (1UL << 30) #define NODE0_START (_TEXT_START - __START_KERNEL_map)
#define MAX_PHYSMEM_BITS 48 diff --git a/arch/sw_64/kvm/Kconfig b/arch/sw_64/kvm/Kconfig index 230ac526911c..85323b48f564 100644 --- a/arch/sw_64/kvm/Kconfig +++ b/arch/sw_64/kvm/Kconfig @@ -42,6 +42,13 @@ config KVM_SW64_HOST Provides host support for SW64 processors. To compile this as a module, choose M here.
+config KVM_MEMHOTPLUG + bool "Memory hotplug support for guest" + depends on KVM + help + Provides memory hotplug support for SW64 guest. + + source "drivers/vhost/Kconfig"
endif # VIRTUALIZATION diff --git a/arch/sw_64/kvm/handle_exit.c b/arch/sw_64/kvm/handle_exit.c index 0d6806051fc7..5016bc0eddc2 100644 --- a/arch/sw_64/kvm/handle_exit.c +++ b/arch/sw_64/kvm/handle_exit.c @@ -34,6 +34,11 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, case SW64_KVM_EXIT_IPI: vcpu_send_ipi(vcpu, hargs->arg0); return 1; +#ifdef CONFIG_KVM_MEMHOTPLUG + case SW64_KVM_EXIT_MEMHOTPLUG: + vcpu_mem_hotplug(vcpu, hargs->arg0); + return 1; +#endif case SW64_KVM_EXIT_FATAL_ERROR: printk("Guest fatal error: Reason=[%lx], EXC_PC=[%lx], DVA=[%lx]", hargs->arg0, hargs->arg1, hargs->arg2); vcpu->run->exit_reason = KVM_EXIT_UNKNOWN; diff --git a/arch/sw_64/kvm/kvm-sw64.c b/arch/sw_64/kvm/kvm-sw64.c index 839ee83d57d5..af29d0ca8e7f 100644 --- a/arch/sw_64/kvm/kvm-sw64.c +++ b/arch/sw_64/kvm/kvm-sw64.c @@ -56,10 +56,18 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq
extern int __sw64_vcpu_run(struct vcpucb *vcb, struct kvm_regs *regs, struct hcall_args *args);
-static unsigned long get_vpcr(unsigned long machine_mem_offset, unsigned long memory_size, unsigned long vpn) +#ifdef CONFIG_KVM_MEMHOTPLUG +static u64 get_vpcr_memhp(u64 seg_base, u64 vpn) { - return (machine_mem_offset >> 23) | ((memory_size >> 23) << 16) | ((vpn & HARDWARE_VPN_MASK) << 44); + return seg_base | ((vpn & HARDWARE_VPN_MASK) << 44); } +#else +static u64 get_vpcr(u64 hpa_base, u64 mem_size, u64 vpn) +{ + return (hpa_base >> 23) | ((mem_size >> 23) << 16) + | ((vpn & HARDWARE_VPN_MASK) << 44); +} +#endif
static unsigned long __get_new_vpn_context(struct kvm_vcpu *vcpu, long cpu) { @@ -212,12 +220,38 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { +#ifdef CONFIG_KVM_MEMHOTPLUG + unsigned long *seg_pgd; + + if (kvm->arch.seg_pgd != NULL) { + kvm_err("kvm_arch already initialized?\n"); + return -EINVAL; + } + + seg_pgd = alloc_pages_exact(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO); + if (!seg_pgd) + return -ENOMEM; + + kvm->arch.seg_pgd = seg_pgd; +#endif + return 0; }
void kvm_arch_destroy_vm(struct kvm *kvm) { int i; +#ifdef CONFIG_KVM_MEMHOTPLUG + void *seg_pgd = NULL; + + if (kvm->arch.seg_pgd) { + seg_pgd = READ_ONCE(kvm->arch.seg_pgd); + kvm->arch.seg_pgd = NULL; + } + + if (seg_pgd) + free_pages_exact(seg_pgd, PAGE_SIZE); +#endif
for (i = 0; i < KVM_MAX_VCPUS; ++i) { if (kvm->vcpus[i]) { @@ -227,7 +261,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) }
atomic_set(&kvm->online_vcpus, 0); - }
long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) @@ -241,6 +274,22 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, return 0; }
+#ifdef CONFIG_KVM_MEMHOTPLUG +static void setup_segment_table(struct kvm *kvm, + struct kvm_memory_slot *memslot, unsigned long addr, size_t size) +{ + unsigned long *seg_pgd = kvm->arch.seg_pgd; + unsigned int num_of_entry = size >> 30; + unsigned long base_hpa = addr >> 30; + int i; + + for (i = 0; i < num_of_entry; i++) { + *seg_pgd = base_hpa + i; + seg_pgd++; + } +} +#endif + int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, const struct kvm_userspace_memory_region *mem, @@ -253,8 +302,15 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, unsigned long ret; size_t size;
- if (change == KVM_MR_FLAGS_ONLY) + if (change == KVM_MR_FLAGS_ONLY || change == KVM_MR_DELETE) + return 0; + +#ifndef CONFIG_KVM_MEMHOTPLUG + if (mem->guest_phys_addr) { + pr_info("%s, No KVM MEMHOTPLUG support!\n", __func__); return 0; + } +#endif
if (test_bit(IO_MARK_BIT, &(mem->guest_phys_addr))) return 0; @@ -276,7 +332,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (!vm_file) { info = kzalloc(sizeof(struct vmem_info), GFP_KERNEL);
- size = round_up(mem->memory_size, 8<<20); + size = round_up(mem->memory_size, 8 << 20); addr = gen_pool_alloc(sw64_kvm_pool, size); if (!addr) return -ENOMEM; @@ -291,6 +347,18 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (!vma) return -ENOMEM;
+#ifdef CONFIG_KVM_MEMHOTPLUG + if (memslot->base_gfn == 0x0UL) { + setup_segment_table(kvm, memslot, addr, size); + kvm->arch.host_phys_addr = (u64)addr; + memslot->arch.host_phys_addr = addr; + } else { + /* used for memory hotplug */ + memslot->arch.host_phys_addr = addr; + memslot->arch.valid = false; + } +#endif + info->start = addr; info->size = size; vma->vm_private_data = (void *) info; @@ -308,8 +376,11 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
pr_info("guest phys addr = %#lx, size = %#lx\n", addr, vma->vm_end - vma->vm_start); + +#ifndef CONFIG_KVM_MEMHOTPLUG kvm->arch.host_phys_addr = (u64)addr; - kvm->arch.size = round_up(mem->memory_size, 8<<20); + kvm->arch.size = round_up(mem->memory_size, 8 << 20); +#endif
memset(__va(addr), 0, 0x2000000);
@@ -463,8 +534,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) /* Set guest vcb */ /* vpn will update later when vcpu is running */ if (vcpu->arch.vcb.vpcr == 0) { +#ifndef CONFIG_KVM_MEMHOTPLUG vcpu->arch.vcb.vpcr = get_vpcr(vcpu->kvm->arch.host_phys_addr, vcpu->kvm->arch.size, 0); +#else + unsigned long seg_base = virt_to_phys(vcpu->kvm->arch.seg_pgd); + + vcpu->arch.vcb.vpcr = get_vpcr_memhp(seg_base, 0); +#endif vcpu->arch.vcb.upcr = 0x7; }
@@ -640,6 +717,30 @@ int kvm_dev_ioctl_check_extension(long ext) return r; }
+#ifdef CONFIG_KVM_MEMHOTPLUG +void vcpu_mem_hotplug(struct kvm_vcpu *vcpu, unsigned long start_addr) +{ + struct kvm *kvm = vcpu->kvm; + struct kvm_memory_slot *slot; + unsigned long start_pfn = start_addr >> PAGE_SHIFT; + + kvm_for_each_memslot(slot, kvm_memslots(kvm)) { + if (start_pfn == slot->base_gfn) { + unsigned long *seg_pgd; + unsigned long num_of_entry = slot->npages >> 17; + unsigned long base_hpa = slot->arch.host_phys_addr; + int i; + + seg_pgd = kvm->arch.seg_pgd + (start_pfn >> 17); + for (i = 0; i < num_of_entry; i++) { + *seg_pgd = (base_hpa >> 30) + i; + seg_pgd++; + } + } + } +} +#endif + void vcpu_send_ipi(struct kvm_vcpu *vcpu, int target_vcpuid) { struct kvm_vcpu *target_vcpu = kvm_get_vcpu(vcpu->kvm, target_vcpuid); diff --git a/arch/sw_64/mm/init.c b/arch/sw_64/mm/init.c index 16d3da7beebe..82f2414ef7f7 100644 --- a/arch/sw_64/mm/init.c +++ b/arch/sw_64/mm/init.c @@ -10,6 +10,7 @@ #include <linux/memblock.h> #include <linux/swiotlb.h> #include <linux/acpi.h> +#include <linux/memory.h>
#include <asm/mmu_context.h>
@@ -33,6 +34,14 @@ static pud_t vmalloc_pud[1024] __attribute__((__aligned__(PAGE_SIZE))); static phys_addr_t mem_start; static phys_addr_t mem_size_limit;
+unsigned long memory_block_size_bytes(void) +{ + if (is_in_guest()) + return MIN_MEMORY_BLOCK_SIZE_VM_MEMHP; + else + return MIN_MEMORY_BLOCK_SIZE; +} + static int __init setup_mem_size(char *p) { char *oldp; diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index fafa8b0d8099..140716083ab8 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -351,6 +351,14 @@ config HMC6352 This driver provides support for the Honeywell HMC6352 compass, providing configuration and heading data via sysfs.
+config SUNWAY_GED + tristate "sunway generic device driver for memhotplug" + depends on SW64 + depends on MEMORY_HOTPLUG + help + This driver provides support for sunway generic device driver for + memhotplug, providing configuration and heading data via sysfs. + config DS1682 tristate "Dallas DS1682 Total Elapsed Time Recorder with Alarm" depends on I2C diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index d23231e73330..3615763234a6 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -34,6 +34,7 @@ obj-$(CONFIG_SENSORS_TSL2550) += tsl2550.o obj-$(CONFIG_DS1682) += ds1682.o obj-$(CONFIG_C2PORT) += c2port/ obj-$(CONFIG_HMC6352) += hmc6352.o +obj-$(CONFIG_SUNWAY_GED) += sunway-ged.o obj-y += eeprom/ obj-y += cb710/ obj-$(CONFIG_VMWARE_BALLOON) += vmw_balloon.o diff --git a/drivers/misc/sunway-ged.c b/drivers/misc/sunway-ged.c new file mode 100644 index 000000000000..b4e4ca315852 --- /dev/null +++ b/drivers/misc/sunway-ged.c @@ -0,0 +1,253 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Generic Event Device for ACPI. */ + +#include <linux/err.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/list.h> +#include <linux/platform_device.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_platform.h> + +#define OFFSET_START_ADDR 0 +#define OFFSET_LENGTH 8 +#define OFFSET_STATUS 16 +#define OFFSET_SLOT 24 + +/* Memory hotplug event */ +#define SUNWAY_MEMHOTPLUG_ADD 0x1 +#define SUNWAY_MEMHOTPLUG_REMOVE 0x2 + +struct sunway_memory_device { + struct sunway_ged_device *device; + unsigned int state; /* State of the memory device */ + struct list_head list; + + u64 start_addr; /* Memory Range start physical addr */ + u64 length; /* Memory Range length */ + u64 slot; /* Memory Range slot */ + unsigned int enabled:1; +}; + +struct sunway_ged_device { + struct device *dev; + void __iomem *membase; + void *driver_data; + spinlock_t lock; + struct list_head dev_list; +}; + +static int sunway_memory_enable_device(struct sunway_memory_device *mem_device) +{ + int num_enabled = 0; + int result = 0; + + if (mem_device->enabled) { /* just sanity check...*/ + num_enabled++; + goto out; + } + + /* + * If the memory block size is zero, please ignore it. + * Don't try to do the following memory hotplug flowchart. + */ + if (!mem_device->length) + goto out; + + lock_device_hotplug(); + /* suppose node = 0, fix me! */ + result = __add_memory(0, mem_device->start_addr, mem_device->length); + unlock_device_hotplug(); + /* + * If the memory block has been used by the kernel, add_memory() + * returns -EEXIST. If add_memory() returns the other error, it + * means that this memory block is not used by the kernel. + */ + if (result && result != -EEXIST) + goto out; + + mem_device->enabled = 1; + + /* + * Add num_enable even if add_memory() returns -EEXIST, so the + * device is bound to this driver. + */ + num_enabled++; +out: + if (!num_enabled) { + dev_err(mem_device->device->dev, "add_memory failed\n"); + return -EINVAL; + } + + return 0; +} + +static int sunway_memory_get_meminfo(struct sunway_memory_device *mem_device) +{ + struct sunway_ged_device *geddev; + + if (!mem_device) + return -EINVAL; + + if (mem_device->enabled) + return 0; + + geddev = mem_device->device; + + mem_device->start_addr = readq(geddev->membase + OFFSET_START_ADDR); + mem_device->length = readq(geddev->membase + OFFSET_LENGTH); + + return 0; +} + +static void sunway_memory_device_remove(struct sunway_ged_device *device) +{ + struct sunway_memory_device *mem_dev, *n; + unsigned long start_addr, length, slot; + + if (!device) + return; + + start_addr = readq(device->membase + OFFSET_START_ADDR); + length = readq(device->membase + OFFSET_LENGTH); + slot = readq(device->membase + OFFSET_SLOT); + + list_for_each_entry_safe(mem_dev, n, &device->dev_list, list) { + if (!mem_dev->enabled) + continue; + + if ((start_addr == mem_dev->start_addr) && + (length == mem_dev->length)) { + /* suppose node = 0, fix me! */ + remove_memory(0, start_addr, length); + list_del(&mem_dev->list); + kfree(mem_dev); + } + } + + writeq(slot, device->membase + OFFSET_SLOT); +} + +static int sunway_memory_device_add(struct sunway_ged_device *device) +{ + struct sunway_memory_device *mem_device; + int result; + + if (!device) + return -EINVAL; + + mem_device = kzalloc(sizeof(struct sunway_memory_device), GFP_KERNEL); + if (!mem_device) + return -ENOMEM; + + INIT_LIST_HEAD(&mem_device->list); + mem_device->device = device; + + /* Get the range from the IO */ + mem_device->start_addr = readq(device->membase + OFFSET_START_ADDR); + mem_device->length = readq(device->membase + OFFSET_LENGTH); + mem_device->slot = readq(device->membase + OFFSET_SLOT); + + result = sunway_memory_enable_device(mem_device); + if (result) { + dev_err(device->dev, "sunway_memory_enable_device() error\n"); + sunway_memory_device_remove(device); + + return result; + } + + list_add_tail(&mem_device->list, &device->dev_list); + dev_dbg(device->dev, "Memory device configured\n"); + + hcall(HCALL_MEMHOTPLUG, mem_device->start_addr, 0, 0); + + return 1; +} + +static irqreturn_t sunwayged_ist(int irq, void *data) +{ + struct sunway_ged_device *sunwayged_dev = data; + unsigned int status; + + status = readl(sunwayged_dev->membase + OFFSET_STATUS); + + /* through IO status to add or remove memory device */ + if (status & SUNWAY_MEMHOTPLUG_ADD) + sunway_memory_device_add(sunwayged_dev); + + if (status & SUNWAY_MEMHOTPLUG_REMOVE) + sunway_memory_device_remove(sunwayged_dev); + + return IRQ_HANDLED; +} + +static irqreturn_t sunwayged_irq_handler(int irq, void *data) +{ + return IRQ_WAKE_THREAD; +} + +static int sunwayged_probe(struct platform_device *pdev) +{ + struct resource *regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); + int irq = platform_get_irq(pdev, 0); + struct sunway_ged_device *geddev; + struct device *dev; + int irqflags; + + if (!regs) { + dev_err(dev, "no registers defined\n"); + return -EINVAL; + } + + geddev = devm_kzalloc(&pdev->dev, sizeof(*geddev), GFP_KERNEL); + if (!geddev) + return -ENOMEM; + + spin_lock_init(&geddev->lock); + geddev->membase = devm_ioremap(&pdev->dev, + regs->start, resource_size(regs)); + if (!geddev->membase) + return -ENOMEM; + + INIT_LIST_HEAD(&geddev->dev_list); + geddev->dev = &pdev->dev; + irqflags = IRQF_SHARED; + + if (request_threaded_irq(irq, sunwayged_irq_handler, sunwayged_ist, + irqflags, "SUNWAY:Ged", geddev)) { + dev_err(dev, "failed to setup event handler for irq %u\n", irq); + + return -EINVAL; + } + + platform_set_drvdata(pdev, geddev); + + return 0; +} + +static int sunwayged_remove(struct platform_device *pdev) +{ + return 0; +} + +static const struct of_device_id sunwayged_of_match[] = { + {.compatible = "sw6,sunway-ged", }, + { } +}; +MODULE_DEVICE_TABLE(of, sunwayged_of_match); + +static struct platform_driver sunwayged_platform_driver = { + .driver = { + .name = "sunway-ged", + .of_match_table = sunwayged_of_match, + }, + .probe = sunwayged_probe, + .remove = sunwayged_remove, +}; +module_platform_driver(sunwayged_platform_driver); + +MODULE_AUTHOR("Lu Feifei"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Sunway ged driver");
From: Lu Feifei lufeifei@wxiat.com
Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56WV8
--------------------------------
This is an emulated device used to communicate with the memory-hotplug device on the qemu side, so the device is added to device tree for guest os.
Signed-off-by: Lu Feifei lufeifei@wxiat.com
Signed-off-by: Gu Zitao guzitao@wxiat.com --- arch/sw_64/boot/dts/chip_vt.dts | 12 ++++++++++++ 1 file changed, 12 insertions(+)
diff --git a/arch/sw_64/boot/dts/chip_vt.dts b/arch/sw_64/boot/dts/chip_vt.dts index f0bcf1db1d08..abad29dee97e 100644 --- a/arch/sw_64/boot/dts/chip_vt.dts +++ b/arch/sw_64/boot/dts/chip_vt.dts @@ -34,5 +34,17 @@ clock-frequency = <24000000>; status = "okay"; }; + misc: misc0@8036 { + #address-cells = <2>; + #size-cells = <2>; + compatible = "sw6,sunway-ged"; + reg = <0x8036 0x0 0x0 0x20>; + interrupt-parent=<&intc>; + interrupts = <13>; + reg-shift = <0>; + reg-io-width = <8>; + clock-frequency = <24000000>; + status = "okay"; + }; }; };
From: Yang Qiang yangqiang@wxiat.com
Sunway inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5GDR1
--------------------------------
When PCI devices enable SRIOV function, the previous policy that uses pci_find_bus to locate PCI bus from a given domain and bus number may return false PCI bus number, so this patch fixes this patch.
Signed-off-by: Yang Qiang yangqiang@wxiat.com
Signed-off-by: Gu Zitao guzitao@wxiat.com --- arch/sw_64/chip/chip3/chip.c | 13 +++++++------ arch/sw_64/kernel/pci.c | 18 +++++++++--------- 2 files changed, 16 insertions(+), 15 deletions(-)
diff --git a/arch/sw_64/chip/chip3/chip.c b/arch/sw_64/chip/chip3/chip.c index 02b369b2b37b..84ca7ffcb2ef 100644 --- a/arch/sw_64/chip/chip3/chip.c +++ b/arch/sw_64/chip/chip3/chip.c @@ -178,18 +178,20 @@ int chip_pcie_configure(struct pci_controller *hose) struct pci_bus *bus, *top; struct list_head *next; unsigned int max_read_size, smallest_max_payload; - int max_payloadsize, iov_bus = 0; + int max_payloadsize; unsigned long rc_index, node; unsigned long piuconfig0, value; unsigned int pcie_caps_offset; unsigned int rc_conf_value; u16 devctl, new_values; bool rc_ari_disabled = false, found = false; + unsigned char bus_max_num;
node = hose->node; rc_index = hose->index; smallest_max_payload = read_rc_conf(node, rc_index, RC_EXP_DEVCAP); smallest_max_payload &= PCI_EXP_DEVCAP_PAYLOAD; + bus_max_num = hose->busn_space->start;
top = hose->bus; bus = top; @@ -200,6 +202,7 @@ int chip_pcie_configure(struct pci_controller *hose) /* end of this bus, go up or finish */ if (bus == top) break; + next = bus->self->bus_list.next; bus = bus->self->bus; continue; @@ -224,10 +227,8 @@ int chip_pcie_configure(struct pci_controller *hose) } }
-#ifdef CONFIG_PCI_IOV - if (dev->is_physfn) - iov_bus += dev->sriov->max_VF_buses - dev->bus->number; -#endif + if (bus->busn_res.end > bus_max_num) + bus_max_num = bus->busn_res.end;
/* Query device PCIe capability register */ pcie_caps_offset = dev->pcie_cap; @@ -306,7 +307,7 @@ int chip_pcie_configure(struct pci_controller *hose) pci_write_config_word(dev, pcie_caps_offset + PCI_EXP_DEVCTL, devctl); }
- return iov_bus; + return bus_max_num; }
static int chip3_check_pci_vt_linkup(unsigned long node, unsigned long index) diff --git a/arch/sw_64/kernel/pci.c b/arch/sw_64/kernel/pci.c index 7cc8b7d2d43b..fcc6e0f02a93 100644 --- a/arch/sw_64/kernel/pci.c +++ b/arch/sw_64/kernel/pci.c @@ -221,7 +221,7 @@ void __init common_init_pci(void) struct pci_bus *bus; unsigned int init_busnr; int need_domain_info = 0; - int ret, iov_bus; + int ret; unsigned long offset;
/* Scan all of the recorded PCI controllers. */ @@ -257,20 +257,20 @@ void __init common_init_pci(void)
bus = hose->bus = bridge->bus; hose->need_domain_info = need_domain_info; - while (pci_find_bus(pci_domain_nr(bus), last_bus)) - last_bus++;
if (is_in_host()) - iov_bus = chip_pcie_configure(hose); - last_bus += iov_bus; + last_bus = chip_pcie_configure(hose); + else + while (pci_find_bus(pci_domain_nr(bus), last_bus)) + last_bus++;
- hose->last_busno = hose->busn_space->end = last_bus - 1; + hose->last_busno = hose->busn_space->end = last_bus; init_busnr = read_rc_conf(hose->node, hose->index, RC_PRIMARY_BUS); init_busnr &= ~(0xff << 16); - init_busnr |= (last_bus - 1) << 16; + init_busnr |= last_bus << 16; write_rc_conf(hose->node, hose->index, RC_PRIMARY_BUS, init_busnr); - pci_bus_update_busn_res_end(bus, last_bus - 1); - + pci_bus_update_busn_res_end(bus, last_bus); + last_bus++; }
pcibios_claim_console_setup();
From: Hang Xiaoqian hangxiaoqian@wxiat.com
Sunway inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5GFNY
--------------------------------
In the case of highly concurrent unaligned processing, unaligned count may cause severe cache thrashing. However, unaligned count is not required, so remove it to reduce cache thrashing.
Signed-off-by: Hang Xiaoqian hangxiaoqian@wxiat.com
Signed-off-by: Gu Zitao guzitao@wxiat.com --- arch/sw_64/kernel/Makefile | 2 +- arch/sw_64/kernel/traps.c | 13 -------- arch/sw_64/kernel/unaligned.c | 56 ----------------------------------- 3 files changed, 1 insertion(+), 70 deletions(-) delete mode 100644 arch/sw_64/kernel/unaligned.c
diff --git a/arch/sw_64/kernel/Makefile b/arch/sw_64/kernel/Makefile index c1e461e0ac56..94b63d6a286b 100644 --- a/arch/sw_64/kernel/Makefile +++ b/arch/sw_64/kernel/Makefile @@ -31,7 +31,7 @@ obj-$(CONFIG_HIBERNATION) += hibernate_asm.o hibernate.o obj-$(CONFIG_AUDIT) += audit.o obj-$(CONFIG_PCI) += pci_common.o obj-$(CONFIG_RELOCATABLE) += relocate.o -obj-$(CONFIG_DEBUG_FS) += unaligned.o segvdbg.o +obj-$(CONFIG_DEBUG_FS) += segvdbg.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o
ifndef CONFIG_PCI diff --git a/arch/sw_64/kernel/traps.c b/arch/sw_64/kernel/traps.c index a61c851967a9..d656eca5f961 100644 --- a/arch/sw_64/kernel/traps.c +++ b/arch/sw_64/kernel/traps.c @@ -320,11 +320,6 @@ do_entIF(unsigned long inst_type, struct pt_regs *regs) force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)regs->pc, 0); }
-struct unaligned_stat { - unsigned long count, va, pc; -} unaligned[2]; - - asmlinkage void do_entUna(void *va, unsigned long opcode, unsigned long reg, struct pt_regs *regs) @@ -334,10 +329,6 @@ do_entUna(void *va, unsigned long opcode, unsigned long reg, unsigned long pc = regs->pc - 4; const struct exception_table_entry *fixup;
- unaligned[0].count++; - unaligned[0].va = (unsigned long) va; - unaligned[0].pc = pc; - /* * We don't want to use the generic get/put unaligned macros as * we want to trap exceptions. Only if we actually get an @@ -666,10 +657,6 @@ do_entUnaUser(void __user *va, unsigned long opcode, if ((unsigned long)va >= TASK_SIZE) goto give_sigsegv;
- ++unaligned[1].count; - unaligned[1].va = (unsigned long)va; - unaligned[1].pc = regs->pc - 4; - if ((1L << opcode) & OP_INT_MASK) { /* it's an integer load/store */ if (reg < 30) { diff --git a/arch/sw_64/kernel/unaligned.c b/arch/sw_64/kernel/unaligned.c deleted file mode 100644 index a1bbdab4a266..000000000000 --- a/arch/sw_64/kernel/unaligned.c +++ /dev/null @@ -1,56 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -/* - * Copyright (C) 2020 Mao Minkai - * Author: Mao Minkai - * - * This code is taken from arch/mips/kernel/segment.c - * Copyright (C) 2013 Imagination Technologies Ltd. - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ - -#include <asm/unaligned.h> -#include <asm/debug.h> - -static int show_unaligned(struct seq_file *sf, void *v) -{ - extern struct unaligned_stat { - unsigned long count, va, pc; - } unaligned[2]; - - seq_printf(sf, "kernel unaligned acc\t: %ld (pc=%lx, va=%lx)\n", unaligned[0].count, unaligned[0].pc, unaligned[0].va); - seq_printf(sf, "user unaligned acc\t: %ld (pc=%lx, va=%lx)\n", unaligned[1].count, unaligned[1].pc, unaligned[1].va); - - return 0; -} - -static int unaligned_open(struct inode *inode, struct file *file) -{ - return single_open(file, show_unaligned, NULL); -} - -static const struct file_operations unaligned_fops = { - .open = unaligned_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int __init unaligned_info(void) -{ - struct dentry *unaligned; - - if (!sw64_debugfs_dir) - return -ENODEV; - - unaligned = debugfs_create_file("unaligned", S_IRUGO, - sw64_debugfs_dir, NULL, - &unaligned_fops); - if (!unaligned) - return -ENOMEM; - return 0; -} -device_initcall(unaligned_info);
From: Mao Minkai maominkai@wxiat.com
Sunway inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5GFOO
--------------------------------
In commit 20b900c4b7fb ("sw64: optimize simd version of memcpy and memset"), _nc instructions are used to improve performance, but the position of memb instruction in memset is wrong. Fix it.
Fixes: 20b900c4b7fb ("sw64: optimize simd version of memcpy and memset") Signed-off-by: Mao Minkai maominkai@wxiat.com
Signed-off-by: Gu Zitao guzitao@wxiat.com --- arch/sw_64/lib/deep-memset.S | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/arch/sw_64/lib/deep-memset.S b/arch/sw_64/lib/deep-memset.S index ed2171c56d4d..7fbd529c72a8 100644 --- a/arch/sw_64/lib/deep-memset.S +++ b/arch/sw_64/lib/deep-memset.S @@ -99,12 +99,11 @@ $mod32_aligned: .align 5 $mod32_loop_nc: subl $18, 64, $18 - blt $18, $mod32_tail + blt $18, $mod32_tail_memb vstd_nc $f10, 0($16) vstd_nc $f10, 32($16) addl $16, 64, $16 br $31, $mod32_loop_nc - memb # required for _nc store instructions
.align 5 $mod32_loop: @@ -115,6 +114,8 @@ $mod32_loop: addl $16, 64, $16 br $31, $mod32_loop
+$mod32_tail_memb: + memb # required for _nc store instructions $mod32_tail: vldd $f10, 0($4) addl $sp, 64, $sp
From: Zhu Donghong zhudonghong@wxiat.com
Sunway inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5GFPA
--------------------------------
On SW64 platform, JMicron 585 SATA card directly connected to Root Complex may raise DMA failure when reboot, so we force a hot reset to Root Complex to fix can not access JMicron 585 SATA card.
Signed-off-by: Zhu Donghong zhudonghong@wxiat.com
Signed-off-by: Gu Zitao guzitao@wxiat.com --- arch/sw_64/platform/platform_xuelang.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+)
diff --git a/arch/sw_64/platform/platform_xuelang.c b/arch/sw_64/platform/platform_xuelang.c index 63a4b163e43e..ae8179b53b4c 100644 --- a/arch/sw_64/platform/platform_xuelang.c +++ b/arch/sw_64/platform/platform_xuelang.c @@ -26,9 +26,25 @@ extern void cpld_write(uint8_t slave_addr, uint8_t reg, uint8_t data);
static void xuelang_kill_arch(int mode) { + struct pci_dev *pdev; + struct pci_controller *hose; + int val; + if (is_in_host()) { switch (mode) { case LINUX_REBOOT_CMD_RESTART: + pdev = pci_get_device(PCI_VENDOR_ID_JMICRON, + 0x0585, NULL); + if (pdev) { + hose = (struct pci_controller *)pdev->sysdata; + val = read_rc_conf(hose->node, hose->index, + RC_PORT_LINK_CTL); + write_rc_conf(hose->node, hose->index, + RC_PORT_LINK_CTL, val | 0x8); + write_rc_conf(hose->node, hose->index, + RC_PORT_LINK_CTL, val); + } + cpld_write(0x64, 0x00, 0xc3); mb(); break;
From: He Sheng hesheng@wxiat.com
Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I56OLG
--------------------------------
Signed-off-by: He Sheng hesheng@wxiat.com
Signed-off-by: Gu Zitao guzitao@wxiat.com --- arch/sw_64/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/sw_64/kernel/setup.c b/arch/sw_64/kernel/setup.c index 85d172c32239..0e93643539d3 100644 --- a/arch/sw_64/kernel/setup.c +++ b/arch/sw_64/kernel/setup.c @@ -976,7 +976,7 @@ static int __init debugfs_sw64(void) { struct dentry *d;
- d = debugfs_create_dir("sw_64", NULL); + d = debugfs_create_dir("sw64", NULL); if (!d) return -ENOMEM; sw64_debugfs_dir = d;
From: Wu Liliu wuliliu@wxiat.com
Sunway inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5GFQ0
--------------------------------
It used to show stack information by SP, that means it has nothing to do with task, which is unexpected. In order to show stack of task, we improve the implementation of show_stack().
However, the original task struct does not have sp, so we add it into thread_struct and do something necessary in __switch_to.
Meanwhile, walk_stackframe() can be reused, so refactor related codes for better support.
Signed-off-by: Wu Liliu wuliliu@wxiat.com
Signed-off-by: Gu Zitao guzitao@wxiat.com --- arch/sw_64/include/asm/processor.h | 1 + arch/sw_64/include/asm/stacktrace.h | 4 +- arch/sw_64/kernel/asm-offsets.c | 1 + arch/sw_64/kernel/entry.S | 6 +++ arch/sw_64/kernel/perf_event.c | 12 ++--- arch/sw_64/kernel/process.c | 1 + arch/sw_64/kernel/stacktrace.c | 78 +++++++++++++++++++++++------ arch/sw_64/kernel/traps.c | 58 ++++----------------- 8 files changed, 85 insertions(+), 76 deletions(-)
diff --git a/arch/sw_64/include/asm/processor.h b/arch/sw_64/include/asm/processor.h index 08e8cc8e5428..886f28635dd4 100644 --- a/arch/sw_64/include/asm/processor.h +++ b/arch/sw_64/include/asm/processor.h @@ -45,6 +45,7 @@ struct thread_struct { struct user_fpsimd_state fpstate; /* Callee-saved registers */ unsigned long ra; + unsigned long sp; unsigned long s[7]; /* s0 ~ s6 */ }; #define INIT_THREAD { } diff --git a/arch/sw_64/include/asm/stacktrace.h b/arch/sw_64/include/asm/stacktrace.h index 813aa5e7a91d..ed691a72573b 100644 --- a/arch/sw_64/include/asm/stacktrace.h +++ b/arch/sw_64/include/asm/stacktrace.h @@ -32,8 +32,8 @@ struct stack_frame { };
extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame); -extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame, - int (*fn)(struct stackframe *, void *), void *data); +extern void walk_stackframe(struct task_struct *tsk, struct pt_regs *regs, + int (*fn)(unsigned long, void *), void *data);
static inline bool on_task_stack(struct task_struct *tsk, unsigned long sp, struct stack_info *info) diff --git a/arch/sw_64/kernel/asm-offsets.c b/arch/sw_64/kernel/asm-offsets.c index 0c7e1e26eb05..9e6c338a5edd 100644 --- a/arch/sw_64/kernel/asm-offsets.c +++ b/arch/sw_64/kernel/asm-offsets.c @@ -213,6 +213,7 @@ void foo(void) OFFSET(TASK_THREAD_FPCR, task_struct, thread.fpstate.fpcr); BLANK(); OFFSET(TASK_THREAD_RA, task_struct, thread.ra); + OFFSET(TASK_THREAD_SP, task_struct, thread.sp); OFFSET(TASK_THREAD_S0, task_struct, thread.s[0]); OFFSET(TASK_THREAD_S1, task_struct, thread.s[1]); OFFSET(TASK_THREAD_S2, task_struct, thread.s[2]); diff --git a/arch/sw_64/kernel/entry.S b/arch/sw_64/kernel/entry.S index 977c774ad799..f79c9a6ddf36 100644 --- a/arch/sw_64/kernel/entry.S +++ b/arch/sw_64/kernel/entry.S @@ -398,6 +398,7 @@ __switch_to: .prologue 0 /* Save context into prev->thread */ stl $26, TASK_THREAD_RA($17) + stl $30, TASK_THREAD_SP($17) stl $9, TASK_THREAD_S0($17) stl $10, TASK_THREAD_S1($17) stl $11, TASK_THREAD_S2($17) @@ -415,6 +416,11 @@ __switch_to: ldl $14, TASK_THREAD_S5($18) ldl $15, TASK_THREAD_S6($18) sys_call HMC_swpctx + /* + * SP has been saved and restored by HMC_swpctx, + * and restore it again here for future expansion. + */ + ldl $30, TASK_THREAD_SP($18) ldi $8, 0x3fff bic $sp, $8, $8 mov $17, $0 diff --git a/arch/sw_64/kernel/perf_event.c b/arch/sw_64/kernel/perf_event.c index 52ec34e33269..6e344239917b 100644 --- a/arch/sw_64/kernel/perf_event.c +++ b/arch/sw_64/kernel/perf_event.c @@ -761,24 +761,18 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, * whist unwinding the stackframe and is like a subroutine return so we use * the PC. */ -static int callchain_trace(struct stackframe *frame, void *data) +static int callchain_trace(unsigned long pc, void *data) { struct perf_callchain_entry_ctx *entry = data;
- perf_callchain_store(entry, frame->pc); - + perf_callchain_store(entry, pc); return 0; }
void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { - struct stackframe frame; - - frame.fp = regs->r15; - frame.pc = regs->pc; - - walk_stackframe(current, &frame, callchain_trace, entry); + walk_stackframe(NULL, regs, callchain_trace, entry); }
/* diff --git a/arch/sw_64/kernel/process.c b/arch/sw_64/kernel/process.c index 7a7578d530c6..da721d4266ee 100644 --- a/arch/sw_64/kernel/process.c +++ b/arch/sw_64/kernel/process.c @@ -169,6 +169,7 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
childti->pcb.ksp = (unsigned long) childregs; childti->pcb.flags = 7; /* set FEN, clear everything else */ + p->thread.sp = (unsigned long) childregs;
if (unlikely(p->flags & PF_KTHREAD)) { /* kernel thread */ diff --git a/arch/sw_64/kernel/stacktrace.c b/arch/sw_64/kernel/stacktrace.c index 2671331717ba..4e9acf99aaab 100644 --- a/arch/sw_64/kernel/stacktrace.c +++ b/arch/sw_64/kernel/stacktrace.c @@ -10,6 +10,8 @@ #include <linux/sched/debug.h> #include <linux/ftrace.h> #include <linux/perf_event.h> +#include <linux/kallsyms.h> + #include <asm/stacktrace.h>
/* @@ -59,40 +61,84 @@ int unwind_frame(struct task_struct *tsk, struct stackframe *frame) } EXPORT_SYMBOL_GPL(unwind_frame);
-void walk_stackframe(struct task_struct *tsk, struct stackframe *frame, - int (*fn)(struct stackframe *, void *), void *data) +void walk_stackframe(struct task_struct *tsk, struct pt_regs *regs, + int (*fn)(unsigned long, void *), void *data) { + unsigned long pc, fp; + + struct stackframe frame; + + if (regs) { + pc = regs->pc; + fp = regs->r15; + } else if (tsk == current || tsk == NULL) { + fp = (unsigned long)__builtin_frame_address(0); + pc = (unsigned long)walk_stackframe; + } else { + fp = tsk->thread.s[6]; + pc = tsk->thread.ra; + } + + if (!__kernel_text_address(pc) || fn(pc, data)) + return; + + frame.pc = pc; + frame.fp = fp; while (1) { int ret; - - if (fn(frame, data)) - break; - ret = unwind_frame(tsk, frame); + ret = unwind_frame(tsk, &frame); if (ret < 0) break; + + if (fn(frame.pc, data)) + break; } } EXPORT_SYMBOL_GPL(walk_stackframe);
#else /* !CONFIG_FRAME_POINTER */ -void walk_stackframe(struct task_struct *tsk, struct stackframe *frame, - int (*fn)(struct stackframe *, void *), void *data) +void walk_stackframe(struct task_struct *tsk, struct pt_regs *regs, + int (*fn)(unsigned long, void *), void *data) { - unsigned long *sp = (unsigned long *)current_thread_info()->pcb.ksp; - unsigned long addr; - struct perf_callchain_entry_ctx *entry = data; + unsigned long *ksp; + unsigned long sp, pc; + + if (regs) { + sp = (unsigned long)(regs+1); + pc = regs->pc; + } else if (tsk == current || tsk == NULL) { + register unsigned long current_sp __asm__ ("$30"); + sp = current_sp; + pc = (unsigned long)walk_stackframe; + } else { + sp = tsk->thread.sp; + pc = tsk->thread.ra; + }
- perf_callchain_store(entry, frame->pc); - while (!kstack_end(sp) && entry->nr < entry->max_stack) { - addr = *sp++; - if (__kernel_text_address(addr)) - perf_callchain_store(entry, addr); + ksp = (unsigned long *)sp; + + while (!kstack_end(ksp)) { + if (__kernel_text_address(pc) && fn(pc, data)) + break; + pc = (*ksp++) - 0x4; } } EXPORT_SYMBOL_GPL(walk_stackframe);
#endif/* CONFIG_FRAME_POINTER */
+static int print_address_trace(unsigned long pc, void *data) +{ + print_ip_sym((const char *)data, pc); + return 0; +} + +void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) +{ + pr_info("Trace:\n"); + walk_stackframe(task, NULL, print_address_trace, (void *)loglvl); +} + /* * Save stack-backtrace addresses into a stack_trace buffer. */ diff --git a/arch/sw_64/kernel/traps.c b/arch/sw_64/kernel/traps.c index d656eca5f961..4e95cab13daa 100644 --- a/arch/sw_64/kernel/traps.c +++ b/arch/sw_64/kernel/traps.c @@ -12,13 +12,20 @@ #include <linux/extable.h> #include <linux/perf_event.h> #include <linux/kdebug.h> +#include <linux/sched.h> #include <linux/kexec.h> +#include <linux/kallsyms.h> +#include <linux/sched/task_stack.h> +#include <linux/sched/debug.h>
#include <asm/gentrap.h> #include <asm/mmu_context.h> #include <asm/fpu.h> #include <asm/kprobes.h> #include <asm/uprobes.h> +#include <asm/stacktrace.h> +#include <asm/processor.h> +#include <asm/ptrace.h>
#include "proto.h"
@@ -68,53 +75,6 @@ dik_show_code(unsigned int *pc) printk("\n"); }
-static void -dik_show_trace(unsigned long *sp, const char *loglvl) -{ - long i = 0; - unsigned long tmp; - - printk("%sTrace:\n", loglvl); - while (0x1ff8 & (unsigned long)sp) { - tmp = *sp; - sp++; - if (!__kernel_text_address(tmp)) - continue; - printk("%s[<%lx>] %pSR\n", loglvl, tmp, (void *)tmp); - if (i > 40) { - printk("%s ...", loglvl); - break; - } - } - printk("\n"); -} - -static int kstack_depth_to_print = 24; - -void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) -{ - unsigned long *stack; - int i; - - /* - * debugging aid: "show_stack(NULL, NULL, KERN_EMERG);" prints the - * back trace for this cpu. - */ - if (sp == NULL) - sp = (unsigned long *)&sp; - - stack = sp; - for (i = 0; i < kstack_depth_to_print; i++) { - if (((long) stack & (THREAD_SIZE-1)) == 0) - break; - if (i && ((i % 4) == 0)) - printk("%s ", loglvl); - printk("%016lx ", *stack++); - } - printk("\n"); - dik_show_trace(sp, loglvl); -} - void die_if_kernel(char *str, struct pt_regs *regs, long err) { if (regs->ps & 8) @@ -125,7 +85,7 @@ void die_if_kernel(char *str, struct pt_regs *regs, long err) printk("%s(%d): %s %ld\n", current->comm, task_pid_nr(current), str, err); dik_show_regs(regs); add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); - dik_show_trace((unsigned long *)(regs+1), KERN_DEFAULT); + show_stack(current, NULL, KERN_EMERG); dik_show_code((unsigned int *)regs->pc);
if (test_and_set_thread_flag(TIF_DIE_IF_KERNEL)) { @@ -535,7 +495,7 @@ do_entUna(void *va, unsigned long opcode, unsigned long reg,
dik_show_regs(regs); dik_show_code((unsigned int *)pc); - dik_show_trace((unsigned long *)(regs+1), KERN_DEFAULT); + show_stack(current, NULL, KERN_EMERG);
if (test_and_set_thread_flag(TIF_DIE_IF_KERNEL)) { printk("die_if_kernel recursion detected.\n");
From: Wu Liliu wuliliu@wxiat.com
Sunway inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5GFUD
--------------------------------
The get_wchan() always return 0 because `fp > sp` is false. This patch reimplements it entirely and fixes this error.
Signed-off-by: Wu Liliu wuliliu@wxiat.com
Signed-off-by: Gu Zitao guzitao@wxiat.com --- arch/sw_64/kernel/process.c | 51 ---------------------------------- arch/sw_64/kernel/stacktrace.c | 23 +++++++++++++++ 2 files changed, 23 insertions(+), 51 deletions(-)
diff --git a/arch/sw_64/kernel/process.c b/arch/sw_64/kernel/process.c index da721d4266ee..a75ae20205f3 100644 --- a/arch/sw_64/kernel/process.c +++ b/arch/sw_64/kernel/process.c @@ -226,57 +226,6 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu) } EXPORT_SYMBOL(dump_fpu);
-/* - * Under heavy swap load I've seen this lose in an ugly way. So do - * some extra sanity checking on the ranges we expect these pointers - * to be in so that we can fail gracefully. This is just for ps after - * all. -- r~ - */ - -unsigned long -thread_saved_pc(struct task_struct *t) -{ - unsigned long top, fp, sp; - - top = (unsigned long)task_stack_page(t) + 2 * PAGE_SIZE; - sp = task_thread_info(t)->pcb.ksp; - fp = t->thread.s[6]; - - if (fp > sp && fp < top) - return *(unsigned long *)fp; - - return 0; -} - -unsigned long -get_wchan(struct task_struct *p) -{ - unsigned long schedule_frame; - unsigned long pc, top, sp; - - if (!p || p == current || p->state == TASK_RUNNING) - return 0; - /* - * This one depends on the frame size of schedule(). Do a - * "disass schedule" in gdb to find the frame size. Also, the - * code assumes that sleep_on() follows immediately after - * interruptible_sleep_on() and that add_timer() follows - * immediately after interruptible_sleep(). Ugly, isn't it? - * Maybe adding a wchan field to task_struct would be better, - * after all... - */ - - pc = thread_saved_pc(p); - if (in_sched_functions(pc)) { - top = (unsigned long)task_stack_page(p) + 2 * PAGE_SIZE; - sp = task_thread_info(p)->pcb.ksp; - schedule_frame = p->thread.s[6]; - if (schedule_frame > sp && schedule_frame < top) - return ((unsigned long *)schedule_frame)[12]; - } - return pc; -} - unsigned long arch_randomize_brk(struct mm_struct *mm) { return randomize_page(mm->brk, 0x02000000); diff --git a/arch/sw_64/kernel/stacktrace.c b/arch/sw_64/kernel/stacktrace.c index 4e9acf99aaab..9de609bb4c9a 100644 --- a/arch/sw_64/kernel/stacktrace.c +++ b/arch/sw_64/kernel/stacktrace.c @@ -172,3 +172,26 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) trace->entries[trace->nr_entries++] = ULONG_MAX; } EXPORT_SYMBOL_GPL(save_stack_trace_tsk); + +static int save_pc(unsigned long pc, void *data) +{ + unsigned long *p = data; + *p = 0; + + if (!in_sched_functions(pc)) + *p = pc; + + return *p; +} + +unsigned long get_wchan(struct task_struct *tsk) +{ + unsigned long pc; + + if (!tsk || tsk == current || tsk->state == TASK_RUNNING) + return 0; + walk_stackframe(tsk, NULL, save_pc, &pc); + + return pc; +} +
From: Wu Liliu wuliliu@wxiat.com
Sunway inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5GFQQ
--------------------------------
It used to save stack-backtrace addresses by SP, which is inaccurate. In order to implement this function accurately, we provide two ways to support it.
Signed-off-by: Wu Liliu wuliliu@wxiat.com
Signed-off-by: Gu Zitao guzitao@wxiat.com --- arch/sw_64/kernel/stacktrace.c | 62 ++++++++++++++++++++++------------ 1 file changed, 40 insertions(+), 22 deletions(-)
diff --git a/arch/sw_64/kernel/stacktrace.c b/arch/sw_64/kernel/stacktrace.c index 9de609bb4c9a..f99298686bda 100644 --- a/arch/sw_64/kernel/stacktrace.c +++ b/arch/sw_64/kernel/stacktrace.c @@ -139,40 +139,58 @@ void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) walk_stackframe(task, NULL, print_address_trace, (void *)loglvl); }
+#ifdef CONFIG_STACKTRACE /* * Save stack-backtrace addresses into a stack_trace buffer. */ -void save_stack_trace(struct stack_trace *trace) +struct stack_trace_data { + struct stack_trace *trace; + unsigned int nosched; +}; + +int save_trace(unsigned long pc, void *d) { - save_stack_trace_tsk(current, trace); -} -EXPORT_SYMBOL_GPL(save_stack_trace); + struct stack_trace_data *data = d; + struct stack_trace *trace = data->trace; + + if (data->nosched && in_sched_functions(pc)) + return 0; + if (trace->skip > 0) { + trace->skip--; + return 0; + }
+ trace->entries[trace->nr_entries++] = pc; + return (trace->nr_entries >= trace->max_entries); +}
-void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) +static void __save_stack_trace(struct task_struct *tsk, + struct stack_trace *trace, unsigned int nosched) { - unsigned long *sp = (unsigned long *)task_thread_info(tsk)->pcb.ksp; - unsigned long addr; - - WARN_ON(trace->nr_entries || !trace->max_entries); - - while (!kstack_end(sp)) { - addr = *sp++; - if (__kernel_text_address(addr) && - !in_sched_functions(addr)) { - if (trace->skip > 0) - trace->skip--; - else - trace->entries[trace->nr_entries++] = addr; - if (trace->nr_entries >= trace->max_entries) - break; - } - } + struct stack_trace_data data; + + data.trace = trace; + data.nosched = nosched; + + walk_stackframe(tsk, NULL, save_trace, &data); + if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; } + +void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) +{ + __save_stack_trace(tsk, trace, 1); +} EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
+void save_stack_trace(struct stack_trace *trace) +{ + __save_stack_trace(current, trace, 0); +} +EXPORT_SYMBOL_GPL(save_stack_trace); +#endif + static int save_pc(unsigned long pc, void *data) { unsigned long *p = data;
From: Wang Yuanheng wangyuanheng@wxiat.com
Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5GFR5
--------------------------------
Add a bool debugfs file /sys/kernel/debug/sw_64/bind_vcpu, you can echo 1/Y to enable bind vcpu, or echo 0/N to disable it. Determin which node to bind the core according to the physical address assigned to the guest.
Signed-off-by: Wang Yuanheng wangyuanheng@wxiat.com
Signed-off-by: Gu Zitao guzitao@wxiat.com --- arch/sw_64/kernel/Makefile | 2 +- arch/sw_64/kernel/bindvcpu.c | 29 +++++++++++++++++++++++++++++ arch/sw_64/kvm/kvm-sw64.c | 13 ++++++++++++- 3 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 arch/sw_64/kernel/bindvcpu.c
diff --git a/arch/sw_64/kernel/Makefile b/arch/sw_64/kernel/Makefile index 94b63d6a286b..d4dc9e175d67 100644 --- a/arch/sw_64/kernel/Makefile +++ b/arch/sw_64/kernel/Makefile @@ -31,7 +31,7 @@ obj-$(CONFIG_HIBERNATION) += hibernate_asm.o hibernate.o obj-$(CONFIG_AUDIT) += audit.o obj-$(CONFIG_PCI) += pci_common.o obj-$(CONFIG_RELOCATABLE) += relocate.o -obj-$(CONFIG_DEBUG_FS) += segvdbg.o +obj-$(CONFIG_DEBUG_FS) += segvdbg.o bindvcpu.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o
ifndef CONFIG_PCI diff --git a/arch/sw_64/kernel/bindvcpu.c b/arch/sw_64/kernel/bindvcpu.c new file mode 100644 index 000000000000..611c395c144b --- /dev/null +++ b/arch/sw_64/kernel/bindvcpu.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Wang Yuanheng + * Author: Wang Yuanheng + * + */ + +#include <linux/kernel.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/uaccess.h> +#include <asm/debug.h> + +extern bool bind_vcpu_enabled; + +static int __init bind_vcpu_init(void) +{ + struct dentry *bindvcpu; + + if (!sw64_debugfs_dir) + return -ENODEV; + + bindvcpu = debugfs_create_bool("bind_vcpu", 0644, + sw64_debugfs_dir, &bind_vcpu_enabled); + if (!bindvcpu) + return -ENOMEM; + return 0; +} +late_initcall(bind_vcpu_init); diff --git a/arch/sw_64/kvm/kvm-sw64.c b/arch/sw_64/kvm/kvm-sw64.c index af29d0ca8e7f..de81f7efe01a 100644 --- a/arch/sw_64/kvm/kvm-sw64.c +++ b/arch/sw_64/kvm/kvm-sw64.c @@ -12,7 +12,7 @@ #include <linux/sched/signal.h> #include <linux/kvm.h> #include <linux/uaccess.h> - +#include <linux/sched.h> #include <asm/kvm_timer.h> #include <asm/kvm_emulate.h>
@@ -21,6 +21,7 @@
bool set_msi_flag; unsigned long sw64_kvm_last_vpn[NR_CPUS]; +__read_mostly bool bind_vcpu_enabled; #define cpu_last_vpn(cpuid) sw64_kvm_last_vpn[cpuid]
#ifdef CONFIG_SUBARCH_C3B @@ -537,6 +538,16 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) #ifndef CONFIG_KVM_MEMHOTPLUG vcpu->arch.vcb.vpcr = get_vpcr(vcpu->kvm->arch.host_phys_addr, vcpu->kvm->arch.size, 0); + + if (unlikely(bind_vcpu_enabled)) { + int nid; + unsigned long end; + + end = vcpu->kvm->arch.host_phys_addr + vcpu->kvm->arch.size; + nid = pfn_to_nid(PHYS_PFN(vcpu->kvm->arch.host_phys_addr)); + if (pfn_to_nid(PHYS_PFN(end)) == nid) + set_cpus_allowed_ptr(vcpu->arch.tsk, node_to_cpumask_map[nid]); + } #else unsigned long seg_base = virt_to_phys(vcpu->kvm->arch.seg_pgd);
Sunway inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5GFRP
--------------------------------
This patch is a backport of commit 67a929e097b7 ("mm: rename CONFIG_HAVE_GENERIC_GUP to CONFIG_HAVE_FAST_GUP"). In order to use the generic GUP, let's do the same.
Signed-off-by: Gu Zitao guzitao@wxiat.com
Signed-off-by: Gu Zitao guzitao@wxiat.com --- arch/sw_64/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/sw_64/Kconfig b/arch/sw_64/Kconfig index ec6e583a5d9a..cf2f6f00708c 100644 --- a/arch/sw_64/Kconfig +++ b/arch/sw_64/Kconfig @@ -7,7 +7,7 @@ config SW64 select HAVE_OPROFILE select HAVE_PCSPKR_PLATFORM select HAVE_PERF_EVENTS - select HAVE_GENERIC_GUP + select HAVE_FAST_GUP select GENERIC_CLOCKEVENTS select GENERIC_IRQ_PROBE select GENERIC_IRQ_LEGACY
From: Zhou Xuemei zhouxuemei@wxiat.com
Sunway inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5GFS3
--------------------------------
When csum_partial_copy_from_user is called in an interrupt, __copy_from_user will modify floating point register f10-f15 without restore register value. This will cause the value of the userspace register to be corrupted. Use memcpy() instead when called from kernel space.
Signed-off-by: Zhou Xuemei zhouxuemei@wxiat.com
Signed-off-by: Gu Zitao guzitao@wxiat.com --- arch/sw_64/lib/csum_partial_copy.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/arch/sw_64/lib/csum_partial_copy.c b/arch/sw_64/lib/csum_partial_copy.c index 5e5274e82b2b..742dd63cdb70 100644 --- a/arch/sw_64/lib/csum_partial_copy.c +++ b/arch/sw_64/lib/csum_partial_copy.c @@ -61,7 +61,11 @@ csum_partial_cfu_dest_aligned(const unsigned long __user *src, unsigned long checksum = ~0U; int err = 0;
- err = __copy_from_user(dst, src, len+8); + if (likely(!uaccess_kernel())) + err = __copy_from_user(dst, src, len + 8); + else + memcpy(dst, src, len + 8); + while (len > 0) { word = *dst; checksum += word; @@ -89,7 +93,10 @@ csum_partial_cfu_dest_unaligned(const unsigned long __user *src, unsigned long checksum = ~0U; int err = 0;
- err = __copy_from_user(dst, src, len+8); + if (likely(!uaccess_kernel())) + err = __copy_from_user(dst, src, len + 8); + else + memcpy(dst, src, len + 8);
dst = (unsigned long *)((unsigned long)dst & (~7UL)); word = *dst;