Jian Zhang (1): memcontrol: Add oom recover for kmemcg when release buddy hugepage
Navid Emamdoost (1): nbd_genl_status: null check for nla_nest_start
Weilong Chen (2): ACPI / APEI: Notify all ras err to driver arm64/ascend: Add new enable_oom_killer interface for oom contrl
Xu Qiang (2): irq-gic-v3: Add support to init ts core GICR irq-gic-v3-its: It can't be initialized when the GICR had been cut
Yuan Can (1): ascend: export interfaces required by ascend drivers
arch/arm64/configs/openeuler_defconfig | 1 + drivers/acpi/apei/Kconfig | 7 + drivers/acpi/apei/ghes.c | 8 +- drivers/block/nbd.c | 6 + drivers/irqchip/Kconfig | 10 + drivers/irqchip/irq-gic-v3-its.c | 254 ++++++++++++++++++++++++- drivers/irqchip/irq-gic-v3.c | 101 +++++++++- include/linux/irqchip/arm-gic-v3.h | 5 + include/linux/oom.h | 26 +++ kernel/power/autosleep.c | 1 + kernel/workqueue.c | 3 + mm/Kconfig | 10 + mm/memcontrol.c | 14 ++ mm/oom_kill.c | 58 ++++++ mm/util.c | 2 + mm/vmalloc.c | 2 + 16 files changed, 499 insertions(+), 9 deletions(-)
From: Navid Emamdoost navid.emamdoost@gmail.com
maillist inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8NC0E CVE: CVE-2019-16089
Reference: https://lore.kernel.org/lkml/20190911164013.27364-1-navid.emamdoost@gmail.co...
---------------------------
nla_nest_start may fail and return NULL. The check is inserted, and errno is selected based on other call sites within the same source code. Update: removed extra new line. v3 Update: added release reply, thanks to Michal Kubecek for pointing out.
Signed-off-by: Navid Emamdoost navid.emamdoost@gmail.com Reviewed-by: Michal Kubecek mkubecek@suse.cz --- drivers/block/nbd.c | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 855fdf5c3b4e..b99c169890d5 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -2408,6 +2408,12 @@ static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info) }
dev_list = nla_nest_start_noflag(reply, NBD_ATTR_DEVICE_LIST); + if (!dev_list) { + nlmsg_free(reply); + ret = -EMSGSIZE; + goto out; + } + if (index == -1) { ret = idr_for_each(&nbd_index_idr, &status_cb, reply); if (ret) {
From: Xu Qiang xuqiang36@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8NC0E CVE: NA
------------
For Ascend platform, other NON-OS managed GICRs need be initialized in OS.
Signed-off-by: Xu Qiang xuqiang36@huawei.com --- drivers/irqchip/Kconfig | 10 ++ drivers/irqchip/irq-gic-v3-its.c | 227 ++++++++++++++++++++++++++++- drivers/irqchip/irq-gic-v3.c | 101 ++++++++++++- include/linux/irqchip/arm-gic-v3.h | 5 + 4 files changed, 335 insertions(+), 8 deletions(-)
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index f7149d0f3d45..3ad905633b8c 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -159,6 +159,16 @@ config HISILICON_IRQ_MBIGEN select ARM_GIC_V3 select ARM_GIC_V3_ITS
+if ASCEND_FEATURES + +config ASCEND_INIT_ALL_GICR + bool "Enable init all GICR for Ascend" + depends on ARM_GIC_V3 + depends on ARM_GIC_V3_ITS + default n + +endif + config IMGPDC_IRQ bool select GENERIC_IRQ_CHIP diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index a8c89df1a997..b7c5bbd209f3 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -193,6 +193,14 @@ static DEFINE_RAW_SPINLOCK(vmovp_lock);
static DEFINE_IDA(its_vpeid_ida);
+#ifdef CONFIG_ASCEND_INIT_ALL_GICR +static bool init_all_gicr; +static int nr_gicr; +#else +#define init_all_gicr false +#define nr_gicr 0 +#endif + #define gic_data_rdist() (raw_cpu_ptr(gic_rdists->rdist)) #define gic_data_rdist_cpu(cpu) (per_cpu_ptr(gic_rdists->rdist, cpu)) #define gic_data_rdist_rd_base() (gic_data_rdist()->rd_base) @@ -1558,6 +1566,11 @@ static __maybe_unused u32 its_read_lpi_count(struct irq_data *d, int cpu)
static void its_inc_lpi_count(struct irq_data *d, int cpu) { +#ifdef CONFIG_ASCEND_INIT_ALL_GICR + if (cpu >= nr_cpu_ids) + return; +#endif + if (irqd_affinity_is_managed(d)) atomic_inc(&per_cpu_ptr(&cpu_lpi_count, cpu)->managed); else @@ -1566,6 +1579,11 @@ static void its_inc_lpi_count(struct irq_data *d, int cpu)
static void its_dec_lpi_count(struct irq_data *d, int cpu) { +#ifdef CONFIG_ASCEND_INIT_ALL_GICR + if (cpu >= nr_cpu_ids) + return; +#endif + if (irqd_affinity_is_managed(d)) atomic_dec(&per_cpu_ptr(&cpu_lpi_count, cpu)->managed); else @@ -1665,6 +1683,26 @@ static int its_select_cpu(struct irq_data *d, return cpu; }
+#ifdef CONFIG_ASCEND_INIT_ALL_GICR +static int its_select_cpu_other(const struct cpumask *mask_val) +{ + int cpu; + + if (!init_all_gicr) + return -EINVAL; + + cpu = find_first_bit(cpumask_bits(mask_val), NR_CPUS); + if (cpu >= nr_gicr) + cpu = -EINVAL; + return cpu; +} +#else +static int its_select_cpu_other(const struct cpumask *mask_val) +{ + return -EINVAL; +} +#endif + static int its_set_affinity(struct irq_data *d, const struct cpumask *mask_val, bool force) { @@ -1686,6 +1724,9 @@ static int its_set_affinity(struct irq_data *d, const struct cpumask *mask_val, cpu = cpumask_pick_least_loaded(d, mask_val);
if (cpu < 0 || cpu >= nr_cpu_ids) + cpu = its_select_cpu_other(mask_val); + + if (cpu < 0) goto err;
/* don't set the affinity when the target cpu is same as current one */ @@ -2951,8 +2992,12 @@ static int allocate_vpe_l1_table(void) static int its_alloc_collections(struct its_node *its) { int i; + int cpu_nr = nr_cpu_ids; + + if (init_all_gicr) + cpu_nr = CONFIG_NR_CPUS;
- its->collections = kcalloc(nr_cpu_ids, sizeof(*its->collections), + its->collections = kcalloc(cpu_nr, sizeof(*its->collections), GFP_KERNEL); if (!its->collections) return -ENOMEM; @@ -3263,6 +3308,186 @@ static void its_cpu_init_collections(void) raw_spin_unlock(&its_lock); }
+#ifdef CONFIG_ASCEND_INIT_ALL_GICR +void its_set_gicr_nr(int nr) +{ + nr_gicr = nr; +} + +static int __init its_enable_init_all_gicr(char *str) +{ + init_all_gicr = true; + return 1; +} + +__setup("init_all_gicr", its_enable_init_all_gicr); + +bool its_init_all_gicr(void) +{ + return init_all_gicr; +} + +static void its_cpu_init_lpis_others(void __iomem *rbase, int cpu) +{ + struct page *pend_page; + phys_addr_t paddr; + u64 val, tmp; + + if (!init_all_gicr) + return; + + val = readl_relaxed(rbase + GICR_CTLR); + if ((gic_rdists->flags & RDIST_FLAGS_RD_TABLES_PREALLOCATED) && + (val & GICR_CTLR_ENABLE_LPIS)) { + /* + * Check that we get the same property table on all + * RDs. If we don't, this is hopeless. + */ + paddr = gicr_read_propbaser(rbase + GICR_PROPBASER); + paddr &= GENMASK_ULL(51, 12); + if (WARN_ON(gic_rdists->prop_table_pa != paddr)) + add_taint(TAINT_CRAP, LOCKDEP_STILL_OK); + + paddr = gicr_read_pendbaser(rbase + GICR_PENDBASER); + paddr &= GENMASK_ULL(51, 16); + + WARN_ON(!gic_check_reserved_range(paddr, LPI_PENDBASE_SZ)); + + goto out; + } + + /* If we didn't allocate the pending table yet, do it now */ + pend_page = its_allocate_pending_table(GFP_NOWAIT); + if (!pend_page) { + pr_err("Failed to allocate PENDBASE for GICR:%p\n", rbase); + return; + } + + paddr = page_to_phys(pend_page); + pr_info("GICR:%p using LPI pending table @%pa\n", + rbase, &paddr); + + WARN_ON(gic_reserve_range(paddr, LPI_PENDBASE_SZ)); + + /* Disable LPIs */ + val = readl_relaxed(rbase + GICR_CTLR); + val &= ~GICR_CTLR_ENABLE_LPIS; + writel_relaxed(val, rbase + GICR_CTLR); + + /* + * Make sure any change to the table is observable by the GIC. + */ + dsb(sy); + + /* set PROPBASE */ + val = (gic_rdists->prop_table_pa | + GICR_PROPBASER_InnerShareable | + GICR_PROPBASER_RaWaWb | + ((LPI_NRBITS - 1) & GICR_PROPBASER_IDBITS_MASK)); + + gicr_write_propbaser(val, rbase + GICR_PROPBASER); + tmp = gicr_read_propbaser(rbase + GICR_PROPBASER); + + if ((tmp ^ val) & GICR_PROPBASER_SHAREABILITY_MASK) { + if (!(tmp & GICR_PROPBASER_SHAREABILITY_MASK)) { + /* + * The HW reports non-shareable, we must + * remove the cacheability attributes as + * well. + */ + val &= ~(GICR_PROPBASER_SHAREABILITY_MASK | + GICR_PROPBASER_CACHEABILITY_MASK); + val |= GICR_PROPBASER_nC; + gicr_write_propbaser(val, rbase + GICR_PROPBASER); + } + pr_info_once("GIC: using cache flushing for LPI property table\n"); + gic_rdists->flags |= RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING; + } + + /* set PENDBASE */ + val = (page_to_phys(pend_page) | + GICR_PENDBASER_InnerShareable | + GICR_PENDBASER_RaWaWb); + + gicr_write_pendbaser(val, rbase + GICR_PENDBASER); + tmp = gicr_read_pendbaser(rbase + GICR_PENDBASER); + + if (!(tmp & GICR_PENDBASER_SHAREABILITY_MASK)) { + /* + * The HW reports non-shareable, we must remove the + * cacheability attributes as well. + */ + val &= ~(GICR_PENDBASER_SHAREABILITY_MASK | + GICR_PENDBASER_CACHEABILITY_MASK); + val |= GICR_PENDBASER_nC; + gicr_write_pendbaser(val, rbase + GICR_PENDBASER); + } + + /* Enable LPIs */ + val = readl_relaxed(rbase + GICR_CTLR); + val |= GICR_CTLR_ENABLE_LPIS; + writel_relaxed(val, rbase + GICR_CTLR); + + /* Make sure the GIC has seen the above */ + dsb(sy); +out: + pr_info("GICv3: CPU%d: using %s LPI pending table @%pa\n", + cpu, pend_page ? "allocated" : "reserved", &paddr); +} + +static void its_cpu_init_collection_others(void __iomem *rbase, + phys_addr_t phys_base, int cpu) +{ + struct its_node *its; + + if (!init_all_gicr) + return; + + raw_spin_lock(&its_lock); + + list_for_each_entry(its, &its_nodes, entry) { + u64 target; + + /* + * We now have to bind each collection to its target + * redistributor. + */ + if (gic_read_typer(its->base + GITS_TYPER) & GITS_TYPER_PTA) { + /* + * This ITS wants the physical address of the + * redistributor. + */ + target = phys_base; + } else { + /* + * This ITS wants a linear CPU number. + */ + target = gic_read_typer(rbase + GICR_TYPER); + target = GICR_TYPER_CPU_NUMBER(target) << 16; + } + + /* Perform collection mapping */ + its->collections[cpu].target_address = target; + its->collections[cpu].col_id = cpu; + + its_send_mapc(its, &its->collections[cpu], 1); + its_send_invall(its, &its->collections[cpu]); + } + + raw_spin_unlock(&its_lock); +} + +int its_cpu_init_others(void __iomem *base, phys_addr_t phys_base, int cpu) +{ + if (!list_empty(&its_nodes)) { + its_cpu_init_lpis_others(base, cpu); + its_cpu_init_collection_others(base, phys_base, cpu); + } + + return 0; +} +#endif + static struct its_device *its_find_device(struct its_node *its, u32 dev_id) { struct its_device *its_dev = NULL, *tmp; diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index f59ac9586b7b..003043bb0d68 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -281,17 +281,11 @@ static u64 __maybe_unused gic_read_iar(void) } #endif
-static void gic_enable_redist(bool enable) +static void __gic_enable_redist(void __iomem *rbase, bool enable) { - void __iomem *rbase; u32 count = 1000000; /* 1s! */ u32 val;
- if (gic_data.flags & FLAGS_WORKAROUND_GICR_WAKER_MSM8996) - return; - - rbase = gic_data_rdist_rd_base(); - val = readl_relaxed(rbase + GICR_WAKER); if (enable) /* Wake up this CPU redistributor */ @@ -318,6 +312,14 @@ static void gic_enable_redist(bool enable) enable ? "wakeup" : "sleep"); }
+static void gic_enable_redist(bool enable) +{ + if (gic_data.flags & FLAGS_WORKAROUND_GICR_WAKER_MSM8996) + return; + + __gic_enable_redist(gic_data_rdist_rd_base(), enable); +} + /* * Routines to disable, enable, EOI and route interrupts */ @@ -1288,6 +1290,89 @@ static void gic_cpu_init(void) gic_cpu_sys_reg_init(); }
+#ifdef CONFIG_ASCEND_INIT_ALL_GICR +static int __gic_compute_nr_gicr(struct redist_region *region, void __iomem *ptr) +{ + static int gicr_nr = 0; + + its_set_gicr_nr(++gicr_nr); + + return 1; +} + +static void gic_compute_nr_gicr(void) +{ + gic_iterate_rdists(__gic_compute_nr_gicr); +} + +static int gic_rdist_cpu(void __iomem *ptr, unsigned int cpu) +{ + unsigned long mpidr = cpu_logical_map(cpu); + u64 typer; + u32 aff; + + /* + * Convert affinity to a 32bit value that can be matched to + * GICR_TYPER bits [63:32]. + */ + aff = (MPIDR_AFFINITY_LEVEL(mpidr, 3) << 24 | + MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16 | + MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8 | + MPIDR_AFFINITY_LEVEL(mpidr, 0)); + + typer = gic_read_typer(ptr + GICR_TYPER); + if ((typer >> 32) == aff) + return 0; + + return 1; +} + +static int gic_rdist_cpus(void __iomem *ptr) +{ + unsigned int i; + + for (i = 0; i < nr_cpu_ids; i++) { + if (gic_rdist_cpu(ptr, i) == 0) + return 0; + } + + return 1; +} + +static int gic_cpu_init_other(struct redist_region *region, void __iomem *ptr) +{ + u64 offset; + phys_addr_t phys_base; + static int cpu = 0; + + if (cpu == 0) + cpu = nr_cpu_ids; + + if (gic_rdist_cpus(ptr) == 1) { + offset = ptr - region->redist_base; + phys_base = region->phys_base + offset; + __gic_enable_redist(ptr, true); + if (gic_dist_supports_lpis()) + its_cpu_init_others(ptr, phys_base, cpu); + cpu++; + } + + return 1; +} + +static void gic_cpu_init_others(void) +{ + if (!its_init_all_gicr()) + return; + + gic_iterate_rdists(gic_cpu_init_other); +} +#else +static inline void gic_compute_nr_gicr(void) {} + +static inline void gic_cpu_init_others(void) {} +#endif + #ifdef CONFIG_SMP
#define MPIDR_TO_SGI_RS(mpidr) (MPIDR_RS(mpidr) << ICC_SGI1R_RS_SHIFT) @@ -2052,6 +2137,7 @@ static int __init gic_init_bases(phys_addr_t dist_phys_base, gic_data.rdists.has_direct_lpi = true; gic_data.rdists.has_vpend_valid_dirty = true; } + gic_compute_nr_gicr();
if (WARN_ON(!gic_data.domain) || WARN_ON(!gic_data.rdists.rdist)) { err = -ENOMEM; @@ -2087,6 +2173,7 @@ static int __init gic_init_bases(phys_addr_t dist_phys_base, }
gic_enable_nmi_support(); + gic_cpu_init_others();
return 0;
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 728691365464..33e098c70952 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -637,6 +637,11 @@ struct irq_domain; struct fwnode_handle; int __init its_lpi_memreserve_init(void); int its_cpu_init(void); +#ifdef CONFIG_ASCEND_INIT_ALL_GICR +void its_set_gicr_nr(int nr); +bool its_init_all_gicr(void); +int its_cpu_init_others(void __iomem *base, phys_addr_t phys_base, int idx); +#endif int its_init(struct fwnode_handle *handle, struct rdists *rdists, struct irq_domain *domain); int mbi_init(struct fwnode_handle *fwnode, struct irq_domain *parent);
From: Xu Qiang xuqiang36@huawei.com
ascend inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8NC0E CVE: NA
------------
In FPGA, We need to check if the gicr has been cut, and if it is, it can't be initialized
Signed-off-by: Xu Qiang xuqiang36@huawei.com --- drivers/irqchip/irq-gic-v3-its.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+)
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index b7c5bbd209f3..c38a214daf24 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -3438,6 +3438,7 @@ static void its_cpu_init_lpis_others(void __iomem *rbase, int cpu) static void its_cpu_init_collection_others(void __iomem *rbase, phys_addr_t phys_base, int cpu) { + u32 count; struct its_node *its;
if (!init_all_gicr) @@ -3466,6 +3467,32 @@ static void its_cpu_init_collection_others(void __iomem *rbase, target = GICR_TYPER_CPU_NUMBER(target) << 16; }
+ dsb(sy); + + /* In FPGA, We need to check if the gicr has been cut, + * and if it is, it can't be initialized + */ + count = 2000; + while (1) { + if (readl_relaxed(rbase + GICR_SYNCR) == 0) + break; + + count--; + if (!count) { + pr_err("this gicr does not exist, or it's abnormal:%pK\n", + &phys_base); + break; + } + cpu_relax(); + udelay(1); + } + + if (count == 0) + break; + + pr_info("its init other collection table, ITS:%pK, GICR:%pK, coreId:%u\n", + &its->phys_base, &phys_base, cpu); + /* Perform collection mapping */ its->collections[cpu].target_address = target; its->collections[cpu].col_id = cpu;
From: Weilong Chen chenweilong@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8NC0E CVE: NA
-------------------------------------------------
Customization deliver all types error to driver. As the driver need to process the errors in process context.
Signed-off-by: Weilong Chen chenweilong@huawei.com --- arch/arm64/configs/openeuler_defconfig | 1 + drivers/acpi/apei/Kconfig | 7 +++++++ drivers/acpi/apei/ghes.c | 8 +++++++- 3 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index ce5a7aeaeeca..736d715d6d1f 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -653,6 +653,7 @@ CONFIG_ACPI_HMAT=y CONFIG_HAVE_ACPI_APEI=y CONFIG_ACPI_APEI=y CONFIG_ACPI_APEI_GHES=y +CONFIG_ACPI_APEI_GHES_NOTIFY_ALL_RAS_ERR=y CONFIG_ACPI_APEI_PCIEAER=y CONFIG_ACPI_APEI_SEA=y CONFIG_ACPI_APEI_MEMORY_FAILURE=y diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig index 6b18f8bc7be3..1dce3ad7c9bd 100644 --- a/drivers/acpi/apei/Kconfig +++ b/drivers/acpi/apei/Kconfig @@ -33,6 +33,13 @@ config ACPI_APEI_GHES by firmware to produce more valuable hardware error information for Linux.
+config ACPI_APEI_GHES_NOTIFY_ALL_RAS_ERR + bool "Notify all ras err to driver" + depends on ARM64 && ACPI_APEI_GHES + default n + help + Deliver all types of error to driver. + config ACPI_APEI_PCIEAER bool "APEI PCIe AER logging/recovering support" depends on ACPI_APEI && PCIEAER diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 63ad0541db38..bf1b9252a8da 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -692,12 +692,18 @@ static bool ghes_do_proc(struct ghes *ghes, queued = ghes_handle_arm_hw_error(gdata, sev); } else { void *err = acpi_hest_get_payload(gdata); - +#ifndef CONFIG_ACPI_APEI_GHES_NOTIFY_ALL_RAS_ERR ghes_defer_non_standard_event(gdata, sev); +#endif log_non_standard_event(sec_type, fru_id, fru_text, sec_sev, err, gdata->error_data_length); } + +#ifdef CONFIG_ACPI_APEI_GHES_NOTIFY_ALL_RAS_ERR + /* Customization deliver all types error to driver. */ + ghes_defer_non_standard_event(gdata, sev); +#endif }
return queued;
From: Weilong Chen chenweilong@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8NC0E CVE: NA
-------------------------------------------------
Support disable oom-killer, and report oom events to bbox vm.enable_oom_killer: 0: disable oom killer 1: enable oom killer (default,compatible with mainline)
Signed-off-by: Weilong Chen chenweilong@huawei.com --- include/linux/oom.h | 26 +++++++++++++++++++++ mm/Kconfig | 10 ++++++++ mm/memcontrol.c | 4 ++++ mm/oom_kill.c | 57 +++++++++++++++++++++++++++++++++++++++++++++ mm/util.c | 2 ++ 5 files changed, 99 insertions(+)
diff --git a/include/linux/oom.h b/include/linux/oom.h index 7d0c9c48a0c5..f88f34d2b4dd 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -112,4 +112,30 @@ extern void oom_killer_enable(void);
extern struct task_struct *find_lock_task_mm(struct task_struct *p);
+extern int sysctl_enable_oom_killer; + +#define OOM_TYPE_NOMEM 0 +#define OOM_TYPE_OVERCOMMIT 1 +#define OOM_TYPE_CGROUP 2 + +#ifdef CONFIG_ASCEND_OOM +extern int register_hisi_oom_notifier(struct notifier_block *nb); +extern int unregister_hisi_oom_notifier(struct notifier_block *nb); +int oom_type_notifier_call(unsigned int type, struct oom_control *oc); +#else +static inline int register_hisi_oom_notifier(struct notifier_block *nb) +{ + return -EINVAL; +} + +static inline int unregister_hisi_oom_notifier(struct notifier_block *nb) +{ + return -EINVAL; +} + +static inline int oom_type_notifier_call(unsigned int type, struct oom_control *oc) +{ + return -EINVAL; +} +#endif #endif /* _INCLUDE_LINUX_OOM_H */ diff --git a/mm/Kconfig b/mm/Kconfig index 0f68e5bbeb89..cdef59dc373c 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1302,6 +1302,16 @@ config SHARE_POOL in kernel and user level, which is only enabled for ascend platform. To enable this feature, enable_ascend_share_pool bootarg is needed.
+config ASCEND_OOM + bool "Enable support for disable oom killer" + default y + help + In some cases we hopes that the oom will not kill the process when it occurs, + be able to notify the black box to report the event, and be able to trigger + the panic to locate the problem. + vm.enable_oom_killer: + 0: disable oom killer + 1: enable oom killer (default,compatible with mainline)
source "mm/damon/Kconfig"
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 8a881ab21f6c..08af3c8df6f3 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1955,6 +1955,7 @@ static bool mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order) current->memcg_in_oom = memcg; current->memcg_oom_gfp_mask = mask; current->memcg_oom_order = order; + oom_type_notifier_call(OOM_TYPE_CGROUP, NULL); } return false; } @@ -2019,6 +2020,9 @@ bool mem_cgroup_oom_synchronize(bool handle) if (locked) mem_cgroup_oom_notify(memcg);
+ if (!sysctl_enable_oom_killer) + oom_type_notifier_call(OOM_TYPE_CGROUP, NULL); + schedule(); mem_cgroup_unmark_under_oom(memcg); finish_wait(&memcg_oom_waitq, &owait.wait); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 44bde56ecd02..fb9dc88ff17b 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -55,6 +55,7 @@ static int sysctl_panic_on_oom; static int sysctl_oom_kill_allocating_task; static int sysctl_oom_dump_tasks = 1; +int sysctl_enable_oom_killer = 1;
/* * Serializes oom killer invocations (out_of_memory()) from all contexts to @@ -724,6 +725,17 @@ static struct ctl_table vm_oom_kill_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#ifdef CONFIG_ASCEND_OOM + { + .procname = "enable_oom_killer", + .data = &sysctl_enable_oom_killer, + .maxlen = sizeof(sysctl_enable_oom_killer), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif {} }; #endif @@ -1073,6 +1085,7 @@ static void check_panic_on_oom(struct oom_control *oc) if (is_sysrq_oom(oc)) return; dump_header(oc, NULL); + oom_type_notifier_call(0, oc); panic("Out of memory: %s panic_on_oom is enabled\n", sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide"); } @@ -1091,6 +1104,45 @@ int unregister_oom_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(unregister_oom_notifier);
+#ifdef CONFIG_ASCEND_OOM +static BLOCKING_NOTIFIER_HEAD(oom_type_notify_list); + +int register_hisi_oom_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&oom_type_notify_list, nb); +} +EXPORT_SYMBOL_GPL(register_hisi_oom_notifier); + +int unregister_hisi_oom_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&oom_type_notify_list, nb); +} +EXPORT_SYMBOL_GPL(unregister_hisi_oom_notifier); + +int oom_type_notifier_call(unsigned int type, struct oom_control *oc) +{ + struct oom_control oc_tmp = { 0 }; + static unsigned long caller_jiffies; + + if (sysctl_enable_oom_killer) + return -EINVAL; + + if (oc) + type = is_memcg_oom(oc) ? OOM_TYPE_CGROUP : OOM_TYPE_NOMEM; + else + oc = &oc_tmp; + + if (printk_timed_ratelimit(&caller_jiffies, 10000)) { + pr_err("OOM_NOTIFIER: oom type %u\n", type); + dump_stack(); + show_mem(); + dump_tasks(oc); + } + + return blocking_notifier_call_chain(&oom_type_notify_list, type, NULL); +} +#endif + /** * out_of_memory - kill the "best" process when we run out of memory * @oc: pointer to struct oom_control @@ -1107,6 +1159,11 @@ bool out_of_memory(struct oom_control *oc) if (oom_killer_disabled) return false;
+ if (!sysctl_enable_oom_killer) { + oom_type_notifier_call(0, oc); + return false; + } + if (!is_memcg_oom(oc)) { blocking_notifier_call_chain(&oom_notify_list, 0, &freed); if (freed > 0 && !is_sysrq_oom(oc)) diff --git a/mm/util.c b/mm/util.c index 90250cbc82fe..e41ac8a58eb5 100644 --- a/mm/util.c +++ b/mm/util.c @@ -26,6 +26,7 @@ #include <linux/share_pool.h>
#include <linux/uaccess.h> +#include <linux/oom.h>
#include "internal.h" #include "swap.h" @@ -981,6 +982,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) error: pr_warn_ratelimited("%s: pid: %d, comm: %s, not enough memory for the allocation\n", __func__, current->pid, current->comm); + oom_type_notifier_call(OOM_TYPE_OVERCOMMIT, NULL); vm_unacct_memory(pages);
return -ENOMEM;
On 2023/12/11 17:12, Yuan Can wrote:
From: Weilong Chen chenweilong@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8NC0E CVE: NA
Support disable oom-killer, and report oom events to bbox vm.enable_oom_killer: 0: disable oom killer 1: enable oom killer (default,compatible with mainline)
Signed-off-by: Weilong Chen chenweilong@huawei.com
include/linux/oom.h | 26 +++++++++++++++++++++ mm/Kconfig | 10 ++++++++ mm/memcontrol.c | 4 ++++ mm/oom_kill.c | 57 +++++++++++++++++++++++++++++++++++++++++++++ mm/util.c | 2 ++ 5 files changed, 99 insertions(+)
diff --git a/include/linux/oom.h b/include/linux/oom.h index 7d0c9c48a0c5..f88f34d2b4dd 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -112,4 +112,30 @@ extern void oom_killer_enable(void);
extern struct task_struct *find_lock_task_mm(struct task_struct *p);
+extern int sysctl_enable_oom_killer;
+#define OOM_TYPE_NOMEM 0 +#define OOM_TYPE_OVERCOMMIT 1 +#define OOM_TYPE_CGROUP 2
这些type应该没有外部使用者,放到宏控里面
+#ifdef CONFIG_ASCEND_OOM +extern int register_hisi_oom_notifier(struct notifier_block *nb); +extern int unregister_hisi_oom_notifier(struct notifier_block *nb); +int oom_type_notifier_call(unsigned int type, struct oom_control *oc); +#else +static inline int register_hisi_oom_notifier(struct notifier_block *nb) +{
- return -EINVAL;
+}
+static inline int unregister_hisi_oom_notifier(struct notifier_block *nb) +{
- return -EINVAL;
+}
+static inline int oom_type_notifier_call(unsigned int type, struct oom_control *oc) +{
- return -EINVAL;
+} +#endif #endif /* _INCLUDE_LINUX_OOM_H */ diff --git a/mm/Kconfig b/mm/Kconfig index 0f68e5bbeb89..cdef59dc373c 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1302,6 +1302,16 @@ config SHARE_POOL in kernel and user level, which is only enabled for ascend platform. To enable this feature, enable_ascend_share_pool bootarg is needed.
+config ASCEND_OOM
bool "Enable support for disable oom killer"
default y
help
In some cases we hopes that the oom will not kill the process when it occurs,
be able to notify the black box to report the event, and be able to trigger
the panic to locate the problem.
vm.enable_oom_killer:
0: disable oom killer
1: enable oom killer (default,compatible with mainline)
source "mm/damon/Kconfig"
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 8a881ab21f6c..08af3c8df6f3 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1955,6 +1955,7 @@ static bool mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order) current->memcg_in_oom = memcg; current->memcg_oom_gfp_mask = mask; current->memcg_oom_order = order;
} return false; }oom_type_notifier_call(OOM_TYPE_CGROUP, NULL);
@@ -2019,6 +2020,9 @@ bool mem_cgroup_oom_synchronize(bool handle) if (locked) mem_cgroup_oom_notify(memcg);
- if (!sysctl_enable_oom_killer)
oom_type_notifier_call(OOM_TYPE_CGROUP, NULL);
函数里面已经判断了,不需要在次判断;sysctl_enable_oom_killer
schedule(); mem_cgroup_unmark_under_oom(memcg); finish_wait(&memcg_oom_waitq, &owait.wait); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 44bde56ecd02..fb9dc88ff17b 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -55,6 +55,7 @@ static int sysctl_panic_on_oom; static int sysctl_oom_kill_allocating_task; static int sysctl_oom_dump_tasks = 1; +int sysctl_enable_oom_killer = 1;
上面改好了感觉可以搞成static了
/*
- Serializes oom killer invocations (out_of_memory()) from all contexts to
@@ -724,6 +725,17 @@ static struct ctl_table vm_oom_kill_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#ifdef CONFIG_ASCEND_OOM
- {
.procname = "enable_oom_killer",
.data = &sysctl_enable_oom_killer,
.maxlen = sizeof(sysctl_enable_oom_killer),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
- },
+#endif {} }; #endif @@ -1073,6 +1085,7 @@ static void check_panic_on_oom(struct oom_control *oc) if (is_sysrq_oom(oc)) return; dump_header(oc, NULL);
- oom_type_notifier_call(0, oc); panic("Out of memory: %s panic_on_oom is enabled\n", sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide"); }
@@ -1091,6 +1104,45 @@ int unregister_oom_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(unregister_oom_notifier);
+#ifdef CONFIG_ASCEND_OOM +static BLOCKING_NOTIFIER_HEAD(oom_type_notify_list);
+int register_hisi_oom_notifier(struct notifier_block *nb) +{
- return blocking_notifier_chain_register(&oom_type_notify_list, nb);
+} +EXPORT_SYMBOL_GPL(register_hisi_oom_notifier);
+int unregister_hisi_oom_notifier(struct notifier_block *nb) +{
- return blocking_notifier_chain_unregister(&oom_type_notify_list, nb);
+} +EXPORT_SYMBOL_GPL(unregister_hisi_oom_notifier);
+int oom_type_notifier_call(unsigned int type, struct oom_control *oc) +{
- struct oom_control oc_tmp = { 0 };
- static unsigned long caller_jiffies;
- if (sysctl_enable_oom_killer)
return -EINVAL;
- if (oc)
type = is_memcg_oom(oc) ? OOM_TYPE_CGROUP : OOM_TYPE_NOMEM;
- else
oc = &oc_tmp;
- if (printk_timed_ratelimit(&caller_jiffies, 10000)) {
pr_err("OOM_NOTIFIER: oom type %u\n", type);
dump_stack();
show_mem();
dump_tasks(oc);
- }
- return blocking_notifier_call_chain(&oom_type_notify_list, type, NULL);
+} +#endif
- /**
- out_of_memory - kill the "best" process when we run out of memory
- @oc: pointer to struct oom_control
@@ -1107,6 +1159,11 @@ bool out_of_memory(struct oom_control *oc) if (oom_killer_disabled) return false;
- if (!sysctl_enable_oom_killer) {
oom_type_notifier_call(0, oc);
return false;
- }
- if (!is_memcg_oom(oc)) { blocking_notifier_call_chain(&oom_notify_list, 0, &freed); if (freed > 0 && !is_sysrq_oom(oc))
diff --git a/mm/util.c b/mm/util.c index 90250cbc82fe..e41ac8a58eb5 100644 --- a/mm/util.c +++ b/mm/util.c @@ -26,6 +26,7 @@ #include <linux/share_pool.h>
#include <linux/uaccess.h> +#include <linux/oom.h>
#include "internal.h" #include "swap.h" @@ -981,6 +982,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) error: pr_warn_ratelimited("%s: pid: %d, comm: %s, not enough memory for the allocation\n", __func__, current->pid, current->comm);
oom_type_notifier_call(OOM_TYPE_OVERCOMMIT, NULL); vm_unacct_memory(pages);
return -ENOMEM;
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8NC0E CVE: NA
--------------------------------
Export oom_type_notifier_call and __get_vm_area_caller. Export pm_autosleep_set_state and __vmalloc_node_range. Export alloc_workqueue_attrs, free_workqueue_attrs and apply_workqueue_attrs.
Signed-off-by: Yuan Can yuancan@huawei.com --- kernel/power/autosleep.c | 1 + kernel/workqueue.c | 3 +++ mm/oom_kill.c | 1 + mm/vmalloc.c | 2 ++ 4 files changed, 7 insertions(+)
diff --git a/kernel/power/autosleep.c b/kernel/power/autosleep.c index b29c8aca7486..80ba474daa40 100644 --- a/kernel/power/autosleep.c +++ b/kernel/power/autosleep.c @@ -113,6 +113,7 @@ int pm_autosleep_set_state(suspend_state_t state) mutex_unlock(&autosleep_lock); return 0; } +EXPORT_SYMBOL_GPL(pm_autosleep_set_state);
int __init pm_autosleep_init(void) { diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 0f682da96e1c..3eabf97c4e9a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3713,6 +3713,7 @@ void free_workqueue_attrs(struct workqueue_attrs *attrs) kfree(attrs); } } +EXPORT_SYMBOL_GPL(free_workqueue_attrs);
/** * alloc_workqueue_attrs - allocate a workqueue_attrs @@ -3741,6 +3742,7 @@ struct workqueue_attrs *alloc_workqueue_attrs(void) free_workqueue_attrs(attrs); return NULL; } +EXPORT_SYMBOL_GPL(alloc_workqueue_attrs);
static void copy_workqueue_attrs(struct workqueue_attrs *to, const struct workqueue_attrs *from) @@ -4482,6 +4484,7 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
return ret; } +EXPORT_SYMBOL_GPL(apply_workqueue_attrs);
/** * wq_update_pod - update pod affinity of a wq for CPU hot[un]plug diff --git a/mm/oom_kill.c b/mm/oom_kill.c index fb9dc88ff17b..62601b3134bb 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -1141,6 +1141,7 @@ int oom_type_notifier_call(unsigned int type, struct oom_control *oc)
return blocking_notifier_call_chain(&oom_type_notify_list, type, NULL); } +EXPORT_SYMBOL_GPL(oom_type_notifier_call); #endif
/** diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 30665fb33589..719539b32488 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2622,6 +2622,7 @@ struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags, return __get_vm_area_node(size, 1, PAGE_SHIFT, flags, start, end, NUMA_NO_NODE, GFP_KERNEL, caller); } +EXPORT_SYMBOL(__get_vm_area_caller);
/** * get_vm_area - reserve a contiguous kernel virtual area @@ -3362,6 +3363,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
return NULL; } +EXPORT_SYMBOL_GPL(__vmalloc_node_range);
/** * __vmalloc_node - allocate virtually contiguous memory
From: Jian Zhang zhangjian210@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8NC0E
-------------------------------
In Ascend, we use tmp hugepage and disable OOM-killer, when we cause a OOM, and after some time, the memory is enough for process, the process will not return to run normal. In this case, we must use oom recover to let the process run.
Signed-off-by: Jian Zhang zhangjian210@huawei.com --- mm/memcontrol.c | 10 ++++++++++ 1 file changed, 10 insertions(+)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 08af3c8df6f3..e1e79339dbd2 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3154,12 +3154,22 @@ void __memcg_kmem_uncharge_page(struct page *page, int order) struct folio *folio = page_folio(page); struct obj_cgroup *objcg; unsigned int nr_pages = 1 << order; +#ifdef CONFIG_ASCEND_OOM + struct mem_cgroup *memcg; +#endif
if (!folio_memcg_kmem(folio)) return;
objcg = __folio_objcg(folio); obj_cgroup_uncharge_pages(objcg, nr_pages); +#ifdef CONFIG_ASCEND_OOM + memcg = get_mem_cgroup_from_objcg(objcg); + if (!mem_cgroup_is_root(memcg)) + memcg_oom_recover(memcg); + css_put(&memcg->css); +#endif + folio->memcg_data = 0; obj_cgroup_put(objcg); }
On 2023/12/11 17:12, Yuan Can wrote:
From: Jian Zhang zhangjian210@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8NC0E
In Ascend, we use tmp hugepage and disable OOM-killer, when we cause a OOM, and after some time, the memory is enough for process, the process will not return to run normal. In this case, we must use oom recover to let the process run.
Signed-off-by: Jian Zhang zhangjian210@huawei.com
mm/memcontrol.c | 10 ++++++++++ 1 file changed, 10 insertions(+)
1. 这个代码没有做到隔离;
memcg_oom_recover都会被调用到
2. 需要单独写一个函数 #ifdef CONFIG_ASCEND_OOM static fucn{} {
这里需要一种隔离,让当前代码不执行
} #else static func{} #endif
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 08af3c8df6f3..e1e79339dbd2 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3154,12 +3154,22 @@ void __memcg_kmem_uncharge_page(struct page *page, int order) struct folio *folio = page_folio(page); struct obj_cgroup *objcg; unsigned int nr_pages = 1 << order; +#ifdef CONFIG_ASCEND_OOM
- struct mem_cgroup *memcg;
+#endif
if (!folio_memcg_kmem(folio)) return;
objcg = __folio_objcg(folio); obj_cgroup_uncharge_pages(objcg, nr_pages); +#ifdef CONFIG_ASCEND_OOM
- memcg = get_mem_cgroup_from_objcg(objcg);
- if (!mem_cgroup_is_root(memcg))
memcg_oom_recover(memcg);
- css_put(&memcg->css);
+#endif
- folio->memcg_data = 0; obj_cgroup_put(objcg); }
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/3344 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/T...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/3344 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/T...