Navid Emamdoost (1): nbd_genl_status: null check for nla_nest_start
Weilong Chen (2): ACPI / APEI: Notify all ras err to driver arm64/ascend: Add new enable_oom_killer interface for oom contrl
Xu Qiang (2): irq-gic-v3: Add support to init ts core GICR irq-gic-v3-its: It can't be initialized when the GICR had been cut
Yuan Can (1): ascend: export interfaces required by ascend drivers
arch/arm64/configs/openeuler_defconfig | 1 + drivers/acpi/apei/Kconfig | 7 + drivers/acpi/apei/ghes.c | 8 +- drivers/block/nbd.c | 6 + drivers/irqchip/Kconfig | 7 + drivers/irqchip/irq-gic-v3-its.c | 254 ++++++++++++++++++++++++- drivers/irqchip/irq-gic-v3.c | 101 +++++++++- include/linux/irqchip/arm-gic-v3.h | 5 + include/linux/oom.h | 24 +++ kernel/power/autosleep.c | 1 + kernel/workqueue.c | 3 + mm/Kconfig | 10 + mm/memcontrol.c | 20 ++ mm/oom_kill.c | 58 ++++++ mm/util.c | 2 + mm/vmalloc.c | 2 + 16 files changed, 500 insertions(+), 9 deletions(-)
From: Navid Emamdoost navid.emamdoost@gmail.com
maillist inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8NC0E CVE: CVE-2019-16089
Reference: https://lore.kernel.org/lkml/20190911164013.27364-1-navid.emamdoost@gmail.co...
---------------------------
nla_nest_start may fail and return NULL. The check is inserted, and errno is selected based on other call sites within the same source code. Update: removed extra new line. v3 Update: added release reply, thanks to Michal Kubecek for pointing out.
Signed-off-by: Navid Emamdoost navid.emamdoost@gmail.com Reviewed-by: Michal Kubecek mkubecek@suse.cz --- drivers/block/nbd.c | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 855fdf5c3b4e..b99c169890d5 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -2408,6 +2408,12 @@ static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info) }
dev_list = nla_nest_start_noflag(reply, NBD_ATTR_DEVICE_LIST); + if (!dev_list) { + nlmsg_free(reply); + ret = -EMSGSIZE; + goto out; + } + if (index == -1) { ret = idr_for_each(&nbd_index_idr, &status_cb, reply); if (ret) {
From: Xu Qiang xuqiang36@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8NC0E CVE: NA
------------
For Ascend platform, other NON-OS managed GICRs need be initialized in OS.
Signed-off-by: Xu Qiang xuqiang36@huawei.com --- drivers/irqchip/Kconfig | 7 + drivers/irqchip/irq-gic-v3-its.c | 227 ++++++++++++++++++++++++++++- drivers/irqchip/irq-gic-v3.c | 101 ++++++++++++- include/linux/irqchip/arm-gic-v3.h | 5 + 4 files changed, 332 insertions(+), 8 deletions(-)
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index f7149d0f3d45..3ba9ed9e1be1 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -159,6 +159,13 @@ config HISILICON_IRQ_MBIGEN select ARM_GIC_V3 select ARM_GIC_V3_ITS
+config ASCEND_INIT_ALL_GICR + bool "Enable init all GICR for Ascend" + depends on ASCEND_FEATURES + depends on ARM_GIC_V3 + depends on ARM_GIC_V3_ITS + default n + config IMGPDC_IRQ bool select GENERIC_IRQ_CHIP diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index a8c89df1a997..b7c5bbd209f3 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -193,6 +193,14 @@ static DEFINE_RAW_SPINLOCK(vmovp_lock);
static DEFINE_IDA(its_vpeid_ida);
+#ifdef CONFIG_ASCEND_INIT_ALL_GICR +static bool init_all_gicr; +static int nr_gicr; +#else +#define init_all_gicr false +#define nr_gicr 0 +#endif + #define gic_data_rdist() (raw_cpu_ptr(gic_rdists->rdist)) #define gic_data_rdist_cpu(cpu) (per_cpu_ptr(gic_rdists->rdist, cpu)) #define gic_data_rdist_rd_base() (gic_data_rdist()->rd_base) @@ -1558,6 +1566,11 @@ static __maybe_unused u32 its_read_lpi_count(struct irq_data *d, int cpu)
static void its_inc_lpi_count(struct irq_data *d, int cpu) { +#ifdef CONFIG_ASCEND_INIT_ALL_GICR + if (cpu >= nr_cpu_ids) + return; +#endif + if (irqd_affinity_is_managed(d)) atomic_inc(&per_cpu_ptr(&cpu_lpi_count, cpu)->managed); else @@ -1566,6 +1579,11 @@ static void its_inc_lpi_count(struct irq_data *d, int cpu)
static void its_dec_lpi_count(struct irq_data *d, int cpu) { +#ifdef CONFIG_ASCEND_INIT_ALL_GICR + if (cpu >= nr_cpu_ids) + return; +#endif + if (irqd_affinity_is_managed(d)) atomic_dec(&per_cpu_ptr(&cpu_lpi_count, cpu)->managed); else @@ -1665,6 +1683,26 @@ static int its_select_cpu(struct irq_data *d, return cpu; }
+#ifdef CONFIG_ASCEND_INIT_ALL_GICR +static int its_select_cpu_other(const struct cpumask *mask_val) +{ + int cpu; + + if (!init_all_gicr) + return -EINVAL; + + cpu = find_first_bit(cpumask_bits(mask_val), NR_CPUS); + if (cpu >= nr_gicr) + cpu = -EINVAL; + return cpu; +} +#else +static int its_select_cpu_other(const struct cpumask *mask_val) +{ + return -EINVAL; +} +#endif + static int its_set_affinity(struct irq_data *d, const struct cpumask *mask_val, bool force) { @@ -1686,6 +1724,9 @@ static int its_set_affinity(struct irq_data *d, const struct cpumask *mask_val, cpu = cpumask_pick_least_loaded(d, mask_val);
if (cpu < 0 || cpu >= nr_cpu_ids) + cpu = its_select_cpu_other(mask_val); + + if (cpu < 0) goto err;
/* don't set the affinity when the target cpu is same as current one */ @@ -2951,8 +2992,12 @@ static int allocate_vpe_l1_table(void) static int its_alloc_collections(struct its_node *its) { int i; + int cpu_nr = nr_cpu_ids; + + if (init_all_gicr) + cpu_nr = CONFIG_NR_CPUS;
- its->collections = kcalloc(nr_cpu_ids, sizeof(*its->collections), + its->collections = kcalloc(cpu_nr, sizeof(*its->collections), GFP_KERNEL); if (!its->collections) return -ENOMEM; @@ -3263,6 +3308,186 @@ static void its_cpu_init_collections(void) raw_spin_unlock(&its_lock); }
+#ifdef CONFIG_ASCEND_INIT_ALL_GICR +void its_set_gicr_nr(int nr) +{ + nr_gicr = nr; +} + +static int __init its_enable_init_all_gicr(char *str) +{ + init_all_gicr = true; + return 1; +} + +__setup("init_all_gicr", its_enable_init_all_gicr); + +bool its_init_all_gicr(void) +{ + return init_all_gicr; +} + +static void its_cpu_init_lpis_others(void __iomem *rbase, int cpu) +{ + struct page *pend_page; + phys_addr_t paddr; + u64 val, tmp; + + if (!init_all_gicr) + return; + + val = readl_relaxed(rbase + GICR_CTLR); + if ((gic_rdists->flags & RDIST_FLAGS_RD_TABLES_PREALLOCATED) && + (val & GICR_CTLR_ENABLE_LPIS)) { + /* + * Check that we get the same property table on all + * RDs. If we don't, this is hopeless. + */ + paddr = gicr_read_propbaser(rbase + GICR_PROPBASER); + paddr &= GENMASK_ULL(51, 12); + if (WARN_ON(gic_rdists->prop_table_pa != paddr)) + add_taint(TAINT_CRAP, LOCKDEP_STILL_OK); + + paddr = gicr_read_pendbaser(rbase + GICR_PENDBASER); + paddr &= GENMASK_ULL(51, 16); + + WARN_ON(!gic_check_reserved_range(paddr, LPI_PENDBASE_SZ)); + + goto out; + } + + /* If we didn't allocate the pending table yet, do it now */ + pend_page = its_allocate_pending_table(GFP_NOWAIT); + if (!pend_page) { + pr_err("Failed to allocate PENDBASE for GICR:%p\n", rbase); + return; + } + + paddr = page_to_phys(pend_page); + pr_info("GICR:%p using LPI pending table @%pa\n", + rbase, &paddr); + + WARN_ON(gic_reserve_range(paddr, LPI_PENDBASE_SZ)); + + /* Disable LPIs */ + val = readl_relaxed(rbase + GICR_CTLR); + val &= ~GICR_CTLR_ENABLE_LPIS; + writel_relaxed(val, rbase + GICR_CTLR); + + /* + * Make sure any change to the table is observable by the GIC. + */ + dsb(sy); + + /* set PROPBASE */ + val = (gic_rdists->prop_table_pa | + GICR_PROPBASER_InnerShareable | + GICR_PROPBASER_RaWaWb | + ((LPI_NRBITS - 1) & GICR_PROPBASER_IDBITS_MASK)); + + gicr_write_propbaser(val, rbase + GICR_PROPBASER); + tmp = gicr_read_propbaser(rbase + GICR_PROPBASER); + + if ((tmp ^ val) & GICR_PROPBASER_SHAREABILITY_MASK) { + if (!(tmp & GICR_PROPBASER_SHAREABILITY_MASK)) { + /* + * The HW reports non-shareable, we must + * remove the cacheability attributes as + * well. + */ + val &= ~(GICR_PROPBASER_SHAREABILITY_MASK | + GICR_PROPBASER_CACHEABILITY_MASK); + val |= GICR_PROPBASER_nC; + gicr_write_propbaser(val, rbase + GICR_PROPBASER); + } + pr_info_once("GIC: using cache flushing for LPI property table\n"); + gic_rdists->flags |= RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING; + } + + /* set PENDBASE */ + val = (page_to_phys(pend_page) | + GICR_PENDBASER_InnerShareable | + GICR_PENDBASER_RaWaWb); + + gicr_write_pendbaser(val, rbase + GICR_PENDBASER); + tmp = gicr_read_pendbaser(rbase + GICR_PENDBASER); + + if (!(tmp & GICR_PENDBASER_SHAREABILITY_MASK)) { + /* + * The HW reports non-shareable, we must remove the + * cacheability attributes as well. + */ + val &= ~(GICR_PENDBASER_SHAREABILITY_MASK | + GICR_PENDBASER_CACHEABILITY_MASK); + val |= GICR_PENDBASER_nC; + gicr_write_pendbaser(val, rbase + GICR_PENDBASER); + } + + /* Enable LPIs */ + val = readl_relaxed(rbase + GICR_CTLR); + val |= GICR_CTLR_ENABLE_LPIS; + writel_relaxed(val, rbase + GICR_CTLR); + + /* Make sure the GIC has seen the above */ + dsb(sy); +out: + pr_info("GICv3: CPU%d: using %s LPI pending table @%pa\n", + cpu, pend_page ? "allocated" : "reserved", &paddr); +} + +static void its_cpu_init_collection_others(void __iomem *rbase, + phys_addr_t phys_base, int cpu) +{ + struct its_node *its; + + if (!init_all_gicr) + return; + + raw_spin_lock(&its_lock); + + list_for_each_entry(its, &its_nodes, entry) { + u64 target; + + /* + * We now have to bind each collection to its target + * redistributor. + */ + if (gic_read_typer(its->base + GITS_TYPER) & GITS_TYPER_PTA) { + /* + * This ITS wants the physical address of the + * redistributor. + */ + target = phys_base; + } else { + /* + * This ITS wants a linear CPU number. + */ + target = gic_read_typer(rbase + GICR_TYPER); + target = GICR_TYPER_CPU_NUMBER(target) << 16; + } + + /* Perform collection mapping */ + its->collections[cpu].target_address = target; + its->collections[cpu].col_id = cpu; + + its_send_mapc(its, &its->collections[cpu], 1); + its_send_invall(its, &its->collections[cpu]); + } + + raw_spin_unlock(&its_lock); +} + +int its_cpu_init_others(void __iomem *base, phys_addr_t phys_base, int cpu) +{ + if (!list_empty(&its_nodes)) { + its_cpu_init_lpis_others(base, cpu); + its_cpu_init_collection_others(base, phys_base, cpu); + } + + return 0; +} +#endif + static struct its_device *its_find_device(struct its_node *its, u32 dev_id) { struct its_device *its_dev = NULL, *tmp; diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index f59ac9586b7b..59d5c736be81 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -281,17 +281,11 @@ static u64 __maybe_unused gic_read_iar(void) } #endif
-static void gic_enable_redist(bool enable) +static void __gic_enable_redist(void __iomem *rbase, bool enable) { - void __iomem *rbase; u32 count = 1000000; /* 1s! */ u32 val;
- if (gic_data.flags & FLAGS_WORKAROUND_GICR_WAKER_MSM8996) - return; - - rbase = gic_data_rdist_rd_base(); - val = readl_relaxed(rbase + GICR_WAKER); if (enable) /* Wake up this CPU redistributor */ @@ -318,6 +312,14 @@ static void gic_enable_redist(bool enable) enable ? "wakeup" : "sleep"); }
+static void gic_enable_redist(bool enable) +{ + if (gic_data.flags & FLAGS_WORKAROUND_GICR_WAKER_MSM8996) + return; + + __gic_enable_redist(gic_data_rdist_rd_base(), enable); +} + /* * Routines to disable, enable, EOI and route interrupts */ @@ -1288,6 +1290,89 @@ static void gic_cpu_init(void) gic_cpu_sys_reg_init(); }
+#ifdef CONFIG_ASCEND_INIT_ALL_GICR +static int __gic_compute_nr_gicr(struct redist_region *region, void __iomem *ptr) +{ + static int gicr_nr; + + its_set_gicr_nr(++gicr_nr); + + return 1; +} + +static void gic_compute_nr_gicr(void) +{ + gic_iterate_rdists(__gic_compute_nr_gicr); +} + +static int gic_rdist_cpu(void __iomem *ptr, unsigned int cpu) +{ + unsigned long mpidr = cpu_logical_map(cpu); + u64 typer; + u32 aff; + + /* + * Convert affinity to a 32bit value that can be matched to + * GICR_TYPER bits [63:32]. + */ + aff = (MPIDR_AFFINITY_LEVEL(mpidr, 3) << 24 | + MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16 | + MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8 | + MPIDR_AFFINITY_LEVEL(mpidr, 0)); + + typer = gic_read_typer(ptr + GICR_TYPER); + if ((typer >> 32) == aff) + return 0; + + return 1; +} + +static int gic_rdist_cpus(void __iomem *ptr) +{ + unsigned int i; + + for (i = 0; i < nr_cpu_ids; i++) { + if (gic_rdist_cpu(ptr, i) == 0) + return 0; + } + + return 1; +} + +static int gic_cpu_init_other(struct redist_region *region, void __iomem *ptr) +{ + u64 offset; + phys_addr_t phys_base; + static int cpu; + + if (cpu == 0) + cpu = nr_cpu_ids; + + if (gic_rdist_cpus(ptr) == 1) { + offset = ptr - region->redist_base; + phys_base = region->phys_base + offset; + __gic_enable_redist(ptr, true); + if (gic_dist_supports_lpis()) + its_cpu_init_others(ptr, phys_base, cpu); + cpu++; + } + + return 1; +} + +static void gic_cpu_init_others(void) +{ + if (!its_init_all_gicr()) + return; + + gic_iterate_rdists(gic_cpu_init_other); +} +#else +static inline void gic_compute_nr_gicr(void) {} + +static inline void gic_cpu_init_others(void) {} +#endif + #ifdef CONFIG_SMP
#define MPIDR_TO_SGI_RS(mpidr) (MPIDR_RS(mpidr) << ICC_SGI1R_RS_SHIFT) @@ -2052,6 +2137,7 @@ static int __init gic_init_bases(phys_addr_t dist_phys_base, gic_data.rdists.has_direct_lpi = true; gic_data.rdists.has_vpend_valid_dirty = true; } + gic_compute_nr_gicr();
if (WARN_ON(!gic_data.domain) || WARN_ON(!gic_data.rdists.rdist)) { err = -ENOMEM; @@ -2087,6 +2173,7 @@ static int __init gic_init_bases(phys_addr_t dist_phys_base, }
gic_enable_nmi_support(); + gic_cpu_init_others();
return 0;
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 728691365464..33e098c70952 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -637,6 +637,11 @@ struct irq_domain; struct fwnode_handle; int __init its_lpi_memreserve_init(void); int its_cpu_init(void); +#ifdef CONFIG_ASCEND_INIT_ALL_GICR +void its_set_gicr_nr(int nr); +bool its_init_all_gicr(void); +int its_cpu_init_others(void __iomem *base, phys_addr_t phys_base, int idx); +#endif int its_init(struct fwnode_handle *handle, struct rdists *rdists, struct irq_domain *domain); int mbi_init(struct fwnode_handle *fwnode, struct irq_domain *parent);
From: Xu Qiang xuqiang36@huawei.com
ascend inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8NC0E CVE: NA
------------
In FPGA, We need to check if the gicr has been cut, and if it is, it can't be initialized
Signed-off-by: Xu Qiang xuqiang36@huawei.com --- drivers/irqchip/irq-gic-v3-its.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+)
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index b7c5bbd209f3..e45dc125723e 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -3438,6 +3438,7 @@ static void its_cpu_init_lpis_others(void __iomem *rbase, int cpu) static void its_cpu_init_collection_others(void __iomem *rbase, phys_addr_t phys_base, int cpu) { + u32 count; struct its_node *its;
if (!init_all_gicr) @@ -3466,6 +3467,32 @@ static void its_cpu_init_collection_others(void __iomem *rbase, target = GICR_TYPER_CPU_NUMBER(target) << 16; }
+ dsb(sy); + + /* In FPGA, We need to check if the gicr has been cut, + * and if it is, it can't be initialized + */ + count = 2000; + while (1) { + if (readl_relaxed(rbase + GICR_SYNCR) == 0) + break; + + count--; + if (!count) { + pr_err("this gicr does not exist, or it's abnormal:%pK\n", + &phys_base); + break; + } + cpu_relax(); + udelay(1); + } + + if (count == 0) + break; + + pr_info("its init other collection table, ITS:%pK, GICR:%pK, coreId:%u\n", + &its->phys_base, &phys_base, cpu); + /* Perform collection mapping */ its->collections[cpu].target_address = target; its->collections[cpu].col_id = cpu;
From: Weilong Chen chenweilong@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8NC0E CVE: NA
-------------------------------------------------
Customization deliver all types error to driver. As the driver need to process the errors in process context.
Signed-off-by: Weilong Chen chenweilong@huawei.com --- arch/arm64/configs/openeuler_defconfig | 1 + drivers/acpi/apei/Kconfig | 7 +++++++ drivers/acpi/apei/ghes.c | 8 +++++++- 3 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index d769a82d9a40..7d3e69dcc71f 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -653,6 +653,7 @@ CONFIG_ACPI_HMAT=y CONFIG_HAVE_ACPI_APEI=y CONFIG_ACPI_APEI=y CONFIG_ACPI_APEI_GHES=y +CONFIG_ACPI_APEI_GHES_NOTIFY_ALL_RAS_ERR=y CONFIG_ACPI_APEI_PCIEAER=y CONFIG_ACPI_APEI_SEA=y CONFIG_ACPI_APEI_MEMORY_FAILURE=y diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig index 6b18f8bc7be3..1dce3ad7c9bd 100644 --- a/drivers/acpi/apei/Kconfig +++ b/drivers/acpi/apei/Kconfig @@ -33,6 +33,13 @@ config ACPI_APEI_GHES by firmware to produce more valuable hardware error information for Linux.
+config ACPI_APEI_GHES_NOTIFY_ALL_RAS_ERR + bool "Notify all ras err to driver" + depends on ARM64 && ACPI_APEI_GHES + default n + help + Deliver all types of error to driver. + config ACPI_APEI_PCIEAER bool "APEI PCIe AER logging/recovering support" depends on ACPI_APEI && PCIEAER diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 63ad0541db38..bf1b9252a8da 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -692,12 +692,18 @@ static bool ghes_do_proc(struct ghes *ghes, queued = ghes_handle_arm_hw_error(gdata, sev); } else { void *err = acpi_hest_get_payload(gdata); - +#ifndef CONFIG_ACPI_APEI_GHES_NOTIFY_ALL_RAS_ERR ghes_defer_non_standard_event(gdata, sev); +#endif log_non_standard_event(sec_type, fru_id, fru_text, sec_sev, err, gdata->error_data_length); } + +#ifdef CONFIG_ACPI_APEI_GHES_NOTIFY_ALL_RAS_ERR + /* Customization deliver all types error to driver. */ + ghes_defer_non_standard_event(gdata, sev); +#endif }
return queued;
From: Weilong Chen chenweilong@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8NC0E CVE: NA
-------------------------------------------------
Support disable oom-killer, and report oom events to bbox vm.enable_oom_killer: 0: disable oom killer 1: enable oom killer (default,compatible with mainline)
Signed-off-by: Weilong Chen chenweilong@huawei.com --- include/linux/oom.h | 24 +++++++++++++++++++ mm/Kconfig | 10 ++++++++ mm/memcontrol.c | 20 ++++++++++++++++ mm/oom_kill.c | 57 +++++++++++++++++++++++++++++++++++++++++++++ mm/util.c | 2 ++ 5 files changed, 113 insertions(+)
diff --git a/include/linux/oom.h b/include/linux/oom.h index 7d0c9c48a0c5..b9210e272651 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -112,4 +112,28 @@ extern void oom_killer_enable(void);
extern struct task_struct *find_lock_task_mm(struct task_struct *p);
+#define OOM_TYPE_NOMEM 0 +#define OOM_TYPE_OVERCOMMIT 1 +#define OOM_TYPE_CGROUP 2 + +#ifdef CONFIG_ASCEND_OOM +int register_hisi_oom_notifier(struct notifier_block *nb); +int unregister_hisi_oom_notifier(struct notifier_block *nb); +int oom_type_notifier_call(unsigned int type, struct oom_control *oc); +#else +static inline int register_hisi_oom_notifier(struct notifier_block *nb) +{ + return -EINVAL; +} + +static inline int unregister_hisi_oom_notifier(struct notifier_block *nb) +{ + return -EINVAL; +} + +static inline int oom_type_notifier_call(unsigned int type, struct oom_control *oc) +{ + return -EINVAL; +} +#endif #endif /* _INCLUDE_LINUX_OOM_H */ diff --git a/mm/Kconfig b/mm/Kconfig index 0f68e5bbeb89..48f4aeeaeff9 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1302,6 +1302,16 @@ config SHARE_POOL in kernel and user level, which is only enabled for ascend platform. To enable this feature, enable_ascend_share_pool bootarg is needed.
+config ASCEND_OOM + bool "Enable support for disable oom killer" + default n + help + In some cases we hopes that the oom will not kill the process when it occurs, + be able to notify the black box to report the event, and be able to trigger + the panic to locate the problem. + vm.enable_oom_killer: + 0: disable oom killer + 1: enable oom killer (default,compatible with mainline)
source "mm/damon/Kconfig"
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 8a881ab21f6c..fec6f37e61da 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1955,6 +1955,7 @@ static bool mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order) current->memcg_in_oom = memcg; current->memcg_oom_gfp_mask = mask; current->memcg_oom_order = order; + oom_type_notifier_call(OOM_TYPE_CGROUP, NULL); } return false; } @@ -2019,6 +2020,8 @@ bool mem_cgroup_oom_synchronize(bool handle) if (locked) mem_cgroup_oom_notify(memcg);
+ oom_type_notifier_call(OOM_TYPE_CGROUP, NULL); + schedule(); mem_cgroup_unmark_under_oom(memcg); finish_wait(&memcg_oom_waitq, &owait.wait); @@ -3140,6 +3143,20 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order) return ret; }
+#ifdef CONFIG_ASCEND_OOM +void hisi_oom_recover(struct obj_cgroup *objcg) +{ + struct mem_cgroup *memcg; + + memcg = get_mem_cgroup_from_objcg(objcg); + if (!mem_cgroup_is_root(memcg)) + memcg_oom_recover(memcg); + css_put(&memcg->css); +} +#else +static inline void hisi_oom_recover(struct obj_cgroup *objcg) { } +#endif + /** * __memcg_kmem_uncharge_page: uncharge a kmem page * @page: page to uncharge @@ -3156,6 +3173,9 @@ void __memcg_kmem_uncharge_page(struct page *page, int order)
objcg = __folio_objcg(folio); obj_cgroup_uncharge_pages(objcg, nr_pages); + + hisi_oom_recover(objcg); + folio->memcg_data = 0; obj_cgroup_put(objcg); } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 44bde56ecd02..607056d49595 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -55,6 +55,7 @@ static int sysctl_panic_on_oom; static int sysctl_oom_kill_allocating_task; static int sysctl_oom_dump_tasks = 1; +static int sysctl_enable_oom_killer = 1;
/* * Serializes oom killer invocations (out_of_memory()) from all contexts to @@ -724,6 +725,17 @@ static struct ctl_table vm_oom_kill_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#ifdef CONFIG_ASCEND_OOM + { + .procname = "enable_oom_killer", + .data = &sysctl_enable_oom_killer, + .maxlen = sizeof(sysctl_enable_oom_killer), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif {} }; #endif @@ -1073,6 +1085,7 @@ static void check_panic_on_oom(struct oom_control *oc) if (is_sysrq_oom(oc)) return; dump_header(oc, NULL); + oom_type_notifier_call(0, oc); panic("Out of memory: %s panic_on_oom is enabled\n", sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide"); } @@ -1091,6 +1104,45 @@ int unregister_oom_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(unregister_oom_notifier);
+#ifdef CONFIG_ASCEND_OOM +static BLOCKING_NOTIFIER_HEAD(oom_type_notify_list); + +int register_hisi_oom_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&oom_type_notify_list, nb); +} +EXPORT_SYMBOL_GPL(register_hisi_oom_notifier); + +int unregister_hisi_oom_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&oom_type_notify_list, nb); +} +EXPORT_SYMBOL_GPL(unregister_hisi_oom_notifier); + +int oom_type_notifier_call(unsigned int type, struct oom_control *oc) +{ + struct oom_control oc_tmp = { 0 }; + static unsigned long caller_jiffies; + + if (sysctl_enable_oom_killer) + return -EINVAL; + + if (oc) + type = is_memcg_oom(oc) ? OOM_TYPE_CGROUP : OOM_TYPE_NOMEM; + else + oc = &oc_tmp; + + if (printk_timed_ratelimit(&caller_jiffies, 10000)) { + pr_err("OOM_NOTIFIER: oom type %u\n", type); + dump_stack(); + show_mem(); + dump_tasks(oc); + } + + return blocking_notifier_call_chain(&oom_type_notify_list, type, NULL); +} +#endif + /** * out_of_memory - kill the "best" process when we run out of memory * @oc: pointer to struct oom_control @@ -1107,6 +1159,11 @@ bool out_of_memory(struct oom_control *oc) if (oom_killer_disabled) return false;
+ if (!sysctl_enable_oom_killer) { + oom_type_notifier_call(0, oc); + return false; + } + if (!is_memcg_oom(oc)) { blocking_notifier_call_chain(&oom_notify_list, 0, &freed); if (freed > 0 && !is_sysrq_oom(oc)) diff --git a/mm/util.c b/mm/util.c index 90250cbc82fe..e41ac8a58eb5 100644 --- a/mm/util.c +++ b/mm/util.c @@ -26,6 +26,7 @@ #include <linux/share_pool.h>
#include <linux/uaccess.h> +#include <linux/oom.h>
#include "internal.h" #include "swap.h" @@ -981,6 +982,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) error: pr_warn_ratelimited("%s: pid: %d, comm: %s, not enough memory for the allocation\n", __func__, current->pid, current->comm); + oom_type_notifier_call(OOM_TYPE_OVERCOMMIT, NULL); vm_unacct_memory(pages);
return -ENOMEM;
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8NC0E CVE: NA
--------------------------------
Export oom_type_notifier_call and __get_vm_area_caller. Export pm_autosleep_set_state and __vmalloc_node_range. Export alloc_workqueue_attrs, free_workqueue_attrs and apply_workqueue_attrs.
Signed-off-by: Yuan Can yuancan@huawei.com --- kernel/power/autosleep.c | 1 + kernel/workqueue.c | 3 +++ mm/oom_kill.c | 1 + mm/vmalloc.c | 2 ++ 4 files changed, 7 insertions(+)
diff --git a/kernel/power/autosleep.c b/kernel/power/autosleep.c index b29c8aca7486..80ba474daa40 100644 --- a/kernel/power/autosleep.c +++ b/kernel/power/autosleep.c @@ -113,6 +113,7 @@ int pm_autosleep_set_state(suspend_state_t state) mutex_unlock(&autosleep_lock); return 0; } +EXPORT_SYMBOL_GPL(pm_autosleep_set_state);
int __init pm_autosleep_init(void) { diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 0f682da96e1c..3eabf97c4e9a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3713,6 +3713,7 @@ void free_workqueue_attrs(struct workqueue_attrs *attrs) kfree(attrs); } } +EXPORT_SYMBOL_GPL(free_workqueue_attrs);
/** * alloc_workqueue_attrs - allocate a workqueue_attrs @@ -3741,6 +3742,7 @@ struct workqueue_attrs *alloc_workqueue_attrs(void) free_workqueue_attrs(attrs); return NULL; } +EXPORT_SYMBOL_GPL(alloc_workqueue_attrs);
static void copy_workqueue_attrs(struct workqueue_attrs *to, const struct workqueue_attrs *from) @@ -4482,6 +4484,7 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
return ret; } +EXPORT_SYMBOL_GPL(apply_workqueue_attrs);
/** * wq_update_pod - update pod affinity of a wq for CPU hot[un]plug diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 607056d49595..224c135978dd 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -1141,6 +1141,7 @@ int oom_type_notifier_call(unsigned int type, struct oom_control *oc)
return blocking_notifier_call_chain(&oom_type_notify_list, type, NULL); } +EXPORT_SYMBOL_GPL(oom_type_notifier_call); #endif
/** diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 30665fb33589..719539b32488 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2622,6 +2622,7 @@ struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags, return __get_vm_area_node(size, 1, PAGE_SHIFT, flags, start, end, NUMA_NO_NODE, GFP_KERNEL, caller); } +EXPORT_SYMBOL(__get_vm_area_caller);
/** * get_vm_area - reserve a contiguous kernel virtual area @@ -3362,6 +3363,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
return NULL; } +EXPORT_SYMBOL_GPL(__vmalloc_node_range);
/** * __vmalloc_node - allocate virtually contiguous memory
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/3364 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/E...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/3364 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/E...