- Kernel - mailweb.openeuler.org

[openEuler-1.0-LTS] media: v4l2-mem2mem: fix a memleak in v4l2_m2m_register_entity
by Yipeng Zou 07 May '24

07 May '24

From: Zhipeng Lu <alexious(a)zju.edu.cn> stable inclusion from stable-v5.10.214 commit afd2a82fe300032f63f8be5d6cd6981e75f8bbf2 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I9L9JK CVE: CVE-2024-27077 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id… -------------------------------- The entity->name (i.e. name) is allocated in v4l2_m2m_register_entity but isn't freed in its following error-handling paths. This patch adds such deallocation to prevent memleak of entity->name. Fixes: be2fff656322 ("media: add helpers for memory-to-memory media controller") Signed-off-by: Zhipeng Lu <alexious(a)zju.edu.cn> Signed-off-by: Hans Verkuil <hverkuil-cisco(a)xs4all.nl> Signed-off-by: Sasha Levin <sashal(a)kernel.org> Signed-off-by: Yipeng Zou <zouyipeng(a)huawei.com> --- drivers/media/v4l2-core/v4l2-mem2mem.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c index 692b0597a35c..0a9be4c5f194 100644 --- a/drivers/media/v4l2-core/v4l2-mem2mem.c +++ b/drivers/media/v4l2-core/v4l2-mem2mem.c @@ -773,11 +773,17 @@ static int v4l2_m2m_register_entity(struct media_device *mdev, entity->function = function; ret = media_entity_pads_init(entity, num_pads, pads); - if (ret) + if (ret) { + kfree(entity->name); + entity->name = NULL; return ret; + } ret = media_device_register_entity(mdev, entity); - if (ret) + if (ret) { + kfree(entity->name); + entity->name = NULL; return ret; + } return 0; } -- 2.34.1

1 0

[PATCH OLK-6.6 0/2] Introduce CONFIG_ARCH_CUSTOM_NUMA_DISTANCE
by Hui Tang 07 May '24

07 May '24

This patchset allow arch adjust node_relaim_distance, as follow: Hui Tang (2): arm64/numa: Support node_reclaim_distance adjust for arch config: enable COBFIG_ARCH_CUSTOM_NUMA_DISTANCE for arm64 arch/arm64/Kconfig | 13 ++++ arch/arm64/configs/openeuler_defconfig | 1 + drivers/base/arch_numa.c | 85 ++++++++++++++++++++++++++ 3 files changed, 99 insertions(+) -- 2.34.1

2 3

[openeuler:openEuler-1.0-LTS] BUILD SUCCESS 48d720404c6ef5c0c29a6019fa34590b3632cf4c
by kernel test robot 07 May '24

07 May '24

tree/branch: https://gitee.com/openeuler/kernel.git openEuler-1.0-LTS branch HEAD: 48d720404c6ef5c0c29a6019fa34590b3632cf4c !6705 quota: fix CVE-2024-26878 Warning ids grouped by kconfigs: gcc_recent_errors |-- x86_64-buildonly-randconfig-002-20240506 | |-- fs-proc-array.c:warning:gtime-may-be-used-uninitialized-in-this-function | |-- fs-proc-array.c:warning:maj_flt-may-be-used-uninitialized-in-this-function | `-- fs-proc-array.c:warning:min_flt-may-be-used-uninitialized-in-this-function |-- x86_64-buildonly-randconfig-006-20240506 | |-- fs-proc-array.c:warning:gtime-may-be-used-uninitialized | |-- fs-proc-array.c:warning:maj_flt-may-be-used-uninitialized | `-- fs-proc-array.c:warning:min_flt-may-be-used-uninitialized |-- x86_64-defconfig | |-- include-linux-list.h:warning:array-subscript-pfo_ret__-is-outside-array-bounds-of-struct-plist_node | |-- include-linux-plist.h:warning:array-subscript-pfo_ret__-is-outside-array-bounds-of-struct-plist_node | `-- mm-swapfile.c:warning:array-subscript-pfo_ret__-is-outside-array-bounds-of-struct-plist_node |-- x86_64-randconfig-014-20240506 | |-- include-linux-compiler.h:warning:array-subscript-index-is-outside-array-bounds-of-u32-aka-unsigned-int | `-- include-linux-compiler.h:warning:array-subscript-unknown-is-outside-array-bounds-of-const-u32-aka-const-unsigned-int |-- x86_64-randconfig-071-20240506 | |-- include-linux-compiler.h:warning:array-subscript-index-is-outside-array-bounds-of-u32-aka-unsigned-int | `-- include-linux-compiler.h:warning:array-subscript-unknown-is-outside-array-bounds-of-const-u32-aka-const-unsigned-int |-- x86_64-randconfig-073-20240506 | |-- include-linux-compiler.h:warning:array-subscript-index-is-outside-array-bounds-of-u32-aka-unsigned-int | |-- include-linux-compiler.h:warning:array-subscript-unknown-is-outside-array-bounds-of-const-u32-aka-const-unsigned-int | |-- include-linux-list.h:warning:array-subscript-pfo_ret__-is-outside-array-bounds-of-struct-plist_node | |-- include-linux-plist.h:warning:array-subscript-pfo_ret__-is-outside-array-bounds-of-struct-plist_node | `-- mm-swapfile.c:warning:array-subscript-pfo_ret__-is-outside-array-bounds-of-struct-plist_node |-- x86_64-randconfig-104-20240506 | |-- include-linux-compiler.h:warning:array-subscript-index-is-outside-array-bounds-of-u32-aka-unsigned-int | `-- include-linux-compiler.h:warning:array-subscript-unknown-is-outside-array-bounds-of-const-u32-aka-const-unsigned-int |-- x86_64-randconfig-121-20240507 | `-- net-ipv4-arp.c:sparse:sparse:incompatible-types-in-comparison-expression-(different-type-sizes): `-- x86_64-randconfig-123-20240507 |-- include-linux-compiler.h:warning:array-subscript-index-is-outside-array-bounds-of-u32-aka-unsigned-int |-- include-linux-compiler.h:warning:array-subscript-unknown-is-outside-array-bounds-of-const-u32-aka-const-unsigned-int |-- include-linux-list.h:warning:array-subscript-pfo_ret__-is-outside-array-bounds-of-struct-plist_node |-- include-linux-plist.h:warning:array-subscript-pfo_ret__-is-outside-array-bounds-of-struct-plist_node |-- mm-swapfile.c:warning:array-subscript-pfo_ret__-is-outside-array-bounds-of-struct-plist_node `-- net-ipv4-arp.c:sparse:sparse:incompatible-types-in-comparison-expression-(different-type-sizes): clang_recent_errors |-- x86_64-allyesconfig | `-- net-ipv4-arp.c:warning:comparison-of-distinct-pointer-types-(-typeof-(dev-addr_len)-(aka-unsigned-char-)-and-typeof-(sizeof-(r-arp_ha.sa_data))-(aka-unsigned-long-)) |-- x86_64-randconfig-001-20240506 | `-- net-ipv4-arp.c:warning:comparison-of-distinct-pointer-types-(-typeof-(dev-addr_len)-(aka-unsigned-char-)-and-typeof-(sizeof-(r-arp_ha.sa_data))-(aka-unsigned-long-)) |-- x86_64-randconfig-002-20240506 | `-- net-ipv4-arp.c:warning:comparison-of-distinct-pointer-types-(-typeof-(dev-addr_len)-(aka-unsigned-char-)-and-typeof-(sizeof-(r-arp_ha.sa_data))-(aka-unsigned-long-)) |-- x86_64-randconfig-003-20240506 | `-- net-ipv4-arp.c:warning:comparison-of-distinct-pointer-types-(-typeof-(dev-addr_len)-(aka-unsigned-char-)-and-typeof-(sizeof-(r-arp_ha.sa_data))-(aka-unsigned-long-)) |-- x86_64-randconfig-004-20240506 | `-- net-ipv4-arp.c:warning:comparison-of-distinct-pointer-types-(-typeof-(dev-addr_len)-(aka-unsigned-char-)-and-typeof-(sizeof-(r-arp_ha.sa_data))-(aka-unsigned-long-)) |-- x86_64-randconfig-005-20240506 | `-- net-ipv4-arp.c:warning:comparison-of-distinct-pointer-types-(-typeof-(dev-addr_len)-(aka-unsigned-char-)-and-typeof-(sizeof-(r-arp_ha.sa_data))-(aka-unsigned-long-)) |-- x86_64-randconfig-006-20240506 | `-- net-ipv4-arp.c:warning:comparison-of-distinct-pointer-types-(-typeof-(dev-addr_len)-(aka-unsigned-char-)-and-typeof-(sizeof-(r-arp_ha.sa_data))-(aka-unsigned-long-)) |-- x86_64-randconfig-072-20240506 | `-- net-ipv4-arp.c:warning:comparison-of-distinct-pointer-types-(-typeof-(dev-addr_len)-(aka-unsigned-char-)-and-typeof-(sizeof-(r-arp_ha.sa_data))-(aka-unsigned-long-)) |-- x86_64-randconfig-074-20240506 | `-- net-ipv4-arp.c:warning:comparison-of-distinct-pointer-types-(-typeof-(dev-addr_len)-(aka-unsigned-char-)-and-typeof-(sizeof-(r-arp_ha.sa_data))-(aka-unsigned-long-)) |-- x86_64-randconfig-076-20240506 | `-- net-ipv4-arp.c:warning:comparison-of-distinct-pointer-types-(-typeof-(dev-addr_len)-(aka-unsigned-char-)-and-typeof-(sizeof-(r-arp_ha.sa_data))-(aka-unsigned-long-)) |-- x86_64-randconfig-101-20240506 | `-- net-ipv4-arp.c:warning:comparison-of-distinct-pointer-types-(-typeof-(dev-addr_len)-(aka-unsigned-char-)-and-typeof-(sizeof-(r-arp_ha.sa_data))-(aka-unsigned-long-)) |-- x86_64-randconfig-122-20240507 | |-- net-ipv4-arp.c:sparse:sparse:incompatible-types-in-comparison-expression-(different-type-sizes): | `-- net-ipv4-arp.c:warning:comparison-of-distinct-pointer-types-(-typeof-(dev-addr_len)-(aka-unsigned-char-)-and-typeof-(sizeof-(r-arp_ha.sa_data))-(aka-unsigned-long-)) `-- x86_64-rhel-8.3-rust `-- net-ipv4-arp.c:warning:comparison-of-distinct-pointer-types-(-typeof-(dev-addr_len)-(aka-unsigned-char-)-and-typeof-(sizeof-(r-arp_ha.sa_data))-(aka-unsigned-long-)) elapsed time: 721m configs tested: 35 configs skipped: 148 The following configs have been built successfully. More configs may be tested in the coming days. tested configs: arm64 allmodconfig gcc arm64 allnoconfig gcc arm64 defconfig gcc arm64 randconfig-001-20240506 gcc arm64 randconfig-002-20240506 gcc arm64 randconfig-003-20240506 gcc arm64 randconfig-004-20240506 gcc x86_64 allnoconfig clang x86_64 allyesconfig clang x86_64 buildonly-randconfig-001-20240506 gcc x86_64 buildonly-randconfig-002-20240506 gcc x86_64 buildonly-randconfig-003-20240506 gcc x86_64 buildonly-randconfig-004-20240506 clang x86_64 buildonly-randconfig-005-20240506 clang x86_64 buildonly-randconfig-006-20240506 gcc x86_64 defconfig gcc x86_64 randconfig-001-20240506 clang x86_64 randconfig-002-20240506 clang x86_64 randconfig-003-20240506 clang x86_64 randconfig-004-20240506 clang x86_64 randconfig-005-20240506 clang x86_64 randconfig-006-20240506 clang x86_64 randconfig-011-20240506 gcc x86_64 randconfig-012-20240506 gcc x86_64 randconfig-013-20240506 gcc x86_64 randconfig-014-20240506 gcc x86_64 randconfig-015-20240506 gcc x86_64 randconfig-016-20240506 gcc x86_64 randconfig-071-20240506 gcc x86_64 randconfig-072-20240506 clang x86_64 randconfig-073-20240506 gcc x86_64 randconfig-074-20240506 clang x86_64 randconfig-075-20240506 gcc x86_64 randconfig-076-20240506 clang x86_64 rhel-8.3-rust clang -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki

1 0

[PATCH OLK-6.6] sched/fair: Optimize performance by simplifying cpu_util_without()
by Li Zetao 06 May '24

06 May '24

hulk inclusion category: performance bugzilla: https://gitee.com/openeuler/kernel/issues/I9MSNE?from=project-issue CVE: NA -------------------------------- Considering that the high-frequency function cpu_util without is only called when waking up or creating for the first time, in this scenario, the performance can be optimized by simplifying the function. Signed-off-by: Zhang Qiao <zhangqiao22(a)huawei.com> Signed-off-by: Li Zetao <lizetao1(a)huawei.com> --- kernel/sched/fair.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7b0cb2f090da..010dbf2047e5 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -8530,13 +8530,23 @@ unsigned long cpu_util_cfs_boost(int cpu) * utilization of the specified task, whenever the task is currently * contributing to the CPU utilization. */ -static unsigned long cpu_util_without(int cpu, struct task_struct *p) +static inline unsigned long cpu_util_without(int cpu, struct task_struct *p) { - /* Task has no contribution or is new */ - if (cpu != task_cpu(p) || !READ_ONCE(p->se.avg.last_update_time)) - p = NULL; + struct cfs_rq *cfs_rq = &cpu_rq(cpu)->cfs; + unsigned long util = READ_ONCE(cfs_rq->avg.util_avg); + /* + * If @dst_cpu is -1 or @p migrates from @cpu to @dst_cpu remove its + * contribution. If @p migrates from another CPU to @cpu add its + * contribution. In all the other cases @cpu is not impacted by the + * migration so its util_avg is already correct. + */ + if (sched_feat(UTIL_EST)) { + unsigned long util_est; + util_est = READ_ONCE(cfs_rq->avg.util_est.enqueued); + util = max(util, util_est); + } - return cpu_util(cpu, p, -1, 0); + return min(util, capacity_orig_of(cpu)); } /* -- 2.34.1

2 1

[PATCH OLK-6.6 v3 0/2] block: fix discard ioctl
by Li Nan 06 May '24

06 May '24

Fix patch format. Li Nan (2): block: fix overflow in blk_ioctl_discard() block: check io size before submit discard block/blk-lib.c | 8 ++++++++ block/ioctl.c | 5 +++-- 2 files changed, 11 insertions(+), 2 deletions(-) -- 2.39.2

2 3

[PATCH openEuler-1.0-LTS] s390/dasd: fix Oops in dasd_alias_get_start_dev due to missing pavgroup
by Li Zetao 06 May '24

06 May '24

From: Stefan Haberland <sth(a)linux.ibm.com> stable inclusion from stable-v4.19.260 commit aaba5ff2742043705bc4c02fd0b2b246e2e16da1 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I9KHGT CVE: CVE-2022-48636 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id… -------------------------------- commit db7ba07108a48c0f95b74fabbfd5d63e924f992d upstream. Fix Oops in dasd_alias_get_start_dev() function caused by the pavgroup pointer being NULL. The pavgroup pointer is checked on the entrance of the function but without the lcu->lock being held. Therefore there is a race window between dasd_alias_get_start_dev() and _lcu_update() which sets pavgroup to NULL with the lcu->lock held. Fix by checking the pavgroup pointer with lcu->lock held. Cc: <stable(a)vger.kernel.org> # 2.6.25+ Fixes: 8e09f21574ea ("[S390] dasd: add hyper PAV support to DASD device driver, part 1") Signed-off-by: Stefan Haberland <sth(a)linux.ibm.com> Reviewed-by: Jan Hoeppner <hoeppner(a)linux.ibm.com> Link: https://lore.kernel.org/r/20220919154931.4123002-2-sth@linux.ibm.com Signed-off-by: Jens Axboe <axboe(a)kernel.dk> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> Signed-off-by: Li Zetao <lizetao1(a)huawei.com> --- drivers/s390/block/dasd_alias.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index 99f86612f775..88530a4952a9 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -657,12 +657,12 @@ int dasd_alias_remove_device(struct dasd_device *device) struct dasd_device *dasd_alias_get_start_dev(struct dasd_device *base_device) { struct dasd_eckd_private *alias_priv, *private = base_device->private; - struct alias_pav_group *group = private->pavgroup; struct alias_lcu *lcu = private->lcu; struct dasd_device *alias_device; + struct alias_pav_group *group; unsigned long flags; - if (!group || !lcu) + if (!lcu) return NULL; if (lcu->pav == NO_PAV || lcu->flags & (NEED_UAC_UPDATE | UPDATE_PENDING)) @@ -679,6 +679,11 @@ struct dasd_device *dasd_alias_get_start_dev(struct dasd_device *base_device) } spin_lock_irqsave(&lcu->lock, flags); + group = private->pavgroup; + if (!group) { + spin_unlock_irqrestore(&lcu->lock, flags); + return NULL; + } alias_device = group->next; if (!alias_device) { if (list_empty(&group->aliaslist)) { -- 2.34.1

2 3

[PATCH OLK-6.6 v2 0/2] block: fix discard ioctl
by Li Nan 06 May '24

06 May '24

Li Nan (2): block: fix overflow in blk_ioctl_discard() block: check io size before submit discard block/blk-lib.c | 8 ++++++++ block/ioctl.c | 5 +++-- 2 files changed, 11 insertions(+), 2 deletions(-) -- 2.39.2

2 3

[PATCH OLK-5.10] mm: memcg: fix stale protection of reclaim target memcg
by Cai Xinchen 06 May '24

06 May '24

From: Yosry Ahmed <yosryahmed(a)google.com> mainline inclusion from mainline-v6.2-rc1 commit adb8213014b25c7f1d75d5b219becaadcd695efb category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I9MD18 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?… -------------------------------- Patch series "mm: memcg: fix protection of reclaim target memcg", v3. This series fixes a bug in calculating the protection of the reclaim target memcg where we end up using stale effective protection values from the last reclaim operation, instead of completely ignoring the protection of the reclaim target as intended. More detailed explanation and examples in patch 1, which includes the fix. Patches 2 & 3 introduce a selftest case that catches the bug. This patch (of 3): When we are doing memcg reclaim, the intended behavior is that we ignore any protection (memory.min, memory.low) of the target memcg (but not its children). Ever since the patch pointed to by the "Fixes" tag, we actually read a stale value for the target memcg protection when deciding whether to skip the memcg or not because it is protected. If the stale value happens to be high enough, we don't reclaim from the target memcg. Essentially, in some cases we may falsely skip reclaiming from the target memcg of reclaim because we read a stale protection value from last time we reclaimed from it. During reclaim, mem_cgroup_calculate_protection() is used to determine the effective protection (emin and elow) values of a memcg. The protection of the reclaim target is ignored, but we cannot set their effective protection to 0 due to a limitation of the current implementation (see comment in mem_cgroup_protection()). Instead, we leave their effective protection values unchaged, and later ignore it in mem_cgroup_protection(). However, mem_cgroup_protection() is called later in shrink_lruvec()->get_scan_count(), which is after the mem_cgroup_below_{min/low}() checks in shrink_node_memcgs(). As a result, the stale effective protection values of the target memcg may lead us to skip reclaiming from the target memcg entirely, before calling shrink_lruvec(). This can be even worse with recursive protection, where the stale target memcg protection can be higher than its standalone protection. See two examples below (a similar version of example (a) is added to test_memcontrol in a later patch). (a) A simple example with proactive reclaim is as follows. Consider the following hierarchy: ROOT | A | B (memory.min = 10M) Consider the following scenario: - B has memory.current = 10M. - The system undergoes global reclaim (or memcg reclaim in A). - In shrink_node_memcgs(): - mem_cgroup_calculate_protection() calculates the effective min (emin) of B as 10M. - mem_cgroup_below_min() returns true for B, we do not reclaim from B. - Now if we want to reclaim 5M from B using proactive reclaim (memory.reclaim), we should be able to, as the protection of the target memcg should be ignored. - In shrink_node_memcgs(): - mem_cgroup_calculate_protection() immediately returns for B without doing anything, as B is the target memcg, relying on mem_cgroup_protection() to ignore B's stale effective min (still 10M). - mem_cgroup_below_min() reads the stale effective min for B and we skip it instead of ignoring its protection as intended, as we never reach mem_cgroup_protection(). (b) An more complex example with recursive protection is as follows. Consider the following hierarchy with memory_recursiveprot: ROOT | A (memory.min = 50M) | B (memory.min = 10M, memory.high = 40M) Consider the following scenario: - B has memory.current = 35M. - The system undergoes global reclaim (target memcg is NULL). - B will have an effective min of 50M (all of A's unclaimed protection). - B will not be reclaimed from. - Now allocate 10M more memory in B, pushing it above it's high limit. - The system undergoes memcg reclaim from B (target memcg is B). - Like example (a), we do nothing in mem_cgroup_calculate_protection(), then call mem_cgroup_below_min(), which will read the stale effective min for B (50M) and skip it. In this case, it's even worse because we are not just considering B's standalone protection (10M), but we are reading a much higher stale protection (50M) which will cause us to not reclaim from B at all. This is an artifact of commit 45c7f7e1ef17 ("mm, memcg: decouple e{low,min} state mutations from protection checks") which made mem_cgroup_calculate_protection() only change the state without returning any value. Before that commit, we used to return MEMCG_PROT_NONE for the target memcg, which would cause us to skip the mem_cgroup_below_{min/low}() checks. After that commit we do not return anything and we end up checking the min & low effective protections for the target memcg, which are stale. Update mem_cgroup_supports_protection() to also check if we are reclaiming from the target, and rename it to mem_cgroup_unprotected() (now returns true if we should not protect the memcg, much simpler logic). Link: https://lkml.kernel.org/r/20221202031512.1365483-1-yosryahmed@google.com Link: https://lkml.kernel.org/r/20221202031512.1365483-2-yosryahmed@google.com Fixes: 45c7f7e1ef17 ("mm, memcg: decouple e{low,min} state mutations from protection checks") Signed-off-by: Yosry Ahmed <yosryahmed(a)google.com> Reviewed-by: Roman Gushchin <roman.gushchin(a)linux.dev> Cc: Chris Down <chris(a)chrisdown.name> Cc: David Rientjes <rientjes(a)google.com> Cc: Johannes Weiner <hannes(a)cmpxchg.org> Cc: Matthew Wilcox <willy(a)infradead.org> Cc: Michal Hocko <mhocko(a)suse.com> Cc: Muchun Song <songmuchun(a)bytedance.com> Cc: Shakeel Butt <shakeelb(a)google.com> Cc: Tejun Heo <tj(a)kernel.org> Cc: Vasily Averin <vasily.averin(a)linux.dev> Cc: Vlastimil Babka <vbabka(a)suse.cz> Cc: Yu Zhao <yuzhao(a)google.com> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> Conflicts: mm/vmscan.c Signed-off-by: Cai Xinchen <caixinchen1(a)huawei.com> --- include/linux/memcontrol.h | 31 +++++++++++++++++++++---------- mm/vmscan.c | 4 ++-- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 24794b56eaaf..287c54141a90 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -769,28 +769,32 @@ static inline void mem_cgroup_protection(struct mem_cgroup *root, void mem_cgroup_calculate_protection(struct mem_cgroup *root, struct mem_cgroup *memcg); -static inline bool mem_cgroup_supports_protection(struct mem_cgroup *memcg) +static inline bool mem_cgroup_unprotected(struct mem_cgroup *target, + struct mem_cgroup *memcg) { /* * The root memcg doesn't account charges, and doesn't support - * protection. + * protection. The target memcg's protection is ignored, see + * mem_cgroup_calculate_protection() and mem_cgroup_protection() */ - return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg); - + return mem_cgroup_disabled() || mem_cgroup_is_root(memcg) || + memcg == target; } -static inline bool mem_cgroup_below_low(struct mem_cgroup *memcg) +static inline bool mem_cgroup_below_low(struct mem_cgroup *target, + struct mem_cgroup *memcg) { - if (!mem_cgroup_supports_protection(memcg)) + if (mem_cgroup_unprotected(target, memcg)) return false; return READ_ONCE(memcg->memory.elow) >= page_counter_read(&memcg->memory); } -static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg) +static inline bool mem_cgroup_below_min(struct mem_cgroup *target, + struct mem_cgroup *memcg) { - if (!mem_cgroup_supports_protection(memcg)) + if (mem_cgroup_unprotected(target, memcg)) return false; return READ_ONCE(memcg->memory.emin) >= @@ -1378,12 +1382,19 @@ static inline void mem_cgroup_calculate_protection(struct mem_cgroup *root, { } -static inline bool mem_cgroup_below_low(struct mem_cgroup *memcg) +static inline bool mem_cgroup_unprotected(struct mem_cgroup *target, + struct mem_cgroup *memcg) +{ + return true; +} +static inline bool mem_cgroup_below_low(struct mem_cgroup *target, + struct mem_cgroup *memcg) { return false; } -static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg) +static inline bool mem_cgroup_below_min(struct mem_cgroup *target, + struct mem_cgroup *memcg) { return false; } diff --git a/mm/vmscan.c b/mm/vmscan.c index 3d383c7126e3..044bf496885b 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2860,13 +2860,13 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc) mem_cgroup_calculate_protection(target_memcg, memcg); - if (mem_cgroup_below_min(memcg)) { + if (mem_cgroup_below_min(target_memcg, memcg)) { /* * Hard protection. * If there is no reclaimable memory, OOM. */ continue; - } else if (mem_cgroup_below_low(memcg)) { + } else if (mem_cgroup_below_low(target_memcg, memcg)) { /* * Soft protection. * Respect the protection only as long as -- 2.34.1

2 1

[PATCH OLK-5.10] drivers: base: transport_class: fix possible memory leak
by dinglongwei 06 May '24

06 May '24

From: Yang Yingliang <yangyingliang(a)huawei.com> mainline inclusion from mainline-v6.3-rc1 commit a86367803838b369fe5486ac18771d14723c258c category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I9MMNG Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?… -------------------------------- Current some drivers(like iscsi) call transport_register_device() failed, they don't call transport_destroy_device() to release the memory allocated in transport_setup_device(), because they don't know what was done, it should be internal thing to release the resource in register function. So fix this leak by calling destroy function inside register function. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com> Link: https://lore.kernel.org/r/20221110102307.3492557-1-yangyingliang@huawei.com Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> Signed-off-by: dinglongwei <dinglongwei1(a)huawei.com> --- include/linux/transport_class.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/transport_class.h b/include/linux/transport_class.h index 63076fb835e3..2efc271a96fa 100644 --- a/include/linux/transport_class.h +++ b/include/linux/transport_class.h @@ -70,8 +70,14 @@ void transport_destroy_device(struct device *); static inline int transport_register_device(struct device *dev) { + int ret; + transport_setup_device(dev); - return transport_add_device(dev); + ret = transport_add_device(dev); + if (ret) + transport_destroy_device(dev); + + return ret; } static inline void -- 2.17.1

2 1

[PATCH] mm: memcg: fix stale protection of reclaim target memcg
by Cai Xinchen 06 May '24

06 May '24

From: Yosry Ahmed <yosryahmed(a)google.com> mainline inclusion from mainline-v6.2-rc1 commit adb8213014b25c7f1d75d5b219becaadcd695efb category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I9MD18 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?… -------------------------------- Patch series "mm: memcg: fix protection of reclaim target memcg", v3. This series fixes a bug in calculating the protection of the reclaim target memcg where we end up using stale effective protection values from the last reclaim operation, instead of completely ignoring the protection of the reclaim target as intended. More detailed explanation and examples in patch 1, which includes the fix. Patches 2 & 3 introduce a selftest case that catches the bug. This patch (of 3): When we are doing memcg reclaim, the intended behavior is that we ignore any protection (memory.min, memory.low) of the target memcg (but not its children). Ever since the patch pointed to by the "Fixes" tag, we actually read a stale value for the target memcg protection when deciding whether to skip the memcg or not because it is protected. If the stale value happens to be high enough, we don't reclaim from the target memcg. Essentially, in some cases we may falsely skip reclaiming from the target memcg of reclaim because we read a stale protection value from last time we reclaimed from it. During reclaim, mem_cgroup_calculate_protection() is used to determine the effective protection (emin and elow) values of a memcg. The protection of the reclaim target is ignored, but we cannot set their effective protection to 0 due to a limitation of the current implementation (see comment in mem_cgroup_protection()). Instead, we leave their effective protection values unchaged, and later ignore it in mem_cgroup_protection(). However, mem_cgroup_protection() is called later in shrink_lruvec()->get_scan_count(), which is after the mem_cgroup_below_{min/low}() checks in shrink_node_memcgs(). As a result, the stale effective protection values of the target memcg may lead us to skip reclaiming from the target memcg entirely, before calling shrink_lruvec(). This can be even worse with recursive protection, where the stale target memcg protection can be higher than its standalone protection. See two examples below (a similar version of example (a) is added to test_memcontrol in a later patch). (a) A simple example with proactive reclaim is as follows. Consider the following hierarchy: ROOT | A | B (memory.min = 10M) Consider the following scenario: - B has memory.current = 10M. - The system undergoes global reclaim (or memcg reclaim in A). - In shrink_node_memcgs(): - mem_cgroup_calculate_protection() calculates the effective min (emin) of B as 10M. - mem_cgroup_below_min() returns true for B, we do not reclaim from B. - Now if we want to reclaim 5M from B using proactive reclaim (memory.reclaim), we should be able to, as the protection of the target memcg should be ignored. - In shrink_node_memcgs(): - mem_cgroup_calculate_protection() immediately returns for B without doing anything, as B is the target memcg, relying on mem_cgroup_protection() to ignore B's stale effective min (still 10M). - mem_cgroup_below_min() reads the stale effective min for B and we skip it instead of ignoring its protection as intended, as we never reach mem_cgroup_protection(). (b) An more complex example with recursive protection is as follows. Consider the following hierarchy with memory_recursiveprot: ROOT | A (memory.min = 50M) | B (memory.min = 10M, memory.high = 40M) Consider the following scenario: - B has memory.current = 35M. - The system undergoes global reclaim (target memcg is NULL). - B will have an effective min of 50M (all of A's unclaimed protection). - B will not be reclaimed from. - Now allocate 10M more memory in B, pushing it above it's high limit. - The system undergoes memcg reclaim from B (target memcg is B). - Like example (a), we do nothing in mem_cgroup_calculate_protection(), then call mem_cgroup_below_min(), which will read the stale effective min for B (50M) and skip it. In this case, it's even worse because we are not just considering B's standalone protection (10M), but we are reading a much higher stale protection (50M) which will cause us to not reclaim from B at all. This is an artifact of commit 45c7f7e1ef17 ("mm, memcg: decouple e{low,min} state mutations from protection checks") which made mem_cgroup_calculate_protection() only change the state without returning any value. Before that commit, we used to return MEMCG_PROT_NONE for the target memcg, which would cause us to skip the mem_cgroup_below_{min/low}() checks. After that commit we do not return anything and we end up checking the min & low effective protections for the target memcg, which are stale. Update mem_cgroup_supports_protection() to also check if we are reclaiming from the target, and rename it to mem_cgroup_unprotected() (now returns true if we should not protect the memcg, much simpler logic). Link: https://lkml.kernel.org/r/20221202031512.1365483-1-yosryahmed@google.com Link: https://lkml.kernel.org/r/20221202031512.1365483-2-yosryahmed@google.com Fixes: 45c7f7e1ef17 ("mm, memcg: decouple e{low,min} state mutations from protection checks") Signed-off-by: Yosry Ahmed <yosryahmed(a)google.com> Reviewed-by: Roman Gushchin <roman.gushchin(a)linux.dev> Cc: Chris Down <chris(a)chrisdown.name> Cc: David Rientjes <rientjes(a)google.com> Cc: Johannes Weiner <hannes(a)cmpxchg.org> Cc: Matthew Wilcox <willy(a)infradead.org> Cc: Michal Hocko <mhocko(a)suse.com> Cc: Muchun Song <songmuchun(a)bytedance.com> Cc: Shakeel Butt <shakeelb(a)google.com> Cc: Tejun Heo <tj(a)kernel.org> Cc: Vasily Averin <vasily.averin(a)linux.dev> Cc: Vlastimil Babka <vbabka(a)suse.cz> Cc: Yu Zhao <yuzhao(a)google.com> Signed-off-by: Andrew Morton <akpm(a)linux-foundation.org> Conflicts: mm/vmscan.c Signed-off-by: Cai Xinchen <caixinchen1(a)huawei.com> --- include/linux/memcontrol.h | 31 +++++++++++++++++++++---------- mm/vmscan.c | 4 ++-- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 24794b56eaaf..287c54141a90 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -769,28 +769,32 @@ static inline void mem_cgroup_protection(struct mem_cgroup *root, void mem_cgroup_calculate_protection(struct mem_cgroup *root, struct mem_cgroup *memcg); -static inline bool mem_cgroup_supports_protection(struct mem_cgroup *memcg) +static inline bool mem_cgroup_unprotected(struct mem_cgroup *target, + struct mem_cgroup *memcg) { /* * The root memcg doesn't account charges, and doesn't support - * protection. + * protection. The target memcg's protection is ignored, see + * mem_cgroup_calculate_protection() and mem_cgroup_protection() */ - return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg); - + return mem_cgroup_disabled() || mem_cgroup_is_root(memcg) || + memcg == target; } -static inline bool mem_cgroup_below_low(struct mem_cgroup *memcg) +static inline bool mem_cgroup_below_low(struct mem_cgroup *target, + struct mem_cgroup *memcg) { - if (!mem_cgroup_supports_protection(memcg)) + if (mem_cgroup_unprotected(target, memcg)) return false; return READ_ONCE(memcg->memory.elow) >= page_counter_read(&memcg->memory); } -static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg) +static inline bool mem_cgroup_below_min(struct mem_cgroup *target, + struct mem_cgroup *memcg) { - if (!mem_cgroup_supports_protection(memcg)) + if (mem_cgroup_unprotected(target, memcg)) return false; return READ_ONCE(memcg->memory.emin) >= @@ -1378,12 +1382,19 @@ static inline void mem_cgroup_calculate_protection(struct mem_cgroup *root, { } -static inline bool mem_cgroup_below_low(struct mem_cgroup *memcg) +static inline bool mem_cgroup_unprotected(struct mem_cgroup *target, + struct mem_cgroup *memcg) +{ + return true; +} +static inline bool mem_cgroup_below_low(struct mem_cgroup *target, + struct mem_cgroup *memcg) { return false; } -static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg) +static inline bool mem_cgroup_below_min(struct mem_cgroup *target, + struct mem_cgroup *memcg) { return false; } diff --git a/mm/vmscan.c b/mm/vmscan.c index 3d383c7126e3..044bf496885b 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2860,13 +2860,13 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc) mem_cgroup_calculate_protection(target_memcg, memcg); - if (mem_cgroup_below_min(memcg)) { + if (mem_cgroup_below_min(target_memcg, memcg)) { /* * Hard protection. * If there is no reclaimable memory, OOM. */ continue; - } else if (mem_cgroup_below_low(memcg)) { + } else if (mem_cgroup_below_low(target_memcg, memcg)) { /* * Soft protection. * Respect the protection only as long as -- 2.34.1

1 0