From: Linus Torvalds torvalds@linux-foundation.org
mainline inclusion from mainline-5.16-rc6 commit e386dfc56f837da66d00a078e5314bc8382fab83 category: perf bugzilla: https://gitee.com/openeuler/kernel/issues/I4S0SZ Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
-------------------------------------------------
Commit 054aa8d439b9 ("fget: check that the fd still exists after getting a ref to it") fixed a race with getting a reference to a file just as it was being closed. It was a fairly minimal patch, and I didn't think re-checking the file pointer lookup would be a measurable overhead, since it was all right there and cached.
But I was wrong, as pointed out by the kernel test robot.
The 'poll2' case of the will-it-scale.per_thread_ops benchmark regressed quite noticeably. Admittedly it seems to be a very artificial test: doing "poll()" system calls on regular files in a very tight loop in multiple threads.
That means that basically all the time is spent just looking up file descriptors without ever doing anything useful with them (not that doing 'poll()' on a regular file is useful to begin with). And as a result it shows the extra "re-check fd" cost as a sore thumb.
Happily, the regression is fixable by just writing the code to loook up the fd to be better and clearer. There's still a cost to verify the file pointer, but now it's basically in the noise even for that benchmark that does nothing else - and the code is more understandable and has better comments too.
[ Side note: this patch is also a classic case of one that looks very messy with the default greedy Myers diff - it's much more legible with either the patience of histogram diff algorithm ]
Link: https://lore.kernel.org/lkml/20211210053743.GA36420@xsang-OptiPlex-9020/ Link: https://lore.kernel.org/lkml/20211213083154.GA20853@linux.intel.com/ Reported-by: kernel test robot oliver.sang@intel.com Tested-by: Carel Si beibei.si@intel.com Cc: Jann Horn jannh@google.com Cc: Miklos Szeredi mszeredi@redhat.com Signed-off-by: Linus Torvalds torvalds@linux-foundation.org
Conflicts: fs/file.c
Signed-off-by: Baokun Li libaokun1@huawei.com Reviewed-by: Zhang Yi yi.zhang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- fs/file.c | 72 ++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 56 insertions(+), 16 deletions(-)
diff --git a/fs/file.c b/fs/file.c index e4c168ddd8e7..0aa251ca02a6 100644 --- a/fs/file.c +++ b/fs/file.c @@ -866,28 +866,68 @@ void do_close_on_exec(struct files_struct *files) spin_unlock(&files->file_lock); }
-static struct file *__fget_files(struct files_struct *files, unsigned int fd, - fmode_t mask, unsigned int refs) +static inline struct file *__fget_files_rcu(struct files_struct *files, + unsigned int fd, fmode_t mask, unsigned int refs) { - struct file *file; + for (;;) { + struct file *file; + struct fdtable *fdt = rcu_dereference_raw(files->fdt); + struct file __rcu **fdentry;
- rcu_read_lock(); -loop: - file = fcheck_files(files, fd); - if (file) { - /* File object ref couldn't be taken. - * dup2() atomicity guarantee is the reason - * we loop to catch the new file (or NULL pointer) + if (unlikely(fd >= fdt->max_fds)) + return NULL; + + fdentry = fdt->fd + array_index_nospec(fd, fdt->max_fds); + file = rcu_dereference_raw(*fdentry); + if (unlikely(!file)) + return NULL; + + if (unlikely(file->f_mode & mask)) + return NULL; + + /* + * Ok, we have a file pointer. However, because we do + * this all locklessly under RCU, we may be racing with + * that file being closed. + * + * Such a race can take two forms: + * + * (a) the file ref already went down to zero, + * and get_file_rcu_many() fails. Just try + * again: */ - if (file->f_mode & mask) - file = NULL; - else if (!get_file_rcu_many(file, refs)) - goto loop; - else if (__fcheck_files(files, fd) != file) { + if (unlikely(!get_file_rcu_many(file, refs))) + continue; + + /* + * (b) the file table entry has changed under us. + * Note that we don't need to re-check the 'fdt->fd' + * pointer having changed, because it always goes + * hand-in-hand with 'fdt'. + * + * If so, we need to put our refs and try again. + */ + if (unlikely(rcu_dereference_raw(files->fdt) != fdt) || + unlikely(rcu_dereference_raw(*fdentry) != file)) { fput_many(file, refs); - goto loop; + continue; } + + /* + * Ok, we have a ref to the file, and checked that it + * still exists. + */ + return file; } +} + +static struct file *__fget_files(struct files_struct *files, unsigned int fd, + fmode_t mask, unsigned int refs) +{ + struct file *file; + + rcu_read_lock(); + file = __fget_files_rcu(files, fd, mask, refs); rcu_read_unlock();
return file;
From: Yu Kuai yukuai3@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I4SDBD
--------------------------------
Our test report a UAF:
[ 2073.019181] ================================================================== [ 2073.019188] BUG: KASAN: use-after-free in __bfq_put_async_bfqq+0xa0/0x168 [ 2073.019191] Write of size 8 at addr ffff8000ccf64128 by task rmmod/72584 [ 2073.019192] [ 2073.019196] CPU: 0 PID: 72584 Comm: rmmod Kdump: loaded Not tainted 4.19.90-yk #5 [ 2073.019198] Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 [ 2073.019200] Call trace: [ 2073.019203] dump_backtrace+0x0/0x310 [ 2073.019206] show_stack+0x28/0x38 [ 2073.019210] dump_stack+0xec/0x15c [ 2073.019216] print_address_description+0x68/0x2d0 [ 2073.019220] kasan_report+0x238/0x2f0 [ 2073.019224] __asan_store8+0x88/0xb0 [ 2073.019229] __bfq_put_async_bfqq+0xa0/0x168 [ 2073.019233] bfq_put_async_queues+0xbc/0x208 [ 2073.019236] bfq_pd_offline+0x178/0x238 [ 2073.019240] blkcg_deactivate_policy+0x1f0/0x420 [ 2073.019244] bfq_exit_queue+0x128/0x178 [ 2073.019249] blk_mq_exit_sched+0x12c/0x160 [ 2073.019252] elevator_exit+0xc8/0xd0 [ 2073.019256] blk_exit_queue+0x50/0x88 [ 2073.019259] blk_cleanup_queue+0x228/0x3d8 [ 2073.019267] null_del_dev+0xfc/0x1e0 [null_blk] [ 2073.019274] null_exit+0x90/0x114 [null_blk] [ 2073.019278] __arm64_sys_delete_module+0x358/0x5a0 [ 2073.019282] el0_svc_common+0xc8/0x320 [ 2073.019287] el0_svc_handler+0xf8/0x160 [ 2073.019290] el0_svc+0x10/0x218 [ 2073.019291] [ 2073.019294] Allocated by task 14163: [ 2073.019301] kasan_kmalloc+0xe0/0x190 [ 2073.019305] kmem_cache_alloc_node_trace+0x1cc/0x418 [ 2073.019308] bfq_pd_alloc+0x54/0x118 [ 2073.019313] blkcg_activate_policy+0x250/0x460 [ 2073.019317] bfq_create_group_hierarchy+0x38/0x110 [ 2073.019321] bfq_init_queue+0x6d0/0x948 [ 2073.019325] blk_mq_init_sched+0x1d8/0x390 [ 2073.019330] elevator_switch_mq+0x88/0x170 [ 2073.019334] elevator_switch+0x140/0x270 [ 2073.019338] elv_iosched_store+0x1a4/0x2a0 [ 2073.019342] queue_attr_store+0x90/0xe0 [ 2073.019348] sysfs_kf_write+0xa8/0xe8 [ 2073.019351] kernfs_fop_write+0x1f8/0x378 [ 2073.019359] __vfs_write+0xe0/0x360 [ 2073.019363] vfs_write+0xf0/0x270 [ 2073.019367] ksys_write+0xdc/0x1b8 [ 2073.019371] __arm64_sys_write+0x50/0x60 [ 2073.019375] el0_svc_common+0xc8/0x320 [ 2073.019380] el0_svc_handler+0xf8/0x160 [ 2073.019383] el0_svc+0x10/0x218 [ 2073.019385] [ 2073.019387] Freed by task 72584: [ 2073.019391] __kasan_slab_free+0x120/0x228 [ 2073.019394] kasan_slab_free+0x10/0x18 [ 2073.019397] kfree+0x94/0x368 [ 2073.019400] bfqg_put+0x64/0xb0 [ 2073.019404] bfqg_and_blkg_put+0x90/0xb0 [ 2073.019408] bfq_put_queue+0x220/0x228 [ 2073.019413] __bfq_put_async_bfqq+0x98/0x168 [ 2073.019416] bfq_put_async_queues+0xbc/0x208 [ 2073.019420] bfq_pd_offline+0x178/0x238 [ 2073.019424] blkcg_deactivate_policy+0x1f0/0x420 [ 2073.019429] bfq_exit_queue+0x128/0x178 [ 2073.019433] blk_mq_exit_sched+0x12c/0x160 [ 2073.019437] elevator_exit+0xc8/0xd0 [ 2073.019440] blk_exit_queue+0x50/0x88 [ 2073.019443] blk_cleanup_queue+0x228/0x3d8 [ 2073.019451] null_del_dev+0xfc/0x1e0 [null_blk] [ 2073.019459] null_exit+0x90/0x114 [null_blk] [ 2073.019462] __arm64_sys_delete_module+0x358/0x5a0 [ 2073.019467] el0_svc_common+0xc8/0x320 [ 2073.019471] el0_svc_handler+0xf8/0x160 [ 2073.019474] el0_svc+0x10/0x218 [ 2073.019475] [ 2073.019479] The buggy address belongs to the object at ffff8000ccf63f00 which belongs to the cache kmalloc-1024 of size 1024 [ 2073.019484] The buggy address is located 552 bytes inside of 1024-byte region [ffff8000ccf63f00, ffff8000ccf64300) [ 2073.019486] The buggy address belongs to the page: [ 2073.019492] page:ffff7e000333d800 count:1 mapcount:0 mapping:ffff8000c0003a00 index:0x0 compound_mapcount: 0 [ 2073.020123] flags: 0x7ffff0000008100(slab|head) [ 2073.020403] raw: 07ffff0000008100 ffff7e0003334c08 ffff7e00001f5a08 ffff8000c0003a00 [ 2073.020409] raw: 0000000000000000 00000000001c001c 00000001ffffffff 0000000000000000 [ 2073.020411] page dumped because: kasan: bad access detected [ 2073.020412] [ 2073.020414] Memory state around the buggy address: [ 2073.020420] ffff8000ccf64000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 2073.020424] ffff8000ccf64080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 2073.020428] >ffff8000ccf64100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 2073.020430] ^ [ 2073.020434] ffff8000ccf64180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 2073.020438] ffff8000ccf64200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 2073.020439] ==================================================================
After add some debug info, we found that the root cause is that the oom_bfqq is moved to a non-root bfqg.
Thus fix the problem by don't move oom_bfqq.
Signed-off-by: Yu Kuai yukuai3@huawei.com Reviewed-by: Jason Yan yanaijie@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- block/bfq-cgroup.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index ee9f24ca7eea..a942899273c0 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -645,6 +645,8 @@ void bfq_bfqq_move(struct bfq_data *bfqd, struct bfq_queue *bfqq, struct bfq_entity *entity = &bfqq->entity; struct bfq_group *old_parent = bfqq_group(bfqq);
+ if (bfqq == &bfqd->oom_bfqq) + return; /* * Get extra reference to prevent bfqq from being freed in * next possible expire or deactivate.
From: Xingang Wang wangxingang5@huawei.com
ascend inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I4L735 CVE: NA
---------------------------------------------------
Fix modpost Section mismatch error in __mpam_device_create() and others. These warnings will occur in high version gcc, for example 10.1.0.
[...] WARNING: vmlinux.o(.text+0x2ed88): Section mismatch in reference from the function __mpam_device_create() to the function .init.text:mpam_device_alloc() The function __mpam_device_create() references the function __init mpam_device_alloc(). This is often because __mpam_device_create lacks a __init annotation or the annotation of mpam_device_alloc is wrong.
WARNING: vmlinux.o(.text.unlikely+0xa5c): Section mismatch in reference from the function mpam_resctrl_init() to the function .init.text:mpam_init_padding() The function mpam_resctrl_init() references the function __init mpam_init_padding(). This is often because mpam_resctrl_init lacks a __init annotation or the annotation of mpam_init_padding is wrong.
WARNING: vmlinux.o(.text.unlikely+0x5a9c): Section mismatch in reference from the function resctrl_group_init() to the function .init.text:resctrl_group_setup_root() The function resctrl_group_init() references the function __init resctrl_group_setup_root(). This is often because resctrl_group_init lacks a __init annotation or the annotation of resctrl_group_setup_root is wrong. [...]
Fixes: 682eefba7fc4 ("arm64/mpam: remove __init macro to support driver probe") Signed-off-by: Xingang Wang wangxingang5@huawei.com Signed-off-by: Wang ShaoBo bobo.shaobowang@huawei.com Reviewed-by: Cheng Jian cj.chengjian@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/arm64/kernel/mpam/mpam_device.c | 8 ++++---- arch/arm64/kernel/mpam/mpam_resctrl.c | 2 +- fs/resctrlfs.c | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/arch/arm64/kernel/mpam/mpam_device.c b/arch/arm64/kernel/mpam/mpam_device.c index 85b5c415fdc2..f2cc6e9b2ed4 100644 --- a/arch/arm64/kernel/mpam/mpam_device.c +++ b/arch/arm64/kernel/mpam/mpam_device.c @@ -621,7 +621,7 @@ static void mpam_failed(struct work_struct *work) mutex_unlock(&mpam_cpuhp_lock); }
-static struct mpam_device * __init +static struct mpam_device * mpam_device_alloc(struct mpam_component *comp) { struct mpam_device *dev; @@ -656,7 +656,7 @@ static void mpam_devices_destroy(struct mpam_component *comp) } }
-static struct mpam_component * __init mpam_component_alloc(int id) +static struct mpam_component *mpam_component_alloc(int id) { struct mpam_component *comp;
@@ -694,7 +694,7 @@ struct mpam_component *mpam_component_get(struct mpam_class *class, int id, return comp; }
-static struct mpam_class * __init mpam_class_alloc(u8 level_idx, +static struct mpam_class *mpam_class_alloc(u8 level_idx, enum mpam_class_types type) { struct mpam_class *class; @@ -733,7 +733,7 @@ static void mpam_class_destroy(struct mpam_class *class) } }
-static struct mpam_class * __init mpam_class_get(u8 level_idx, +static struct mpam_class *mpam_class_get(u8 level_idx, enum mpam_class_types type, bool alloc) { diff --git a/arch/arm64/kernel/mpam/mpam_resctrl.c b/arch/arm64/kernel/mpam/mpam_resctrl.c index 53789acaae20..932d04484982 100644 --- a/arch/arm64/kernel/mpam/mpam_resctrl.c +++ b/arch/arm64/kernel/mpam/mpam_resctrl.c @@ -1130,7 +1130,7 @@ void closid_free(int closid) * Choose a width for the resource name and resource data based on the * resource that has widest name and cbm. */ -static __init void mpam_init_padding(void) +static void mpam_init_padding(void) { int cl; struct mpam_resctrl_res *res; diff --git a/fs/resctrlfs.c b/fs/resctrlfs.c index a18933a11437..8956237de47f 100644 --- a/fs/resctrlfs.c +++ b/fs/resctrlfs.c @@ -1116,7 +1116,7 @@ static void resctrl_group_default_init(struct resctrl_group *r) r->type = RDTCTRL_GROUP; }
-static int __init resctrl_group_setup_root(void) +static int resctrl_group_setup_root(void) { int ret;
From: Xingang Wang wangxingang5@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4L735 CVE: NA
---------------------------------------------------
The process of MPAM device tree initialization is like this: arm_mpam_device_probe() // driver probe mpam_discovery_start() // start discover mpam devices [...] // find and add mpam devices mpam_discovery_complete() // trigger mpam_enable
When there are multiple mpam device nodes, the driver probe procedure will execute more than once. However, the mpam_discovery_start() and mpam_discovery_complete() should only run once. Besides, the start should run first, and the complete should run after all devices added.
So we reorganize the device tree structure, so that there will be only one mpam device parent nodes, and the probe procedure will only run once. We add the child node to represent the mpam devices, and traverse and add all mpam devices in the middle procedure of driver probe.
Signed-off-by: Xingang Wang wangxingang5@huawei.com Signed-off-by: Wang ShaoBo bobo.shaobowang@huawei.com Reviewed-by: Cheng Jian cj.chengjian@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/arm64/kernel/mpam/mpam_device.c | 59 +++++++++++++++------------- 1 file changed, 32 insertions(+), 27 deletions(-)
diff --git a/arch/arm64/kernel/mpam/mpam_device.c b/arch/arm64/kernel/mpam/mpam_device.c index f2cc6e9b2ed4..2aa9a3ab59f2 100644 --- a/arch/arm64/kernel/mpam/mpam_device.c +++ b/arch/arm64/kernel/mpam/mpam_device.c @@ -34,6 +34,7 @@ #include <linux/arm_mpam.h> #include <linux/of.h> #include <linux/of_platform.h> +#include <linux/of_address.h>
#include "mpam_resource.h" #include "mpam_device.h" @@ -1708,10 +1709,9 @@ static const struct of_device_id arm_mpam_of_device_ids[] = { { } };
-static int of_mpam_parse_irq(struct platform_device *pdev, +static int of_mpam_parse_irq(struct device_node *node, struct mpam_device *dev) { - struct device_node *node = pdev->dev.of_node; u32 overflow_interrupt, overflow_flags; u32 error_interrupt, error_interrupt_flags;
@@ -1726,12 +1726,12 @@ static int of_mpam_parse_irq(struct platform_device *pdev, error_interrupt, error_interrupt_flags); }
-static int of_mpam_parse_cache(struct platform_device *pdev) +static int of_mpam_parse_cache(struct platform_device *pdev, + struct device_node *node) { struct mpam_device *dev; - struct device_node *node = pdev->dev.of_node; int cache_level, cache_id; - struct resource *res; + u64 reg_value[2];
if (of_property_read_u32(node, "cache-level", &cache_level)) { dev_err(&pdev->dev, "missing cache level property\n"); @@ -1744,27 +1744,27 @@ static int of_mpam_parse_cache(struct platform_device *pdev) }
/* Base address */ - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { + if (of_property_read_u64_array(node, "reg", reg_value, 2)) { dev_err(&pdev->dev, "missing io resource property\n"); return -EINVAL; }
- dev = mpam_device_create_cache(cache_level, cache_id, NULL, res->start); + dev = mpam_device_create_cache(cache_level, cache_id, NULL, + reg_value[0]); if (IS_ERR(dev)) { dev_err(&pdev->dev, "Failed to create cache node\n"); return -EINVAL; }
- return of_mpam_parse_irq(pdev, dev); + return of_mpam_parse_irq(node, dev); }
-static int of_mpam_parse_memory(struct platform_device *pdev) +static int of_mpam_parse_memory(struct platform_device *pdev, + struct device_node *node) { struct mpam_device *dev; - struct device_node *node = pdev->dev.of_node; int numa_id; - struct resource *res; + u64 reg_value[2];
if (of_property_read_u32(node, "numa-node-id", &numa_id)) { dev_err(&pdev->dev, "missing numa node id property\n"); @@ -1772,40 +1772,35 @@ static int of_mpam_parse_memory(struct platform_device *pdev) }
/* Base address */ - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { + if (of_property_read_u64_array(node, "reg", reg_value, 2)) { dev_err(&pdev->dev, "missing io resource property\n"); return -EINVAL; }
- dev = mpam_device_create_memory(numa_id, res->start); + dev = mpam_device_create_memory(numa_id, reg_value[0]); if (IS_ERR(dev)) { dev_err(&pdev->dev, "Failed to create memory node\n"); return -EINVAL; }
- return of_mpam_parse_irq(pdev, dev); + return of_mpam_parse_irq(node, dev); }
-static int of_mpam_parse(struct platform_device *pdev) +static int of_mpam_add_child(struct platform_device *pdev, + struct device_node *node) { - struct device *dev = &pdev->dev; - struct device_node *node = dev->of_node; enum mpam_class_types type;
- if (!node || !of_match_node(arm_mpam_of_device_ids, pdev->dev.of_node)) - return -EINVAL; - - if (of_property_read_u32(dev->of_node, "type", &type)) { - dev_err(dev, "missing type property\n"); + if (of_property_read_u32(node, "type", &type)) { + dev_err(&pdev->dev, "missing type property\n"); return -EINVAL; }
switch (type) { case MPAM_CLASS_CACHE: - return of_mpam_parse_cache(pdev); + return of_mpam_parse_cache(pdev, node); case MPAM_CLASS_MEMORY: - return of_mpam_parse_memory(pdev); + return of_mpam_parse_memory(pdev, node); default: pr_warn_once("Unknown node type %u.\n", type); return -EINVAL; @@ -1823,6 +1818,9 @@ static int of_mpam_parse(struct platform_device *pdev) static int arm_mpam_device_probe(struct platform_device *pdev) { int ret; + struct device *dev = &pdev->dev; + struct device_node *node = dev->of_node; + struct device_node *child = NULL;
if (!cpus_have_const_cap(ARM64_HAS_MPAM)) return 0; @@ -1830,11 +1828,18 @@ static int arm_mpam_device_probe(struct platform_device *pdev) if (!acpi_disabled || mpam_enabled != MPAM_ENABLE_OF) return 0;
+ if (!node || !of_match_node(arm_mpam_of_device_ids, pdev->dev.of_node)) + return -EINVAL; + ret = mpam_discovery_start(); if (ret) return ret;
- ret = of_mpam_parse(pdev); + for_each_available_child_of_node(node, child) { + ret = of_mpam_add_child(pdev, child); + if (ret) + break; + }
if (ret) { mpam_discovery_failed();
From: Xingang Wang wangxingang5@huawei.com
arm64/mpam: refactor device tree structure to support multiple devices
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I49RB2 CVE: NA
---------------------------------------------------
To support multiple mpam device nodes, all nodes should be organized as child of the same parent nodes. This makes sure that the mpam discovery start and complete procedure in the right execution order. Add modification in the devicetree documentation to record this.
Signed-off-by: Xingang Wang wangxingang5@huawei.com Signed-off-by: Wang ShaoBo bobo.shaobowang@huawei.com Reviewed-by: Cheng Jian cj.chengjian@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- .../devicetree/bindings/arm/arm,mpam.txt | 45 ++++++++++--------- 1 file changed, 24 insertions(+), 21 deletions(-)
diff --git a/Documentation/devicetree/bindings/arm/arm,mpam.txt b/Documentation/devicetree/bindings/arm/arm,mpam.txt index 65c1e6809685..e9ba09bb3159 100644 --- a/Documentation/devicetree/bindings/arm/arm,mpam.txt +++ b/Documentation/devicetree/bindings/arm/arm,mpam.txt @@ -28,27 +28,30 @@ and Monitoring (MPAM), for Armv8-A", MPAM interrupts(section 8.8).
Example:
-mpam_memory0 { +mpam { compatible = "arm,mpam"; - reg = <0x0 0x10000000 0x0 0x10000>; - type = <2>; /* memory type */ - numa-node-id = <0>; - overflow-interrupt = <0>; - overflow-flags = <0>; - error-interrupt = <0>; - error-interrupt-flags = <0>; - not-ready-max = <0>; -};
-mpam_cache0 { - compatible = "arm,mpam"; - reg = <0x0 0x20000000 0x0 0x10000>; - type = <1>; /* cache type */ - cache-id = <0>; - cache-level = <3>; - overflow-interrupt = <0>; - overflow-flags = <0>; - error-interrupt = <0>; - error-interrupt-flags = <0>; - not-ready-max = <0>; + mpam_memory0 { + reg = <0x0 0x10000000 0x0 0x10000>; + type = <2>; /* memory type */ + numa-node-id = <0>; + overflow-interrupt = <0>; + overflow-flags = <0>; + error-interrupt = <0>; + error-interrupt-flags = <0>; + not-ready-max = <0>; + }; + + mpam_cache0 { + reg = <0x0 0x20000000 0x0 0x10000>; + type = <1>; /* cache type */ + cache-id = <0>; + cache-level = <3>; + overflow-interrupt = <0>; + overflow-flags = <0>; + error-interrupt = <0>; + error-interrupt-flags = <0>; + not-ready-max = <0>; + }; + };
From: Wang ShaoBo bobo.shaobowang@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4SE03 CVE: NA
---------------------------------------------------
This makes step entry aligned with step_size*step_cnt but not step_size, and check for alignment before traversing rmid_transform.
When modifying rmid with a value not aligned with step_size*step_cnt, for_each_rmid_transform_point_step_from might miss next step point if it has been occupied in case step_cnt or step_size not equals to 1, which will cause the actual allocated rmid to be inconsistent with the expected one.
Fixes: 8a2c07b5b84f ("arm64/mpam: rmid: refine allocation and release process") Signed-off-by: Wang ShaoBo bobo.shaobowang@huawei.com Reviewed-by: Cheng Jian cj.chengjian@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/arm64/kernel/mpam/mpam_resctrl.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-)
diff --git a/arch/arm64/kernel/mpam/mpam_resctrl.c b/arch/arm64/kernel/mpam/mpam_resctrl.c index 932d04484982..e9e77064bdb2 100644 --- a/arch/arm64/kernel/mpam/mpam_resctrl.c +++ b/arch/arm64/kernel/mpam/mpam_resctrl.c @@ -834,7 +834,8 @@ static inline unsigned long **__rmid_remap_bmp(u32 col) #define __step_xy_initialize(step, x, y, from) \ (x = from, step = 1, y = 0) #define __step_align(from) \ - (!(from % rmid_remap_matrix.step_size)) + (!(from % (rmid_remap_matrix.step_size * \ + rmid_remap_matrix.step_cnt))) #define __step_overflow(step) \ (__xy_overflow(x, y) || \ (step > rmid_remap_matrix.step_cnt)) @@ -908,7 +909,7 @@ static int is_rmid_remap_bmp_full(unsigned long *bmp) bitmap_full(bmp, rmid_remap_matrix.rows)); }
-static int rmid_remap_bmp_find_first_avail_partid(int partid) +static int rmid_remap_bmp_find_step_entry(int partid) { int x, y; unsigned long **bmp; @@ -917,17 +918,18 @@ static int rmid_remap_bmp_find_first_avail_partid(int partid) rmid_remap_matrix.cols) return 0;
+ /* step entry should be non-occupied and aligned */ bmp = __rmid_remap_bmp(partid); - if (bmp && !is_rmid_remap_bmp_occ(*bmp)) - return partid; + if (bmp) + return (is_rmid_remap_bmp_occ(*bmp) || + !__step_align(partid)) ? -ENOSPC : partid;
for_each_rmid_transform_point_from(bmp, x, y, 0) { /* * do not waste partid resource, start - * from step_size aligned position. + * from step aligned position. */ - if (!is_rmid_remap_bmp_occ(*bmp) && - (x % rmid_remap_matrix.step_size) == 0) + if (__step_align(x) && !is_rmid_remap_bmp_occ(*bmp)) return x; }
@@ -1021,8 +1023,8 @@ static int __rmid_alloc(int partid, int pmg) if (pmg >= 0) checkpmg = true;
- /* traverse from first non-occupied and step_size aligned entry */ - ret = rmid_remap_bmp_find_first_avail_partid(partid); + /* traverse from first non-occupied and step-aligned entry */ + ret = rmid_remap_bmp_find_step_entry(partid); if (ret < 0) goto out; partid = ret;
From: Zheng Yejian zhengyejian1@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I4SFHQ
--------------------------------
Fix following reference count issues where get 'obj->mod' once but put twice: 1) klp_register_patch klp_init_patch klp_init_object klp_find_object_module try_module_get <-- 1. Get refcount once klp_init_object_loaded <-- 2. If fail here!!! module_put <-- 3. Put refcount first time klp_free_patch_start klp_free_objects __klp_free_objects module_put <-- 4. 'obj->mod' not null, put twice!!!
2) klp_register_patch klp_init_patch klp_init_object klp_find_object_module try_module_get <-- 1. Get refcount once kobject_add <-- 2. If other objs fail here!!! klp_free_objects_mod_limited module_put <-- 3. Put refcount first time klp_free_patch_start klp_free_objects __klp_free_objects module_put <-- 4. 'obj->mod' not null, put twice!!!
Fixes: c33e42836a74 ("livepatch/core: Allow implementation without ftrace") Signed-off-by: Zheng Yejian zhengyejian1@huawei.com Reviewed-by: Xu Kuohai xukuohai@huawei.com Reviewed-by: Cheng Jian cj.chengjian@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- kernel/livepatch/core.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-)
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 1fde6ba196a4..e964d834203a 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -811,8 +811,10 @@ static void __klp_free_objects(struct klp_patch *patch, bool nops_only)
klp_for_each_object_safe(patch, obj, tmp_obj) { #ifdef CONFIG_LIVEPATCH_STOP_MACHINE_CONSISTENCY - if (klp_is_module(obj)) + if (klp_is_module(obj) && obj->mod) { module_put(obj->mod); + obj->mod = NULL; + } #endif __klp_free_funcs(obj, nops_only);
@@ -1118,8 +1120,10 @@ static int klp_init_object(struct klp_patch *patch, struct klp_object *obj)
out: #ifdef CONFIG_LIVEPATCH_STOP_MACHINE_CONSISTENCY - if (klp_is_module(obj)) + if (klp_is_module(obj)) { module_put(obj->mod); + obj->mod = NULL; + } #endif return ret; } @@ -1137,6 +1141,9 @@ static void klp_init_object_early(struct klp_patch *patch, INIT_LIST_HEAD(&obj->func_list); kobject_init(&obj->kobj, &klp_ktype_object); list_add_tail(&obj->node, &patch->obj_list); +#ifdef CONFIG_LIVEPATCH_STOP_MACHINE_CONSISTENCY + obj->mod = NULL; +#endif }
static int klp_init_patch_early(struct klp_patch *patch) @@ -1187,8 +1194,10 @@ static void klp_free_objects_mod_limited(struct klp_patch *patch, klp_for_each_object_safe(patch, obj, tmp_obj) { if (limit == obj) break; - if (klp_is_module(obj)) + if (klp_is_module(obj) && obj->mod) { module_put(obj->mod); + obj->mod = NULL; + } } } #endif
From: Zheng Yejian zhengyejian1@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I4SFHQ
--------------------------------
Refer to following function procedure, klp_free_objects_mod_limited seems redundant, so remove it: klp_register_patch klp_init_patch klp_init_object <--- klp_find_object_module \ try_module_get |<-- 1. If something wrong here jump_label_register <---- klp_free_objects_mod_limited <-- 2. Check and put 'obj->mod' module_put klp_free_patch_start <-- 3. Check and put 'obj->mod' again klp_free_objects __klp_free_objects module_put
Signed-off-by: Zheng Yejian zhengyejian1@huawei.com Reviewed-by: Xu Kuohai xukuohai@huawei.com Reviewed-by: Cheng Jian cj.chengjian@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- kernel/livepatch/core.c | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-)
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index e964d834203a..c9c76b440177 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -1185,23 +1185,6 @@ static int klp_init_patch_early(struct klp_patch *patch) return 0; }
-#ifdef CONFIG_LIVEPATCH_STOP_MACHINE_CONSISTENCY -static void klp_free_objects_mod_limited(struct klp_patch *patch, - struct klp_object *limit) -{ - struct klp_object *obj, *tmp_obj; - - klp_for_each_object_safe(patch, obj, tmp_obj) { - if (limit == obj) - break; - if (klp_is_module(obj) && obj->mod) { - module_put(obj->mod); - obj->mod = NULL; - } - } -} -#endif - static int klp_init_patch(struct klp_patch *patch) { struct klp_object *obj; @@ -1220,7 +1203,7 @@ static int klp_init_patch(struct klp_patch *patch) klp_for_each_object(patch, obj) { ret = klp_init_object(patch, obj); if (ret) - goto out; + return ret; }
flush_module_icache(patch->mod); @@ -1230,7 +1213,7 @@ static int klp_init_patch(struct klp_patch *patch) ret = jump_label_register(patch->mod); if (ret) { module_enable_ro(patch->mod, true); - goto out; + return ret; } module_enable_ro(patch->mod, true);
@@ -1242,11 +1225,6 @@ static int klp_init_patch(struct klp_patch *patch) list_add_tail(&patch->list, &klp_patches);
return 0; -out: -#ifdef CONFIG_LIVEPATCH_STOP_MACHINE_CONSISTENCY - klp_free_objects_mod_limited(patch, obj); -#endif - return ret; }
#ifdef CONFIG_LIVEPATCH_PER_TASK_CONSISTENCY
From: Zheng Yejian zhengyejian1@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I4SFHQ
--------------------------------
Refer to following function procedure, 'obj->mod' is got if not define CONFIG_LIVEPATCH_FTRACE, but it is put if define CONFIG_LIVEPATCH_STOP_MACHINE_CONSISTENCY. If enable state of these two macros changed, reference count of 'obj->mod' would be wrong.
klp_register_patch klp_init_patch klp_init_object klp_find_object_module try_module_get <-- !CONFIG_LIVEPATCH_FTRACE module_put <-- CONFIG_LIVEPATCH_STOP_MACHINE_CONSISTENCY klp_free_patch_start klp_free_objects __klp_free_objects module_put <-- CONFIG_LIVEPATCH_STOP_MACHINE_CONSISTENCY
So we use CONFIG_LIVEPATCH_STOP_MACHINE_CONSISTENCY uniformly.
Fixes: c33e42836a74 ("livepatch/core: Allow implementation without ftrace") Signed-off-by: Zheng Yejian zhengyejian1@huawei.com Reviewed-by: Xu Kuohai xukuohai@huawei.com Reviewed-by: Cheng Jian cj.chengjian@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- kernel/livepatch/core.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index c9c76b440177..b46ef236424d 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -122,10 +122,7 @@ static int klp_find_object_module(struct klp_object *obj) * until mod->exit() finishes. This is especially important for * patches that modify semantic of the functions. */ -#ifdef CONFIG_LIVEPATCH_FTRACE - if (mod && mod->klp_alive) - obj->mod = mod; -#else +#ifdef CONFIG_LIVEPATCH_STOP_MACHINE_CONSISTENCY if (!mod) { pr_err("module '%s' not loaded\n", obj->name); mutex_unlock(&module_mutex); @@ -138,6 +135,9 @@ static int klp_find_object_module(struct klp_object *obj) }
obj->mod = mod; +#else + if (mod && mod->klp_alive) + obj->mod = mod; #endif
mutex_unlock(&module_mutex);