From: Wang ShaoBo bobo.shaobowang@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I3YAI3 CVE: NA
-------------------------------------------------
The following error occurred occasionally on a machine that supports MPAM:
[ 13.321386][ T658] Unable to handle kernel paging request at virtual address ffff80001115816c [ 13.326013][ T684] hid-generic 0003:12D1:0003.0002: input,hidraw1: USB HID v1.10 Mouse [Keyboard/Mouse KVM 1.1.0] on usb-0000:7a:01.0-1.1/input1 [ 13.340558][ T658] Mem abort info: [ 13.340563][ T658] ESR = 0x86000007 [ 13.352567][ T5] hub 6-1:1.0: USB hub found [ 13.364750][ T658] EC = 0x21: IABT (current EL), IL = 32 bits [ 13.369891][ T5] hub 6-1:1.0: 4 ports detected [ 13.373871][ T658] SET = 0, FnV = 0 [ 13.396107][ T658] EA = 0, S1PTW = 0 [ 13.400599][ T658] swapper pgtable: 64k pages, 48-bit VAs, pgdp=0000000029540000 [ 13.408726][ T658] [ffff80001115816c] pgd=0000205fffff0003, p4d=0000205fffff0003, pud=0000205fffff0003, pmd=0000205ffffe0003, pte=0000000000000000 [ 13.423346][ T658] Internal error: Oops: 86000007 [#1] SMP [ 13.429720][ T658] Modules linked in: [ 13.434243][ T658] CPU: 72 PID: 658 Comm: kworker/72:1 Not tainted 5.10.0-4.17.0.28.oe1.aarch64 #1 [ 13.443966][ T658] Hardware name: Huawei TaiShan 200 (Model 2280)/BC82AMDDA, BIOS 1.70 01/07/2021 [ 13.453683][ T658] Workqueue: events mpam_enable [ 13.459206][ T658] pstate: 20c00009 (nzCv daif +PAN +UAO -TCO BTYPE=--) [ 13.466625][ T658] pc : mpam_enable+0x194/0x1d8 [ 13.472019][ T658] lr : mpam_enable+0x194/0x1d8 [ 13.477301][ T658] sp : ffff80004664fd70 [ 13.481937][ T658] x29: ffff80004664fd70 x28: 0000000000000000 [ 13.488578][ T658] x27: ffff00400484a648 x26: ffff800011b71080 [ 13.495306][ T658] x25: 0000000000000000 x24: ffff800011b6cda0 [ 13.502001][ T658] x23: ffff800011646f18 x22: ffff800011b6cd80 [ 13.508684][ T658] x21: ffff800011b6c000 x20: ffff800011646f08 [ 13.515425][ T658] x19: ffff800011646f70 x18: 0000000000000020 [ 13.522075][ T658] x17: 000000001790b332 x16: 0000000000000001 [ 13.528785][ T658] x15: ffffffffffffffff x14: ff00000000000000 [ 13.535464][ T658] x13: ffffffffffffffff x12: 0000000000000006 [ 13.542045][ T658] x11: 00000091cea718e2 x10: 0000000000000b90 [ 13.548735][ T658] x9 : ffff80001009ebac x8 : ffff2040061aabf0 [ 13.555383][ T658] x7 : ffffa05f8dca0000 x6 : 000000000000000f [ 13.561924][ T658] x5 : 0000000000000000 x4 : ffff2040061aa000 [ 13.568613][ T658] x3 : ffff80001164dfa0 x2 : 00000000ffffffff [ 13.575267][ T658] x1 : ffffa05f8dca0000 x0 : 00000000000000c1 [ 13.581813][ T658] Call trace: [ 13.585600][ T658] mpam_enable+0x194/0x1d8 [ 13.590450][ T658] process_one_work+0x1cc/0x390 [ 13.595654][ T658] worker_thread+0x70/0x2f0 [ 13.600499][ T658] kthread+0x118/0x120 [ 13.604935][ T658] ret_from_fork+0x10/0x18 [ 13.609717][ T658] Code: bad PC value [ 13.613944][ T658] ---[ end trace f1e305d2c339f67f ]--- [ 13.753818][ T658] Kernel panic - not syncing: Oops: Fatal exception [ 13.760885][ T658] SMP: stopping secondary CPUs [ 13.765933][ T658] Kernel Offset: disabled [ 13.770516][ T658] CPU features: 0x8040002,22208a38 [ 13.775862][ T658] Memory Limit: none [ 13.913929][ T658] ---[ end Kernel panic - not syncing:
The process of MPAM devices initialization is like this:
mpam_discovery_start() ... // discover devices mpam_discovery_complete() // hang up the mpam_online/offline_cpu callbacks -=> mpam_cpu_online() // probe all devices -=> mpam_enable() // prepare for resctrl (1) -=> cpuhp_remove_state() // clean resctrl internal structure (2) -=> cpuhp_setup_state() // rehang mpam_online/offline_cpu callbacks -=> mpam_cpu_online() // it does not call mpam_enable again -=> mpam_resctrl_cpu_online() // pull up resctrl
Re-hang process of mpam_cpu_online/offline callbacks should not be disturbed by irqs, to ensure that CPU context is reliable before re-entering mpam_cpu_online(), which always happens between (1) and (2).
Fixes: 2ab89c893faf ("arm64/mpam: resctrl: Re-synchronise resctrl's view of online CPUs") Signed-off-by: Wang ShaoBo bobo.shaobowang@huawei.com Reviewed-by: Cheng Jian cj.chengjian@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/kernel/mpam/mpam_device.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/arch/arm64/kernel/mpam/mpam_device.c b/arch/arm64/kernel/mpam/mpam_device.c index f8840274b902f..c0615f6947a1f 100644 --- a/arch/arm64/kernel/mpam/mpam_device.c +++ b/arch/arm64/kernel/mpam/mpam_device.c @@ -593,9 +593,11 @@ static void __init mpam_enable(struct work_struct *work) pr_err("Failed to setup/init resctrl\n"); mutex_unlock(&mpam_devices_lock);
+ local_irq_disable(); mpam_cpuhp_state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mpam:online", mpam_cpu_online, mpam_cpu_offline); + local_irq_enable(); if (mpam_cpuhp_state <= 0) pr_err("Failed to re-register 'dyn' cpuhp callbacks"); mutex_unlock(&mpam_cpuhp_lock);
From: Wang ShaoBo bobo.shaobowang@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I4LMMF CVE: NA
-------------------------------------------------
This adds tips when rmid modification failed.
Fixes: a85aba6a1d67 ("mpam: Add support for group rmid modify") Signed-off-by: Wang ShaoBo bobo.shaobowang@huawei.com Reviewed-by: Cheng Jian cj.chengjian@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/kernel/mpam/mpam_resctrl.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/arch/arm64/kernel/mpam/mpam_resctrl.c b/arch/arm64/kernel/mpam/mpam_resctrl.c index 79b711e2b7a5c..80f402476abbf 100644 --- a/arch/arm64/kernel/mpam/mpam_resctrl.c +++ b/arch/arm64/kernel/mpam/mpam_resctrl.c @@ -1890,12 +1890,14 @@ static ssize_t resctrl_group_rmid_write(struct kernfs_open_file *of,
if (rmid == 0 || rdtgrp->mon.rmid == 0) { ret = -EINVAL; + rdt_last_cmd_puts("default rmid 0 is always kept\n"); goto unlock; }
ret = rmid_to_partid_pmg(rmid, &partid, &pmg); if (ret < 0) { ret = -EINVAL; + rdt_last_cmd_puts("invalid rmid\n"); goto unlock; }
@@ -1904,6 +1906,7 @@ static ssize_t resctrl_group_rmid_write(struct kernfs_open_file *of,
if (rdtgrp->type != RDTCTRL_GROUP || !list_empty(&rdtgrp->mon.crdtgrp_list)) { + ret = -EINVAL; rdt_last_cmd_puts("unsupported operation\n"); goto unlock; }
From: Wang ShaoBo bobo.shaobowang@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4LL14 CVE: NA
-------------------------------------------------
Different from Intel-RDT, MPAM need handle more cases when monitoring, there are two label PARTID and PMG embedded into one single data stream, they may work at the same time, or only PMG works, if only PMG works, the number of PMG determines the number of resources can be monitored at the same time.
for instance(NR_PARTID equals to 2, NR_PMG equals to 2):
(1) PARTID and PMG works together RMID = PARTID + PMG*NR_PARTID 0 0 0 1 1 0 2 0 1 3 1 1
(2) only PMG works RMID = PARTID + PMG*NR_PARTID 0 0 0 PARTID=1 makes no sense 0 1 0 1 0 1 PARTID=1 makes no sense 1 1 1
Given those reasons, we should take care the usage of rmid remap matrix, two fields ( @step_size: Step size from traversing the point of matrix once @step_cnt: Indicates how many times to traverse(.e.g if cdp;step_cnt=2) ) are added to struct rmid_transform for measuring allocation and realease of monitor resource(RMIDs).
step_size is default set to 1, if only PMG(NR_PMG=4) works, makes it equals to number of columns, step_cnt means how many times are allocated and released each time, at this time rmid remap matrix looks like:
^ | ------column------>
RMID 0 1 2 3 (step_size=1) `---' `--> (step_cnt=2 if cdp enabled)
RMID 0 1 2 3 (step_size=1) `-- `--> (step_cnt=1 if cdp disabled)
if PARTID(NR_PARTID=4) and PMG(NR_PMG=4) works together, at this time rmid remap matrix looks like:
------------row------------> | | RMID 0 1 2 3 (step_size=1) | `---' | `--> (step_cnt=2 if cdp enabled) | 4 5 6 7 | 8 9 10 11 v 12 13 14 15
In addition, it also supports step_size not equal to 1, cross-line traversal, but this scenario did not happen.
Signed-off-by: Wang ShaoBo bobo.shaobowang@huawei.com Reviewed-by: Cheng Jian cj.chengjian@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/kernel/mpam/mpam_resctrl.c | 283 ++++++++++++++++---------- 1 file changed, 178 insertions(+), 105 deletions(-)
diff --git a/arch/arm64/kernel/mpam/mpam_resctrl.c b/arch/arm64/kernel/mpam/mpam_resctrl.c index 80f402476abbf..51cdefebaeba8 100644 --- a/arch/arm64/kernel/mpam/mpam_resctrl.c +++ b/arch/arm64/kernel/mpam/mpam_resctrl.c @@ -637,23 +637,24 @@ int closid_bitmap_init(void) * @rows: Number of bits for remap_body[:] bitmap * @clos: Number of bitmaps * @nr_usage: Number rmid we have - * @stride: Step stride from transforming rmid to partid and pmg + * @step_size: Step size from traversing the point of matrix once + * @step_cnt: Indicates how many times to traverse(.e.g if cdp;step_cnt=2) * @remap_body: Storing bitmaps' entry and itself - * @remap_enabled: Does remap_body init done */ struct rmid_transform { u32 rows; u32 cols; u32 nr_usage; - int stride; + int step_size; + int step_cnt; unsigned long **remap_body; - bool remap_enabled; }; static struct rmid_transform rmid_remap_matrix; +DEFINE_STATIC_KEY_FALSE(rmid_remap_enable_key);
static u32 get_nr_rmids(void) { - if (!rmid_remap_matrix.remap_enabled) + if (!static_branch_likely(&rmid_remap_enable_key)) return 0;
return rmid_remap_matrix.nr_usage; @@ -692,9 +693,17 @@ static int set_rmid_remap_matrix(u32 rows, u32 cols) */ hw_alloc_times_validate(times, flag); rmid_remap_matrix.cols = rounddown(cols, times); - rmid_remap_matrix.stride = times; + rmid_remap_matrix.step_cnt = times; if (times > rmid_remap_matrix.cols) return -EINVAL; + /* + * if only pmg(Performance Monitor Group) + * work on the monitor, step_size must be + * set to maximum number of columns, + * otherwise set it to 1, such as kunpeng + * 920 does. + */ + rmid_remap_matrix.step_size = 1;
/* * first row of rmid remap matrix is used for indicating @@ -738,7 +747,8 @@ static int set_rmid_remap_matrix(u32 rows, u32 cols) 0, rmid_remap_matrix.rows); }
- rmid_remap_matrix.remap_enabled = 1; + /* make column entry of rmid matrix visible */ + static_branch_enable_cpuslocked(&rmid_remap_enable_key);
return 0; clean: @@ -753,6 +763,9 @@ static int set_rmid_remap_matrix(u32 rows, u32 cols) rmid_remap_matrix.remap_body = NULL; }
+ /* if recreation failed, cannot use rmid remap matrix */ + static_branch_disable_cpuslocked(&rmid_remap_enable_key); + return ret; }
@@ -766,37 +779,101 @@ static u32 probe_rmid_remap_matrix_rows(void) return (u32)mpam_sysprops_num_pmg(); }
-static inline unsigned long **__rmid_remap_bmp(int col) +static inline unsigned long **__rmid_remap_bmp(u32 col) { - if (!rmid_remap_matrix.remap_enabled) + if (!static_branch_likely(&rmid_remap_enable_key)) return NULL;
- if ((u32)col >= rmid_remap_matrix.cols) + if (col >= rmid_remap_matrix.cols) return NULL;
return rmid_remap_matrix.remap_body + col; }
-#define for_each_rmid_remap_bmp(bmp) \ - for (bmp = __rmid_remap_bmp(0); \ - bmp <= __rmid_remap_bmp(rmid_remap_matrix.cols - 1); \ - bmp++) - -#define for_each_valid_rmid_remap_bmp(bmp) \ - for_each_rmid_remap_bmp(bmp) \ - if (bmp && *bmp) - -#define STRIDE_CHK(stride) \ - (stride == rmid_remap_matrix.stride) - -#define STRIDE_INC_CHK(stride) \ - (++stride == rmid_remap_matrix.stride) +/* + * these macros defines how can we traverse rmid remap matrix, there are + * three scenarios: + * + * (1) step_size is default set to 1, if only PMG(NR_PMG=4) works, makes + * it equals to number of columns, step_cnt means how many times are + * allocated and released each time, at this time rmid remap matrix + * looks like: + * + * ^ + * | + * ------column------> + * + * RMID 0 1 2 3 (step_size=1) + * `---' + * `--> (step_cnt=2 if cdp enabled) + * + * RMID 0 1 2 3 (step_size=1) + * `-- + * `--> (step_cnt=1 if cdp disabled) + * + * (2) if PARTID(NR_PARTID=4) and PMG(NR_PMG=4) works together, at this + * time rmid remap matrix looks like: + * + * ------------row------------> + * | + * | RMID 0 1 2 3 (step_size=1) + * | `---' + * | `--> (step_cnt=2 if cdp enabled) + * | 4 5 6 7 + * | 8 9 10 11 + * v 12 13 14 15 + * + * (3) step_size not equal to 1, cross-line traversal, but this scenario + * did not happen yet. + */
-#define STRIDE_CHK_AND_WARN(stride) \ -do { \ - if (!STRIDE_CHK(stride)) \ - WARN_ON_ONCE("Unexpected stride\n"); \ -} while (0) +#define __xy_initialize(x, y, from) \ + (x = from, y = 0) +#define __xy_overflow(x, y) \ + (y >= rmid_remap_matrix.cols) +#define __x_forward(x) \ + (x = (x + 1) % rmid_remap_matrix.cols) +#define __y_forward(x, y) \ + (y += ((x) ? 0 : 1)) + +#define __step_xy_initialize(step, x, y, from) \ + (x = from, step = 1, y = 0) +#define __step_align(from) \ + (!(from % rmid_remap_matrix.step_size)) +#define __step_overflow(step) \ + (__xy_overflow(x, y) || \ + (step > rmid_remap_matrix.step_cnt)) +#define __step_x_forward(x) \ + __x_forward(x) +#define __step_forward(step, x) \ + (step += ((x % rmid_remap_matrix.step_size) ? 0 : 1)) +#define __step_y_forward(x, y) \ + __y_forward(x, y) + +#define for_each_rmid_transform_point_step_from(p_entry, step, x, y, from) \ + for (__step_xy_initialize(step, x, y, from), \ + (p_entry) = __rmid_remap_bmp((from)); \ + __step_align(from) && !__step_overflow(step); \ + __step_x_forward(x), \ + __step_forward(step, x), \ + __step_y_forward(x, y), \ + (p_entry) = __rmid_remap_bmp(x)) \ + if (unlikely(((p_entry) == NULL) || \ + (*p_entry) == NULL)) \ + WARN_ON_ONCE(1); \ + else + +#define for_each_rmid_transform_point_from(p_entry, x, y, from) \ + for (__xy_initialize(x, y, from), \ + (p_entry) = __rmid_remap_bmp((from)); \ + !__xy_overflow(x, y); \ + __x_forward(x), \ + __y_forward(x, y), \ + (p_entry) = __rmid_remap_bmp(x)) \ + if (unlikely(((p_entry) == NULL) || \ + (*p_entry) == NULL)) \ + WARN_ON_ONCE(1); \ + else
static void set_rmid_remap_bmp_occ(unsigned long *bmp) { @@ -836,6 +913,32 @@ static int is_rmid_remap_bmp_full(unsigned long *bmp) bitmap_full(bmp, rmid_remap_matrix.rows)); }
+static int rmid_remap_bmp_find_first_avail_partid(int partid) +{ + int x, y; + unsigned long **bmp; + + if (rmid_remap_matrix.step_size == + rmid_remap_matrix.cols) + return 0; + + bmp = __rmid_remap_bmp(partid); + if (bmp && !is_rmid_remap_bmp_occ(*bmp)) + return partid; + + for_each_rmid_transform_point_from(bmp, x, y, 0) { + /* + * do not waste partid resource, start + * from step_size aligned position. + */ + if (!is_rmid_remap_bmp_occ(*bmp) && + (x % rmid_remap_matrix.step_size) == 0) + return x; + } + + return -ENOSPC; +} + static int rmid_remap_bmp_alloc_pmg(unsigned long *bmp) { int pos; @@ -850,8 +953,7 @@ static int rmid_remap_bmp_alloc_pmg(unsigned long *bmp)
static int rmid_remap_matrix_init(void) { - int stride = 0; - int ret; + int x, y, step, ret; u32 cols, rows; unsigned long **bmp;
@@ -868,15 +970,11 @@ static int rmid_remap_matrix_init(void) * default rmid, otherwise drop partid = 0 and * partid = 1 for LxCACHE, LxDATA reservation. */ - for_each_valid_rmid_remap_bmp(bmp) { + for_each_rmid_transform_point_step_from(bmp, step, x, y, 0) { set_rmid_remap_bmp_occ(*bmp); - rmid_remap_bmp_bdr_clear(*bmp, 0); - if (STRIDE_INC_CHK(stride)) - break; + rmid_remap_bmp_alloc_pmg(*bmp); }
- STRIDE_CHK_AND_WARN(stride); - ret = rmid_mon_ptrs_init(rmid_remap_matrix.nr_usage); if (ret) goto out; @@ -921,70 +1019,59 @@ static int rmid_to_partid_pmg(int rmid, int *partid, int *pmg)
static int __rmid_alloc(int partid, int pmg) { - int stride = 0; - int partid_sel = 0; - int ret; - int rmid[2] = {-1, -1}; - unsigned long **cmp, **bmp; + int x, y, step, ret, rmid; + bool checkpmg = false; + unsigned long **bmp;
- if (partid >= 0) { - cmp = __rmid_remap_bmp(partid); - if (!cmp) { - ret = -EINVAL; - goto out; - } - for_each_valid_rmid_remap_bmp(bmp) { - if (bmp < cmp) - continue; - set_rmid_remap_bmp_occ(*bmp); - - if (pmg >= 0) { - if (is_rmid_remap_bmp_bdr_set(*bmp, pmg)) { - ret = -EEXIST; - goto out; - } - rmid_remap_bmp_bdr_clear(*bmp, pmg); - } else { - ret = rmid_remap_bmp_alloc_pmg(*bmp); - if (ret < 0) - goto out; - pmg = ret; - } + if (pmg >= 0) + checkpmg = true;
- rmid[stride] = to_rmid(partid + stride, pmg); - if (STRIDE_INC_CHK(stride)) - break; - } - } else { - for_each_valid_rmid_remap_bmp(bmp) { - partid_sel++; + /* traverse from first non-occupied and step_size aligned entry */ + ret = rmid_remap_bmp_find_first_avail_partid(partid); + if (ret < 0) + goto out; + partid = ret;
- if (is_rmid_remap_bmp_occ(*bmp)) - continue; - set_rmid_remap_bmp_occ(*bmp); + for_each_rmid_transform_point_step_from(bmp, step, x, y, partid) { + set_rmid_remap_bmp_occ(*bmp);
- ret = rmid_remap_bmp_alloc_pmg(*bmp); - if (ret < 0) + /* checking if the given pmg is available */ + if (checkpmg) { + /* + * it can only happened in step_size aligned + * position, so it does not exist pmgs cleared + * before. + */ + if (is_rmid_remap_bmp_bdr_set(*bmp, pmg + y)) { + ret = -EEXIST; goto out; - pmg = ret; - rmid[stride] = to_rmid(partid_sel - 1, pmg); - if (STRIDE_INC_CHK(stride)) - break; + } + rmid_remap_bmp_bdr_clear(*bmp, pmg + y); + continue; } + + /* alloc available pmg */ + ret = rmid_remap_bmp_alloc_pmg(*bmp); + if (ret < 0) + goto out; + /* always return first pmg */ + if (pmg < 0) + pmg = ret; }
- if (!STRIDE_CHK(stride)) { + rmid = to_rmid(partid, pmg); + if (!is_rmid_valid(rmid)) { ret = -ENOSPC; goto out; } - - ret = assoc_rmid_with_mon(rmid[0]); - if (ret) + ret = assoc_rmid_with_mon(rmid); + if (ret) { + rmid_free(rmid); goto out; + }
- return rmid[0]; + return rmid; out: - rmid_free(rmid[0]); return ret; }
@@ -995,32 +1082,18 @@ int rmid_alloc(int partid)
void rmid_free(int rmid) { - int stride = 0; - int partid, pmg; - unsigned long **bmp, **cmp; + int x, y, step, partid, pmg; + unsigned long **bmp;
if (rmid_to_partid_pmg(rmid, &partid, &pmg)) return;
- cmp = __rmid_remap_bmp(partid); - if (!cmp) - return; - - for_each_valid_rmid_remap_bmp(bmp) { - if (bmp < cmp) - continue; - - rmid_remap_bmp_bdr_set(*bmp, pmg); - + for_each_rmid_transform_point_step_from(bmp, step, x, y, partid) { + rmid_remap_bmp_bdr_set(*bmp, pmg + y); if (is_rmid_remap_bmp_full(*bmp)) unset_rmid_remap_bmp_occ(*bmp); - - if (STRIDE_INC_CHK(stride)) - break; }
- STRIDE_CHK_AND_WARN(stride); - deassoc_rmid_with_mon(rmid); }