mailweb.openeuler.org
Manage this list

Keyboard Shortcuts

Thread View

  • j: Next unread message
  • k: Previous unread message
  • j a: Jump to all threads
  • j l: Jump to MailingList overview

Kernel

Threads by month
  • ----- 2025 -----
  • May
  • April
  • March
  • February
  • January
  • ----- 2024 -----
  • December
  • November
  • October
  • September
  • August
  • July
  • June
  • May
  • April
  • March
  • February
  • January
  • ----- 2023 -----
  • December
  • November
  • October
  • September
  • August
  • July
  • June
  • May
  • April
  • March
  • February
  • January
  • ----- 2022 -----
  • December
  • November
  • October
  • September
  • August
  • July
  • June
  • May
  • April
  • March
  • February
  • January
  • ----- 2021 -----
  • December
  • November
  • October
  • September
  • August
  • July
  • June
  • May
  • April
  • March
  • February
  • January
  • ----- 2020 -----
  • December
  • November
  • October
  • September
  • August
  • July
  • June
  • May
  • April
  • March
  • February
  • January
  • ----- 2019 -----
  • December
kernel@openeuler.org

  • 32 participants
  • 18097 discussions
[PATCH openEuler-5.10] cpuidle: add cpuidle-haltpoll driver module parameter
by Zheng Zengkai 19 Dec '22

19 Dec '22
From: Xiangyou Xie <xiexiangyou(a)huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6404X?from=project-issue CVE: NA --------------------------- To ensure energy efficiency, haltpoll is disabled by default. But In some performance scenarios, you can enable haltpoll using the following methods: echo Y > /sys/module/cpuidle_haltpoll/parameters/force Signed-off-by: Xiangyou Xie <xiexiangyou(a)huawei.com> Reviewed-by: Keqian Zhu <zhukeqian1(a)huawei.com> Reviewed-by: Qingqing Li <liqingqing3(a)huawei.com> Reviewed-by: chenhui <judy.chenhui(a)huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai(a)huawei.com> --- drivers/cpuidle/cpuidle-haltpoll.c | 97 +++++++++++++++++++++++----- drivers/cpuidle/governors/haltpoll.c | 4 +- 2 files changed, 82 insertions(+), 19 deletions(-) diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c index e22f7867609b..902af11bd0e0 100644 --- a/drivers/cpuidle/cpuidle-haltpoll.c +++ b/drivers/cpuidle/cpuidle-haltpoll.c @@ -18,9 +18,17 @@ #include <linux/kvm_para.h> #include <linux/cpuidle_haltpoll.h> -static bool force __read_mostly; -module_param(force, bool, 0444); -MODULE_PARM_DESC(force, "Load unconditionally"); +static bool force; +MODULE_PARM_DESC(force, "bool, enable haltpoll driver"); +static int enable_haltpoll_driver(const char *val, const struct kernel_param *kp); +static int register_haltpoll_driver(void); +static void unregister_haltpoll_driver(void); + +static const struct kernel_param_ops enable_haltpoll_ops = { + .set = enable_haltpoll_driver, + .get = param_get_bool, +}; +module_param_cb(force, &enable_haltpoll_ops, &force, 0644); static struct cpuidle_device __percpu *haltpoll_cpuidle_devices; static enum cpuhp_state haltpoll_hp_state; @@ -36,6 +44,42 @@ static int default_enter_idle(struct cpuidle_device *dev, return index; } + +static int enable_haltpoll_driver(const char *val, const struct kernel_param *kp) +{ +#ifdef CONFIG_ARM64 + int ret; + bool do_enable; + + if (!val) + return 0; + + ret = strtobool(val, &do_enable); + + if (ret || force == do_enable) + return ret; + + if (do_enable) { + ret = register_haltpoll_driver(); + + if (!ret) { + pr_info("Enable haltpoll driver.\n"); + force = 1; + } else { + pr_err("Fail to enable haltpoll driver.\n"); + } + } else { + unregister_haltpoll_driver(); + force = 0; + pr_info("Unregister haltpoll driver.\n"); + } + + return ret; +#else + return -1; +#endif +} + static struct cpuidle_driver haltpoll_driver = { .name = "haltpoll", .governor = "haltpoll", @@ -84,22 +128,18 @@ static int haltpoll_cpu_offline(unsigned int cpu) return 0; } -static void haltpoll_uninit(void) -{ - if (haltpoll_hp_state) - cpuhp_remove_state(haltpoll_hp_state); - cpuidle_unregister_driver(&haltpoll_driver); - - free_percpu(haltpoll_cpuidle_devices); - haltpoll_cpuidle_devices = NULL; -} static bool haltpoll_want(void) { return kvm_para_has_hint(KVM_HINTS_REALTIME); } -static int __init haltpoll_init(void) +static void haltpoll_uninit(void) +{ + unregister_haltpoll_driver(); +} + +static int register_haltpoll_driver(void) { int ret; struct cpuidle_driver *drv = &haltpoll_driver; @@ -112,9 +152,6 @@ static int __init haltpoll_init(void) cpuidle_poll_state_init(drv); - if (!force && (!kvm_para_available() || !haltpoll_want())) - return -ENODEV; - ret = cpuidle_register_driver(drv); if (ret < 0) return ret; @@ -137,9 +174,35 @@ static int __init haltpoll_init(void) return ret; } +static void unregister_haltpoll_driver(void) +{ + if (haltpoll_hp_state) + cpuhp_remove_state(haltpoll_hp_state); + cpuidle_unregister_driver(&haltpoll_driver); + + free_percpu(haltpoll_cpuidle_devices); + haltpoll_cpuidle_devices = NULL; + +} + +static int __init haltpoll_init(void) +{ + int ret = 0; +#ifdef CONFIG_X86 + /* Do not load haltpoll if idle= is passed */ + if (boot_option_idle_override != IDLE_NO_OVERRIDE) + return -ENODEV; +#endif + if (force || (haltpoll_want() && kvm_para_available())) + ret = register_haltpoll_driver(); + + return ret; +} + static void __exit haltpoll_exit(void) { - haltpoll_uninit(); + if (haltpoll_cpuidle_devices) + haltpoll_uninit(); } module_init(haltpoll_init); diff --git a/drivers/cpuidle/governors/haltpoll.c b/drivers/cpuidle/governors/haltpoll.c index 4e814683509e..415004793bf1 100644 --- a/drivers/cpuidle/governors/haltpoll.c +++ b/drivers/cpuidle/governors/haltpoll.c @@ -39,7 +39,7 @@ module_param(guest_halt_poll_grow_start, uint, 0644); static bool guest_halt_poll_allow_shrink __read_mostly = true; module_param(guest_halt_poll_allow_shrink, bool, 0644); -static bool enable __read_mostly; +static bool enable __read_mostly = true; module_param(enable, bool, 0444); MODULE_PARM_DESC(enable, "Load unconditionally"); @@ -144,7 +144,7 @@ static struct cpuidle_governor haltpoll_governor = { static int __init init_haltpoll(void) { - if (kvm_para_available() || enable) + if (enable) return cpuidle_register_governor(&haltpoll_governor); return 0; -- 2.20.1
1 0
0 0
[PATCH openEuler-5.10 1/2] block: fix crash on cmpxchg for request_wrapper
by Zheng Zengkai 19 Dec '22

19 Dec '22
From: Yu Kuai <yukuai3(a)huawei.com> hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I65K8D CVE: NA -------------------------------- Now that address of request_wrapper is caculated by address of request plus cmd_size, if cmd_size is not aligned to 8 bytes, request_wrapper will end up not aligned to 8 bytes as well, which will crash in arm64 because assembly instruction casal requires that operand address is aligned to 8 bytes: Internal error: Oops: 96000021 [#1] SMP pc : blk_account_io_latency+0x54/0x134 Call trace: blk_account_io_latency+0x54/0x134 blk_account_io_done+0x3c/0x4c __blk_mq_end_request+0x78/0x134 scsi_end_request+0xcc/0x1f0 scsi_io_completion+0x88/0x240 scsi_finish_command+0x104/0x140 scsi_softirq_done+0x90/0x180 blk_mq_complete_request+0x5c/0x70 scsi_mq_done+0x4c/0x100 Fix the problem by declaring request_wrapper as aligned to cachline, and placing it before request. Fixes: 82327165da5c ("blk-mq: don't access request_wrapper if request is not allocated from block layer") Signed-off-by: Yu Kuai <yukuai3(a)huawei.com> Reviewed-by: Hou Tao <houtao1(a)huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai(a)huawei.com> --- block/blk-flush.c | 8 +++++--- block/blk-mq.c | 2 +- block/blk-mq.h | 9 +++------ 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/block/blk-flush.c b/block/blk-flush.c index 65753f781c20..093c581a2651 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -470,6 +470,7 @@ struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size, gfp_t flags) { struct blk_flush_queue *fq; + struct request_wrapper *wrapper; int rq_sz = sizeof(struct request) + sizeof(struct request_wrapper); fq = kzalloc_node(sizeof(*fq), flags, node); @@ -479,10 +480,11 @@ struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size, spin_lock_init(&fq->mq_flush_lock); rq_sz = round_up(rq_sz + cmd_size, cache_line_size()); - fq->flush_rq = kzalloc_node(rq_sz, flags, node); - if (!fq->flush_rq) + wrapper = kzalloc_node(rq_sz, flags, node); + if (!wrapper) goto fail_rq; + fq->flush_rq = (struct request *)(wrapper + 1); INIT_LIST_HEAD(&fq->flush_queue[0]); INIT_LIST_HEAD(&fq->flush_queue[1]); INIT_LIST_HEAD(&fq->flush_data_in_flight); @@ -501,7 +503,7 @@ void blk_free_flush_queue(struct blk_flush_queue *fq) if (!fq) return; - kfree(fq->flush_rq); + kfree(request_to_wrapper(fq->flush_rq)); kfree(fq); } diff --git a/block/blk-mq.c b/block/blk-mq.c index 1c4a4e197e65..ffabe9c3de31 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2642,7 +2642,7 @@ static int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, to_do = min(entries_per_page, depth - i); left -= to_do * rq_size; for (j = 0; j < to_do; j++) { - struct request *rq = p; + struct request *rq = p + sizeof(struct request_wrapper); tags->static_rqs[i] = rq; if (blk_mq_init_request(set, rq, hctx_idx, node)) { diff --git a/block/blk-mq.h b/block/blk-mq.h index 6254abe9c112..dcb2077e4db6 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -40,14 +40,11 @@ struct blk_mq_ctx { struct request_wrapper { /* Time that I/O was counted in part_get_stat_info(). */ u64 stat_time_ns; -}; +} ____cacheline_aligned_in_smp; -static inline struct request_wrapper *request_to_wrapper(struct request *rq) +static inline struct request_wrapper *request_to_wrapper(void *rq) { - unsigned long addr = (unsigned long)rq; - - addr += sizeof(*rq) + rq->q->tag_set->cmd_size; - return (struct request_wrapper *)addr; + return rq - sizeof(struct request_wrapper); } void blk_mq_exit_queue(struct request_queue *q); -- 2.20.1
1 1
0 0
[PATCH openEuler-5.10 1/7] Bluetooth: L2CAP: Fix build errors in some archs
by Zheng Zengkai 19 Dec '22

19 Dec '22
From: Luiz Augusto von Dentz <luiz.von.dentz(a)intel.com> stable inclusion from stable-v5.10.141 commit 38267d266336a7fb9eae9be23567a44776c6e4ca category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I64WCC CVE: CVE-2022-20566 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id… ------------------------------- commit b840304fb46cdf7012722f456bce06f151b3e81b upstream. This attempts to fix the follow errors: In function 'memcmp', inlined from 'bacmp' at ./include/net/bluetooth/bluetooth.h:347:9, inlined from 'l2cap_global_chan_by_psm' at net/bluetooth/l2cap_core.c:2003:15: ./include/linux/fortify-string.h:44:33: error: '__builtin_memcmp' specified bound 6 exceeds source size 0 [-Werror=stringop-overread] 44 | #define __underlying_memcmp __builtin_memcmp | ^ ./include/linux/fortify-string.h:420:16: note: in expansion of macro '__underlying_memcmp' 420 | return __underlying_memcmp(p, q, size); | ^~~~~~~~~~~~~~~~~~~ In function 'memcmp', inlined from 'bacmp' at ./include/net/bluetooth/bluetooth.h:347:9, inlined from 'l2cap_global_chan_by_psm' at net/bluetooth/l2cap_core.c:2004:15: ./include/linux/fortify-string.h:44:33: error: '__builtin_memcmp' specified bound 6 exceeds source size 0 [-Werror=stringop-overread] 44 | #define __underlying_memcmp __builtin_memcmp | ^ ./include/linux/fortify-string.h:420:16: note: in expansion of macro '__underlying_memcmp' 420 | return __underlying_memcmp(p, q, size); | ^~~~~~~~~~~~~~~~~~~ Fixes: 332f1795ca20 ("Bluetooth: L2CAP: Fix l2cap_global_chan_by_psm regression") Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz(a)intel.com> Cc: Sudip Mukherjee <sudipm.mukherjee(a)gmail.com> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> Signed-off-by: Lu Jialin <lujialin4(a)huawei.com> Reviewed-by: Xiu Jianfeng <xiujianfeng(a)huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai(a)huawei.com> --- net/bluetooth/l2cap_core.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 70bfd9e8913e..f78ad8f536f7 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1988,11 +1988,11 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, src_match = !bacmp(&c->src, src); dst_match = !bacmp(&c->dst, dst); if (src_match && dst_match) { - c = l2cap_chan_hold_unless_zero(c); - if (c) { - read_unlock(&chan_list_lock); - return c; - } + if (!l2cap_chan_hold_unless_zero(c)) + continue; + + read_unlock(&chan_list_lock); + return c; } /* Closest match */ -- 2.20.1
1 6
0 0
[PATCH openEuler-1.0-LTS] hv_netvsc: Add check for kvmalloc_array
by Yongqiang Liu 19 Dec '22

19 Dec '22
From: Jiasheng Jiang <jiasheng(a)iscas.ac.cn> mainline inclusion from mainline-v5.17 commit 886e44c9298a6b428ae046e2fa092ca52e822e6a category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I6694H CVE: CVE-2022-3107 -------------------------------- As the potential failure of the kvmalloc_array(), it should be better to check and restore the 'data' if fails in order to avoid the dereference of the NULL pointer. Fixes: 6ae746711263 ("hv_netvsc: Add per-cpu ethtool stats for netvsc") Signed-off-by: Jiasheng Jiang <jiasheng(a)iscas.ac.cn> Link: https://lore.kernel.org/r/20220314020125.2365084-1-jiasheng@iscas.ac.cn Signed-off-by: Jakub Kicinski <kuba(a)kernel.org> Signed-off-by: Baisong Zhong <zhongbaisong(a)huawei.com> Reviewed-by: Liu Jian <liujian56(a)huawei.com> Reviewed-by: Yue Haibing <yuehaibing(a)huawei.com> Reviewed-by: Xiu Jianfeng <xiujianfeng(a)huawei.com> Signed-off-by: Yongqiang Liu <liuyongqiang13(a)huawei.com> --- drivers/net/hyperv/netvsc_drv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 1f9f7fcdb0eb..0de42e8ec2bb 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1453,6 +1453,9 @@ static void netvsc_get_ethtool_stats(struct net_device *dev, pcpu_sum = kvmalloc_array(num_possible_cpus(), sizeof(struct netvsc_ethtool_pcpu_stats), GFP_KERNEL); + if (!pcpu_sum) + return; + netvsc_get_pcpu_stats(dev, pcpu_sum); for_each_present_cpu(cpu) { struct netvsc_ethtool_pcpu_stats *this_sum = &pcpu_sum[cpu]; -- 2.25.1
1 0
0 0
[PATCH openEuler-5.10 01/10] sched/qos: Don't unthrottle cfs_rq when cfs_rq is throttled by qos
by Zheng Zengkai 16 Dec '22

16 Dec '22
From: Zhang Qiao <zhangqiao22(a)huawei.com> hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I64OUS CVE: NA ------------------------------- When a cfs_rq throttled by qos, mark cfs_rq->throttled as 1, and cfs bw will unthrottled this cfs_rq by mistake, it cause a list_del_valid warning. So add macro QOS_THROTTLED(=2), when a cfs_rq is throttled by qos, we mark the cfs_rq->throttled as QOS_THROTTLED, will check the value of cfs_rq->throttled before unthrottle a cfs_rq. Signed-off-by: Zhang Qiao <zhangqiao22(a)huawei.com> Reviewed-by: Chen Hui <judy.chenhui(a)huawei.com> Reviewed-by: zheng zucheng <zhengzucheng(a)huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai(a)huawei.com> --- kernel/sched/fair.c | 102 +++++++++++++++++++++++++++++--------------- 1 file changed, 68 insertions(+), 34 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 654e964b5c31..d3c4b945c019 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -124,6 +124,13 @@ int __weak arch_asym_cpu_priority(int cpu) #endif #ifdef CONFIG_QOS_SCHED + +/* + * To distinguish cfs bw, use QOS_THROTTLED mark cfs_rq->throttled + * when qos throttled(and cfs bw throttle mark cfs_rq->throttled as 1). + */ +#define QOS_THROTTLED 2 + static DEFINE_PER_CPU_SHARED_ALIGNED(struct list_head, qos_throttled_cfs_rq); static DEFINE_PER_CPU_SHARED_ALIGNED(struct hrtimer, qos_overload_timer); static DEFINE_PER_CPU(int, qos_cpu_overload); @@ -4932,6 +4939,14 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) se = cfs_rq->tg->se[cpu_of(rq)]; +#ifdef CONFIG_QOS_SCHED + /* + * if this cfs_rq throttled by qos, not need unthrottle it. + */ + if (cfs_rq->throttled == QOS_THROTTLED) + return; +#endif + cfs_rq->throttled = 0; update_rq_clock(rq); @@ -7278,26 +7293,6 @@ static inline bool is_offline_task(struct task_struct *p) static void start_qos_hrtimer(int cpu); -static int qos_tg_unthrottle_up(struct task_group *tg, void *data) -{ - struct rq *rq = data; - struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)]; - - cfs_rq->throttle_count--; - - return 0; -} - -static int qos_tg_throttle_down(struct task_group *tg, void *data) -{ - struct rq *rq = data; - struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)]; - - cfs_rq->throttle_count++; - - return 0; -} - static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq) { struct rq *rq = rq_of(cfs_rq); @@ -7309,7 +7304,7 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq) /* freeze hierarchy runnable averages while throttled */ rcu_read_lock(); - walk_tg_tree_from(cfs_rq->tg, qos_tg_throttle_down, tg_nop, (void *)rq); + walk_tg_tree_from(cfs_rq->tg, tg_throttle_down, tg_nop, (void *)rq); rcu_read_unlock(); task_delta = cfs_rq->h_nr_running; @@ -7320,8 +7315,13 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq) if (!se->on_rq) break; - if (dequeue) + if (dequeue) { dequeue_entity(qcfs_rq, se, DEQUEUE_SLEEP); + } else { + update_load_avg(qcfs_rq, se, 0); + se_update_runnable(se); + } + qcfs_rq->h_nr_running -= task_delta; qcfs_rq->idle_h_nr_running -= idle_task_delta; @@ -7339,7 +7339,7 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq) if (list_empty(&per_cpu(qos_throttled_cfs_rq, cpu_of(rq)))) start_qos_hrtimer(cpu_of(rq)); - cfs_rq->throttled = 1; + cfs_rq->throttled = QOS_THROTTLED; list_add(&cfs_rq->qos_throttled_list, &per_cpu(qos_throttled_cfs_rq, cpu_of(rq))); @@ -7349,12 +7349,14 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq) { struct rq *rq = rq_of(cfs_rq); struct sched_entity *se; - int enqueue = 1; unsigned int prev_nr = cfs_rq->h_nr_running; long task_delta, idle_task_delta; se = cfs_rq->tg->se[cpu_of(rq)]; + if (cfs_rq->throttled != QOS_THROTTLED) + return; + cfs_rq->throttled = 0; update_rq_clock(rq); @@ -7362,7 +7364,7 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq) /* update hierarchical throttle state */ rcu_read_lock(); - walk_tg_tree_from(cfs_rq->tg, tg_nop, qos_tg_unthrottle_up, (void *)rq); + walk_tg_tree_from(cfs_rq->tg, tg_nop, tg_unthrottle_up, (void *)rq); rcu_read_unlock(); if (!cfs_rq->load.weight) @@ -7372,26 +7374,58 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq) idle_task_delta = cfs_rq->idle_h_nr_running; for_each_sched_entity(se) { if (se->on_rq) - enqueue = 0; + break; cfs_rq = cfs_rq_of(se); - if (enqueue) - enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP); + enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP); + cfs_rq->h_nr_running += task_delta; cfs_rq->idle_h_nr_running += idle_task_delta; if (cfs_rq_throttled(cfs_rq)) - break; + goto unthrottle_throttle; } - assert_list_leaf_cfs_rq(rq); + for_each_sched_entity(se) { + cfs_rq = cfs_rq_of(se); - if (!se) { - add_nr_running(rq, task_delta); - if (prev_nr < 2 && prev_nr + task_delta >= 2) - overload_set(rq); + update_load_avg(cfs_rq, se, UPDATE_TG); + se_update_runnable(se); + + cfs_rq->h_nr_running += task_delta; + cfs_rq->idle_h_nr_running += idle_task_delta; + + /* end evaluation on encountering a throttled cfs_rq */ + if (cfs_rq_throttled(cfs_rq)) + goto unthrottle_throttle; + + /* + * One parent has been throttled and cfs_rq removed from the + * list. Add it back to not break the leaf list. + */ + if (throttled_hierarchy(cfs_rq)) + list_add_leaf_cfs_rq(cfs_rq); + } + + add_nr_running(rq, task_delta); + if (prev_nr < 2 && prev_nr + task_delta >= 2) + overload_set(rq); + +unthrottle_throttle: + /* + * The cfs_rq_throttled() breaks in the above iteration can result in + * incomplete leaf list maintenance, resulting in triggering the + * assertion below. + */ + for_each_sched_entity(se) { + cfs_rq = cfs_rq_of(se); + + if (list_add_leaf_cfs_rq(cfs_rq)) + break; } + assert_list_leaf_cfs_rq(rq); + /* Determine whether we need to wake up potentially idle CPU: */ if (rq->curr == rq->idle && rq->cfs.nr_running) resched_curr(rq); -- 2.20.1
1 9
0 0
[PATCH openEuler-5.10-LTS 1/7] efi/libstub: Fix build error in efi-stub.c for riscv
by Zheng Zengkai 16 Dec '22

16 Dec '22
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I66G0M ----------------------------------------------- Commit 36a1a8916de5 ("efi/loongarch: Add efistub booting support") introduced the following macro definition for riscv in file drivers/firmware/efi/libstub/efi-stub.c # define EFI_RT_VIRTUAL_LIMIT DEFAULT_MAP_WINDOW_64 # define EFI_RT_VIRTUAL_LIMIT TASK_SIZE_MIN # define EFI_RT_VIRTUAL_LIMIT TASK_SIZE but related patches have not been merged for riscv, like e8a62cc26ddf ("riscv: Implement sv48 support"), 01abdfeac81b ("riscv: compat: Support TASK_SIZE for compat mode"), Causing build error in riscv: build failed: riscv, allmodconfig <line too long ...> n this function); did you mean ‘TASK_SIZE_MAX’? # define EFI_RT_VIRTUAL_LIMIT TASK_SIZE_MIN ^ drivers/firmware/efi/libstub/efi-stub.c:289:31: note: in expansion of macro ‘EFI_RT_VIRTUAL_LIMIT’ static const u64 headroom = EFI_RT_VIRTUAL_LIMIT - ^~~~~~~~~~~~~~~~~~~~ drivers/firmware/efi/libstub/efi-stub.c:44:32: note: each undeclared identifier is reported only once for each function it appears in # define EFI_RT_VIRTUAL_LIMIT TASK_SIZE_MIN ^ drivers/firmware/efi/libstub/efi-stub.c:289:31: note: in expansion of macro ‘EFI_RT_VIRTUAL_LIMIT’ static const u64 headroom = EFI_RT_VIRTUAL_LIMIT - ^~~~~~~~~~~~~~~~~~~~ make[4]: *** [drivers/firmware/efi/libstub/efi-stub.o] Error 1 make[3]: *** [drivers/firmware/efi/libstub] Error 2 make[2]: *** [drivers/firmware/efi] Error 2 make[1]: *** [drivers/firmware] Error 2 make[1]: *** Waiting for unfinished jobs.... make: *** [drivers] Error 2 Fix it by removing the CONFIG_RISCV condition for EFI_RT_VIRTUAL_LIMIT definition. Fixes: 36a1a8916de5 ("efi/loongarch: Add efistub booting support") Signed-off-by: Zheng Zengkai <zhengzengkai(a)huawei.com> Reviewed-by: Wei Li <liwei391(a)huawei.com> --- drivers/firmware/efi/libstub/efi-stub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/efi/libstub/efi-stub.c b/drivers/firmware/efi/libstub/efi-stub.c index 8e0f64b3db69..96129f0fc60e 100644 --- a/drivers/firmware/efi/libstub/efi-stub.c +++ b/drivers/firmware/efi/libstub/efi-stub.c @@ -40,7 +40,7 @@ #ifdef CONFIG_ARM64 # define EFI_RT_VIRTUAL_LIMIT DEFAULT_MAP_WINDOW_64 -#elif defined(CONFIG_RISCV) || defined(CONFIG_LOONGARCH) +#elif defined(CONFIG_LOONGARCH) # define EFI_RT_VIRTUAL_LIMIT TASK_SIZE_MIN #else # define EFI_RT_VIRTUAL_LIMIT TASK_SIZE -- 2.20.1
1 6
0 0
[PATCH openEuler-1.0-LTS 1/3] xen/netback: Ensure protocol headers don't fall in the non-linear area
by Yongqiang Liu 16 Dec '22

16 Dec '22
From: Ross Lagerwall <ross.lagerwall(a)citrix.com> mainline inclusion from mainline-v6.1 commit ad7f402ae4f466647c3a669b8a6f3e5d4271c84a category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I651EB CVE: CVE-2022-3643 -------------------------------- In some cases, the frontend may send a packet where the protocol headers are spread across multiple slots. This would result in netback creating an skb where the protocol headers spill over into the non-linear area. Some drivers and NICs don't handle this properly resulting in an interface reset or worse. This issue was introduced by the removal of an unconditional skb pull in the tx path to improve performance. Fix this without reintroducing the pull by setting up grant copy ops for as many slots as needed to reach the XEN_NETBACK_TX_COPY_LEN size. Adjust the rest of the code to handle multiple copy operations per skb. This is XSA-423 / CVE-2022-3643. Fixes: 7e5d7753956b ("xen-netback: remove unconditional __pskb_pull_tail() in guest Tx path") Signed-off-by: Ross Lagerwall <ross.lagerwall(a)citrix.com> Reviewed-by: Paul Durrant <paul(a)xen.org> Signed-off-by: Juergen Gross <jgross(a)suse.com> Signed-off-by: Zhengchao Shao <shaozhengchao(a)huawei.com> Reviewed-by: Yue Haibing <yuehaibing(a)huawei.com> Reviewed-by: Xiu Jianfeng <xiujianfeng(a)huawei.com> Signed-off-by: Yongqiang Liu <liuyongqiang13(a)huawei.com> --- drivers/net/xen-netback/netback.c | 223 ++++++++++++++++-------------- 1 file changed, 123 insertions(+), 100 deletions(-) diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 41bdfb684d46..1036d903038c 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -323,10 +323,13 @@ static int xenvif_count_requests(struct xenvif_queue *queue, struct xenvif_tx_cb { - u16 pending_idx; + u16 copy_pending_idx[XEN_NETBK_LEGACY_SLOTS_MAX + 1]; + u8 copy_count; }; #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb) +#define copy_pending_idx(skb, i) (XENVIF_TX_CB(skb)->copy_pending_idx[i]) +#define copy_count(skb) (XENVIF_TX_CB(skb)->copy_count) static inline void xenvif_tx_create_map_op(struct xenvif_queue *queue, u16 pending_idx, @@ -361,31 +364,93 @@ static inline struct sk_buff *xenvif_alloc_skb(unsigned int size) return skb; } -static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *queue, - struct sk_buff *skb, - struct xen_netif_tx_request *txp, - struct gnttab_map_grant_ref *gop, - unsigned int frag_overflow, - struct sk_buff *nskb) +static void xenvif_get_requests(struct xenvif_queue *queue, + struct sk_buff *skb, + struct xen_netif_tx_request *first, + struct xen_netif_tx_request *txfrags, + unsigned *copy_ops, + unsigned *map_ops, + unsigned int frag_overflow, + struct sk_buff *nskb, + unsigned int extra_count, + unsigned int data_len) { struct skb_shared_info *shinfo = skb_shinfo(skb); skb_frag_t *frags = shinfo->frags; - u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx; - int start; + u16 pending_idx; pending_ring_idx_t index; unsigned int nr_slots; + struct gnttab_copy *cop = queue->tx_copy_ops + *copy_ops; + struct gnttab_map_grant_ref *gop = queue->tx_map_ops + *map_ops; + struct xen_netif_tx_request *txp = first; + + nr_slots = shinfo->nr_frags + 1; + + copy_count(skb) = 0; + + /* Create copy ops for exactly data_len bytes into the skb head. */ + __skb_put(skb, data_len); + while (data_len > 0) { + int amount = data_len > txp->size ? txp->size : data_len; + + cop->source.u.ref = txp->gref; + cop->source.domid = queue->vif->domid; + cop->source.offset = txp->offset; + + cop->dest.domid = DOMID_SELF; + cop->dest.offset = (offset_in_page(skb->data + + skb_headlen(skb) - + data_len)) & ~XEN_PAGE_MASK; + cop->dest.u.gmfn = virt_to_gfn(skb->data + skb_headlen(skb) + - data_len); + + cop->len = amount; + cop->flags = GNTCOPY_source_gref; - nr_slots = shinfo->nr_frags; + index = pending_index(queue->pending_cons); + pending_idx = queue->pending_ring[index]; + callback_param(queue, pending_idx).ctx = NULL; + copy_pending_idx(skb, copy_count(skb)) = pending_idx; + copy_count(skb)++; + + cop++; + data_len -= amount; - /* Skip first skb fragment if it is on same page as header fragment. */ - start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); + if (amount == txp->size) { + /* The copy op covered the full tx_request */ + + memcpy(&queue->pending_tx_info[pending_idx].req, + txp, sizeof(*txp)); + queue->pending_tx_info[pending_idx].extra_count = + (txp == first) ? extra_count : 0; + + if (txp == first) + txp = txfrags; + else + txp++; + queue->pending_cons++; + nr_slots--; + } else { + /* The copy op partially covered the tx_request. + * The remainder will be mapped. + */ + txp->offset += amount; + txp->size -= amount; + } + } - for (shinfo->nr_frags = start; shinfo->nr_frags < nr_slots; - shinfo->nr_frags++, txp++, gop++) { + for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots; + shinfo->nr_frags++, gop++) { index = pending_index(queue->pending_cons++); pending_idx = queue->pending_ring[index]; - xenvif_tx_create_map_op(queue, pending_idx, txp, 0, gop); + xenvif_tx_create_map_op(queue, pending_idx, txp, + txp == first ? extra_count : 0, gop); frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx); + + if (txp == first) + txp = txfrags; + else + txp++; } if (frag_overflow) { @@ -406,7 +471,8 @@ static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *que skb_shinfo(skb)->frag_list = nskb; } - return gop; + (*copy_ops) = cop - queue->tx_copy_ops; + (*map_ops) = gop - queue->tx_map_ops; } static inline void xenvif_grant_handle_set(struct xenvif_queue *queue, @@ -442,7 +508,7 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, struct gnttab_copy **gopp_copy) { struct gnttab_map_grant_ref *gop_map = *gopp_map; - u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx; + u16 pending_idx; /* This always points to the shinfo of the skb being checked, which * could be either the first or the one on the frag_list */ @@ -453,24 +519,37 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, struct skb_shared_info *first_shinfo = NULL; int nr_frags = shinfo->nr_frags; const bool sharedslot = nr_frags && - frag_get_pending_idx(&shinfo->frags[0]) == pending_idx; + frag_get_pending_idx(&shinfo->frags[0]) == + copy_pending_idx(skb, copy_count(skb) - 1); int i, err; - /* Check status of header. */ - err = (*gopp_copy)->status; - if (unlikely(err)) { - if (net_ratelimit()) - netdev_dbg(queue->vif->dev, - "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n", - (*gopp_copy)->status, - pending_idx, - (*gopp_copy)->source.u.ref); - /* The first frag might still have this slot mapped */ - if (!sharedslot) - xenvif_idx_release(queue, pending_idx, - XEN_NETIF_RSP_ERROR); + for (i = 0; i < copy_count(skb); i++) { + int newerr; + + /* Check status of header. */ + pending_idx = copy_pending_idx(skb, i); + + newerr = (*gopp_copy)->status; + if (likely(!newerr)) { + /* The first frag might still have this slot mapped */ + if (i < copy_count(skb) - 1 || !sharedslot) + xenvif_idx_release(queue, pending_idx, + XEN_NETIF_RSP_OKAY); + } else { + err = newerr; + if (net_ratelimit()) + netdev_dbg(queue->vif->dev, + "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n", + (*gopp_copy)->status, + pending_idx, + (*gopp_copy)->source.u.ref); + /* The first frag might still have this slot mapped */ + if (i < copy_count(skb) - 1 || !sharedslot) + xenvif_idx_release(queue, pending_idx, + XEN_NETIF_RSP_ERROR); + } + (*gopp_copy)++; } - (*gopp_copy)++; check_frags: for (i = 0; i < nr_frags; i++, gop_map++) { @@ -517,14 +596,6 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, if (err) continue; - /* First error: if the header haven't shared a slot with the - * first frag, release it as well. - */ - if (!sharedslot) - xenvif_idx_release(queue, - XENVIF_TX_CB(skb)->pending_idx, - XEN_NETIF_RSP_OKAY); - /* Invalidate preceding fragments of this skb. */ for (j = 0; j < i; j++) { pending_idx = frag_get_pending_idx(&shinfo->frags[j]); @@ -794,7 +865,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, unsigned *copy_ops, unsigned *map_ops) { - struct gnttab_map_grant_ref *gop = queue->tx_map_ops; struct sk_buff *skb, *nskb; int ret; unsigned int frag_overflow; @@ -876,8 +946,12 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, continue; } + data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN) ? + XEN_NETBACK_TX_COPY_LEN : txreq.size; + ret = xenvif_count_requests(queue, &txreq, extra_count, txfrags, work_to_do); + if (unlikely(ret < 0)) break; @@ -903,9 +977,8 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, index = pending_index(queue->pending_cons); pending_idx = queue->pending_ring[index]; - data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN && - ret < XEN_NETBK_LEGACY_SLOTS_MAX) ? - XEN_NETBACK_TX_COPY_LEN : txreq.size; + if (ret >= XEN_NETBK_LEGACY_SLOTS_MAX - 1 && data_len < txreq.size) + data_len = txreq.size; skb = xenvif_alloc_skb(data_len); if (unlikely(skb == NULL)) { @@ -916,8 +989,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, } skb_shinfo(skb)->nr_frags = ret; - if (data_len < txreq.size) - skb_shinfo(skb)->nr_frags++; /* At this point shinfo->nr_frags is in fact the number of * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX. */ @@ -979,54 +1050,19 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, type); } - XENVIF_TX_CB(skb)->pending_idx = pending_idx; - - __skb_put(skb, data_len); - queue->tx_copy_ops[*copy_ops].source.u.ref = txreq.gref; - queue->tx_copy_ops[*copy_ops].source.domid = queue->vif->domid; - queue->tx_copy_ops[*copy_ops].source.offset = txreq.offset; - - queue->tx_copy_ops[*copy_ops].dest.u.gmfn = - virt_to_gfn(skb->data); - queue->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF; - queue->tx_copy_ops[*copy_ops].dest.offset = - offset_in_page(skb->data) & ~XEN_PAGE_MASK; - - queue->tx_copy_ops[*copy_ops].len = data_len; - queue->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref; - - (*copy_ops)++; - - if (data_len < txreq.size) { - frag_set_pending_idx(&skb_shinfo(skb)->frags[0], - pending_idx); - xenvif_tx_create_map_op(queue, pending_idx, &txreq, - extra_count, gop); - gop++; - } else { - frag_set_pending_idx(&skb_shinfo(skb)->frags[0], - INVALID_PENDING_IDX); - memcpy(&queue->pending_tx_info[pending_idx].req, - &txreq, sizeof(txreq)); - queue->pending_tx_info[pending_idx].extra_count = - extra_count; - } - - queue->pending_cons++; - - gop = xenvif_get_requests(queue, skb, txfrags, gop, - frag_overflow, nskb); + xenvif_get_requests(queue, skb, &txreq, txfrags, copy_ops, + map_ops, frag_overflow, nskb, extra_count, + data_len); __skb_queue_tail(&queue->tx_queue, skb); queue->tx.req_cons = idx; - if (((gop-queue->tx_map_ops) >= ARRAY_SIZE(queue->tx_map_ops)) || + if ((*map_ops >= ARRAY_SIZE(queue->tx_map_ops)) || (*copy_ops >= ARRAY_SIZE(queue->tx_copy_ops))) break; } - (*map_ops) = gop - queue->tx_map_ops; return; } @@ -1105,9 +1141,8 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) while ((skb = __skb_dequeue(&queue->tx_queue)) != NULL) { struct xen_netif_tx_request *txp; u16 pending_idx; - unsigned data_len; - pending_idx = XENVIF_TX_CB(skb)->pending_idx; + pending_idx = copy_pending_idx(skb, 0); txp = &queue->pending_tx_info[pending_idx].req; /* Check the remap error code. */ @@ -1126,18 +1161,6 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) continue; } - data_len = skb->len; - callback_param(queue, pending_idx).ctx = NULL; - if (data_len < txp->size) { - /* Append the packet payload as a fragment. */ - txp->offset += data_len; - txp->size -= data_len; - } else { - /* Schedule a response immediately. */ - xenvif_idx_release(queue, pending_idx, - XEN_NETIF_RSP_OKAY); - } - if (txp->flags & XEN_NETTXF_csum_blank) skb->ip_summed = CHECKSUM_PARTIAL; else if (txp->flags & XEN_NETTXF_data_validated) @@ -1314,7 +1337,7 @@ static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue) /* Called after netfront has transmitted */ int xenvif_tx_action(struct xenvif_queue *queue, int budget) { - unsigned nr_mops, nr_cops = 0; + unsigned nr_mops = 0, nr_cops = 0; int work_done, ret; if (unlikely(!tx_work_todo(queue))) -- 2.25.1
1 2
0 0
[PATCH openEuler-1.0-LTS] arm64: fix a concurrency issue in emulation_proc_handler()
by Yongqiang Liu 16 Dec '22

16 Dec '22
From: ruanjinjie <ruanjinjie(a)huawei.com> hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I65T0J CVE: NA ------------------------------- In emulation_proc_handler(), read and write operations are performed on insn->current_mode. In the concurrency scenario, mutex only protects writing insn->current_mode, and not protects the read. Suppose there are two concurrent tasks, task1 updates insn->current_mode to INSN_EMULATE in the critical section, the prev_mode of task2 is still the old data INSN_UNDEF of insn->current_mode. As a result, two tasks call update_insn_emulation_mode twice with prev_mode = INSN_UNDEF and current_mode = INSN_EMULATE, then call register_emulation_hooks twice, resulting in a list_add double problem. Call trace: __list_add_valid+0xd8/0xe4 register_undef_hook+0x94/0x13c update_insn_emulation_mode+0xd0/0x12c emulation_proc_handler+0xd8/0xf4 proc_sys_call_handler+0x140/0x250 proc_sys_write+0x1c/0x2c new_sync_write+0xec/0x18c vfs_write+0x214/0x2ac ksys_write+0x70/0xfc __arm64_sys_write+0x24/0x30 el0_svc_common.constprop.0+0x7c/0x1bc do_el0_svc+0x2c/0x94 el0_svc+0x20/0x30 el0_sync_handler+0xb0/0xb4 el0_sync+0x160/0x180 Fixes: 08f3f0b2fe7e ("arm64: fix oops in concurrently setting insn_emulation sysctls") Signed-off-by: ruanjinjie <ruanjinjie(a)huawei.com> Reviewed-by: Zhang Jianhua <chris.zjh(a)huawei.com> Reviewed-by: Liao Chang <liaochang1(a)huawei.com> Signed-off-by: Yongqiang Liu <liuyongqiang13(a)huawei.com> --- arch/arm64/kernel/armv8_deprecated.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 88d3454f2848..07a0095956f1 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -211,10 +211,12 @@ static int emulation_proc_handler(struct ctl_table *table, int write, loff_t *ppos) { int ret = 0; - struct insn_emulation *insn = container_of(table->data, struct insn_emulation, current_mode); - enum insn_emulation_mode prev_mode = insn->current_mode; + struct insn_emulation *insn; + enum insn_emulation_mode prev_mode; mutex_lock(&insn_emulation_mutex); + insn = container_of(table->data, struct insn_emulation, current_mode); + prev_mode = insn->current_mode; ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (ret || !write || prev_mode == insn->current_mode) -- 2.25.1
1 0
0 0
[PATCH openEuler-1.0-LTS 1/2] sched/qos: Don't unthrottle cfs_rq when cfs_rq is throttled by qos
by Yongqiang Liu 16 Dec '22

16 Dec '22
From: Zhang Qiao <zhangqiao22(a)huawei.com> hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I64OUS CVE: NA ------------------------------- When a cfs_rq throttled by qos, mark cfs_rq->throttled as 1, and cfs bw will unthrottled this cfs_rq by mistake, it cause a list_del_valid warning. So add macro QOS_THROTTLED(=2), when a cfs_rq is throttled by qos, we mark the cfs_rq->throttled as QOS_THROTTLED, will check the value of cfs_rq->throttled before unthrottle a cfs_rq. Signed-off-by: Zhang Qiao <zhangqiao22(a)huawei.com> Reviewed-by: Chen Hui <judy.chenhui(a)huawei.com> Reviewed-by: zheng zucheng <zhengzucheng(a)huawei.com> Signed-off-by: Yongqiang Liu <liuyongqiang13(a)huawei.com> --- kernel/sched/fair.c | 59 ++++++++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 25 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index a34ca843bf0a..0dba06ce0677 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -103,6 +103,13 @@ int __weak arch_asym_cpu_priority(int cpu) #endif #ifdef CONFIG_QOS_SCHED + +/* + * To distinguish cfs bw, use QOS_THROTTLED mark cfs_rq->throttled + * when qos throttled(and cfs bw throttle mark cfs_rq->throttled as 1). + */ +#define QOS_THROTTLED 2 + static DEFINE_PER_CPU_SHARED_ALIGNED(struct list_head, qos_throttled_cfs_rq); static DEFINE_PER_CPU_SHARED_ALIGNED(struct hrtimer, qos_overload_timer); static DEFINE_PER_CPU(int, qos_cpu_overload); @@ -4649,6 +4656,14 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) se = cfs_rq->tg->se[cpu_of(rq)]; +#ifdef CONFIG_QOS_SCHED + /* + * if this cfs_rq throttled by qos, not need unthrottle it. + */ + if (cfs_rq->throttled == QOS_THROTTLED) + return; +#endif + cfs_rq->throttled = 0; update_rq_clock(rq); @@ -7066,37 +7081,18 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ #ifdef CONFIG_QOS_SCHED static void start_qos_hrtimer(int cpu); -static int qos_tg_unthrottle_up(struct task_group *tg, void *data) -{ - struct rq *rq = data; - struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)]; - - cfs_rq->throttle_count--; - - return 0; -} - -static int qos_tg_throttle_down(struct task_group *tg, void *data) -{ - struct rq *rq = data; - struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)]; - - cfs_rq->throttle_count++; - - return 0; -} - static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq) { struct rq *rq = rq_of(cfs_rq); struct sched_entity *se; long task_delta, idle_task_delta, dequeue = 1; + unsigned int prev_nr = rq->cfs.h_nr_running; se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))]; /* freeze hierarchy runnable averages while throttled */ rcu_read_lock(); - walk_tg_tree_from(cfs_rq->tg, qos_tg_throttle_down, tg_nop, (void *)rq); + walk_tg_tree_from(cfs_rq->tg, tg_throttle_down, tg_nop, (void *)rq); rcu_read_unlock(); task_delta = cfs_rq->h_nr_running; @@ -7118,12 +7114,14 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq) if (!se) { sub_nr_running(rq, task_delta); + if (prev_nr >= 2 && prev_nr - task_delta < 2) + overload_clear(rq); } if (list_empty(&per_cpu(qos_throttled_cfs_rq, cpu_of(rq)))) start_qos_hrtimer(cpu_of(rq)); - cfs_rq->throttled = 1; + cfs_rq->throttled = QOS_THROTTLED; list_add(&cfs_rq->qos_throttled_list, &per_cpu(qos_throttled_cfs_rq, cpu_of(rq))); @@ -7135,8 +7133,11 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq) struct sched_entity *se; int enqueue = 1; long task_delta, idle_task_delta; + unsigned int prev_nr = rq->cfs.h_nr_running; se = cfs_rq->tg->se[cpu_of(rq)]; + if (cfs_rq->throttled != QOS_THROTTLED) + return; cfs_rq->throttled = 0; @@ -7145,7 +7146,7 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq) /* update hierarchical throttle state */ rcu_read_lock(); - walk_tg_tree_from(cfs_rq->tg, tg_nop, qos_tg_unthrottle_up, (void *)rq); + walk_tg_tree_from(cfs_rq->tg, tg_nop, tg_unthrottle_up, (void *)rq); rcu_read_unlock(); if (!cfs_rq->load.weight) @@ -7167,12 +7168,20 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq) break; } - assert_list_leaf_cfs_rq(rq); - if (!se) { add_nr_running(rq, task_delta); + if (prev_nr < 2 && prev_nr + task_delta >= 2) + overload_set(rq); + } + + for_each_sched_entity(se) { + cfs_rq = cfs_rq_of(se); + + list_add_leaf_cfs_rq(cfs_rq); } + assert_list_leaf_cfs_rq(rq); + /* Determine whether we need to wake up potentially idle CPU: */ if (rq->curr == rq->idle && rq->cfs.nr_running) resched_curr(rq); -- 2.25.1
1 1
0 0
[PATCH openEuler-5.10 1/3] RDMA/hns: Kernel notify usr space to stop ring db
by Zheng Zengkai 15 Dec '22

15 Dec '22
From: Guofeng Yue <yueguofeng(a)hisilicon.com> driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I65TUL --------------------------------------------------------------- In the reset scenario, if the kernel receives the reset signal, it needs to notify the user space to stop ring doorbell. Signed-off-by: Yixing Liu <liuyixing1(a)huawei.com> Signed-off-by: Guofeng Yue <yueguofeng(a)hisilicon.com> Reviewed-by: Yangyang Li <liyangyang20(a)huawei.com> Reviewed-by: Yue Haibing <yuehaibing(a)huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai(a)huawei.com> --- drivers/infiniband/hw/hns/hns_roce_device.h | 5 ++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 55 ++++++++++++++++++++- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 4 ++ drivers/infiniband/hw/hns/hns_roce_main.c | 47 +++++++++++++++++- include/uapi/rdma/hns-abi.h | 1 + 5 files changed, 109 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 5b5f6c5920f1..69a14b1ca99c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -59,6 +59,7 @@ #define HNS_ROCE_CEQ 0 #define HNS_ROCE_AEQ 1 +#define HNS_ROCE_IS_RESETTING 1 #define HNS_ROCE_CEQE_SIZE 0x4 #define HNS_ROCE_AEQE_SIZE 0x10 @@ -206,6 +207,7 @@ enum hns_roce_mmap_type { HNS_ROCE_MMAP_TYPE_DB = 1, HNS_ROCE_MMAP_TYPE_DWQE, HNS_ROCE_MMAP_TYPE_DCA, + HNS_ROCE_MMAP_TYPE_RESET, }; struct hns_user_mmap_entry { @@ -248,6 +250,7 @@ struct hns_roce_ucontext { struct list_head page_list; struct mutex page_mutex; struct hns_user_mmap_entry *db_mmap_entry; + struct hns_user_mmap_entry *reset_mmap_entry; u32 config; struct hns_roce_dca_ctx dca_ctx; void *dca_dbgfs; @@ -1027,6 +1030,8 @@ struct hns_roce_dev { int loop_idc; u32 sdb_offset; u32 odb_offset; + struct page *reset_page; /* store reset state */ + void *reset_kaddr; /* addr of reset page */ const struct hns_roce_hw *hw; void *priv; struct workqueue_struct *irq_workq; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 5c58fb2070c4..ecd38c2a7baa 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2812,6 +2812,31 @@ static void free_dip_list(struct hns_roce_dev *hr_dev) spin_unlock_irqrestore(&hr_dev->dip_list_lock, flags); } +static int hns_roce_v2_get_reset_page(struct hns_roce_dev *hr_dev) +{ + hr_dev->reset_page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!hr_dev->reset_page) + return -ENOMEM; + + hr_dev->reset_kaddr = vmap(&hr_dev->reset_page, 1, VM_MAP, PAGE_KERNEL); + if (!hr_dev->reset_kaddr) + goto err_with_vmap; + + return 0; + +err_with_vmap: + put_page(hr_dev->reset_page); + return -ENOMEM; +} + +static void hns_roce_v2_put_reset_page(struct hns_roce_dev *hr_dev) +{ + vunmap(hr_dev->reset_kaddr); + hr_dev->reset_kaddr = NULL; + put_page(hr_dev->reset_page); + hr_dev->reset_page = NULL; +} + static struct ib_pd *free_mr_init_pd(struct hns_roce_dev *hr_dev) { struct hns_roce_v2_priv *priv = hr_dev->priv; @@ -3168,16 +3193,23 @@ static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) { int ret; + ret = hns_roce_v2_get_reset_page(hr_dev); + if (ret) { + dev_err(hr_dev->dev, + "reset state init failed, ret = %d.\n", ret); + return ret; + } + /* The hns ROCEE requires the extdb info to be cleared before using */ ret = hns_roce_clear_extdb_list_info(hr_dev); if (ret) - return ret; + goto err_clear_extdb_failed; hns_roce_set_mac_type(hr_dev); ret = get_hem_table(hr_dev); if (ret) - return ret; + goto err_clear_extdb_failed; if (hr_dev->is_vf) return 0; @@ -3192,6 +3224,8 @@ static int hns_roce_v2_init(struct hns_roce_dev *hr_dev) err_llm_init_failed: put_hem_table(hr_dev); +err_clear_extdb_failed: + hns_roce_v2_put_reset_page(hr_dev); return ret; } @@ -3203,6 +3237,8 @@ static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev) if (!hr_dev->is_vf) hns_roce_free_link_table(hr_dev); + hns_roce_v2_put_reset_page(hr_dev); + if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP09) free_dip_list(hr_dev); } @@ -7282,6 +7318,18 @@ void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, bool reset) handle->rinfo.instance_state = HNS_ROCE_STATE_NON_INIT; } + +static void hns_roce_v2_reset_notify_user(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_reset_state *state; + + state = (struct hns_roce_v2_reset_state *)hr_dev->reset_kaddr; + + state->reset_state = HNS_ROCE_IS_RESETTING; + /* Ensure reset state was flushed in memory */ + wmb(); +} + static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle) { struct hns_roce_dev *hr_dev; @@ -7300,6 +7348,9 @@ static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle) hr_dev->active = false; hr_dev->dis_db = true; + + hns_roce_v2_reset_notify_user(hr_dev); + hr_dev->state = HNS_ROCE_DEVICE_STATE_RST_DOWN; return 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 28381993278f..7751b3de2ff0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -1340,6 +1340,10 @@ struct hns_roce_link_table { #define HNS_ROCE_EXT_LLM_ENTRY(addr, id) (((id) << (64 - 12)) | ((addr) >> 12)) #define HNS_ROCE_EXT_LLM_MIN_PAGES(que_num) ((que_num) * 4 + 2) +struct hns_roce_v2_reset_state { + u32 reset_state; /* stored to use in user space */ +}; + struct hns_roce_v2_free_mr { struct hns_roce_qp *rsv_qp[HNS_ROCE_FREE_MR_USED_QP_NUM]; struct hns_roce_cq *rsv_cq; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index f8fc6c905e39..9daf9b7f0976 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -367,6 +367,7 @@ hns_roce_user_mmap_entry_insert(struct ib_ucontext *ucontext, u64 address, break; case HNS_ROCE_MMAP_TYPE_DWQE: case HNS_ROCE_MMAP_TYPE_DCA: + case HNS_ROCE_MMAP_TYPE_RESET: ret = rdma_user_mmap_entry_insert_range( ucontext, &entry->rdma_entry, length, 1, U32_MAX); @@ -408,6 +409,26 @@ static int hns_roce_alloc_uar_entry(struct ib_ucontext *uctx) return 0; } +static void hns_roce_dealloc_reset_entry(struct hns_roce_ucontext *context) +{ + if (context->reset_mmap_entry) + rdma_user_mmap_entry_remove(&context->reset_mmap_entry->rdma_entry); +} + +static int hns_roce_alloc_reset_entry(struct ib_ucontext *uctx) +{ + struct hns_roce_ucontext *context = to_hr_ucontext(uctx); + struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device); + + context->reset_mmap_entry = hns_roce_user_mmap_entry_insert(uctx, + (u64)hr_dev->reset_kaddr, PAGE_SIZE, HNS_ROCE_MMAP_TYPE_RESET); + + if (!context->reset_mmap_entry) + return -ENOMEM; + + return 0; +} + static void ucontext_set_resp(struct ib_ucontext *uctx, struct hns_roce_ib_alloc_ucontext_resp *resp) { @@ -425,6 +446,11 @@ static void ucontext_set_resp(struct ib_ucontext *uctx, rdma_entry = &context->dca_ctx.dca_mmap_entry->rdma_entry; resp->dca_mmap_key = rdma_user_mmap_get_offset(rdma_entry); } + + if (context->reset_mmap_entry) { + rdma_entry = &context->reset_mmap_entry->rdma_entry; + resp->reset_mmap_key = rdma_user_mmap_get_offset(rdma_entry); + } } static u32 get_udca_max_qps(struct hns_roce_dev *hr_dev, @@ -503,6 +529,10 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, hns_roce_register_udca(hr_dev, get_udca_max_qps(hr_dev, &ucmd), context); + ret = hns_roce_alloc_reset_entry(uctx); + if (ret) + goto error_fail_reset_entry; + ucontext_set_resp(uctx, &resp); ret = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp))); if (ret) @@ -518,7 +548,9 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, error_fail_copy_to_udata: hns_roce_unregister_udca(hr_dev, context); + hns_roce_dealloc_reset_entry(context); +error_fail_reset_entry: hns_roce_dealloc_uar_entry(context); error_fail_uar_entry: @@ -542,6 +574,7 @@ static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext) hns_roce_unregister_udca(hr_dev, context); hns_roce_dealloc_uar_entry(context); + hns_roce_dealloc_reset_entry(context); ida_free(&hr_dev->uar_ida.ida, (int)context->uar.logic_idx); } @@ -578,6 +611,7 @@ static int mmap_dca(struct ib_ucontext *context, struct vm_area_struct *vma) static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma) { + struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device); struct rdma_user_mmap_entry *rdma_entry; struct hns_user_mmap_entry *entry; phys_addr_t pfn; @@ -599,8 +633,19 @@ static int hns_roce_mmap(struct ib_ucontext *uctx, struct vm_area_struct *vma) case HNS_ROCE_MMAP_TYPE_DCA: ret = mmap_dca(uctx, vma); goto out; + case HNS_ROCE_MMAP_TYPE_RESET: + if (vma->vm_flags & (VM_WRITE | VM_EXEC)) { + ret = -EINVAL; + goto out; + } + + ret = remap_pfn_range(vma, vma->vm_start, + page_to_pfn(hr_dev->reset_page), + PAGE_SIZE, vma->vm_page_prot); + goto out; default: - return -EINVAL; + ret = -EINVAL; + goto out; } ret = rdma_user_mmap_io(uctx, vma, pfn, rdma_entry->npages * PAGE_SIZE, diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index 69508419d3a0..5988a6288d14 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -127,6 +127,7 @@ struct hns_roce_ib_alloc_ucontext_resp { __u32 dca_qps; __u32 dca_mmap_size; __aligned_u64 dca_mmap_key; + __aligned_u64 reset_mmap_key; }; enum hns_roce_uctx_comp_mask { -- 2.20.1
1 2
0 0
  • ← Newer
  • 1
  • ...
  • 1545
  • 1546
  • 1547
  • 1548
  • 1549
  • 1550
  • 1551
  • ...
  • 1810
  • Older →

HyperKitty Powered by HyperKitty