From: Chen Hui judy.chenhui@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5KUFB CVE: NA
--------------------------------
Add four helper functions to get cpu stat, as follows: 1.acquire cfs/rt/irq cpu load statitic. 2.acquire multiple types of nr_running statitic. 3.acquire cpu idle statitic. 4.acquire cpu capacity.
Based on CPU statistics in different dimensions, specific scheduling policies can be implemented in bpf program.
Signed-off-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Hui Tang tanghui20@huawei.com Signed-off-by: Ren Zhijie renzhijie2@huawei.com --- include/linux/sched.h | 33 +++++++ include/uapi/linux/bpf.h | 28 ++++++ kernel/sched/bpf_sched.c | 155 +++++++++++++++++++++++++++++++++ scripts/bpf_helpers_doc.py | 8 ++ tools/include/uapi/linux/bpf.h | 28 ++++++ 5 files changed, 252 insertions(+)
diff --git a/include/linux/sched.h b/include/linux/sched.h index d6db602da068..c969af754263 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2185,5 +2185,38 @@ static inline int sched_qos_cpu_overload(void)
#ifdef CONFIG_BPF_SCHED extern void sched_settag(struct task_struct *tsk, s64 tag); + +struct bpf_sched_cpu_load { + unsigned long cfs_load_avg; + unsigned long cfs_runnable_avg; + unsigned long cfs_util_avg; + unsigned long rt_load_avg; + unsigned long rt_runnable_avg; + unsigned long rt_util_avg; + unsigned long irq_load_avg; + unsigned long irq_runnable_avg; + unsigned long irq_util_avg; +}; + +struct bpf_sched_cpu_nr_running { + unsigned int nr_running; + unsigned int cfs_nr_running; + unsigned int cfs_h_nr_running; + unsigned int cfs_idle_h_nr_running; + unsigned int rt_nr_running; + unsigned int rr_nr_running; +}; + +struct bpf_sched_cpu_idle_stat { + int available_idle; + unsigned int exit_latency; + unsigned long idle_stamp; + unsigned long avg_idle; +}; + +struct bpf_sched_cpu_capacity { + unsigned long capacity; + unsigned long capacity_orig; +}; #endif #endif diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 39e69583e8ff..b898cae70b0a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3822,6 +3822,30 @@ union bpf_attr { * Set tag to *tsk*. * Return * Nothing. Always succeeds. + * + * int bpf_sched_cpu_load_of(int cpu, struct bpf_sched_cpu_load *ctx, int len) + * Description + * Get multiple types of *cpu* load and store in *ctx*. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_sched_cpu_nr_running_of(int cpu, struct bpf_sched_cpu_nr_running *ctx, int len) + * Description + * Get multiple types of *cpu* nr running and store in *ctx*. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_sched_cpu_idle_stat_of(int cpu, struct bpf_sched_cpu_idle_stat *ctx, int len) + * Description + * Get *cpu* idle state and store in *ctx*. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_sched_cpu_capacity_of(int cpu, struct bpf_sched_cpu_capacity *ctx, int len) + * Description + * Get *cpu* capacity and store in *ctx*. + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3992,6 +4016,10 @@ union bpf_attr { FN(sched_entity_to_tg), \ FN(sched_set_tg_tag), \ FN(sched_set_task_tag), \ + FN(sched_cpu_load_of), \ + FN(sched_cpu_nr_running_of), \ + FN(sched_cpu_idle_stat_of), \ + FN(sched_cpu_capacity_of), \ /* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/sched/bpf_sched.c b/kernel/sched/bpf_sched.c index 4446bb702c30..db2ca47f2937 100644 --- a/kernel/sched/bpf_sched.c +++ b/kernel/sched/bpf_sched.c @@ -248,6 +248,153 @@ const struct bpf_func_proto bpf_sched_set_task_tag_proto = { .arg1_btf_id = &btf_sched_task_ids[0], .arg2_type = ARG_ANYTHING, }; + +BPF_CALL_3(bpf_sched_cpu_load_of, int, cpu, + struct bpf_sched_cpu_load *, ctx, + int, len) +{ + struct rq *rq; + + if (len != sizeof(*ctx)) + return -EINVAL; + + if ((unsigned int)cpu >= nr_cpu_ids) + return -EINVAL; + + memset(ctx, 0, sizeof(struct bpf_sched_cpu_load)); +#ifdef CONFIG_SMP + rq = cpu_rq(cpu); + SCHED_WARN_ON(!rcu_read_lock_held()); + ctx->cfs_load_avg = rq->cfs.avg.load_avg; + ctx->cfs_runnable_avg = rq->cfs.avg.runnable_avg; + ctx->cfs_util_avg = rq->cfs.avg.util_avg; + ctx->rt_load_avg = rq->avg_rt.load_avg; + ctx->rt_runnable_avg = rq->avg_rt.runnable_avg; + ctx->rt_util_avg = rq->avg_rt.util_avg; +#ifdef CONFIG_HAVE_SCHED_AVG_IRQ + ctx->irq_load_avg = rq->avg_irq.load_avg; + ctx->irq_runnable_avg = rq->avg_irq.runnable_avg; + ctx->irq_util_avg = rq->avg_irq.util_avg; +#endif +#endif + + return 0; +} + +static const struct bpf_func_proto bpf_sched_cpu_load_of_proto = { + .func = bpf_sched_cpu_load_of, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + .arg2_type = ARG_PTR_TO_UNINIT_MEM, + .arg3_type = ARG_CONST_SIZE, +}; + +BPF_CALL_3(bpf_sched_cpu_nr_running_of, int, cpu, + struct bpf_sched_cpu_nr_running *, ctx, + int, len) +{ + struct rq *rq; + + if (len != sizeof(*ctx)) + return -EINVAL; + + if ((unsigned int)cpu >= nr_cpu_ids) + return -EINVAL; + + SCHED_WARN_ON(!rcu_read_lock_held()); + + rq = cpu_rq(cpu); + ctx->nr_running = rq->nr_running; + ctx->cfs_nr_running = rq->cfs.nr_running; + ctx->cfs_h_nr_running = rq->cfs.h_nr_running; + ctx->cfs_idle_h_nr_running = rq->cfs.idle_h_nr_running; + ctx->rt_nr_running = rq->rt.rt_nr_running; + ctx->rr_nr_running = rq->rt.rr_nr_running; + + return 0; +} + +static const struct bpf_func_proto bpf_sched_cpu_nr_running_of_proto = { + .func = bpf_sched_cpu_nr_running_of, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + .arg2_type = ARG_PTR_TO_UNINIT_MEM, + .arg3_type = ARG_CONST_SIZE, +}; + +BPF_CALL_3(bpf_sched_cpu_idle_stat_of, int, cpu, + struct bpf_sched_cpu_idle_stat *, ctx, + int, len) +{ + struct cpuidle_state *idle; + struct rq *rq; + + if (len != sizeof(*ctx)) + return -EINVAL; + + if ((unsigned int)cpu >= nr_cpu_ids) + return -EINVAL; + + memset(ctx, 0, sizeof(struct bpf_sched_cpu_idle_stat)); + SCHED_WARN_ON(!rcu_read_lock_held()); + ctx->available_idle = available_idle_cpu(cpu); + rq = cpu_rq(cpu); + idle = idle_get_state(rq); + if (idle) + ctx->exit_latency = idle->exit_latency; + +#ifdef CONFIG_SMP + ctx->idle_stamp = rq->idle_stamp; + ctx->avg_idle = rq->avg_idle; +#endif + + return 0; +} + +static const struct bpf_func_proto bpf_sched_cpu_idle_stat_of_proto = { + .func = bpf_sched_cpu_idle_stat_of, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + .arg2_type = ARG_PTR_TO_UNINIT_MEM, + .arg3_type = ARG_CONST_SIZE, +}; + +BPF_CALL_3(bpf_sched_cpu_capacity_of, int, cpu, + struct bpf_sched_cpu_capacity *, ctx, + int, len) +{ + struct rq *rq; + + if (len != sizeof(*ctx)) + return -EINVAL; + + if ((unsigned int)cpu >= nr_cpu_ids) + return -EINVAL; + + memset(ctx, 0, sizeof(struct bpf_sched_cpu_capacity)); +#ifdef CONFIG_SMP + SCHED_WARN_ON(!rcu_read_lock_held()); + rq = cpu_rq(cpu); + ctx->capacity = rq->cpu_capacity; + ctx->capacity_orig = rq->cpu_capacity_orig; +#endif + + return 0; +} + +static const struct bpf_func_proto bpf_sched_cpu_capacity_of_proto = { + .func = bpf_sched_cpu_capacity_of, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + .arg2_type = ARG_PTR_TO_UNINIT_MEM, + .arg3_type = ARG_CONST_SIZE, +}; + + static const struct bpf_func_proto * bpf_sched_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -266,6 +413,14 @@ bpf_sched_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sched_entity_to_task_proto; case BPF_FUNC_sched_entity_to_tg: return &bpf_sched_entity_to_tg_proto; + case BPF_FUNC_sched_cpu_load_of: + return &bpf_sched_cpu_load_of_proto; + case BPF_FUNC_sched_cpu_nr_running_of: + return &bpf_sched_cpu_nr_running_of_proto; + case BPF_FUNC_sched_cpu_idle_stat_of: + return &bpf_sched_cpu_idle_stat_of_proto; + case BPF_FUNC_sched_cpu_capacity_of: + return &bpf_sched_cpu_capacity_of_proto; default: return bpf_base_func_proto(func_id); } diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py index f8a778ac9ce1..f2b5e63801ca 100755 --- a/scripts/bpf_helpers_doc.py +++ b/scripts/bpf_helpers_doc.py @@ -437,6 +437,10 @@ class PrinterHelpers(Printer): 'struct btf_ptr', 'struct sched_entity', 'struct task_group', + 'struct bpf_sched_cpu_load', + 'struct bpf_sched_cpu_nr_running', + 'struct bpf_sched_cpu_idle_stat', + 'struct bpf_sched_cpu_capacity', ] known_types = { '...', @@ -482,6 +486,10 @@ class PrinterHelpers(Printer): 'struct btf_ptr', 'struct sched_entity', 'struct task_group', + 'struct bpf_sched_cpu_load', + 'struct bpf_sched_cpu_nr_running', + 'struct bpf_sched_cpu_idle_stat', + 'struct bpf_sched_cpu_capacity', } mapped_types = { 'u8': '__u8', diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 8844f900bf83..97295cd863c4 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3822,6 +3822,30 @@ union bpf_attr { * Set tag to *tsk*. * Return * Nothing. Always succeeds. + * + * int bpf_sched_cpu_load_of(int cpu, struct bpf_sched_cpu_load *ctx, int len) + * Description + * Get multiple types of *cpu* load and store in *ctx*. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_sched_cpu_nr_running_of(int cpu, struct bpf_sched_cpu_nr_running *ctx, int len) + * Description + * Get multiple types of *cpu* nr running and store in *ctx*. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_sched_cpu_idle_stat_of(int cpu, struct bpf_sched_cpu_idle_stat *ctx, int len) + * Description + * Get *cpu* idle state and store in *ctx*. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_sched_cpu_capacity_of(int cpu, struct bpf_sched_cpu_capacity *ctx, int len) + * Description + * Get *cpu* capacity and store in *ctx*. + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3992,6 +4016,10 @@ union bpf_attr { FN(sched_entity_to_tg), \ FN(sched_set_tg_tag), \ FN(sched_set_task_tag), \ + FN(sched_cpu_load_of), \ + FN(sched_cpu_nr_running_of), \ + FN(sched_cpu_idle_stat_of), \ + FN(sched_cpu_capacity_of), \ /* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper