From: Roman Gushchin guro@fb.com
maillist inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5F6X6 CVE: NA
Reference: https://lore.kernel.org/all/20210916162451.709260-1-guro@fb.com/
-------------------
This patch adds 3 helpers useful for dealing with sched entities: u64 bpf_sched_entity_to_tgidpid(struct sched_entity *se); u64 bpf_sched_entity_to_cgrpid(struct sched_entity *se); long bpf_sched_entity_belongs_to_cgrp(struct sched_entity *se, u64 cgrpid);
Sched entity is a basic structure used by the scheduler to represent schedulable objects: tasks and cgroups (if CONFIG_FAIR_GROUP_SCHED is enabled). It will be passed as an argument to many bpf hooks, so scheduler bpf programs need a convenient way to deal with it.
bpf_sched_entity_to_tgidpid() and bpf_sched_entity_to_cgrpid() are useful to identify a sched entity in userspace terms (pid, tgid and cgroup id). bpf_sched_entity_belongs_to_cgrp() allows to check whether a sched entity belongs to sub-tree of a cgroup. It allows to write cgroup-specific scheduler policies even without enabling the cgroup cpu controller.
Signed-off-by: Roman Gushchin guro@fb.com Signed-off-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Ren Zhijie renzhijie2@huawei.com --- include/uapi/linux/bpf.h | 23 ++++++++++ kernel/sched/bpf_sched.c | 76 +++++++++++++++++++++++++++++++++- scripts/bpf_helpers_doc.py | 2 + tools/include/uapi/linux/bpf.h | 23 ++++++++++ 4 files changed, 123 insertions(+), 1 deletion(-)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4f8f3f2113a7..d5fbbc28b6a0 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3757,6 +3757,26 @@ union bpf_attr { * Get Ipv4 origdst or replysrc. Works with IPv4. * Return * 0 on success, or a negative error in case of failure. + * + * u64 bpf_sched_entity_to_tgidpid(struct sched_entity *se) + * Description + * Return task's encoded tgid and pid if the sched entity is a task. + * Return + * Tgid and pid encoded as tgid << 32 | pid, if *se* is a task. (u64)-1 otherwise. + * + * u64 bpf_sched_entity_to_cgrpid(struct sched_entity *se) + * Description + * Return cgroup id if the given sched entity is a cgroup. + * Return + * Cgroup id, if *se* is a cgroup. (u64)-1 otherwise. + * + * long bpf_sched_entity_belongs_to_cgrp(struct sched_entity *se, u64 cgrpid) + * Description + * Checks whether the sched entity belongs to a cgroup or + * it's sub-tree. It doesn't require a cgroup CPU controller + * to be enabled. + * Return + * 1 if the sched entity belongs to a cgroup, 0 otherwise. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3917,6 +3937,9 @@ union bpf_attr { FN(redirect_peer), \ FN(get_sockops_uid_gid), \ FN(sk_original_addr), \ + FN(sched_entity_to_tgidpid), \ + FN(sched_entity_to_cgrpid), \ + FN(sched_entity_belongs_to_cgrp), \ /* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/sched/bpf_sched.c b/kernel/sched/bpf_sched.c index 2f05c186cfd0..831b5917fcda 100644 --- a/kernel/sched/bpf_sched.c +++ b/kernel/sched/bpf_sched.c @@ -42,14 +42,88 @@ int bpf_sched_verify_prog(struct bpf_verifier_log *vlog, return 0; }
+BPF_CALL_1(bpf_sched_entity_to_tgidpid, struct sched_entity *, se) +{ + if (entity_is_task(se)) { + struct task_struct *task = task_of(se); + + return (u64) task->tgid << 32 | task->pid; + } else { + return (u64) -1; + } +} + +BPF_CALL_1(bpf_sched_entity_to_cgrpid, struct sched_entity *, se) +{ +#ifdef CONFIG_FAIR_GROUP_SCHED + if (!entity_is_task(se)) + return cgroup_id(se->my_q->tg->css.cgroup); +#endif + return (u64) -1; +} + +BPF_CALL_2(bpf_sched_entity_belongs_to_cgrp, struct sched_entity *, se, + u64, cgrpid) +{ +#ifdef CONFIG_CGROUPS + struct cgroup *cgrp; + int level; + + if (entity_is_task(se)) + cgrp = task_dfl_cgroup(task_of(se)); +#ifdef CONFIG_FAIR_GROUP_SCHED + else + cgrp = se->my_q->tg->css.cgroup; +#endif + + for (level = cgrp->level; level; level--) + if (cgrp->ancestor_ids[level] == cgrpid) + return 1; +#endif + return 0; +} + +BTF_ID_LIST_SINGLE(btf_sched_entity_ids, struct, sched_entity) + +static const struct bpf_func_proto bpf_sched_entity_to_tgidpid_proto = { + .func = bpf_sched_entity_to_tgidpid, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID, + .arg1_btf_id = &btf_sched_entity_ids[0], +}; + +static const struct bpf_func_proto bpf_sched_entity_to_cgrpid_proto = { + .func = bpf_sched_entity_to_cgrpid, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID, + .arg1_btf_id = &btf_sched_entity_ids[0], +}; + +static const struct bpf_func_proto bpf_sched_entity_belongs_to_cgrp_proto = { + .func = bpf_sched_entity_belongs_to_cgrp, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID, + .arg1_btf_id = &btf_sched_entity_ids[0], + .arg2_type = ARG_ANYTHING, +}; + static const struct bpf_func_proto * bpf_sched_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { case BPF_FUNC_trace_printk: return bpf_get_trace_printk_proto(); + case BPF_FUNC_sched_entity_to_tgidpid: + return &bpf_sched_entity_to_tgidpid_proto; + case BPF_FUNC_sched_entity_to_cgrpid: + return &bpf_sched_entity_to_cgrpid_proto; + case BPF_FUNC_sched_entity_belongs_to_cgrp: + return &bpf_sched_entity_belongs_to_cgrp_proto; default: - return NULL; + return bpf_base_func_proto(func_id); } }
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py index 31484377b8b1..be21512ee7be 100755 --- a/scripts/bpf_helpers_doc.py +++ b/scripts/bpf_helpers_doc.py @@ -435,6 +435,7 @@ class PrinterHelpers(Printer): 'struct xdp_md', 'struct path', 'struct btf_ptr', + 'struct sched_entity', ] known_types = { '...', @@ -478,6 +479,7 @@ class PrinterHelpers(Printer): 'struct task_struct', 'struct path', 'struct btf_ptr', + 'struct sched_entity', } mapped_types = { 'u8': '__u8', diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 7d7c04f698ec..b2a0b189b797 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3757,6 +3757,26 @@ union bpf_attr { * Get Ipv4 origdst or replysrc. Works with IPv4. * Return * 0 on success, or a negative error in case of failure. + * + * u64 bpf_sched_entity_to_tgidpid(struct sched_entity *se) + * Description + * Return task's encoded tgid and pid if the sched entity is a task. + * Return + * Tgid and pid encoded as tgid << 32 | pid, if *se* is a task. (u64)-1 otherwise. + * + * u64 bpf_sched_entity_to_cgrpid(struct sched_entity *se) + * Description + * Return cgroup id if the given sched entity is a cgroup. + * Return + * Cgroup id, if *se* is a cgroup. (u64)-1 otherwise. + * + * long bpf_sched_entity_belongs_to_cgrp(struct sched_entity *se, u64 cgrpid) + * Description + * Checks whether the sched entity belongs to a cgroup or + * it's sub-tree. It doesn't require a cgroup CPU controller + * to be enabled. + * Return + * 1 if the sched entity belongs to a cgroup, 0 otherwise. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3917,6 +3937,9 @@ union bpf_attr { FN(redirect_peer), \ FN(get_sockops_uid_gid), \ FN(sk_original_addr), \ + FN(sched_entity_to_tgidpid), \ + FN(sched_entity_to_cgrpid), \ + FN(sched_entity_belongs_to_cgrp), \ /* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper