
hulk inclusion category: featrue bugzilla: https://gitee.com/openeuler/kernel/issues/ICS15S -------------------------------- Add BPF_PROG_TYPE_HISOCK program type and BPF_HISOCK_EGRESS attach type to allow bpf program to be attached on network egress side and bypass net filter rules for the specific connections. Signed-off-by: Pu Lehui <pulehui@huawei.com> --- include/linux/bpf-cgroup-defs.h | 3 ++ include/linux/bpf-cgroup.h | 25 +++++++++++++ include/linux/bpf_types.h | 4 +++ include/uapi/linux/bpf.h | 9 +++++ kernel/bpf/cgroup.c | 43 +++++++++++++++++++++++ kernel/bpf/syscall.c | 19 ++++++++++ kernel/bpf/verifier.c | 3 ++ net/core/filter.c | 62 +++++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 9 +++++ tools/lib/bpf/libbpf.c | 3 ++ 10 files changed, 180 insertions(+) diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h index fb6adb1c3889..2bd35e802b0a 100644 --- a/include/linux/bpf-cgroup-defs.h +++ b/include/linux/bpf-cgroup-defs.h @@ -45,6 +45,9 @@ enum cgroup_bpf_attach_type { CGROUP_INET_SOCK_RELEASE, CGROUP_LSM_START, CGROUP_LSM_END = CGROUP_LSM_START + CGROUP_LSM_NUM - 1, +#ifdef CONFIG_HISOCK + HISOCK_EGRESS, +#endif MAX_CGROUP_BPF_ATTACH_TYPE }; diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index d4f2c8706042..f94f57d185b8 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -62,6 +62,9 @@ to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type) CGROUP_ATYPE(CGROUP_INET4_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET6_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET_SOCK_RELEASE); +#ifdef CONFIG_HISOCK + CGROUP_ATYPE(HISOCK_EGRESS); +#endif default: return CGROUP_BPF_ATTACH_TYPE_INVALID; } @@ -150,6 +153,11 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level, int optname, void *optval, int *optlen, int retval); +#ifdef CONFIG_HISOCK +int __cgroup_bpf_run_hisock_egress(struct sock *sk, struct sk_buff *skb, + enum cgroup_bpf_attach_type atype); +#endif + static inline enum bpf_cgroup_storage_type cgroup_storage_type( struct bpf_map *map) { @@ -401,6 +409,20 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, __ret; \ }) +#ifdef CONFIG_HISOCK +#define BPF_CGROUP_RUN_PROG_HISOCK_EGRESS(sk, skb) \ +({ \ + int __ret = HISOCK_PASS; \ + if (cgroup_bpf_enabled(HISOCK_EGRESS) && sk) { \ + typeof(sk) __sk = sk_to_full_sk(sk); \ + if (sk_fullsock(__sk) && __sk == skb_to_full_sk(skb)) \ + __ret = __cgroup_bpf_run_hisock_egress(__sk, skb, \ + HISOCK_EGRESS); \ + } \ + __ret; \ +}) +#endif + int cgroup_bpf_prog_attach(const union bpf_attr *attr, enum bpf_prog_type ptype, struct bpf_prog *prog); int cgroup_bpf_prog_detach(const union bpf_attr *attr, @@ -498,6 +520,9 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, optlen, retval) ({ retval; }) #define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \ kernel_optval) ({ 0; }) +#ifdef CONFIG_HISOCK +#define BPF_CGROUP_RUN_PROG_HISOCK_EGRESS(sk, skb) ({ HISOCK_PASS; }) +#endif #define for_each_cgroup_storage_type(stype) for (; false; ) diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index f5cdd5a9e268..15809bc5eff4 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -17,6 +17,10 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock, struct bpf_sock, struct sock) BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr, struct bpf_sock_addr, struct bpf_sock_addr_kern) +#ifdef CONFIG_HISOCK +BPF_PROG_TYPE(BPF_PROG_TYPE_HISOCK, hisock, + struct __sk_buff, struct sk_buff) +#endif #endif BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_in, struct __sk_buff, struct sk_buff) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 8619baf2d64b..52f376f9428c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1006,6 +1006,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ BPF_PROG_TYPE_NETFILTER, BPF_PROG_TYPE_SCHED, + BPF_PROG_TYPE_HISOCK, }; enum bpf_attach_type { @@ -1059,6 +1060,7 @@ enum bpf_attach_type { BPF_TCX_EGRESS, BPF_TRACE_UPROBE_MULTI, BPF_SCHED, + BPF_HISOCK_EGRESS, __MAX_BPF_ATTACH_TYPE }; @@ -7353,4 +7355,11 @@ struct bpf_iter_num { __u64 __opaque[1]; } __attribute__((aligned(8))); +enum hisock_action { + HISOCK_PASS, + HISOCK_DROP, + HISOCK_REDIRECT, + __MAX_HISOCK_ACTION, +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index cf2eb0895d40..90cc73c762cf 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -663,6 +663,12 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp, */ return -EPERM; +#ifdef CONFIG_HISOCK + /* Only one bpf program can be attached to HISOCK_EGRESS */ + if (atype == HISOCK_EGRESS && prog_list_length(progs) >= 1) + return -EEXIST; +#endif + if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS) return -E2BIG; @@ -1548,6 +1554,43 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); +#ifdef CONFIG_HISOCK +int __cgroup_bpf_run_hisock_egress(struct sock *sk, struct sk_buff *skb, + enum cgroup_bpf_attach_type atype) +{ + struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); + struct bpf_prog_array_item *item; + struct bpf_prog *prog; + struct bpf_prog_array *array; + struct bpf_run_ctx *old_run_ctx; + struct bpf_cg_run_ctx run_ctx; + void *saved_data_end; + u32 ret = HISOCK_PASS; + + bpf_compute_and_save_data_end(skb, &saved_data_end); + + migrate_disable(); + rcu_read_lock(); + array = rcu_dereference(cgrp->bpf.effective[atype]); + item = &array->items[0]; + old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); + /* Only one bpf program can be attached to HISOCK_EGRESS */ + prog = READ_ONCE(item->prog); + if (prog) { + run_ctx.prog_item = item; + ret = __bpf_prog_run_save_cb(prog, skb); + } + bpf_reset_run_ctx(old_run_ctx); + rcu_read_unlock(); + migrate_enable(); + + bpf_restore_data_end(skb, saved_data_end); + + return ret < __MAX_HISOCK_ACTION ? ret : -EPERM; +} +EXPORT_SYMBOL(__cgroup_bpf_run_hisock_egress); +#endif + int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, short access, enum cgroup_bpf_attach_type atype) { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 266e84baea84..7131d7bf92d7 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2554,6 +2554,9 @@ static bool is_net_admin_prog_type(enum bpf_prog_type prog_type) case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: case BPF_PROG_TYPE_CGROUP_SOCKOPT: case BPF_PROG_TYPE_CGROUP_SYSCTL: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: +#endif case BPF_PROG_TYPE_SOCK_OPS: case BPF_PROG_TYPE_EXT: /* extends any prog */ case BPF_PROG_TYPE_NETFILTER: @@ -3820,6 +3823,10 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type) case BPF_CGROUP_GETSOCKOPT: case BPF_CGROUP_SETSOCKOPT: return BPF_PROG_TYPE_CGROUP_SOCKOPT; +#ifdef CONFIG_HISOCK + case BPF_HISOCK_EGRESS: + return BPF_PROG_TYPE_HISOCK; +#endif case BPF_TRACE_ITER: case BPF_TRACE_RAW_TP: case BPF_TRACE_FENTRY: @@ -3978,6 +3985,9 @@ static int bpf_prog_attach(const union bpf_attr *attr) case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_SOCK_OPS: case BPF_PROG_TYPE_LSM: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: +#endif if (ptype == BPF_PROG_TYPE_LSM && prog->expected_attach_type != BPF_LSM_CGROUP) ret = -EINVAL; @@ -4043,6 +4053,9 @@ static int bpf_prog_detach(const union bpf_attr *attr) case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_SOCK_OPS: case BPF_PROG_TYPE_LSM: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: +#endif ret = cgroup_bpf_prog_detach(attr, ptype); break; case BPF_PROG_TYPE_SCHED_CLS: @@ -4094,6 +4107,9 @@ static int bpf_prog_query(const union bpf_attr *attr, case BPF_CGROUP_GETSOCKOPT: case BPF_CGROUP_SETSOCKOPT: case BPF_LSM_CGROUP: +#ifdef CONFIG_HISOCK + case BPF_HISOCK_EGRESS: +#endif return cgroup_bpf_prog_query(attr, uattr); case BPF_LIRC_MODE2: return lirc_prog_query(attr, uattr); @@ -5054,6 +5070,9 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) case BPF_PROG_TYPE_CGROUP_DEVICE: case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_CGROUP_SOCKOPT: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: +#endif ret = cgroup_bpf_link_attach(attr, prog); break; case BPF_PROG_TYPE_EXT: diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index dbf2df404460..3a85c3ff4a43 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5576,6 +5576,9 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, return true; case BPF_PROG_TYPE_CGROUP_SOCKOPT: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: +#endif if (t == BPF_WRITE) env->seen_direct_write = true; diff --git a/net/core/filter.c b/net/core/filter.c index 2968f1f8dd47..5abdd9a0a2ac 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -8152,6 +8152,29 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) } } +#ifdef CONFIG_HISOCK +static const struct bpf_func_proto * +hisock_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +{ + switch (func_id) { + case BPF_FUNC_skb_store_bytes: + return &bpf_skb_store_bytes_proto; + case BPF_FUNC_skb_load_bytes: + return &bpf_skb_load_bytes_proto; + case BPF_FUNC_skb_pull_data: + return &bpf_skb_pull_data_proto; + case BPF_FUNC_skb_change_tail: + return &bpf_skb_change_tail_proto; + case BPF_FUNC_skb_change_head: + return &bpf_skb_change_head_proto; + case BPF_FUNC_skb_adjust_room: + return &bpf_skb_adjust_room_proto; + default: + return bpf_base_func_proto(func_id); + } +} +#endif + static const struct bpf_func_proto * tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -8741,6 +8764,33 @@ static bool cg_skb_is_valid_access(int off, int size, return bpf_skb_is_valid_access(off, size, type, prog, info); } +#ifdef CONFIG_HISOCK +static bool hisock_is_valid_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + switch (off) { + case bpf_ctx_range(struct __sk_buff, tc_classid): + case bpf_ctx_range(struct __sk_buff, data_meta): + case bpf_ctx_range(struct __sk_buff, tstamp): + case bpf_ctx_range(struct __sk_buff, wire_len): + return false; + } + + switch (off) { + case bpf_ctx_range(struct __sk_buff, data): + info->reg_type = PTR_TO_PACKET; + break; + case bpf_ctx_range(struct __sk_buff, data_end): + info->reg_type = PTR_TO_PACKET_END; + break; + } + + return bpf_skb_is_valid_access(off, size, type, prog, info); +} +#endif + static bool lwt_is_valid_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, @@ -11086,6 +11136,18 @@ const struct bpf_prog_ops cg_skb_prog_ops = { .test_run = bpf_prog_test_run_skb, }; +#ifdef CONFIG_HISOCK +const struct bpf_verifier_ops hisock_verifier_ops = { + .get_func_proto = hisock_func_proto, + .is_valid_access = hisock_is_valid_access, + .convert_ctx_access = bpf_convert_ctx_access, + .gen_prologue = bpf_noop_prologue, +}; + +const struct bpf_prog_ops hisock_prog_ops = { +}; +#endif + const struct bpf_verifier_ops lwt_in_verifier_ops = { .get_func_proto = lwt_in_func_proto, .is_valid_access = lwt_is_valid_access, diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 6873e3acbdef..9796dfba3b38 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1006,6 +1006,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ BPF_PROG_TYPE_NETFILTER, BPF_PROG_TYPE_SCHED, + BPF_PROG_TYPE_HISOCK, }; enum bpf_attach_type { @@ -1059,6 +1060,7 @@ enum bpf_attach_type { BPF_TCX_EGRESS, BPF_TRACE_UPROBE_MULTI, BPF_SCHED, + BPF_HISOCK_EGRESS, __MAX_BPF_ATTACH_TYPE }; @@ -7356,4 +7358,11 @@ struct bpf_iter_num { __u64 __opaque[1]; } __attribute__((aligned(8))); +enum hisock_action { + HISOCK_PASS, + HISOCK_DROP, + HISOCK_REDIRECT, + __MAX_HISOCK_ACTION, +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index a57f9afffe98..828c1d2f173e 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -123,6 +123,7 @@ static const char * const attach_type_name[] = { [BPF_TCX_EGRESS] = "tcx_egress", [BPF_TRACE_UPROBE_MULTI] = "trace_uprobe_multi", [BPF_SCHED] = "sched", + [BPF_HISOCK_EGRESS] = "hisock_egress", }; static const char * const link_type_name[] = { @@ -212,6 +213,7 @@ static const char * const prog_type_name[] = { [BPF_PROG_TYPE_SYSCALL] = "syscall", [BPF_PROG_TYPE_NETFILTER] = "netfilter", [BPF_PROG_TYPE_SCHED] = "sched", + [BPF_PROG_TYPE_HISOCK] = "hisock", }; static int __base_pr(enum libbpf_print_level level, const char *format, @@ -8873,6 +8875,7 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE), SEC_DEF("netfilter", NETFILTER, BPF_NETFILTER, SEC_NONE), SEC_DEF("sched/", SCHED, BPF_SCHED, SEC_ATTACH_BTF, attach_sched), + SEC_DEF("hisock_egress", HISOCK, BPF_HISOCK_EGRESS, SEC_ATTACHABLE_OPT), }; int libbpf_register_prog_handler(const char *sec, -- 2.34.1