
hulk inclusion category: featrue bugzilla: https://gitee.com/openeuler/kernel/issues/ICJ716 -------------------------------- Add BPF_PROG_TYPE_HISOCK program type and BPF_HISOCK_EGRESS attach type to allow bpf program to be attached on network egress side and bypass net filter rules for the specific connections. Signed-off-by: Pu Lehui <pulehui@huawei.com> --- include/linux/bpf-cgroup.h | 35 +++++++++++++++++++ include/linux/bpf_types.h | 4 +++ include/uapi/linux/bpf.h | 9 +++++ kernel/bpf/cgroup.c | 43 +++++++++++++++++++++++ kernel/bpf/syscall.c | 19 +++++++++++ kernel/bpf/verifier.c | 3 ++ net/core/filter.c | 62 ++++++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 9 +++++ tools/lib/bpf/libbpf.c | 2 ++ 9 files changed, 186 insertions(+) diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 8347817d713c..948ab36ae29e 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -50,7 +50,12 @@ enum cgroup_bpf_attach_type { CGROUP_INET6_GETSOCKNAME, CGROUP_INET_SOCK_RELEASE, #ifdef CONFIG_KABI_RESERVE +#ifdef CONFIG_HISOCK + KABI_BROKEN_REMOVE_ENUM(CGROUP_ATTACH_TYPE_KABI_RESERVE_1) + KABI_BROKEN_INSERT_ENUM(HISOCK_EGRESS) +#else CGROUP_ATTACH_TYPE_KABI_RESERVE_1, +#endif CGROUP_ATTACH_TYPE_KABI_RESERVE_2, CGROUP_ATTACH_TYPE_KABI_RESERVE_3, CGROUP_ATTACH_TYPE_KABI_RESERVE_4, @@ -58,6 +63,10 @@ enum cgroup_bpf_attach_type { CGROUP_ATTACH_TYPE_KABI_RESERVE_6, CGROUP_ATTACH_TYPE_KABI_RESERVE_7, CGROUP_ATTACH_TYPE_KABI_RESERVE_8, +#else +#ifdef CONFIG_HISOCK + HISOCK_EGRESS, +#endif #endif MAX_CGROUP_BPF_ATTACH_TYPE }; @@ -92,6 +101,9 @@ to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type) CGROUP_ATYPE(CGROUP_INET4_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET6_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET_SOCK_RELEASE); +#ifdef CONFIG_HISOCK + CGROUP_ATYPE(HISOCK_EGRESS); +#endif default: return CGROUP_BPF_ATTACH_TYPE_INVALID; } @@ -237,6 +249,11 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, int __user *optlen, int max_optlen, int retval); +#ifdef CONFIG_HISOCK +int __cgroup_bpf_run_hisock_egress(struct sock *sk, struct sk_buff *skb, + enum cgroup_bpf_attach_type atype); +#endif + static inline enum bpf_cgroup_storage_type cgroup_storage_type( struct bpf_map *map) { @@ -446,6 +463,21 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, __ret; \ }) +#ifdef CONFIG_HISOCK +#define BPF_CGROUP_RUN_PROG_HISOCK_EGRESS(sk, skb) \ +({ \ + int __ret = HISOCK_PASS; \ + if (cgroup_bpf_enabled(HISOCK_EGRESS) && \ + sk && sk == skb->sk) { \ + typeof(sk) __sk = sk_to_full_sk(sk); \ + if (sk_fullsock(__sk)) \ + __ret = __cgroup_bpf_run_hisock_egress(__sk, skb, \ + HISOCK_EGRESS); \ + } \ + __ret; \ +}) +#endif + int cgroup_bpf_prog_attach(const union bpf_attr *attr, enum bpf_prog_type ptype, struct bpf_prog *prog); int cgroup_bpf_prog_detach(const union bpf_attr *attr, @@ -526,6 +558,9 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, optlen, max_optlen, retval) ({ retval; }) #define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \ kernel_optval) ({ 0; }) +#ifdef CONFIG_HISOCK +#define BPF_CGROUP_RUN_PROG_HISOCK_EGRESS(sk, skb) ({ HISOCK_PASS; }) +#endif #define for_each_cgroup_storage_type(stype) for (; false; ) diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 57954e35fd36..188c19e1142d 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -17,6 +17,10 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock, struct bpf_sock, struct sock) BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr, struct bpf_sock_addr, struct bpf_sock_addr_kern) +#ifdef CONFIG_HISOCK +BPF_PROG_TYPE(BPF_PROG_TYPE_HISOCK, hisock, + struct __sk_buff, struct sk_buff) +#endif #endif BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_in, struct __sk_buff, struct sk_buff) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b907c8e3b107..7906c799f6d7 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -202,6 +202,7 @@ enum bpf_prog_type { #ifndef __GENKSYMS__ BPF_PROG_TYPE_SCHED, BPF_PROG_TYPE_NET_GLOBAL, + BPF_PROG_TYPE_HISOCK, #endif }; @@ -250,6 +251,7 @@ enum bpf_attach_type { BPF_GNET_SK_DST_SET, BPF_GNET_RCV_NIC_NODE, BPF_GNET_SEND_NIC_NODE, + BPF_HISOCK_EGRESS, #endif __MAX_BPF_ATTACH_TYPE }; @@ -5284,4 +5286,11 @@ struct bpf_gnet_ctx { __u64 rx_dev_netns_cookie; }; +enum hisock_action { + HISOCK_PASS, + HISOCK_DROP, + HISOCK_REDIRECT, + __MAX_HISOCK_ACTION, +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 3b9addda8765..41bd6032c6b4 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -488,6 +488,12 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, */ return -EPERM; +#ifdef CONFIG_HISOCK + /* Only one bpf program can be attached to HISOCK_EGRESS */ + if (atype == HISOCK_EGRESS && prog_list_length(progs) >= 1) + return -EEXIST; +#endif + if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS) return -E2BIG; @@ -1221,6 +1227,43 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); +#ifdef CONFIG_HISOCK +int __cgroup_bpf_run_hisock_egress(struct sock *sk, struct sk_buff *skb, + enum cgroup_bpf_attach_type atype) +{ + struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); + struct bpf_prog_array_item *item; + struct bpf_prog *prog; + struct bpf_prog_array *array; + struct bpf_run_ctx *old_run_ctx; + struct bpf_cg_run_ctx run_ctx; + void *saved_data_end; + u32 ret = HISOCK_PASS; + + bpf_compute_and_save_data_end(skb, &saved_data_end); + + migrate_disable(); + rcu_read_lock(); + array = rcu_dereference(cgrp->bpf.effective[atype]); + item = &array->items[0]; + old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); + /* Only one bpf program can be attached to HISOCK_EGRESS */ + prog = READ_ONCE(item->prog); + if (prog) { + run_ctx.prog_item = item; + ret = __bpf_prog_run_save_cb(prog, skb); + } + bpf_reset_run_ctx(old_run_ctx); + rcu_read_unlock(); + migrate_enable(); + + bpf_restore_data_end(skb, saved_data_end); + + return ret < __MAX_HISOCK_ACTION ? ret : -EPERM; +} +EXPORT_SYMBOL(__cgroup_bpf_run_hisock_egress); +#endif + int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, short access, enum cgroup_bpf_attach_type atype) { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b9846d8a8bde..3b414e36df6b 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2111,6 +2111,9 @@ static bool is_net_admin_prog_type(enum bpf_prog_type prog_type) case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: case BPF_PROG_TYPE_CGROUP_SOCKOPT: case BPF_PROG_TYPE_CGROUP_SYSCTL: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: +#endif case BPF_PROG_TYPE_SOCK_OPS: case BPF_PROG_TYPE_EXT: /* extends any prog */ #ifdef CONFIG_BPF_NET_GLOBAL_PROG @@ -3002,6 +3005,10 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type) case BPF_CGROUP_GETSOCKOPT: case BPF_CGROUP_SETSOCKOPT: return BPF_PROG_TYPE_CGROUP_SOCKOPT; +#ifdef CONFIG_HISOCK + case BPF_HISOCK_EGRESS: + return BPF_PROG_TYPE_HISOCK; +#endif case BPF_TRACE_ITER: return BPF_PROG_TYPE_TRACING; case BPF_SK_LOOKUP: @@ -3098,6 +3105,9 @@ static int bpf_prog_attach(const union bpf_attr *attr) case BPF_PROG_TYPE_CGROUP_SOCKOPT: case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_SOCK_OPS: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: +#endif ret = cgroup_bpf_prog_attach(attr, ptype, prog); break; #ifdef CONFIG_BPF_NET_GLOBAL_PROG @@ -3140,6 +3150,9 @@ static int bpf_prog_detach(const union bpf_attr *attr) case BPF_PROG_TYPE_CGROUP_SOCKOPT: case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_SOCK_OPS: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: +#endif return cgroup_bpf_prog_detach(attr, ptype); #ifdef CONFIG_BPF_NET_GLOBAL_PROG case BPF_PROG_TYPE_NET_GLOBAL: @@ -3186,6 +3199,9 @@ static int bpf_prog_query(const union bpf_attr *attr, case BPF_CGROUP_SYSCTL: case BPF_CGROUP_GETSOCKOPT: case BPF_CGROUP_SETSOCKOPT: +#ifdef CONFIG_HISOCK + case BPF_HISOCK_EGRESS: +#endif return cgroup_bpf_prog_query(attr, uattr); case BPF_LIRC_MODE2: return lirc_prog_query(attr, uattr); @@ -4169,6 +4185,9 @@ static int link_create(union bpf_attr *attr) case BPF_PROG_TYPE_CGROUP_DEVICE: case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_CGROUP_SOCKOPT: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: +#endif ret = cgroup_bpf_link_attach(attr, prog); break; case BPF_PROG_TYPE_TRACING: diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 11414b1efc50..6341764e0ab3 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3359,6 +3359,9 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, return true; case BPF_PROG_TYPE_CGROUP_SOCKOPT: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: +#endif if (t == BPF_WRITE) env->seen_direct_write = true; diff --git a/net/core/filter.c b/net/core/filter.c index 62d09520a55d..b6f71d2b0039 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7292,6 +7292,29 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) } } +#ifdef CONFIG_HISOCK +static const struct bpf_func_proto * +hisock_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +{ + switch (func_id) { + case BPF_FUNC_skb_store_bytes: + return &bpf_skb_store_bytes_proto; + case BPF_FUNC_skb_load_bytes: + return &bpf_skb_load_bytes_proto; + case BPF_FUNC_skb_pull_data: + return &bpf_skb_pull_data_proto; + case BPF_FUNC_skb_change_tail: + return &bpf_skb_change_tail_proto; + case BPF_FUNC_skb_change_head: + return &bpf_skb_change_head_proto; + case BPF_FUNC_skb_adjust_room: + return &bpf_skb_adjust_room_proto; + default: + return bpf_base_func_proto(func_id); + } +} +#endif + static const struct bpf_func_proto * tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -7829,6 +7852,33 @@ static bool cg_skb_is_valid_access(int off, int size, return bpf_skb_is_valid_access(off, size, type, prog, info); } +#ifdef CONFIG_HISOCK +static bool hisock_is_valid_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + switch (off) { + case bpf_ctx_range(struct __sk_buff, tc_classid): + case bpf_ctx_range(struct __sk_buff, data_meta): + case bpf_ctx_range(struct __sk_buff, tstamp): + case bpf_ctx_range(struct __sk_buff, wire_len): + return false; + } + + switch (off) { + case bpf_ctx_range(struct __sk_buff, data): + info->reg_type = PTR_TO_PACKET; + break; + case bpf_ctx_range(struct __sk_buff, data_end): + info->reg_type = PTR_TO_PACKET_END; + break; + } + + return bpf_skb_is_valid_access(off, size, type, prog, info); +} +#endif + static bool lwt_is_valid_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, @@ -9952,6 +10002,18 @@ const struct bpf_prog_ops cg_skb_prog_ops = { .test_run = bpf_prog_test_run_skb, }; +#ifdef CONFIG_HISOCK +const struct bpf_verifier_ops hisock_verifier_ops = { + .get_func_proto = hisock_func_proto, + .is_valid_access = hisock_is_valid_access, + .convert_ctx_access = bpf_convert_ctx_access, + .gen_prologue = bpf_noop_prologue, +}; + +const struct bpf_prog_ops hisock_prog_ops = { +}; +#endif + const struct bpf_verifier_ops lwt_in_verifier_ops = { .get_func_proto = lwt_in_func_proto, .is_valid_access = lwt_is_valid_access, diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 0dfa6f3d6b20..cde0d087bd1c 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -202,6 +202,7 @@ enum bpf_prog_type { #ifndef __GENKSYMS__ BPF_PROG_TYPE_SCHED, BPF_PROG_TYPE_NET_GLOBAL, + BPF_PROG_TYPE_HISOCK, #endif }; @@ -250,6 +251,7 @@ enum bpf_attach_type { BPF_GNET_SK_DST_SET, BPF_GNET_RCV_NIC_NODE, BPF_GNET_SEND_NIC_NODE, + BPF_HISOCK_EGRESS, #endif __MAX_BPF_ATTACH_TYPE }; @@ -5288,4 +5290,11 @@ struct bpf_gnet_ctx { __u64 rx_dev_netns_cookie; }; +enum hisock_action { + HISOCK_PASS, + HISOCK_DROP, + HISOCK_REDIRECT, + __MAX_HISOCK_ACTION, +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index b7f71d2d7d53..3eaf5c353008 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -8502,6 +8502,8 @@ static const struct bpf_sec_def section_defs[] = { BPF_GNET_RCV_NIC_NODE), BPF_EAPROG_SEC("gnet/send_nic_node", BPF_PROG_TYPE_NET_GLOBAL, BPF_GNET_SEND_NIC_NODE), + BPF_APROG_SEC("hisock_egress", BPF_PROG_TYPE_HISOCK, + BPF_HISOCK_EGRESS), }; #undef BPF_PROG_SEC_IMPL -- 2.34.1