[PATCH OLK-6.6 v3 00/11] HiSock Redirect Framework

Matteo Croce (1): bpf: Enable generic kfuncs for BPF_CGROUP_* programs Pu Lehui (10): bpf: Add CONFIG_HISOCK bpf: Add XDP_HISOCK_REDIRECT action bpf: Add BPF_PROG_TYPE_HISOCK prog type bpf: Add HISOCK_EGRESS hook on network egress path bpf: Add bpf_skops_get_ingress_dst kfunc bpf: Add hisock_xdp_buff wrapper for xdp_buff bpf: Add bpf_xdp_set_ingress_dst kfunc bpf: Add bpf_*_change_dev kfunc openeuler_defconfig: Enable CONFIG_HISOCK samples/bpf: Add HiSock Redirect sample arch/arm64/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + include/linux/bpf-cgroup-defs.h | 3 + include/linux/bpf-cgroup.h | 25 ++ include/linux/bpf_types.h | 4 + include/net/xdp.h | 5 + include/uapi/linux/bpf.h | 14 + kernel/bpf/btf.c | 18 +- kernel/bpf/cgroup.c | 43 +++ kernel/bpf/helpers.c | 1 + kernel/bpf/syscall.c | 19 ++ kernel/bpf/verifier.c | 3 + net/Kconfig | 10 + net/core/dev.c | 76 ++++- net/core/filter.c | 167 ++++++++++ net/ipv4/ip_output.c | 68 +++++ samples/bpf/.gitignore | 1 + samples/bpf/Makefile | 3 + samples/bpf/hisock/bpf.c | 251 +++++++++++++++ samples/bpf/hisock/hisock_cmd.c | 405 +++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 14 + tools/lib/bpf/libbpf.c | 3 + 22 files changed, 1130 insertions(+), 5 deletions(-) create mode 100644 samples/bpf/hisock/bpf.c create mode 100644 samples/bpf/hisock/hisock_cmd.c -- 2.34.1

hulk inclusion category: featrue bugzilla: https://gitee.com/openeuler/kernel/issues/ICS15S -------------------------------- Add CONFIG_HISOCK to enable HiSock redirect framework, which bypasses net filter rules for specific connections selected by bpf prog on both TX and RX directions. Signed-off-by: Pu Lehui <pulehui@huawei.com> --- net/Kconfig | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/Kconfig b/net/Kconfig index 2fc1860faeb4..fd08800cb130 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -348,6 +348,16 @@ config BPF_STREAM_PARSER Enabling this allows a TCP stream parser to be used with BPF_MAP_TYPE_SOCKMAP. +config HISOCK + bool "enable HiSock Redirect Framework" + depends on INET + depends on CGROUP_BPF + depends on BPF_SYSCALL + default n + help + Enalbe HiSock, which bypasses net filter rules for specific + connections selected by bpf prog on both TX and RX directions. + config NET_FLOW_LIMIT bool depends on RPS -- 2.34.1

hulk inclusion category: featrue bugzilla: https://gitee.com/openeuler/kernel/issues/ICS15S -------------------------------- Add XDP_HISOCK_REDIRECT xdp action to bypass net filter rules for specific connections. XDP_HISOCK_REDIRECT action only support for generic XDP. Signed-off-by: Pu Lehui <pulehui@huawei.com> --- include/uapi/linux/bpf.h | 1 + net/core/dev.c | 68 ++++++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 1 + 3 files changed, 70 insertions(+) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a660cb68c853..8619baf2d64b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6310,6 +6310,7 @@ enum xdp_action { XDP_PASS, XDP_TX, XDP_REDIRECT, + XDP_HISOCK_REDIRECT = 100, }; /* user accessible metadata for XDP packet hook diff --git a/net/core/dev.c b/net/core/dev.c index cbb4bd4718cd..e36056866543 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5029,6 +5029,9 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, case XDP_REDIRECT: case XDP_TX: case XDP_PASS: +#ifdef CONFIG_HISOCK + case XDP_HISOCK_REDIRECT: +#endif break; default: bpf_warn_invalid_xdp_action(skb->dev, xdp_prog, act); @@ -5074,6 +5077,64 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) } } +#ifdef CONFIG_HISOCK +static int generic_xdp_hisock_redirect(struct sk_buff *skb) +{ + const struct iphdr *iph; + u32 len; + + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + goto out; + + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + goto free_skb; + + iph = ip_hdr(skb); + if (iph->ihl < 5 || iph->version != 4 || + ip_is_fragment(iph)) + return -EOPNOTSUPP; + + if (!pskb_may_pull(skb, iph->ihl * 4)) + goto free_skb; + + iph = ip_hdr(skb); + if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) + goto free_skb; + + len = ntohs(iph->tot_len); + if (skb->len < len || len < (iph->ihl * 4)) + goto free_skb; + + if (pskb_trim_rcsum(skb, len)) + goto free_skb; + + iph = ip_hdr(skb); + skb->transport_header = skb->network_header + iph->ihl * 4; + + skb_orphan(skb); + + if (!skb_valid_dst(skb)) { + if (ip_route_input_noref(skb, iph->daddr, iph->saddr, + iph->tos, skb->dev)) + goto free_skb; + } + + __skb_pull(skb, skb_network_header_len(skb)); + + rcu_read_lock(); + ip_protocol_deliver_rcu(dev_net(skb->dev), skb, iph->protocol); + rcu_read_unlock(); + + return 0; + +free_skb: + kfree_skb(skb); +out: + return -EFAULT; +} +#endif + static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key); int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb) @@ -5095,6 +5156,13 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb) case XDP_TX: generic_xdp_tx(skb, xdp_prog); break; +#ifdef CONFIG_HISOCK + case XDP_HISOCK_REDIRECT: + err = generic_xdp_hisock_redirect(skb); + if (err == -EOPNOTSUPP) + return XDP_PASS; + break; +#endif } return XDP_DROP; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 9b302242be6c..6873e3acbdef 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -6313,6 +6313,7 @@ enum xdp_action { XDP_PASS, XDP_TX, XDP_REDIRECT, + XDP_HISOCK_REDIRECT = 100, }; /* user accessible metadata for XDP packet hook -- 2.34.1

hulk inclusion category: featrue bugzilla: https://gitee.com/openeuler/kernel/issues/ICS15S -------------------------------- Add BPF_PROG_TYPE_HISOCK program type and BPF_HISOCK_EGRESS attach type to allow bpf program to be attached on network egress side and bypass net filter rules for the specific connections. Signed-off-by: Pu Lehui <pulehui@huawei.com> --- include/linux/bpf-cgroup-defs.h | 3 ++ include/linux/bpf-cgroup.h | 25 +++++++++++++ include/linux/bpf_types.h | 4 +++ include/uapi/linux/bpf.h | 13 +++++++ kernel/bpf/cgroup.c | 43 +++++++++++++++++++++++ kernel/bpf/syscall.c | 19 ++++++++++ kernel/bpf/verifier.c | 3 ++ net/core/filter.c | 62 +++++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 13 +++++++ tools/lib/bpf/libbpf.c | 3 ++ 10 files changed, 188 insertions(+) diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h index fb6adb1c3889..a9c988c8e217 100644 --- a/include/linux/bpf-cgroup-defs.h +++ b/include/linux/bpf-cgroup-defs.h @@ -45,6 +45,9 @@ enum cgroup_bpf_attach_type { CGROUP_INET_SOCK_RELEASE, CGROUP_LSM_START, CGROUP_LSM_END = CGROUP_LSM_START + CGROUP_LSM_NUM - 1, +#ifdef CONFIG_HISOCK + KABI_BROKEN_INSERT_ENUM(HISOCK_EGRESS) +#endif MAX_CGROUP_BPF_ATTACH_TYPE }; diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index d4f2c8706042..f94f57d185b8 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -62,6 +62,9 @@ to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type) CGROUP_ATYPE(CGROUP_INET4_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET6_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET_SOCK_RELEASE); +#ifdef CONFIG_HISOCK + CGROUP_ATYPE(HISOCK_EGRESS); +#endif default: return CGROUP_BPF_ATTACH_TYPE_INVALID; } @@ -150,6 +153,11 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level, int optname, void *optval, int *optlen, int retval); +#ifdef CONFIG_HISOCK +int __cgroup_bpf_run_hisock_egress(struct sock *sk, struct sk_buff *skb, + enum cgroup_bpf_attach_type atype); +#endif + static inline enum bpf_cgroup_storage_type cgroup_storage_type( struct bpf_map *map) { @@ -401,6 +409,20 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, __ret; \ }) +#ifdef CONFIG_HISOCK +#define BPF_CGROUP_RUN_PROG_HISOCK_EGRESS(sk, skb) \ +({ \ + int __ret = HISOCK_PASS; \ + if (cgroup_bpf_enabled(HISOCK_EGRESS) && sk) { \ + typeof(sk) __sk = sk_to_full_sk(sk); \ + if (sk_fullsock(__sk) && __sk == skb_to_full_sk(skb)) \ + __ret = __cgroup_bpf_run_hisock_egress(__sk, skb, \ + HISOCK_EGRESS); \ + } \ + __ret; \ +}) +#endif + int cgroup_bpf_prog_attach(const union bpf_attr *attr, enum bpf_prog_type ptype, struct bpf_prog *prog); int cgroup_bpf_prog_detach(const union bpf_attr *attr, @@ -498,6 +520,9 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, optlen, retval) ({ retval; }) #define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \ kernel_optval) ({ 0; }) +#ifdef CONFIG_HISOCK +#define BPF_CGROUP_RUN_PROG_HISOCK_EGRESS(sk, skb) ({ HISOCK_PASS; }) +#endif #define for_each_cgroup_storage_type(stype) for (; false; ) diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index f5cdd5a9e268..15809bc5eff4 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -17,6 +17,10 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock, struct bpf_sock, struct sock) BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr, struct bpf_sock_addr, struct bpf_sock_addr_kern) +#ifdef CONFIG_HISOCK +BPF_PROG_TYPE(BPF_PROG_TYPE_HISOCK, hisock, + struct __sk_buff, struct sk_buff) +#endif #endif BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_in, struct __sk_buff, struct sk_buff) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 8619baf2d64b..782d56c4ebca 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1006,6 +1006,9 @@ enum bpf_prog_type { BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ BPF_PROG_TYPE_NETFILTER, BPF_PROG_TYPE_SCHED, +#ifndef __GENKSYMS__ + BPF_PROG_TYPE_HISOCK, +#endif }; enum bpf_attach_type { @@ -1059,6 +1062,9 @@ enum bpf_attach_type { BPF_TCX_EGRESS, BPF_TRACE_UPROBE_MULTI, BPF_SCHED, +#ifndef __GENKSYMS__ + BPF_HISOCK_EGRESS, +#endif __MAX_BPF_ATTACH_TYPE }; @@ -7353,4 +7359,11 @@ struct bpf_iter_num { __u64 __opaque[1]; } __attribute__((aligned(8))); +enum hisock_action { + HISOCK_PASS, + HISOCK_DROP, + HISOCK_REDIRECT, + __MAX_HISOCK_ACTION, +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index cf2eb0895d40..90cc73c762cf 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -663,6 +663,12 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp, */ return -EPERM; +#ifdef CONFIG_HISOCK + /* Only one bpf program can be attached to HISOCK_EGRESS */ + if (atype == HISOCK_EGRESS && prog_list_length(progs) >= 1) + return -EEXIST; +#endif + if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS) return -E2BIG; @@ -1548,6 +1554,43 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); +#ifdef CONFIG_HISOCK +int __cgroup_bpf_run_hisock_egress(struct sock *sk, struct sk_buff *skb, + enum cgroup_bpf_attach_type atype) +{ + struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); + struct bpf_prog_array_item *item; + struct bpf_prog *prog; + struct bpf_prog_array *array; + struct bpf_run_ctx *old_run_ctx; + struct bpf_cg_run_ctx run_ctx; + void *saved_data_end; + u32 ret = HISOCK_PASS; + + bpf_compute_and_save_data_end(skb, &saved_data_end); + + migrate_disable(); + rcu_read_lock(); + array = rcu_dereference(cgrp->bpf.effective[atype]); + item = &array->items[0]; + old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); + /* Only one bpf program can be attached to HISOCK_EGRESS */ + prog = READ_ONCE(item->prog); + if (prog) { + run_ctx.prog_item = item; + ret = __bpf_prog_run_save_cb(prog, skb); + } + bpf_reset_run_ctx(old_run_ctx); + rcu_read_unlock(); + migrate_enable(); + + bpf_restore_data_end(skb, saved_data_end); + + return ret < __MAX_HISOCK_ACTION ? ret : -EPERM; +} +EXPORT_SYMBOL(__cgroup_bpf_run_hisock_egress); +#endif + int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, short access, enum cgroup_bpf_attach_type atype) { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 266e84baea84..7131d7bf92d7 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2554,6 +2554,9 @@ static bool is_net_admin_prog_type(enum bpf_prog_type prog_type) case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: case BPF_PROG_TYPE_CGROUP_SOCKOPT: case BPF_PROG_TYPE_CGROUP_SYSCTL: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: +#endif case BPF_PROG_TYPE_SOCK_OPS: case BPF_PROG_TYPE_EXT: /* extends any prog */ case BPF_PROG_TYPE_NETFILTER: @@ -3820,6 +3823,10 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type) case BPF_CGROUP_GETSOCKOPT: case BPF_CGROUP_SETSOCKOPT: return BPF_PROG_TYPE_CGROUP_SOCKOPT; +#ifdef CONFIG_HISOCK + case BPF_HISOCK_EGRESS: + return BPF_PROG_TYPE_HISOCK; +#endif case BPF_TRACE_ITER: case BPF_TRACE_RAW_TP: case BPF_TRACE_FENTRY: @@ -3978,6 +3985,9 @@ static int bpf_prog_attach(const union bpf_attr *attr) case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_SOCK_OPS: case BPF_PROG_TYPE_LSM: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: +#endif if (ptype == BPF_PROG_TYPE_LSM && prog->expected_attach_type != BPF_LSM_CGROUP) ret = -EINVAL; @@ -4043,6 +4053,9 @@ static int bpf_prog_detach(const union bpf_attr *attr) case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_SOCK_OPS: case BPF_PROG_TYPE_LSM: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: +#endif ret = cgroup_bpf_prog_detach(attr, ptype); break; case BPF_PROG_TYPE_SCHED_CLS: @@ -4094,6 +4107,9 @@ static int bpf_prog_query(const union bpf_attr *attr, case BPF_CGROUP_GETSOCKOPT: case BPF_CGROUP_SETSOCKOPT: case BPF_LSM_CGROUP: +#ifdef CONFIG_HISOCK + case BPF_HISOCK_EGRESS: +#endif return cgroup_bpf_prog_query(attr, uattr); case BPF_LIRC_MODE2: return lirc_prog_query(attr, uattr); @@ -5054,6 +5070,9 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) case BPF_PROG_TYPE_CGROUP_DEVICE: case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_CGROUP_SOCKOPT: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: +#endif ret = cgroup_bpf_link_attach(attr, prog); break; case BPF_PROG_TYPE_EXT: diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index dbf2df404460..3a85c3ff4a43 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5576,6 +5576,9 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, return true; case BPF_PROG_TYPE_CGROUP_SOCKOPT: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: +#endif if (t == BPF_WRITE) env->seen_direct_write = true; diff --git a/net/core/filter.c b/net/core/filter.c index 2968f1f8dd47..5abdd9a0a2ac 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -8152,6 +8152,29 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) } } +#ifdef CONFIG_HISOCK +static const struct bpf_func_proto * +hisock_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +{ + switch (func_id) { + case BPF_FUNC_skb_store_bytes: + return &bpf_skb_store_bytes_proto; + case BPF_FUNC_skb_load_bytes: + return &bpf_skb_load_bytes_proto; + case BPF_FUNC_skb_pull_data: + return &bpf_skb_pull_data_proto; + case BPF_FUNC_skb_change_tail: + return &bpf_skb_change_tail_proto; + case BPF_FUNC_skb_change_head: + return &bpf_skb_change_head_proto; + case BPF_FUNC_skb_adjust_room: + return &bpf_skb_adjust_room_proto; + default: + return bpf_base_func_proto(func_id); + } +} +#endif + static const struct bpf_func_proto * tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -8741,6 +8764,33 @@ static bool cg_skb_is_valid_access(int off, int size, return bpf_skb_is_valid_access(off, size, type, prog, info); } +#ifdef CONFIG_HISOCK +static bool hisock_is_valid_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + switch (off) { + case bpf_ctx_range(struct __sk_buff, tc_classid): + case bpf_ctx_range(struct __sk_buff, data_meta): + case bpf_ctx_range(struct __sk_buff, tstamp): + case bpf_ctx_range(struct __sk_buff, wire_len): + return false; + } + + switch (off) { + case bpf_ctx_range(struct __sk_buff, data): + info->reg_type = PTR_TO_PACKET; + break; + case bpf_ctx_range(struct __sk_buff, data_end): + info->reg_type = PTR_TO_PACKET_END; + break; + } + + return bpf_skb_is_valid_access(off, size, type, prog, info); +} +#endif + static bool lwt_is_valid_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, @@ -11086,6 +11136,18 @@ const struct bpf_prog_ops cg_skb_prog_ops = { .test_run = bpf_prog_test_run_skb, }; +#ifdef CONFIG_HISOCK +const struct bpf_verifier_ops hisock_verifier_ops = { + .get_func_proto = hisock_func_proto, + .is_valid_access = hisock_is_valid_access, + .convert_ctx_access = bpf_convert_ctx_access, + .gen_prologue = bpf_noop_prologue, +}; + +const struct bpf_prog_ops hisock_prog_ops = { +}; +#endif + const struct bpf_verifier_ops lwt_in_verifier_ops = { .get_func_proto = lwt_in_func_proto, .is_valid_access = lwt_is_valid_access, diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 6873e3acbdef..a457d4f82da2 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1006,6 +1006,9 @@ enum bpf_prog_type { BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ BPF_PROG_TYPE_NETFILTER, BPF_PROG_TYPE_SCHED, +#ifndef __GENKSYMS__ + BPF_PROG_TYPE_HISOCK, +#endif }; enum bpf_attach_type { @@ -1059,6 +1062,9 @@ enum bpf_attach_type { BPF_TCX_EGRESS, BPF_TRACE_UPROBE_MULTI, BPF_SCHED, +#ifndef __GENKSYMS__ + BPF_HISOCK_EGRESS, +#endif __MAX_BPF_ATTACH_TYPE }; @@ -7356,4 +7362,11 @@ struct bpf_iter_num { __u64 __opaque[1]; } __attribute__((aligned(8))); +enum hisock_action { + HISOCK_PASS, + HISOCK_DROP, + HISOCK_REDIRECT, + __MAX_HISOCK_ACTION, +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index a57f9afffe98..828c1d2f173e 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -123,6 +123,7 @@ static const char * const attach_type_name[] = { [BPF_TCX_EGRESS] = "tcx_egress", [BPF_TRACE_UPROBE_MULTI] = "trace_uprobe_multi", [BPF_SCHED] = "sched", + [BPF_HISOCK_EGRESS] = "hisock_egress", }; static const char * const link_type_name[] = { @@ -212,6 +213,7 @@ static const char * const prog_type_name[] = { [BPF_PROG_TYPE_SYSCALL] = "syscall", [BPF_PROG_TYPE_NETFILTER] = "netfilter", [BPF_PROG_TYPE_SCHED] = "sched", + [BPF_PROG_TYPE_HISOCK] = "hisock", }; static int __base_pr(enum libbpf_print_level level, const char *format, @@ -8873,6 +8875,7 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE), SEC_DEF("netfilter", NETFILTER, BPF_NETFILTER, SEC_NONE), SEC_DEF("sched/", SCHED, BPF_SCHED, SEC_ATTACH_BTF, attach_sched), + SEC_DEF("hisock_egress", HISOCK, BPF_HISOCK_EGRESS, SEC_ATTACHABLE_OPT), }; int libbpf_register_prog_handler(const char *sec, -- 2.34.1

hulk inclusion category: featrue bugzilla: https://gitee.com/openeuler/kernel/issues/ICS15S -------------------------------- Add HISOCK_EGRESS hook on network egress path to bypass net filter rules for specific connections. If the L2 header has been encapsulated in the bpf program, skb is sent directly. Otherwise, roll back to the neighbor lookup and send it out. Signed-off-by: Pu Lehui <pulehui@huawei.com> --- net/ipv4/ip_output.c | 68 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index f49570e2f713..89f5f3b178e1 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -457,6 +457,55 @@ static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4) iph->daddr = fl4->daddr; } +#ifdef CONFIG_HISOCK +static int hisock_egress_redirect_xmit(struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + struct netdev_queue *txq; + bool free_skb = true; + int cpu, rc; + + rcu_read_lock_bh(); + + txq = netdev_core_pick_tx(dev, skb, NULL); + cpu = smp_processor_id(); + HARD_TX_LOCK(dev, txq, cpu); + if (!netif_xmit_stopped(txq)) { + rc = netdev_start_xmit(skb, dev, txq, 0); + if (dev_xmit_complete(rc)) + free_skb = false; + } + HARD_TX_UNLOCK(dev, txq); + + rcu_read_unlock_bh(); + + if (free_skb) { + rc = -ENETDOWN; + kfree_skb(skb); + } + + return rc; +} + +static int do_hisock_egress_redirect(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct iphdr *iph; + + skb->protocol = htons(ETH_P_IP); + if (!skb->dev) + skb->dev = skb_dst(skb)->dev; + + if (skb_mac_header_was_set(skb)) + return hisock_egress_redirect_xmit(skb); + + iph = ip_hdr(skb); + iph_set_totlen(iph, skb->len); + ip_send_check(iph); + + return ip_finish_output2(net, sk, skb); +} +#endif + /* Note: skb->sk can be different from sk, in case of tunnels */ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, __u8 tos) @@ -537,6 +586,25 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, skb->priority = READ_ONCE(sk->sk_priority); skb->mark = READ_ONCE(sk->sk_mark); +#ifdef CONFIG_HISOCK + res = BPF_CGROUP_RUN_PROG_HISOCK_EGRESS(sk, skb); + switch (res) { + case HISOCK_PASS: + break; + case HISOCK_REDIRECT: + res = do_hisock_egress_redirect(net, sk, skb); + rcu_read_unlock(); + return res; + default: + pr_warn_once("Illegal HiSock return value %d, expect packet loss!", res); + fallthrough; + case HISOCK_DROP: + kfree_skb(skb); + rcu_read_unlock(); + return NET_XMIT_DROP; + } +#endif + res = ip_local_out(net, sk, skb); rcu_read_unlock(); return res; -- 2.34.1

From: Matteo Croce <teknoraver@meta.com> mainline inclusion from mainline-v6.12-rc1 commit 67666479edf1e2b732f4d0ac797885e859a78de4 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ICS15S Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- These kfuncs are enabled even in BPF_PROG_TYPE_TRACING, so they should be safe also in BPF_CGROUP_* programs. Since all BPF_CGROUP_* programs share the same hook, call register_btf_kfunc_id_set() only once. In enum btf_kfunc_hook, rename BTF_KFUNC_HOOK_CGROUP_SKB to a more generic BTF_KFUNC_HOOK_CGROUP, since it's used for all the cgroup related program types. Signed-off-by: Matteo Croce <teknoraver@meta.com> Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Link: https://lore.kernel.org/bpf/20240819162805.78235-2-technoboy85@gmail.com Conflicts: kernel/bpf/helpers.c [context conflicts] Signed-off-by: Pu Lehui <pulehui@huawei.com> --- kernel/bpf/btf.c | 8 ++++++-- kernel/bpf/helpers.c | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 39fc33e6368f..68c6e3817ce6 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -211,7 +211,7 @@ enum btf_kfunc_hook { BTF_KFUNC_HOOK_TRACING, BTF_KFUNC_HOOK_SYSCALL, BTF_KFUNC_HOOK_FMODRET, - BTF_KFUNC_HOOK_CGROUP_SKB, + BTF_KFUNC_HOOK_CGROUP, BTF_KFUNC_HOOK_SCHED_ACT, BTF_KFUNC_HOOK_SK_SKB, BTF_KFUNC_HOOK_SOCKET_FILTER, @@ -8093,8 +8093,12 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type) case BPF_PROG_TYPE_SYSCALL: return BTF_KFUNC_HOOK_SYSCALL; case BPF_PROG_TYPE_CGROUP_SKB: + case BPF_PROG_TYPE_CGROUP_SOCK: + case BPF_PROG_TYPE_CGROUP_DEVICE: case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: - return BTF_KFUNC_HOOK_CGROUP_SKB; + case BPF_PROG_TYPE_CGROUP_SOCKOPT: + case BPF_PROG_TYPE_CGROUP_SYSCTL: + return BTF_KFUNC_HOOK_CGROUP; case BPF_PROG_TYPE_SCHED_ACT: return BTF_KFUNC_HOOK_SCHED_ACT; case BPF_PROG_TYPE_SK_SKB: diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 41f049ecb5c8..19a13ed5406b 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2696,6 +2696,7 @@ static int __init kfunc_init(void) ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &generic_kfunc_set); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &generic_kfunc_set); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &generic_kfunc_set); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SKB, &generic_kfunc_set); ret = ret ?: register_btf_id_dtor_kfuncs(generic_dtors, ARRAY_SIZE(generic_dtors), THIS_MODULE); -- 2.34.1

hulk inclusion category: featrue bugzilla: https://gitee.com/openeuler/kernel/issues/ICS15S -------------------------------- Add bpf_get_ingress_dst kfunc for sock_ops bpf program. It is used to get the receive dst entry of the full sock. Signed-off-by: Pu Lehui <pulehui@huawei.com> --- kernel/bpf/btf.c | 3 +++ net/core/filter.c | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 68c6e3817ce6..c2059bf7c71d 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -8098,6 +8098,9 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type) case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: case BPF_PROG_TYPE_CGROUP_SOCKOPT: case BPF_PROG_TYPE_CGROUP_SYSCTL: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_SOCK_OPS: +#endif return BTF_KFUNC_HOOK_CGROUP; case BPF_PROG_TYPE_SCHED_ACT: return BTF_KFUNC_HOOK_SCHED_ACT; diff --git a/net/core/filter.c b/net/core/filter.c index 5abdd9a0a2ac..fd717806586c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -12057,6 +12057,27 @@ __bpf_kfunc int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern, return 0; } + +#ifdef CONFIG_HISOCK +__bpf_kfunc struct dst_entry * +bpf_skops_get_ingress_dst(struct bpf_sock_ops *skops_ctx) +{ + struct bpf_sock_ops_kern *skops = (struct bpf_sock_ops_kern *)skops_ctx; + struct sock *sk = skops->sk; + struct dst_entry *dst; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + if (!sk || !sk_fullsock(sk)) + return NULL; + + dst = rcu_dereference(sk->sk_rx_dst); + if (dst) + dst = dst_check(dst, 0); + + return dst; +} +#endif __diag_pop(); int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, @@ -12085,6 +12106,12 @@ BTF_SET8_START(bpf_kfunc_check_set_sock_addr) BTF_ID_FLAGS(func, bpf_sock_addr_set_sun_path) BTF_SET8_END(bpf_kfunc_check_set_sock_addr) +#ifdef CONFIG_HISOCK +BTF_SET8_START(bpf_kfunc_check_set_sock_ops) +BTF_ID_FLAGS(func, bpf_skops_get_ingress_dst, KF_RET_NULL) +BTF_SET8_END(bpf_kfunc_check_set_sock_ops) +#endif + static const struct btf_kfunc_id_set bpf_kfunc_set_skb = { .owner = THIS_MODULE, .set = &bpf_kfunc_check_set_skb, @@ -12100,6 +12127,13 @@ static const struct btf_kfunc_id_set bpf_kfunc_set_sock_addr = { .set = &bpf_kfunc_check_set_sock_addr, }; +#ifdef CONFIG_HISOCK +static const struct btf_kfunc_id_set bpf_kfunc_set_sock_ops = { + .owner = THIS_MODULE, + .set = &bpf_kfunc_check_set_sock_ops, +}; +#endif + static int __init bpf_kfunc_init(void) { int ret; @@ -12115,6 +12149,9 @@ static int __init bpf_kfunc_init(void) ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_LWT_SEG6LOCAL, &bpf_kfunc_set_skb); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_NETFILTER, &bpf_kfunc_set_skb); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp); +#ifdef CONFIG_HISOCK + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SOCK_OPS, &bpf_kfunc_set_sock_ops); +#endif return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, &bpf_kfunc_set_sock_addr); } -- 2.34.1

hulk inclusion category: featrue bugzilla: https://gitee.com/openeuler/kernel/issues/ICS15S -------------------------------- Add hisock_xdp_buff wrapper for xdp_buff to bind the associated skb. Signed-off-by: Pu Lehui <pulehui@huawei.com> --- include/net/xdp.h | 5 +++++ net/core/dev.c | 8 +++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/include/net/xdp.h b/include/net/xdp.h index 31698ef493b3..4ca0a42e55c6 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -150,6 +150,11 @@ xdp_prepare_buff(struct xdp_buff *xdp, unsigned char *hard_start, xdp->data_meta = meta_valid ? data : data + 1; } +struct hisock_xdp_buff { + struct xdp_buff xdp; + struct sk_buff *skb; +}; + /* Reserve memory area at end-of data area. * * This macro reserves tailroom in the XDP buffer by limiting the diff --git a/net/core/dev.c b/net/core/dev.c index e36056866543..22025bab8907 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5140,16 +5140,18 @@ static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key); int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb) { if (xdp_prog) { - struct xdp_buff xdp; + struct hisock_xdp_buff hxdp; + struct xdp_buff *xdp = &hxdp.xdp; u32 act; int err; - act = netif_receive_generic_xdp(skb, &xdp, xdp_prog); + hxdp.skb = skb; + act = netif_receive_generic_xdp(skb, xdp, xdp_prog); if (act != XDP_PASS) { switch (act) { case XDP_REDIRECT: err = xdp_do_generic_redirect(skb->dev, skb, - &xdp, xdp_prog); + xdp, xdp_prog); if (err) goto out_redir; break; -- 2.34.1

hulk inclusion category: featrue bugzilla: https://gitee.com/openeuler/kernel/issues/ICS15S -------------------------------- Add bpf_xdp_set_ingress_dst kfunc for xdp program. It is used to set the receive dst entry to the skb associated with xdp_buff. Signed-off-by: Pu Lehui <pulehui@huawei.com> --- net/core/filter.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/net/core/filter.c b/net/core/filter.c index fd717806586c..3ed92fbe3cfa 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -12077,6 +12077,26 @@ bpf_skops_get_ingress_dst(struct bpf_sock_ops *skops_ctx) return dst; } + +__bpf_kfunc int bpf_xdp_set_ingress_dst(struct xdp_md *xdp_ctx, void *dst__ign) +{ + struct xdp_buff *xdp = (struct xdp_buff *)xdp_ctx; + struct hisock_xdp_buff *hxdp = (struct hisock_xdp_buff *)xdp; + struct dst_entry *_dst = (struct dst_entry *)dst__ign; + + if (!hxdp->skb) + return -EOPNOTSUPP; + + if (!_dst || !virt_addr_valid(_dst)) + return -EFAULT; + + /* same as skb_valid_dst */ + if (_dst->flags & DST_METADATA) + return -EINVAL; + + skb_dst_set_noref(hxdp->skb, _dst); + return 0; +} #endif __diag_pop(); @@ -12100,6 +12120,9 @@ BTF_SET8_END(bpf_kfunc_check_set_skb) BTF_SET8_START(bpf_kfunc_check_set_xdp) BTF_ID_FLAGS(func, bpf_dynptr_from_xdp) +#ifdef CONFIG_HISOCK +BTF_ID_FLAGS(func, bpf_xdp_set_ingress_dst) +#endif BTF_SET8_END(bpf_kfunc_check_set_xdp) BTF_SET8_START(bpf_kfunc_check_set_sock_addr) -- 2.34.1

hulk inclusion category: featrue bugzilla: https://gitee.com/openeuler/kernel/issues/ICS15S -------------------------------- Add bpf_*_change_dev kfunc for xdp and hisock_egress program. It is used to change ingress or egress device of associated skb. Signed-off-by: Pu Lehui <pulehui@huawei.com> --- kernel/bpf/btf.c | 7 +++++++ net/core/filter.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index c2059bf7c71d..b9a0ae4e4a6f 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -218,6 +218,9 @@ enum btf_kfunc_hook { BTF_KFUNC_HOOK_LWT, BTF_KFUNC_HOOK_NETFILTER, BTF_KFUNC_HOOK_SCHED, +#ifdef CONFIG_HISOCK + BTF_KFUNC_HOOK_HISOCK, +#endif BTF_KFUNC_HOOK_MAX, }; @@ -8117,6 +8120,10 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type) return BTF_KFUNC_HOOK_NETFILTER; case BPF_PROG_TYPE_SCHED: return BTF_KFUNC_HOOK_SCHED; +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: + return BTF_KFUNC_HOOK_HISOCK; +#endif default: return BTF_KFUNC_HOOK_MAX; } diff --git a/net/core/filter.c b/net/core/filter.c index 3ed92fbe3cfa..eb888d2913e5 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -12097,6 +12097,40 @@ __bpf_kfunc int bpf_xdp_set_ingress_dst(struct xdp_md *xdp_ctx, void *dst__ign) skb_dst_set_noref(hxdp->skb, _dst); return 0; } + +__bpf_kfunc int bpf_xdp_change_dev(struct xdp_md *xdp_ctx, u32 ifindex) +{ + struct xdp_buff *xdp = (struct xdp_buff *)xdp_ctx; + struct hisock_xdp_buff *hxdp = (void *)xdp; + struct net_device *dev; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + if (!hxdp->skb) + return -EOPNOTSUPP; + + dev = dev_get_by_index_rcu(&init_net, ifindex); + if (!dev) + return -ENODEV; + + hxdp->skb->dev = dev; + return 0; +} + +__bpf_kfunc int bpf_skb_change_dev(struct __sk_buff *skb_ctx, u32 ifindex) +{ + struct sk_buff *skb = (struct sk_buff *)skb_ctx; + struct net_device *dev; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + dev = dev_get_by_index_rcu(&init_net, ifindex); + if (!dev) + return -ENODEV; + + skb->dev = dev; + return 0; +} #endif __diag_pop(); @@ -12122,6 +12156,7 @@ BTF_SET8_START(bpf_kfunc_check_set_xdp) BTF_ID_FLAGS(func, bpf_dynptr_from_xdp) #ifdef CONFIG_HISOCK BTF_ID_FLAGS(func, bpf_xdp_set_ingress_dst) +BTF_ID_FLAGS(func, bpf_xdp_change_dev) #endif BTF_SET8_END(bpf_kfunc_check_set_xdp) @@ -12133,6 +12168,10 @@ BTF_SET8_END(bpf_kfunc_check_set_sock_addr) BTF_SET8_START(bpf_kfunc_check_set_sock_ops) BTF_ID_FLAGS(func, bpf_skops_get_ingress_dst, KF_RET_NULL) BTF_SET8_END(bpf_kfunc_check_set_sock_ops) + +BTF_SET8_START(bpf_kfunc_check_set_hisock) +BTF_ID_FLAGS(func, bpf_skb_change_dev) +BTF_SET8_END(bpf_kfunc_check_set_hisock) #endif static const struct btf_kfunc_id_set bpf_kfunc_set_skb = { @@ -12155,6 +12194,11 @@ static const struct btf_kfunc_id_set bpf_kfunc_set_sock_ops = { .owner = THIS_MODULE, .set = &bpf_kfunc_check_set_sock_ops, }; + +static const struct btf_kfunc_id_set bpf_kfunc_set_hisock = { + .owner = THIS_MODULE, + .set = &bpf_kfunc_check_set_hisock, +}; #endif static int __init bpf_kfunc_init(void) @@ -12174,6 +12218,7 @@ static int __init bpf_kfunc_init(void) ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp); #ifdef CONFIG_HISOCK ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SOCK_OPS, &bpf_kfunc_set_sock_ops); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_HISOCK, &bpf_kfunc_set_hisock); #endif return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, &bpf_kfunc_set_sock_addr); -- 2.34.1

hulk inclusion category: featrue bugzilla: https://gitee.com/openeuler/kernel/issues/ICS15S -------------------------------- Enable CONFIG_HISOCK in openeuler_defconfig. Signed-off-by: Pu Lehui <pulehui@huawei.com> --- arch/arm64/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 8848ca6fdda0..4bf7ddd03d93 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -1927,6 +1927,7 @@ CONFIG_CGROUP_NET_CLASSID=y CONFIG_NET_RX_BUSY_POLL=y CONFIG_BQL=y CONFIG_BPF_STREAM_PARSER=y +CONFIG_HISOCK=y CONFIG_NET_FLOW_LIMIT=y # diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index d00c4dd20b0f..1c595f3a89b5 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -1894,6 +1894,7 @@ CONFIG_CGROUP_NET_CLASSID=y CONFIG_NET_RX_BUSY_POLL=y CONFIG_BQL=y CONFIG_BPF_STREAM_PARSER=y +# CONFIG_HISOCK is not set CONFIG_NET_FLOW_LIMIT=y # -- 2.34.1

hulk inclusion category: featrue bugzilla: https://gitee.com/openeuler/kernel/issues/ICS15S -------------------------------- Add End-to-End HiSock Redirect sample. Signed-off-by: Pu Lehui <pulehui@huawei.com> --- samples/bpf/.gitignore | 1 + samples/bpf/Makefile | 3 + samples/bpf/hisock/bpf.c | 251 ++++++++++++++++++++ samples/bpf/hisock/hisock_cmd.c | 405 ++++++++++++++++++++++++++++++++ 4 files changed, 660 insertions(+) create mode 100644 samples/bpf/hisock/bpf.c create mode 100644 samples/bpf/hisock/hisock_cmd.c diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore index 0002cd359fb1..4505e51c3f4d 100644 --- a/samples/bpf/.gitignore +++ b/samples/bpf/.gitignore @@ -41,6 +41,7 @@ xdp_adjust_tail xdp_fwd xdp_router_ipv4 xdp_tx_iptunnel +hisock/hisock_cmd testfile.img hbm_out.log iperf.* diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 3fa16412db15..2f19faa7fb63 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -46,6 +46,7 @@ tprogs-y += xdp_fwd tprogs-y += task_fd_query tprogs-y += ibumad tprogs-y += hbm +tprogs-y += hisock/hisock_cmd # Libbpf dependencies LIBBPF_SRC = $(TOOLS_PATH)/lib/bpf @@ -96,6 +97,7 @@ xdp_fwd-objs := xdp_fwd_user.o task_fd_query-objs := task_fd_query_user.o $(TRACE_HELPERS) ibumad-objs := ibumad_user.o hbm-objs := hbm.o $(CGROUP_HELPERS) +hisock_cmd-objs := hisock/hisock_cmd.o xdp_router_ipv4-objs := xdp_router_ipv4_user.o $(XDP_SAMPLE) @@ -149,6 +151,7 @@ always-y += task_fd_query_kern.o always-y += ibumad_kern.o always-y += hbm_out_kern.o always-y += hbm_edt_kern.o +always-y += hisock/bpf.o ifeq ($(ARCH), arm) # Strip all except -D__LINUX_ARM_ARCH__ option needed to handle linux diff --git a/samples/bpf/hisock/bpf.c b/samples/bpf/hisock/bpf.c new file mode 100644 index 000000000000..46159c96cb18 --- /dev/null +++ b/samples/bpf/hisock/bpf.c @@ -0,0 +1,251 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * + * Description: End-to-End HiSock Redirect sample. + */ +#include <linux/if_vlan.h> +#include <net/dst.h> + +#include <uapi/linux/in.h> +#include <uapi/linux/if_ether.h> +#include <uapi/linux/ip.h> +#include <uapi/linux/tcp.h> +#include <uapi/linux/bpf.h> + +#include <bpf/bpf_endian.h> +#include <bpf/bpf_helpers.h> + +#define IP_MF 0x2000 +#define IP_OFFSET 0x1FFF +#define CSUM_SHIFT_BITS 16 + +#define SOCKOPS_SUCC 1 +#define SOCKOPS_FAIL 0 + +#define PORT_LOCAL 1 +#define PORT_REMOTE 2 + +#define MAX_NUMA 8 +#define MAX_CONN_NUMA 4096 +#define MAX_CONN (MAX_CONN_NUMA * MAX_NUMA * 2) + +struct sock_tuple { + u32 saddr; + u32 daddr; + u16 sport; + u16 dport; +}; + +struct sock_value { + struct dst_entry *ingress_dst; + struct ethhdr ingress_eth; + bool eth_updated; + u32 ingress_ifindex; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(struct sock_tuple)); + __uint(value_size, sizeof(struct sock_value)); + __uint(max_entries, MAX_CONN); +} connmap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(u16)); + __uint(value_size, sizeof(u8)); + __uint(max_entries, 128); +} speed_port SEC(".maps"); + +struct dst_entry *bpf_skops_get_ingress_dst(struct bpf_sock_ops *skops) __ksym; +int bpf_xdp_set_ingress_dst(struct xdp_md *xdp, void *dst) __ksym; +int bpf_skb_change_dev(struct __sk_buff *skb, u32 ifindex) __ksym; + +static inline bool is_speed_flow(u32 local, u32 remote) +{ + u8 *val; + + val = bpf_map_lookup_elem(&speed_port, &local); + if (val && *val == PORT_LOCAL) + return true; + + val = bpf_map_lookup_elem(&speed_port, &remote); + if (val && *val == PORT_REMOTE) + return true; + + return false; +} + +SEC("hisock_sockops") +int hisock_sockops_prog(struct bpf_sock_ops *skops) +{ + struct sock_tuple key = { 0 }; + struct sock_value val = { 0 }; + struct dst_entry *dst; + + if (!is_speed_flow(skops->local_port, bpf_ntohl(skops->remote_port))) + return SOCKOPS_SUCC; + + switch (skops->op) { + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + dst = bpf_skops_get_ingress_dst(skops); + if (!dst) + return SOCKOPS_FAIL; + + key.saddr = skops->remote_ip4; + key.sport = bpf_ntohl(skops->remote_port); + key.daddr = skops->local_ip4; + key.dport = skops->local_port; + + val.ingress_dst = dst; + bpf_map_update_elem(&connmap, &key, &val, BPF_ANY); + + bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG); + break; + case BPF_SOCK_OPS_STATE_CB: + if (skops->args[1] != BPF_TCP_CLOSE_WAIT && + skops->args[1] != BPF_TCP_FIN_WAIT1 && + skops->args[1] != BPF_TCP_CLOSE) + break; + + key.saddr = skops->remote_ip4; + key.sport = bpf_ntohl(skops->remote_port); + key.daddr = skops->local_ip4; + key.dport = skops->local_port; + + bpf_map_delete_elem(&connmap, &key); + + bpf_sock_ops_cb_flags_set(skops, + skops->bpf_sock_ops_cb_flags & ~BPF_SOCK_OPS_STATE_CB_FLAG); + break; + default: + break; + } + + return SOCKOPS_SUCC; +} + +SEC("hisock_ingress") +int hisock_ingress_prog(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct sock_tuple key = { 0 }; + struct sock_value *val; + struct ethhdr *ehdr; + struct tcphdr *thdr; + struct iphdr *ihdr; + + ehdr = (struct ethhdr *)data; + if (ehdr + 1 > data_end) + return XDP_PASS; + + if (ehdr->h_proto != bpf_htons(ETH_P_IP)) + return XDP_PASS; + + ihdr = (struct iphdr *)(ehdr + 1); + if (ihdr + 1 > data_end) + return XDP_PASS; + + if (ihdr->ihl != 5 || ihdr->protocol != IPPROTO_TCP) + return XDP_PASS; + + if (ihdr->frag_off & bpf_htons(IP_MF | IP_OFFSET)) + return XDP_PASS; + + thdr = (struct tcphdr *)(ihdr + 1); + if (thdr + 1 > data_end) + return XDP_PASS; + + if (thdr->syn || thdr->fin || thdr->rst) + return XDP_PASS; + + key.saddr = ihdr->saddr; + key.sport = bpf_ntohs(thdr->source); + key.daddr = ihdr->daddr; + key.dport = bpf_ntohs(thdr->dest); + + val = bpf_map_lookup_elem(&connmap, &key); + if (!val) + return XDP_PASS; + + if (unlikely(!val->eth_updated)) { + memcpy(val->ingress_eth.h_source, ehdr->h_dest, ETH_ALEN); + memcpy(val->ingress_eth.h_dest, ehdr->h_source, ETH_ALEN); + val->ingress_eth.h_proto = ehdr->h_proto; + val->eth_updated = true; + } + + if (unlikely(!val->ingress_ifindex)) + val->ingress_ifindex = ctx->ingress_ifindex; + + if (likely(val->ingress_dst)) + bpf_xdp_set_ingress_dst(ctx, val->ingress_dst); + + return XDP_HISOCK_REDIRECT; +} + +static inline void ipv4_csum(struct iphdr *ihdr) +{ + u32 csum = 0; + u16 *next_ip_u16 = (u16 *)ihdr; + + ihdr->check = 0; + for (size_t i = 0; i < sizeof(struct iphdr) >> 1; i++) + csum += *next_ip_u16++; + + ihdr->check = ~((csum & 0xffff) + (csum >> CSUM_SHIFT_BITS)); +} + +SEC("hisock_egress") +int hisock_egress_prog(struct __sk_buff *skb) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + struct sock_tuple key = { 0 }; + struct sock_value *val; + struct ethhdr *ehdr; + struct iphdr *ihdr; + int ret; + + key.saddr = skb->remote_ip4; + key.sport = bpf_ntohl(skb->remote_port); + key.daddr = skb->local_ip4; + key.dport = skb->local_port; + + val = bpf_map_lookup_elem(&connmap, &key); + if (!val) + return HISOCK_PASS; + + if (unlikely(!val->eth_updated)) + goto redirect; + + ihdr = (struct iphdr *)data; + if (ihdr + 1 > data_end) + return HISOCK_PASS; + + ihdr->tot_len = bpf_htons(skb->len); + ipv4_csum(ihdr); + + ret = bpf_skb_change_head(skb, ETH_HLEN, 0); + if (ret < 0) + goto redirect; + + data = (void *)(long)skb->data; + data_end = (void *)(long)skb->data_end; + + ehdr = (struct ethhdr *)data; + if (ehdr + 1 > data_end) + return HISOCK_DROP; + + memcpy(ehdr, &val->ingress_eth, ETH_HLEN); +redirect: + if (likely(val->ingress_ifindex)) + bpf_skb_change_dev(skb, val->ingress_ifindex); + + return HISOCK_REDIRECT; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/hisock/hisock_cmd.c b/samples/bpf/hisock/hisock_cmd.c new file mode 100644 index 000000000000..6b64c008b6c7 --- /dev/null +++ b/samples/bpf/hisock/hisock_cmd.c @@ -0,0 +1,405 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * + * Description: End-to-End HiSock Redirect sample. + */ +#include <linux/bpf.h> +#include <linux/if_link.h> +#include <assert.h> +#include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <signal.h> +#include <libgen.h> +#include <limits.h> +#include <sys/mount.h> +#include <sys/resource.h> +#include <net/if.h> + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + +#include "bpf_util.h" +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#define DEF_BPF_PATH "bpf.o" +#define PORT_LOCAL 1 +#define PORT_REMOTE 2 +#define MAX_IF_NUM 8 + +struct { + __u32 ifindex[MAX_IF_NUM]; + int if_num; + char *local_port; + char *remote_port; + char *cgrp_path; + char *bpf_path; + bool unload; + bool help; +} hisock; + +struct hisock_prog_info { + const char *prog_name; + enum bpf_prog_type prog_type; + enum bpf_attach_type attach_type; + int attach_flag; + int prog_fd; + bool is_xdp; +}; + +static struct hisock_prog_info prog_infos[] = { + { + .prog_name = "hisock_sockops_prog", + .prog_type = BPF_PROG_TYPE_SOCK_OPS, + .attach_type = BPF_CGROUP_SOCK_OPS, + .attach_flag = 0, + .is_xdp = false, + }, + { + .prog_name = "hisock_ingress_prog", + .prog_type = BPF_PROG_TYPE_XDP, + .attach_type = BPF_XDP, + .attach_flag = XDP_FLAGS_SKB_MODE, + .is_xdp = true, + }, + { + .prog_name = "hisock_egress_prog", + .prog_type = BPF_PROG_TYPE_HISOCK, + .attach_type = BPF_HISOCK_EGRESS, + .attach_flag = 0, + .is_xdp = false, + }, +}; + +static int set_prog_type(struct bpf_object *obj) +{ + enum bpf_attach_type attach_type; + enum bpf_prog_type prog_type; + struct bpf_program *prog; + const char *prog_name; + int i; + + bpf_object__for_each_program(prog, obj) { + prog_name = bpf_program__name(prog); + for (i = 0; i < ARRAY_SIZE(prog_infos); i++) { + if (!strcmp(prog_infos[i].prog_name, prog_name)) { + prog_type = prog_infos[i].prog_type; + attach_type = prog_infos[i].attach_type; + break; + } + } + + if (i == ARRAY_SIZE(prog_infos)) + return -1; + + bpf_program__set_type(prog, prog_type); + bpf_program__set_expected_attach_type(prog, attach_type); + } + + return 0; +} + +static int find_progs(struct bpf_object *obj) +{ + struct hisock_prog_info *info; + struct bpf_program *prog; + int i, prog_fd; + + for (i = 0; i < ARRAY_SIZE(prog_infos); i++) { + info = &prog_infos[i]; + prog = bpf_object__find_program_by_name(obj, info->prog_name); + if (!prog) { + fprintf(stderr, "ERROR: failed to find prog sec %s\n", info->prog_name); + return -1; + } + + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) { + fprintf(stderr, "ERROR: failed to get fd of prog %s\n", info->prog_name); + return -1; + } + + info->prog_fd = prog_fd; + } + + return 0; +} + +static int parse_port_range(const char *port_str, __u8 status, int map_fd) +{ + char *str = strdup(port_str); + char *token, *rest = str; + __u16 port; + + while ((token = strtok_r(rest, ",", &rest))) { + char *dash = strchr(token, '-'); + + if (dash) { + *dash = '\0'; + __u16 start = atoi(token); + __u16 end = atoi(dash + 1); + + if (start > end || start == 0 || end > 65535) { + fprintf(stderr, "Invalid port range: %s\n", token); + return -1; + } + + for (port = start; port <= end; port++) + bpf_map_update_elem(map_fd, &port, &status, BPF_ANY); + + printf("Speed port range %u-%u:%u\n", start, end, status); + } else { + port = atoi(token); + if (port == 0 || port > 65535) { + fprintf(stderr, "Invalid port: %s\n", token); + return -1; + } + bpf_map_update_elem(map_fd, &port, &status, BPF_ANY); + printf("Speed port %u:%u\n", port, status); + } + } + + free(str); + return 0; +} + +static int set_speed_port(struct bpf_object *obj) +{ + int map_fd; + + map_fd = bpf_object__find_map_fd_by_name(obj, "speed_port"); + if (map_fd < 0) { + fprintf(stderr, "ERROR: failed to find map fd\n"); + return -1; + } + + if (hisock.local_port && + parse_port_range(hisock.local_port, PORT_LOCAL, map_fd)) { + fprintf(stderr, "ERROR: failed to update local port\n"); + return -1; + } + + if (hisock.remote_port && + parse_port_range(hisock.remote_port, PORT_REMOTE, map_fd)) { + fprintf(stderr, "ERROR: failed to update remote port\n"); + return -1; + } + + return 0; +} + +static int detach_progs(void) +{ + struct hisock_prog_info *info; + int i, j, cgrp_fd; + int err_cnt = 0; + + cgrp_fd = open(hisock.cgrp_path, O_DIRECTORY, O_RDONLY); + if (cgrp_fd < 0) { + fprintf(stderr, "ERROR: failed to open cgrp %s\n", hisock.cgrp_path); + return -1; + } + + for (i = 0; i < ARRAY_SIZE(prog_infos); i++) { + info = &prog_infos[i]; + if (info->is_xdp) { + for (j = 0; j < hisock.if_num; j++) { + if (bpf_xdp_detach(hisock.ifindex[j], + info->attach_flag, NULL)) { + fprintf(stderr, + "ERROR: failed to detach prog %s\n", + info->prog_name); + err_cnt++; + } + } + continue; + } + + if (bpf_prog_detach(cgrp_fd, info->attach_type)) { + fprintf(stderr, "ERROR: failed to detach prog %s\n", info->prog_name); + err_cnt++; + } + } + + close(cgrp_fd); + return -err_cnt; +} + +static int attach_progs(void) +{ + struct hisock_prog_info *info; + int i, j, cgrp_fd; + + cgrp_fd = open(hisock.cgrp_path, O_DIRECTORY, O_RDONLY); + if (cgrp_fd < 0) { + fprintf(stderr, "ERROR: failed to open cgrp %s\n", hisock.cgrp_path); + return -1; + } + + for (i = 0; i < ARRAY_SIZE(prog_infos); i++) { + info = &prog_infos[i]; + if (info->is_xdp) { + for (j = 0; j < hisock.if_num; j++) { + if (bpf_xdp_attach(hisock.ifindex[j], info->prog_fd, + info->attach_flag, NULL)) + goto fail; + } + continue; + } + + if (bpf_prog_attach(info->prog_fd, cgrp_fd, info->attach_type, + info->attach_flag)) + goto fail; + } + + close(cgrp_fd); + return 0; +fail: + fprintf(stderr, "ERROR: failed to attach prog %s\n", info->prog_name); + close(cgrp_fd); + detach_progs(); + return -1; +} + +static int do_hisock(void) +{ + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + struct bpf_object *obj; + + setrlimit(RLIMIT_MEMLOCK, &r); + + obj = bpf_object__open(hisock.bpf_path); + if (libbpf_get_error(obj)) { + fprintf(stderr, "ERROR: failed to open bpf file\n"); + return -1; + } + + if (set_prog_type(obj)) { + fprintf(stderr, "ERROR: failed to set prog type\n"); + bpf_object__close(obj); + return -1; + } + + if (bpf_object__load(obj)) { + fprintf(stderr, "ERROR: failed to load bpf obj\n"); + bpf_object__close(obj); + return -1; + } + + if (find_progs(obj)) { + fprintf(stderr, "ERROR: failed to find progs\n"); + bpf_object__close(obj); + return -1; + } + + if (set_speed_port(obj)) { + fprintf(stderr, "ERROR: failed to set speed port\n"); + bpf_object__close(obj); + return -1; + } + + if (attach_progs()) { + fprintf(stderr, "ERROR: failed to attach progs\n"); + bpf_object__close(obj); + return -1; + } + + bpf_object__close(obj); + return 0; +} + +static void do_help(void) +{ + fprintf(stderr, + "Load: hisock_cmd [-f BPF_FILE] [-c CGRP_PATH] " + "[-p LOCAL_PORT] [-r REMOTE_PORT] [-i INTERFACE]\n" + "Unload: hisock_cmd -u [-c CGRP_PATH] [-i INTERFACE]\n"); +} + +static int parse_args(int argc, char **argv) +{ + char *ifname; + int opt; + + hisock.bpf_path = DEF_BPF_PATH; + hisock.if_num = 0; + + while ((opt = getopt(argc, argv, "f:c:p:r:i:uh")) != -1) { + switch (opt) { + case 'f': + hisock.bpf_path = optarg; + break; + case 'c': + hisock.cgrp_path = optarg; + break; + case 'p': + hisock.local_port = optarg; + break; + case 'r': + hisock.remote_port = optarg; + break; + case 'i': + ifname = optarg; + hisock.ifindex[hisock.if_num] = if_nametoindex(ifname); + hisock.if_num++; + break; + case 'u': + hisock.unload = true; + break; + case 'h': + hisock.help = true; + break; + default: + fprintf(stderr, "ERROR: unknown option %c\n", opt); + return -1; + } + } + + if (hisock.cgrp_path == NULL || + hisock.if_num == 0 || + (!hisock.unload && + hisock.local_port == NULL && + hisock.remote_port == NULL)) { + do_help(); + return -1; + } + + return 0; +} + +int main(int argc, char **argv) +{ + if (parse_args(argc, argv)) { + fprintf(stderr, "ERROR: failed to parse args\n"); + return -1; + } + + if (hisock.help) { + do_help(); + return 0; + } + + if (hisock.unload) { + if (detach_progs()) { + fprintf(stderr, "ERROR: failed to detach progs\n"); + return -1; + } + + printf("Unload HiSock successfully\n"); + return 0; + } + + if (do_hisock()) { + fprintf(stderr, "ERROR: failed to do hisock\n"); + return -1; + } + + printf("Load HiSock successfully\n"); + return 0; +} -- 2.34.1

反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/17797 邮件列表地址:https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/IWR... FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/17797 Mailing list address: https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/IWR...
participants (2)
-
patchwork bot
-
Pu Lehui