[PATCH OLK-6.6 00/11] HiSock Redirect Framework

Pu Lehui (10): bpf: Add CONFIG_HISOCK bpf: Add XDP_HISOCK_REDIRECT action bpf: Add BPF_PROG_TYPE_HISOCK prog type bpf: Add HISOCK_EGRESS hook on network egress path bpf: Add bpf_get_ingress_dst helper bpf: Add hisock_xdp_buff wrapper for xdp_buff bpf: Add bpf_set_ingress_dst helper bpf: Add bpf_change_skb_dev helper openeuler_defconfig: Enable CONFIG_HISOCK samples/bpf: Add HiSock Redirect sample Xu Kuohai (1): bpf: Add bpf_ext_memcpy extension helper for arm64 arch/arm64/configs/openeuler_defconfig | 1 + arch/arm64/include/asm/insn.h | 4 + arch/arm64/lib/insn.c | 8 + arch/arm64/net/bpf_jit.h | 15 + arch/arm64/net/bpf_jit_comp.c | 266 ++++++++++++++++ arch/x86/configs/openeuler_defconfig | 1 + include/linux/bpf-cgroup-defs.h | 3 + include/linux/bpf-cgroup.h | 25 ++ include/linux/bpf_types.h | 4 + include/linux/filter.h | 3 + include/net/xdp.h | 5 + include/uapi/linux/bpf.h | 45 +++ kernel/bpf/cgroup.c | 43 +++ kernel/bpf/core.c | 7 + kernel/bpf/helpers.c | 27 ++ kernel/bpf/syscall.c | 19 ++ kernel/bpf/verifier.c | 27 ++ net/Kconfig | 10 + net/core/dev.c | 76 ++++- net/core/filter.c | 182 +++++++++++ net/ipv4/ip_output.c | 68 +++++ samples/bpf/.gitignore | 1 + samples/bpf/Makefile | 3 + samples/bpf/hisock/bpf.c | 247 +++++++++++++++ samples/bpf/hisock/hisock_cmd.c | 405 +++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 45 +++ tools/lib/bpf/libbpf.c | 3 + 27 files changed, 1540 insertions(+), 3 deletions(-) create mode 100644 samples/bpf/hisock/bpf.c create mode 100644 samples/bpf/hisock/hisock_cmd.c -- 2.34.1

hulk inclusion category: featrue bugzilla: https://gitee.com/openeuler/kernel/issues/ICS15S -------------------------------- Add CONFIG_HISOCK to enable HiSock redirect framework, which bypasses net filter rules for specific connections selected by bpf prog on both TX and RX directions. Signed-off-by: Pu Lehui <pulehui@huawei.com> --- net/Kconfig | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/net/Kconfig b/net/Kconfig index 2fc1860faeb4..fd08800cb130 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -348,6 +348,16 @@ config BPF_STREAM_PARSER Enabling this allows a TCP stream parser to be used with BPF_MAP_TYPE_SOCKMAP. +config HISOCK + bool "enable HiSock Redirect Framework" + depends on INET + depends on CGROUP_BPF + depends on BPF_SYSCALL + default n + help + Enalbe HiSock, which bypasses net filter rules for specific + connections selected by bpf prog on both TX and RX directions. + config NET_FLOW_LIMIT bool depends on RPS -- 2.34.1

hulk inclusion category: featrue bugzilla: https://gitee.com/openeuler/kernel/issues/ICS15S -------------------------------- Add XDP_HISOCK_REDIRECT xdp action to bypass net filter rules for specific connections. XDP_HISOCK_REDIRECT action only support for generic XDP. Signed-off-by: Pu Lehui <pulehui@huawei.com> --- include/uapi/linux/bpf.h | 1 + net/core/dev.c | 68 ++++++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 1 + 3 files changed, 70 insertions(+) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a660cb68c853..8619baf2d64b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6310,6 +6310,7 @@ enum xdp_action { XDP_PASS, XDP_TX, XDP_REDIRECT, + XDP_HISOCK_REDIRECT = 100, }; /* user accessible metadata for XDP packet hook diff --git a/net/core/dev.c b/net/core/dev.c index cbb4bd4718cd..e36056866543 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5029,6 +5029,9 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, case XDP_REDIRECT: case XDP_TX: case XDP_PASS: +#ifdef CONFIG_HISOCK + case XDP_HISOCK_REDIRECT: +#endif break; default: bpf_warn_invalid_xdp_action(skb->dev, xdp_prog, act); @@ -5074,6 +5077,64 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) } } +#ifdef CONFIG_HISOCK +static int generic_xdp_hisock_redirect(struct sk_buff *skb) +{ + const struct iphdr *iph; + u32 len; + + skb = skb_share_check(skb, GFP_ATOMIC); + if (!skb) + goto out; + + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + goto free_skb; + + iph = ip_hdr(skb); + if (iph->ihl < 5 || iph->version != 4 || + ip_is_fragment(iph)) + return -EOPNOTSUPP; + + if (!pskb_may_pull(skb, iph->ihl * 4)) + goto free_skb; + + iph = ip_hdr(skb); + if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) + goto free_skb; + + len = ntohs(iph->tot_len); + if (skb->len < len || len < (iph->ihl * 4)) + goto free_skb; + + if (pskb_trim_rcsum(skb, len)) + goto free_skb; + + iph = ip_hdr(skb); + skb->transport_header = skb->network_header + iph->ihl * 4; + + skb_orphan(skb); + + if (!skb_valid_dst(skb)) { + if (ip_route_input_noref(skb, iph->daddr, iph->saddr, + iph->tos, skb->dev)) + goto free_skb; + } + + __skb_pull(skb, skb_network_header_len(skb)); + + rcu_read_lock(); + ip_protocol_deliver_rcu(dev_net(skb->dev), skb, iph->protocol); + rcu_read_unlock(); + + return 0; + +free_skb: + kfree_skb(skb); +out: + return -EFAULT; +} +#endif + static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key); int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb) @@ -5095,6 +5156,13 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb) case XDP_TX: generic_xdp_tx(skb, xdp_prog); break; +#ifdef CONFIG_HISOCK + case XDP_HISOCK_REDIRECT: + err = generic_xdp_hisock_redirect(skb); + if (err == -EOPNOTSUPP) + return XDP_PASS; + break; +#endif } return XDP_DROP; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 9b302242be6c..6873e3acbdef 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -6313,6 +6313,7 @@ enum xdp_action { XDP_PASS, XDP_TX, XDP_REDIRECT, + XDP_HISOCK_REDIRECT = 100, }; /* user accessible metadata for XDP packet hook -- 2.34.1

hulk inclusion category: featrue bugzilla: https://gitee.com/openeuler/kernel/issues/ICS15S -------------------------------- Add BPF_PROG_TYPE_HISOCK program type and BPF_HISOCK_EGRESS attach type to allow bpf program to be attached on network egress side and bypass net filter rules for the specific connections. Signed-off-by: Pu Lehui <pulehui@huawei.com> --- include/linux/bpf-cgroup-defs.h | 3 ++ include/linux/bpf-cgroup.h | 25 +++++++++++++ include/linux/bpf_types.h | 4 +++ include/uapi/linux/bpf.h | 9 +++++ kernel/bpf/cgroup.c | 43 +++++++++++++++++++++++ kernel/bpf/syscall.c | 19 ++++++++++ kernel/bpf/verifier.c | 3 ++ net/core/filter.c | 62 +++++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 9 +++++ tools/lib/bpf/libbpf.c | 3 ++ 10 files changed, 180 insertions(+) diff --git a/include/linux/bpf-cgroup-defs.h b/include/linux/bpf-cgroup-defs.h index fb6adb1c3889..2bd35e802b0a 100644 --- a/include/linux/bpf-cgroup-defs.h +++ b/include/linux/bpf-cgroup-defs.h @@ -45,6 +45,9 @@ enum cgroup_bpf_attach_type { CGROUP_INET_SOCK_RELEASE, CGROUP_LSM_START, CGROUP_LSM_END = CGROUP_LSM_START + CGROUP_LSM_NUM - 1, +#ifdef CONFIG_HISOCK + HISOCK_EGRESS, +#endif MAX_CGROUP_BPF_ATTACH_TYPE }; diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index d4f2c8706042..f94f57d185b8 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -62,6 +62,9 @@ to_cgroup_bpf_attach_type(enum bpf_attach_type attach_type) CGROUP_ATYPE(CGROUP_INET4_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET6_GETSOCKNAME); CGROUP_ATYPE(CGROUP_INET_SOCK_RELEASE); +#ifdef CONFIG_HISOCK + CGROUP_ATYPE(HISOCK_EGRESS); +#endif default: return CGROUP_BPF_ATTACH_TYPE_INVALID; } @@ -150,6 +153,11 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level, int optname, void *optval, int *optlen, int retval); +#ifdef CONFIG_HISOCK +int __cgroup_bpf_run_hisock_egress(struct sock *sk, struct sk_buff *skb, + enum cgroup_bpf_attach_type atype); +#endif + static inline enum bpf_cgroup_storage_type cgroup_storage_type( struct bpf_map *map) { @@ -401,6 +409,20 @@ static inline bool cgroup_bpf_sock_enabled(struct sock *sk, __ret; \ }) +#ifdef CONFIG_HISOCK +#define BPF_CGROUP_RUN_PROG_HISOCK_EGRESS(sk, skb) \ +({ \ + int __ret = HISOCK_PASS; \ + if (cgroup_bpf_enabled(HISOCK_EGRESS) && sk) { \ + typeof(sk) __sk = sk_to_full_sk(sk); \ + if (sk_fullsock(__sk) && __sk == skb_to_full_sk(skb)) \ + __ret = __cgroup_bpf_run_hisock_egress(__sk, skb, \ + HISOCK_EGRESS); \ + } \ + __ret; \ +}) +#endif + int cgroup_bpf_prog_attach(const union bpf_attr *attr, enum bpf_prog_type ptype, struct bpf_prog *prog); int cgroup_bpf_prog_detach(const union bpf_attr *attr, @@ -498,6 +520,9 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, optlen, retval) ({ retval; }) #define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \ kernel_optval) ({ 0; }) +#ifdef CONFIG_HISOCK +#define BPF_CGROUP_RUN_PROG_HISOCK_EGRESS(sk, skb) ({ HISOCK_PASS; }) +#endif #define for_each_cgroup_storage_type(stype) for (; false; ) diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index f5cdd5a9e268..15809bc5eff4 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -17,6 +17,10 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock, struct bpf_sock, struct sock) BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr, struct bpf_sock_addr, struct bpf_sock_addr_kern) +#ifdef CONFIG_HISOCK +BPF_PROG_TYPE(BPF_PROG_TYPE_HISOCK, hisock, + struct __sk_buff, struct sk_buff) +#endif #endif BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_in, struct __sk_buff, struct sk_buff) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 8619baf2d64b..52f376f9428c 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1006,6 +1006,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ BPF_PROG_TYPE_NETFILTER, BPF_PROG_TYPE_SCHED, + BPF_PROG_TYPE_HISOCK, }; enum bpf_attach_type { @@ -1059,6 +1060,7 @@ enum bpf_attach_type { BPF_TCX_EGRESS, BPF_TRACE_UPROBE_MULTI, BPF_SCHED, + BPF_HISOCK_EGRESS, __MAX_BPF_ATTACH_TYPE }; @@ -7353,4 +7355,11 @@ struct bpf_iter_num { __u64 __opaque[1]; } __attribute__((aligned(8))); +enum hisock_action { + HISOCK_PASS, + HISOCK_DROP, + HISOCK_REDIRECT, + __MAX_HISOCK_ACTION, +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index cf2eb0895d40..90cc73c762cf 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -663,6 +663,12 @@ static int __cgroup_bpf_attach(struct cgroup *cgrp, */ return -EPERM; +#ifdef CONFIG_HISOCK + /* Only one bpf program can be attached to HISOCK_EGRESS */ + if (atype == HISOCK_EGRESS && prog_list_length(progs) >= 1) + return -EEXIST; +#endif + if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS) return -E2BIG; @@ -1548,6 +1554,43 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); +#ifdef CONFIG_HISOCK +int __cgroup_bpf_run_hisock_egress(struct sock *sk, struct sk_buff *skb, + enum cgroup_bpf_attach_type atype) +{ + struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); + struct bpf_prog_array_item *item; + struct bpf_prog *prog; + struct bpf_prog_array *array; + struct bpf_run_ctx *old_run_ctx; + struct bpf_cg_run_ctx run_ctx; + void *saved_data_end; + u32 ret = HISOCK_PASS; + + bpf_compute_and_save_data_end(skb, &saved_data_end); + + migrate_disable(); + rcu_read_lock(); + array = rcu_dereference(cgrp->bpf.effective[atype]); + item = &array->items[0]; + old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); + /* Only one bpf program can be attached to HISOCK_EGRESS */ + prog = READ_ONCE(item->prog); + if (prog) { + run_ctx.prog_item = item; + ret = __bpf_prog_run_save_cb(prog, skb); + } + bpf_reset_run_ctx(old_run_ctx); + rcu_read_unlock(); + migrate_enable(); + + bpf_restore_data_end(skb, saved_data_end); + + return ret < __MAX_HISOCK_ACTION ? ret : -EPERM; +} +EXPORT_SYMBOL(__cgroup_bpf_run_hisock_egress); +#endif + int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, short access, enum cgroup_bpf_attach_type atype) { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 266e84baea84..7131d7bf92d7 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2554,6 +2554,9 @@ static bool is_net_admin_prog_type(enum bpf_prog_type prog_type) case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: case BPF_PROG_TYPE_CGROUP_SOCKOPT: case BPF_PROG_TYPE_CGROUP_SYSCTL: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: +#endif case BPF_PROG_TYPE_SOCK_OPS: case BPF_PROG_TYPE_EXT: /* extends any prog */ case BPF_PROG_TYPE_NETFILTER: @@ -3820,6 +3823,10 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type) case BPF_CGROUP_GETSOCKOPT: case BPF_CGROUP_SETSOCKOPT: return BPF_PROG_TYPE_CGROUP_SOCKOPT; +#ifdef CONFIG_HISOCK + case BPF_HISOCK_EGRESS: + return BPF_PROG_TYPE_HISOCK; +#endif case BPF_TRACE_ITER: case BPF_TRACE_RAW_TP: case BPF_TRACE_FENTRY: @@ -3978,6 +3985,9 @@ static int bpf_prog_attach(const union bpf_attr *attr) case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_SOCK_OPS: case BPF_PROG_TYPE_LSM: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: +#endif if (ptype == BPF_PROG_TYPE_LSM && prog->expected_attach_type != BPF_LSM_CGROUP) ret = -EINVAL; @@ -4043,6 +4053,9 @@ static int bpf_prog_detach(const union bpf_attr *attr) case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_SOCK_OPS: case BPF_PROG_TYPE_LSM: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: +#endif ret = cgroup_bpf_prog_detach(attr, ptype); break; case BPF_PROG_TYPE_SCHED_CLS: @@ -4094,6 +4107,9 @@ static int bpf_prog_query(const union bpf_attr *attr, case BPF_CGROUP_GETSOCKOPT: case BPF_CGROUP_SETSOCKOPT: case BPF_LSM_CGROUP: +#ifdef CONFIG_HISOCK + case BPF_HISOCK_EGRESS: +#endif return cgroup_bpf_prog_query(attr, uattr); case BPF_LIRC_MODE2: return lirc_prog_query(attr, uattr); @@ -5054,6 +5070,9 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) case BPF_PROG_TYPE_CGROUP_DEVICE: case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_CGROUP_SOCKOPT: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: +#endif ret = cgroup_bpf_link_attach(attr, prog); break; case BPF_PROG_TYPE_EXT: diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index dbf2df404460..3a85c3ff4a43 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5576,6 +5576,9 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, return true; case BPF_PROG_TYPE_CGROUP_SOCKOPT: +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: +#endif if (t == BPF_WRITE) env->seen_direct_write = true; diff --git a/net/core/filter.c b/net/core/filter.c index 2968f1f8dd47..5abdd9a0a2ac 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -8152,6 +8152,29 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) } } +#ifdef CONFIG_HISOCK +static const struct bpf_func_proto * +hisock_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +{ + switch (func_id) { + case BPF_FUNC_skb_store_bytes: + return &bpf_skb_store_bytes_proto; + case BPF_FUNC_skb_load_bytes: + return &bpf_skb_load_bytes_proto; + case BPF_FUNC_skb_pull_data: + return &bpf_skb_pull_data_proto; + case BPF_FUNC_skb_change_tail: + return &bpf_skb_change_tail_proto; + case BPF_FUNC_skb_change_head: + return &bpf_skb_change_head_proto; + case BPF_FUNC_skb_adjust_room: + return &bpf_skb_adjust_room_proto; + default: + return bpf_base_func_proto(func_id); + } +} +#endif + static const struct bpf_func_proto * tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -8741,6 +8764,33 @@ static bool cg_skb_is_valid_access(int off, int size, return bpf_skb_is_valid_access(off, size, type, prog, info); } +#ifdef CONFIG_HISOCK +static bool hisock_is_valid_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + switch (off) { + case bpf_ctx_range(struct __sk_buff, tc_classid): + case bpf_ctx_range(struct __sk_buff, data_meta): + case bpf_ctx_range(struct __sk_buff, tstamp): + case bpf_ctx_range(struct __sk_buff, wire_len): + return false; + } + + switch (off) { + case bpf_ctx_range(struct __sk_buff, data): + info->reg_type = PTR_TO_PACKET; + break; + case bpf_ctx_range(struct __sk_buff, data_end): + info->reg_type = PTR_TO_PACKET_END; + break; + } + + return bpf_skb_is_valid_access(off, size, type, prog, info); +} +#endif + static bool lwt_is_valid_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, @@ -11086,6 +11136,18 @@ const struct bpf_prog_ops cg_skb_prog_ops = { .test_run = bpf_prog_test_run_skb, }; +#ifdef CONFIG_HISOCK +const struct bpf_verifier_ops hisock_verifier_ops = { + .get_func_proto = hisock_func_proto, + .is_valid_access = hisock_is_valid_access, + .convert_ctx_access = bpf_convert_ctx_access, + .gen_prologue = bpf_noop_prologue, +}; + +const struct bpf_prog_ops hisock_prog_ops = { +}; +#endif + const struct bpf_verifier_ops lwt_in_verifier_ops = { .get_func_proto = lwt_in_func_proto, .is_valid_access = lwt_is_valid_access, diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 6873e3acbdef..9796dfba3b38 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1006,6 +1006,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ BPF_PROG_TYPE_NETFILTER, BPF_PROG_TYPE_SCHED, + BPF_PROG_TYPE_HISOCK, }; enum bpf_attach_type { @@ -1059,6 +1060,7 @@ enum bpf_attach_type { BPF_TCX_EGRESS, BPF_TRACE_UPROBE_MULTI, BPF_SCHED, + BPF_HISOCK_EGRESS, __MAX_BPF_ATTACH_TYPE }; @@ -7356,4 +7358,11 @@ struct bpf_iter_num { __u64 __opaque[1]; } __attribute__((aligned(8))); +enum hisock_action { + HISOCK_PASS, + HISOCK_DROP, + HISOCK_REDIRECT, + __MAX_HISOCK_ACTION, +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index a57f9afffe98..828c1d2f173e 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -123,6 +123,7 @@ static const char * const attach_type_name[] = { [BPF_TCX_EGRESS] = "tcx_egress", [BPF_TRACE_UPROBE_MULTI] = "trace_uprobe_multi", [BPF_SCHED] = "sched", + [BPF_HISOCK_EGRESS] = "hisock_egress", }; static const char * const link_type_name[] = { @@ -212,6 +213,7 @@ static const char * const prog_type_name[] = { [BPF_PROG_TYPE_SYSCALL] = "syscall", [BPF_PROG_TYPE_NETFILTER] = "netfilter", [BPF_PROG_TYPE_SCHED] = "sched", + [BPF_PROG_TYPE_HISOCK] = "hisock", }; static int __base_pr(enum libbpf_print_level level, const char *format, @@ -8873,6 +8875,7 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE), SEC_DEF("netfilter", NETFILTER, BPF_NETFILTER, SEC_NONE), SEC_DEF("sched/", SCHED, BPF_SCHED, SEC_ATTACH_BTF, attach_sched), + SEC_DEF("hisock_egress", HISOCK, BPF_HISOCK_EGRESS, SEC_ATTACHABLE_OPT), }; int libbpf_register_prog_handler(const char *sec, -- 2.34.1

hulk inclusion category: featrue bugzilla: https://gitee.com/openeuler/kernel/issues/ICS15S -------------------------------- Add HISOCK_EGRESS hook on network egress path to bypass net filter rules for specific connections. If the L2 header has been encapsulated in the bpf program, skb is sent directly. Otherwise, roll back to the neighbor lookup and send it out. Signed-off-by: Pu Lehui <pulehui@huawei.com> --- net/ipv4/ip_output.c | 68 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index f49570e2f713..89f5f3b178e1 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -457,6 +457,55 @@ static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4) iph->daddr = fl4->daddr; } +#ifdef CONFIG_HISOCK +static int hisock_egress_redirect_xmit(struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + struct netdev_queue *txq; + bool free_skb = true; + int cpu, rc; + + rcu_read_lock_bh(); + + txq = netdev_core_pick_tx(dev, skb, NULL); + cpu = smp_processor_id(); + HARD_TX_LOCK(dev, txq, cpu); + if (!netif_xmit_stopped(txq)) { + rc = netdev_start_xmit(skb, dev, txq, 0); + if (dev_xmit_complete(rc)) + free_skb = false; + } + HARD_TX_UNLOCK(dev, txq); + + rcu_read_unlock_bh(); + + if (free_skb) { + rc = -ENETDOWN; + kfree_skb(skb); + } + + return rc; +} + +static int do_hisock_egress_redirect(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct iphdr *iph; + + skb->protocol = htons(ETH_P_IP); + if (!skb->dev) + skb->dev = skb_dst(skb)->dev; + + if (skb_mac_header_was_set(skb)) + return hisock_egress_redirect_xmit(skb); + + iph = ip_hdr(skb); + iph_set_totlen(iph, skb->len); + ip_send_check(iph); + + return ip_finish_output2(net, sk, skb); +} +#endif + /* Note: skb->sk can be different from sk, in case of tunnels */ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, __u8 tos) @@ -537,6 +586,25 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, skb->priority = READ_ONCE(sk->sk_priority); skb->mark = READ_ONCE(sk->sk_mark); +#ifdef CONFIG_HISOCK + res = BPF_CGROUP_RUN_PROG_HISOCK_EGRESS(sk, skb); + switch (res) { + case HISOCK_PASS: + break; + case HISOCK_REDIRECT: + res = do_hisock_egress_redirect(net, sk, skb); + rcu_read_unlock(); + return res; + default: + pr_warn_once("Illegal HiSock return value %d, expect packet loss!", res); + fallthrough; + case HISOCK_DROP: + kfree_skb(skb); + rcu_read_unlock(); + return NET_XMIT_DROP; + } +#endif + res = ip_local_out(net, sk, skb); rcu_read_unlock(); return res; -- 2.34.1

hulk inclusion category: featrue bugzilla: https://gitee.com/openeuler/kernel/issues/ICS15S -------------------------------- Add bpf_get_ingress_dst helper for sock_ops bpf program. It is used to get the receive dst entry of the full sock. Signed-off-by: Pu Lehui <pulehui@huawei.com> --- include/uapi/linux/bpf.h | 8 ++++++++ net/core/filter.c | 32 ++++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 8 ++++++++ 3 files changed, 48 insertions(+) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 52f376f9428c..d37a33a5c155 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5671,6 +5671,13 @@ union bpf_attr { * 0 on success. * * **-ENOENT** if the bpf_local_storage cannot be found. + * + * void *bpf_get_ingress_dst(struct bpf_sock_ops *skops) + * Description + * Get the ingress dst entry of the full sock. + * Return + * Valid ingress dst on success, or negative error + * in case of failure. */ #define ___BPF_FUNC_MAPPER(FN, ctx...) \ FN(unspec, 0, ##ctx) \ @@ -5885,6 +5892,7 @@ union bpf_attr { FN(user_ringbuf_drain, 209, ##ctx) \ FN(cgrp_storage_get, 210, ##ctx) \ FN(cgrp_storage_delete, 211, ##ctx) \ + FN(get_ingress_dst, 212, ##ctx) \ /* */ /* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't diff --git a/net/core/filter.c b/net/core/filter.c index 5abdd9a0a2ac..1e168cec0178 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7786,6 +7786,34 @@ static const struct bpf_func_proto bpf_sock_ops_reserve_hdr_opt_proto = { .arg3_type = ARG_ANYTHING, }; +#ifdef CONFIG_HISOCK +BTF_ID_LIST_SINGLE(btf_dst_entity_ids, struct, dst_entry) +BPF_CALL_1(bpf_sock_ops_get_ingress_dst, struct bpf_sock_ops_kern *, sops) +{ + struct sock *sk = sops->sk; + struct dst_entry *dst; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + if (!sk || !sk_fullsock(sk)) + return (unsigned long)NULL; + + dst = rcu_dereference(sk->sk_rx_dst); + if (dst) + dst = dst_check(dst, 0); + + return (unsigned long)dst; +} + +const struct bpf_func_proto bpf_sock_ops_get_ingress_dst_proto = { + .func = bpf_sock_ops_get_ingress_dst, + .gpl_only = false, + .ret_type = RET_PTR_TO_BTF_ID_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, + .ret_btf_id = &btf_dst_entity_ids[0], +}; +#endif + BPF_CALL_3(bpf_skb_set_tstamp, struct sk_buff *, skb, u64, tstamp, u32, tstamp_type) { @@ -8422,6 +8450,10 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sock_ops_store_hdr_opt_proto; case BPF_FUNC_reserve_hdr_opt: return &bpf_sock_ops_reserve_hdr_opt_proto; +#ifdef CONFIG_HISOCK + case BPF_FUNC_get_ingress_dst: + return &bpf_sock_ops_get_ingress_dst_proto; +#endif case BPF_FUNC_tcp_sock: return &bpf_tcp_sock_proto; #endif /* CONFIG_INET */ diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 9796dfba3b38..3f6d740bace1 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5671,6 +5671,13 @@ union bpf_attr { * 0 on success. * * **-ENOENT** if the bpf_local_storage cannot be found. + * + * void *bpf_get_ingress_dst(struct bpf_sock_ops *skops) + * Description + * Get the ingress dst entry of the full sock. + * Return + * Valid ingress dst on success, or negative error + * in case of failure. */ #define ___BPF_FUNC_MAPPER(FN, ctx...) \ FN(unspec, 0, ##ctx) \ @@ -5885,6 +5892,7 @@ union bpf_attr { FN(user_ringbuf_drain, 209, ##ctx) \ FN(cgrp_storage_get, 210, ##ctx) \ FN(cgrp_storage_delete, 211, ##ctx) \ + FN(get_ingress_dst, 212, ##ctx) \ /* */ /* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't -- 2.34.1

hulk inclusion category: featrue bugzilla: https://gitee.com/openeuler/kernel/issues/ICS15S -------------------------------- Add hisock_xdp_buff wrapper for xdp_buff to bind the associated skb. Signed-off-by: Pu Lehui <pulehui@huawei.com> --- include/net/xdp.h | 5 +++++ net/core/dev.c | 8 +++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/include/net/xdp.h b/include/net/xdp.h index 31698ef493b3..4ca0a42e55c6 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -150,6 +150,11 @@ xdp_prepare_buff(struct xdp_buff *xdp, unsigned char *hard_start, xdp->data_meta = meta_valid ? data : data + 1; } +struct hisock_xdp_buff { + struct xdp_buff xdp; + struct sk_buff *skb; +}; + /* Reserve memory area at end-of data area. * * This macro reserves tailroom in the XDP buffer by limiting the diff --git a/net/core/dev.c b/net/core/dev.c index e36056866543..22025bab8907 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5140,16 +5140,18 @@ static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key); int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb) { if (xdp_prog) { - struct xdp_buff xdp; + struct hisock_xdp_buff hxdp; + struct xdp_buff *xdp = &hxdp.xdp; u32 act; int err; - act = netif_receive_generic_xdp(skb, &xdp, xdp_prog); + hxdp.skb = skb; + act = netif_receive_generic_xdp(skb, xdp, xdp_prog); if (act != XDP_PASS) { switch (act) { case XDP_REDIRECT: err = xdp_do_generic_redirect(skb->dev, skb, - &xdp, xdp_prog); + xdp, xdp_prog); if (err) goto out_redir; break; -- 2.34.1

hulk inclusion category: featrue bugzilla: https://gitee.com/openeuler/kernel/issues/ICS15S -------------------------------- Add bpf_set_ingress_dst helper for xdp program. It is used to set the receive dst entry to the skb associated with xdp_buff. Signed-off-by: Pu Lehui <pulehui@huawei.com> --- include/uapi/linux/bpf.h | 8 ++++++++ net/core/filter.c | 34 ++++++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 8 ++++++++ 3 files changed, 50 insertions(+) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d37a33a5c155..7aabd5d39e2d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5678,6 +5678,13 @@ union bpf_attr { * Return * Valid ingress dst on success, or negative error * in case of failure. + * + * int bpf_set_ingress_dst(struct xdp_buff *xdp, void *dst) + * Description + * Set valid ingress dst entry to the skb associated + * with xdp_buff. + * Return + * 0 on success, or a negative error in case of failure. */ #define ___BPF_FUNC_MAPPER(FN, ctx...) \ FN(unspec, 0, ##ctx) \ @@ -5893,6 +5900,7 @@ union bpf_attr { FN(cgrp_storage_get, 210, ##ctx) \ FN(cgrp_storage_delete, 211, ##ctx) \ FN(get_ingress_dst, 212, ##ctx) \ + FN(set_ingress_dst, 213, ##ctx) \ /* */ /* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't diff --git a/net/core/filter.c b/net/core/filter.c index 1e168cec0178..dceed41f7b58 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7082,6 +7082,36 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = { .arg5_type = ARG_ANYTHING, }; +#ifdef CONFIG_HISOCK +BPF_CALL_2(bpf_xdp_set_ingress_dst, struct xdp_buff *, xdp, void *, dst) +{ + struct hisock_xdp_buff *hxdp = (struct hisock_xdp_buff *)xdp; + struct dst_entry *_dst = (struct dst_entry *)dst; + + if (!hxdp->skb) + return -EOPNOTSUPP; + + if (!_dst || !virt_addr_valid(_dst)) + return -EFAULT; + + /* same as skb_valid_dst */ + if (_dst->flags & DST_METADATA) + return -EINVAL; + + skb_dst_set_noref(hxdp->skb, _dst); + return 0; +} + +static const struct bpf_func_proto bpf_xdp_set_ingress_dst_proto = { + .func = bpf_xdp_set_ingress_dst, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; +#endif + BPF_CALL_5(bpf_sock_addr_skc_lookup_tcp, struct bpf_sock_addr_kern *, ctx, struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) { @@ -8381,6 +8411,10 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_tcp_check_syncookie_proto; case BPF_FUNC_tcp_gen_syncookie: return &bpf_tcp_gen_syncookie_proto; +#ifdef CONFIG_HISOCK + case BPF_FUNC_set_ingress_dst: + return &bpf_xdp_set_ingress_dst_proto; +#endif #ifdef CONFIG_SYN_COOKIES case BPF_FUNC_tcp_raw_gen_syncookie_ipv4: return &bpf_tcp_raw_gen_syncookie_ipv4_proto; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 3f6d740bace1..164e30c6d37b 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5678,6 +5678,13 @@ union bpf_attr { * Return * Valid ingress dst on success, or negative error * in case of failure. + * + * int bpf_set_ingress_dst(struct xdp_buff *xdp, void *dst) + * Description + * Set valid ingress dst entry to the skb associated + * with xdp_buff. + * Return + * 0 on success, or a negative error in case of failure. */ #define ___BPF_FUNC_MAPPER(FN, ctx...) \ FN(unspec, 0, ##ctx) \ @@ -5893,6 +5900,7 @@ union bpf_attr { FN(cgrp_storage_get, 210, ##ctx) \ FN(cgrp_storage_delete, 211, ##ctx) \ FN(get_ingress_dst, 212, ##ctx) \ + FN(set_ingress_dst, 213, ##ctx) \ /* */ /* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't -- 2.34.1

hulk inclusion category: featrue bugzilla: https://gitee.com/openeuler/kernel/issues/ICS15S -------------------------------- Add bpf_change_skb_dev helper for xdp and hisock_egress program. It is used to change ingress or egress device of associated skb. Signed-off-by: Pu Lehui <pulehui@huawei.com> --- include/uapi/linux/bpf.h | 12 ++++++++ net/core/filter.c | 54 ++++++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 12 ++++++++ 3 files changed, 78 insertions(+) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7aabd5d39e2d..04eacc3d1b7e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5685,6 +5685,17 @@ union bpf_attr { * with xdp_buff. * Return * 0 on success, or a negative error in case of failure. + * + * int bpf_change_skb_dev(void *ctx, u32 ifindex) + * Description + * Change ingress or egress device of the associated skb. + * Supports only BPF_PROG_TYPE_HISOCK and BPF_PROG_TYPE_XDP + * program types. + * + * *ctx* is either **struct xdp_md** for XDP programs or + * **struct __sk_buff** hisock_egress programs. + * Return + * 0 on success, or negative error in case of failure. */ #define ___BPF_FUNC_MAPPER(FN, ctx...) \ FN(unspec, 0, ##ctx) \ @@ -5901,6 +5912,7 @@ union bpf_attr { FN(cgrp_storage_delete, 211, ##ctx) \ FN(get_ingress_dst, 212, ##ctx) \ FN(set_ingress_dst, 213, ##ctx) \ + FN(change_skb_dev, 214, ##ctx) \ /* */ /* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't diff --git a/net/core/filter.c b/net/core/filter.c index dceed41f7b58..9b2ecc4a8ecb 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3736,6 +3736,30 @@ static const struct bpf_func_proto bpf_skb_adjust_room_proto = { .arg4_type = ARG_ANYTHING, }; +#ifdef CONFIG_HISOCK +BPF_CALL_2(bpf_skb_change_skb_dev, struct sk_buff *, skb, u32, ifindex) +{ + struct net_device *dev; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + dev = dev_get_by_index_rcu(&init_net, ifindex); + if (!dev) + return -ENODEV; + + skb->dev = dev; + return 0; +} + +static const struct bpf_func_proto bpf_skb_change_skb_dev_proto = { + .func = bpf_skb_change_skb_dev, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; +#endif + static u32 __bpf_skb_min_len(const struct sk_buff *skb) { int offset = skb_network_offset(skb); @@ -7110,6 +7134,32 @@ static const struct bpf_func_proto bpf_xdp_set_ingress_dst_proto = { .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, }; + +BPF_CALL_2(bpf_xdp_change_skb_dev, struct xdp_buff *, xdp, u32, ifindex) +{ + struct hisock_xdp_buff *hxdp = (void *)xdp; + struct net_device *dev; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + if (!hxdp->skb) + return -EOPNOTSUPP; + + dev = dev_get_by_index_rcu(&init_net, ifindex); + if (!dev) + return -ENODEV; + + hxdp->skb->dev = dev; + return 0; +} + +static const struct bpf_func_proto bpf_xdp_change_skb_dev_proto = { + .func = bpf_xdp_change_skb_dev, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; #endif BPF_CALL_5(bpf_sock_addr_skc_lookup_tcp, struct bpf_sock_addr_kern *, ctx, @@ -8227,6 +8277,8 @@ hisock_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_skb_change_head_proto; case BPF_FUNC_skb_adjust_room: return &bpf_skb_adjust_room_proto; + case BPF_FUNC_change_skb_dev: + return &bpf_skb_change_skb_dev_proto; default: return bpf_base_func_proto(func_id); } @@ -8414,6 +8466,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) #ifdef CONFIG_HISOCK case BPF_FUNC_set_ingress_dst: return &bpf_xdp_set_ingress_dst_proto; + case BPF_FUNC_change_skb_dev: + return &bpf_xdp_change_skb_dev_proto; #endif #ifdef CONFIG_SYN_COOKIES case BPF_FUNC_tcp_raw_gen_syncookie_ipv4: diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 164e30c6d37b..2b8d560fa908 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5685,6 +5685,17 @@ union bpf_attr { * with xdp_buff. * Return * 0 on success, or a negative error in case of failure. + * + * int bpf_change_skb_dev(void *ctx, u32 ifindex) + * Description + * Change ingress or egress device of the associated skb. + * Supports only BPF_PROG_TYPE_HISOCK and BPF_PROG_TYPE_XDP + * program types. + * + * *ctx* is either **struct xdp_md** for XDP programs or + * **struct __sk_buff** hisock_egress programs. + * Return + * 0 on success, or negative error in case of failure. */ #define ___BPF_FUNC_MAPPER(FN, ctx...) \ FN(unspec, 0, ##ctx) \ @@ -5901,6 +5912,7 @@ union bpf_attr { FN(cgrp_storage_delete, 211, ##ctx) \ FN(get_ingress_dst, 212, ##ctx) \ FN(set_ingress_dst, 213, ##ctx) \ + FN(change_skb_dev, 214, ##ctx) \ /* */ /* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't -- 2.34.1

From: Xu Kuohai <xukuohai@huawei.com> hulk inclusion category: featrue bugzilla: https://gitee.com/openeuler/kernel/issues/ICS15S -------------------------------- Add bpf_ext_memcpy extension helper, which will be available if arch supports extension helper. Currently, only arm64 is supported. Signed-off-by: Xu Kuohai <xukuohai@huawei.com> Signed-off-by: Pu Lehui <pulehui@huawei.com> --- arch/arm64/include/asm/insn.h | 4 + arch/arm64/lib/insn.c | 8 + arch/arm64/net/bpf_jit.h | 15 ++ arch/arm64/net/bpf_jit_comp.c | 266 +++++++++++++++++++++++++++++++++ include/linux/filter.h | 3 + include/uapi/linux/bpf.h | 7 + kernel/bpf/core.c | 7 + kernel/bpf/helpers.c | 27 ++++ kernel/bpf/verifier.c | 24 +++ tools/include/uapi/linux/bpf.h | 7 + 10 files changed, 368 insertions(+) diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 12c0278294e3..0c54a15945ac 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -188,6 +188,10 @@ enum aarch64_insn_ldst_type { AARCH64_INSN_LDST_STORE_REL_EX, AARCH64_INSN_LDST_SIGNED_LOAD_IMM_OFFSET, AARCH64_INSN_LDST_SIGNED_LOAD_REG_OFFSET, +#ifdef CONFIG_HISOCK + AARCH64_INSN_LDST_LOAD_PAIR_SIGNED_OFFSET, + AARCH64_INSN_LDST_STORE_PAIR_SIGNED_OFFSET, +#endif }; enum aarch64_insn_adsb_type { diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c index 7232b1e70a12..4609c550c055 100644 --- a/arch/arm64/lib/insn.c +++ b/arch/arm64/lib/insn.c @@ -500,6 +500,14 @@ u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1, case AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX: insn = aarch64_insn_get_stp_post_value(); break; +#ifdef CONFIG_HISOCK + case AARCH64_INSN_LDST_LOAD_PAIR_SIGNED_OFFSET: + insn = aarch64_insn_get_ldp_value(); + break; + case AARCH64_INSN_LDST_STORE_PAIR_SIGNED_OFFSET: + insn = aarch64_insn_get_stp_value(); + break; +#endif default: pr_err("%s: unknown load/store encoding %d\n", __func__, type); return AARCH64_BREAK_FAULT; diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index 23b1b34db088..e30549bacf72 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -102,6 +102,21 @@ /* Rt = Rn[0]; Rt2 = Rn[8]; Rn += 16; */ #define A64_POP(Rt, Rt2, Rn) A64_LS_PAIR(Rt, Rt2, Rn, 16, LOAD, POST_INDEX) +#ifdef CONFIG_HISOCK +#define A64_STP(Rt, Rt2, Rn, offset) \ + A64_LS_PAIR(Rt, Rt2, Rn, offset, STORE, SIGNED_OFFSET) +#define A64_LDP(Rt, Rt2, Rn, offset) \ + A64_LS_PAIR(Rt, Rt2, Rn, offset, LOAD, SIGNED_OFFSET) +#define A64_STP32(Wt, Wt2, Rn, offset) \ + aarch64_insn_gen_load_store_pair(Wt, Wt2, Rn, offset, \ + AARCH64_INSN_VARIANT_32BIT, \ + AARCH64_INSN_LDST_STORE_PAIR_SIGNED_OFFSET) +#define A64_LDP32(Wt, Wt2, Rn, offset) \ + aarch64_insn_gen_load_store_pair(Wt, Wt2, Rn, offset, \ + AARCH64_INSN_VARIANT_32BIT, \ + AARCH64_INSN_LDST_LOAD_PAIR_SIGNED_OFFSET) +#endif + /* Load/store exclusive */ #define A64_SIZE(sf) \ ((sf) ? AARCH64_INSN_SIZE_64 : AARCH64_INSN_SIZE_32) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 8b957d2f60eb..92ea5a964b1b 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -26,11 +26,26 @@ #include "bpf_jit.h" +#ifdef CONFIG_HISOCK +#define TCALL_CNT (MAX_BPF_JIT_REG + 0) +#define FP_BOTTOM (MAX_BPF_JIT_REG + 1) +#define TMP_REG_1 (MAX_BPF_JIT_REG + 2) +#define TMP_REG_2 (MAX_BPF_JIT_REG + 3) +#define TMP_REG_3 (MAX_BPF_JIT_REG + 4) +#define TMP_REG_4 (MAX_BPF_JIT_REG + 5) +#define TMP_REG_5 (MAX_BPF_JIT_REG + 6) +#define TMP_REG_6 (MAX_BPF_JIT_REG + 7) +#define TMP_REG_7 (MAX_BPF_JIT_REG + 8) +#define TMP_REG_8 (MAX_BPF_JIT_REG + 9) +#define TMP_REG_9 (MAX_BPF_JIT_REG + 10) +#define TMP_REG_10 (MAX_BPF_JIT_REG + 11) +#else #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) #define TCALL_CNT (MAX_BPF_JIT_REG + 2) #define TMP_REG_3 (MAX_BPF_JIT_REG + 3) #define FP_BOTTOM (MAX_BPF_JIT_REG + 4) +#endif #define check_imm(bits, imm) do { \ if ((((imm) > 0) && ((imm) >> (bits))) || \ @@ -64,6 +79,15 @@ static const int bpf2a64[] = { [TMP_REG_1] = A64_R(10), [TMP_REG_2] = A64_R(11), [TMP_REG_3] = A64_R(12), +#ifdef CONFIG_HISOCK + [TMP_REG_4] = A64_R(13), + [TMP_REG_5] = A64_R(14), + [TMP_REG_6] = A64_R(15), + [TMP_REG_7] = A64_R(5), + [TMP_REG_8] = A64_R(6), + [TMP_REG_9] = A64_R(7), + [TMP_REG_10] = A64_R(28), +#endif /* tail_call_cnt */ [TCALL_CNT] = A64_R(26), /* temporary register for blinding constants */ @@ -801,6 +825,234 @@ static int add_exception_handler(const struct bpf_insn *insn, return 0; } +#ifdef CONFIG_HISOCK +static bool support_unaligned_access(void) +{ + unsigned long sctlr = SCTLR_ELx_A; + + switch (read_sysreg(CurrentEL)) { + case CurrentEL_EL1: + sctlr = read_sysreg(sctlr_el1); + break; + case CurrentEL_EL2: + sctlr = read_sysreg(sctlr_el2); + break; + default: + /* not EL1 and EL2 ? */ + break; + } + + return (sctlr & SCTLR_ELx_A) ? false : true; +} + +extern u64 bpf_ext_memcpy(void *dst, size_t dst_sz, + const void *src, size_t src_sz); + +static void emit_memcpy(struct jit_ctx *ctx, int size) +{ + u8 dst = bpf2a64[BPF_REG_1]; + u8 src = bpf2a64[BPF_REG_3]; + u8 tmp1 = bpf2a64[TMP_REG_1]; + u8 tmp2 = bpf2a64[TMP_REG_2]; + u8 tmp3 = bpf2a64[TMP_REG_3]; + u8 tmp4 = bpf2a64[TMP_REG_4]; + u8 tmp5 = bpf2a64[TMP_REG_5]; + u8 tmp6 = bpf2a64[TMP_REG_6]; + u8 tmp7 = bpf2a64[TMP_REG_7]; + u8 tmp8 = bpf2a64[TMP_REG_8]; + u8 tmp9 = bpf2a64[TMP_REG_9]; + u8 tmp10 = bpf2a64[TMP_REG_10]; + + if (!support_unaligned_access()) { + emit_call((u64)bpf_ext_memcpy, ctx); + return; + } + + switch (size) { + case 0: + break; + case 1: + emit(A64_LDRBI(tmp1, src, 0), ctx); + emit(A64_STRBI(tmp1, dst, 0), ctx); + break; + case 2: + emit(A64_LDRHI(tmp1, src, 0), ctx); + emit(A64_STRHI(tmp1, dst, 0), ctx); + break; + case 3: + emit(A64_LDRHI(tmp1, src, 0), ctx); + emit(A64_LDRBI(tmp2, src, 2), ctx); + emit(A64_STRHI(tmp1, dst, 0), ctx); + emit(A64_STRBI(tmp2, dst, 2), ctx); + break; + case 4: + emit(A64_LDR32I(tmp1, src, 0), ctx); + emit(A64_STR32I(tmp1, dst, 0), ctx); + break; + case 5: + emit(A64_LDR32I(tmp1, src, 0), ctx); + emit(A64_LDRBI(tmp2, src, 4), ctx); + emit(A64_STR32I(tmp1, dst, 0), ctx); + emit(A64_STRBI(tmp2, dst, 4), ctx); + break; + case 6: + emit(A64_LDR32I(tmp1, src, 0), ctx); + emit(A64_LDRHI(tmp2, src, 4), ctx); + emit(A64_STR32I(tmp1, dst, 0), ctx); + emit(A64_STRHI(tmp2, dst, 4), ctx); + break; + case 7: + emit(A64_LDR32I(tmp1, src, 0), ctx); + emit(A64_LDRHI(tmp2, src, 4), ctx); + emit(A64_LDRBI(tmp3, src, 6), ctx); + emit(A64_STR32I(tmp1, src, 0), ctx); + emit(A64_STRHI(tmp2, dst, 4), ctx); + emit(A64_STRBI(tmp3, dst, 6), ctx); + break; + case 8: + emit(A64_LDR64I(tmp1, src, 0), ctx); + emit(A64_STR64I(tmp1, dst, 0), ctx); + break; + case 9 ... 15: + emit(A64_ADD_I(1, tmp1, src, size), ctx); + emit(A64_ADD_I(1, tmp2, dst, size), ctx); + emit(A64_LDR64I(tmp3, src, 0), ctx); + emit(A64_LDP32(tmp4, tmp5, tmp1, -8), ctx); + emit(A64_STR64I(tmp3, dst, 0), ctx); + emit(A64_STP32(tmp4, tmp5, tmp2, -8), ctx); + break; + case 16: + emit(A64_LDP(tmp1, tmp2, src, 0), ctx); + emit(A64_STP(tmp1, tmp2, dst, 0), ctx); + break; + case 17 ... 31: + emit(A64_ADD_I(1, tmp1, src, size), ctx); + emit(A64_ADD_I(1, tmp2, dst, size), ctx); + emit(A64_LDP(tmp3, tmp4, src, 0), ctx); + emit(A64_LDP(tmp5, tmp6, tmp1, -16), ctx); + emit(A64_STP(tmp3, tmp4, dst, 0), ctx); + emit(A64_STP(tmp5, tmp6, tmp2, -16), ctx); + break; + case 32: + emit(A64_LDP(tmp1, tmp2, src, 0), ctx); + emit(A64_LDP(tmp3, tmp4, src, 16), ctx); + emit(A64_STP(tmp1, tmp2, dst, 0), ctx); + emit(A64_STP(tmp3, tmp4, dst, 16), ctx); + break; + case 33 ... 63: + emit(A64_ADD_I(1, tmp1, src, size), ctx); + emit(A64_ADD_I(1, tmp2, dst, size), ctx); + emit(A64_LDP(tmp3, tmp4, src, 0), ctx); + emit(A64_LDP(tmp5, tmp6, src, 16), ctx); + emit(A64_STP(tmp3, tmp4, dst, 0), ctx); + emit(A64_STP(tmp5, tmp6, dst, 16), ctx); + emit(A64_LDP(tmp3, tmp4, tmp1, -32), ctx); + emit(A64_LDP(tmp5, tmp6, tmp1, -16), ctx); + emit(A64_STP(tmp3, tmp4, tmp2, -32), ctx); + emit(A64_STP(tmp5, tmp6, tmp2, -16), ctx); + break; + case 64: + emit(A64_LDP(tmp1, tmp2, src, 0), ctx); + emit(A64_LDP(tmp3, tmp4, src, 16), ctx); + emit(A64_LDP(tmp5, tmp6, src, 32), ctx); + emit(A64_LDP(tmp7, tmp8, src, 48), ctx); + emit(A64_STP(tmp1, tmp2, dst, 0), ctx); + emit(A64_STP(tmp3, tmp4, dst, 16), ctx); + emit(A64_STP(tmp5, tmp6, dst, 32), ctx); + emit(A64_STP(tmp7, tmp8, dst, 48), ctx); + break; + case 65 ... 95: + /* copy first 48 bytes */ + emit(A64_LDP(tmp1, tmp2, src, 0), ctx); + emit(A64_LDP(tmp3, tmp4, src, 16), ctx); + emit(A64_LDP(tmp5, tmp6, src, 32), ctx); + + emit(A64_STP(tmp1, tmp2, dst, 0), ctx); + emit(A64_STP(tmp3, tmp4, dst, 16), ctx); + emit(A64_STP(tmp5, tmp6, dst, 32), ctx); + + /* copy last 48 bytes */ + emit(A64_ADD_I(1, tmp7, src, size), ctx); + emit(A64_ADD_I(1, tmp8, dst, size), ctx); + + emit(A64_LDP(tmp1, tmp2, tmp7, -48), ctx); + emit(A64_LDP(tmp3, tmp4, tmp7, -32), ctx); + emit(A64_LDP(tmp5, tmp6, tmp7, -16), ctx); + + emit(A64_STP(tmp1, tmp2, tmp8, -48), ctx); + emit(A64_STP(tmp3, tmp4, tmp8, -32), ctx); + emit(A64_STP(tmp5, tmp6, tmp8, -16), ctx); + break; + case 96: + emit(A64_LDP(tmp1, tmp2, src, 0), ctx); + emit(A64_LDP(tmp3, tmp4, src, 16), ctx); + emit(A64_LDP(tmp5, tmp6, src, 32), ctx); + emit(A64_LDP(tmp7, tmp8, src, 48), ctx); + + emit(A64_STP(tmp1, tmp2, dst, 0), ctx); + emit(A64_STP(tmp3, tmp4, dst, 16), ctx); + emit(A64_STP(tmp5, tmp6, dst, 32), ctx); + emit(A64_STP(tmp7, tmp8, dst, 48), ctx); + + emit(A64_LDP(tmp1, tmp2, src, 64), ctx); + emit(A64_LDP(tmp3, tmp4, src, 80), ctx); + emit(A64_STP(tmp1, tmp2, dst, 64), ctx); + emit(A64_STP(tmp3, tmp4, dst, 80), ctx); + break; + case 97 ... 127: + emit(A64_ADD_I(1, tmp9, src, size), ctx); + emit(A64_ADD_I(1, tmp10, dst, size), ctx); + + /* copy first 64 bytes */ + emit(A64_LDP(tmp1, tmp2, src, 0), ctx); + emit(A64_LDP(tmp3, tmp4, src, 16), ctx); + emit(A64_LDP(tmp5, tmp6, src, 32), ctx); + emit(A64_LDP(tmp7, tmp8, src, 48), ctx); + + emit(A64_STP(tmp1, tmp2, dst, 0), ctx); + emit(A64_STP(tmp3, tmp4, dst, 16), ctx); + emit(A64_STP(tmp5, tmp6, dst, 32), ctx); + emit(A64_STP(tmp7, tmp8, dst, 48), ctx); + + /* copy last 64 bytes */ + emit(A64_LDP(tmp1, tmp2, tmp9, -64), ctx); + emit(A64_LDP(tmp3, tmp4, tmp9, -48), ctx); + emit(A64_LDP(tmp5, tmp6, tmp9, -32), ctx); + emit(A64_LDP(tmp7, tmp8, tmp9, -16), ctx); + + emit(A64_STP(tmp1, tmp2, tmp10, -64), ctx); + emit(A64_STP(tmp3, tmp4, tmp10, -48), ctx); + emit(A64_STP(tmp5, tmp6, tmp10, -32), ctx); + emit(A64_STP(tmp7, tmp8, tmp10, -16), ctx); + break; + case 128: + emit(A64_LDP(tmp1, tmp2, src, 0), ctx); + emit(A64_LDP(tmp3, tmp4, src, 16), ctx); + emit(A64_LDP(tmp5, tmp6, src, 32), ctx); + emit(A64_LDP(tmp7, tmp8, src, 48), ctx); + + emit(A64_STP(tmp1, tmp2, dst, 0), ctx); + emit(A64_STP(tmp3, tmp4, dst, 16), ctx); + emit(A64_STP(tmp5, tmp6, dst, 32), ctx); + emit(A64_STP(tmp7, tmp8, dst, 48), ctx); + + emit(A64_LDP(tmp1, tmp2, src, 64), ctx); + emit(A64_LDP(tmp3, tmp4, src, 80), ctx); + emit(A64_LDP(tmp5, tmp6, src, 96), ctx); + emit(A64_LDP(tmp7, tmp8, src, 112), ctx); + + emit(A64_STP(tmp1, tmp2, dst, 64), ctx); + emit(A64_STP(tmp3, tmp4, dst, 80), ctx); + emit(A64_STP(tmp5, tmp6, dst, 96), ctx); + emit(A64_STP(tmp7, tmp8, dst, 112), ctx); + break; + default: + emit_call((u64)bpf_ext_memcpy, ctx); + break; + } +} +#endif + /* JITs an eBPF instruction. * Returns: * 0 - successfully JITed an 8-byte eBPF instruction. @@ -1164,6 +1416,13 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool func_addr_fixed; u64 func_addr; +#ifdef CONFIG_HISOCK + if (insn->src_reg == 0 && insn->imm == BPF_FUNC_ext_memcpy) { + emit_memcpy(ctx, insn->off); + break; + } +#endif + ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, &func_addr, &func_addr_fixed); if (ret < 0) @@ -1738,6 +1997,13 @@ bool bpf_jit_supports_kfunc_call(void) return true; } +#ifdef CONFIG_HISOCK +bool bpf_jit_supports_ext_helper(void) +{ + return true; +} +#endif + u64 bpf_jit_alloc_exec_limit(void) { return VMALLOC_END - VMALLOC_START; diff --git a/include/linux/filter.h b/include/linux/filter.h index a7c0caa8b7ad..9f3b71bee822 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -917,6 +917,9 @@ bool bpf_jit_needs_zext(void); bool bpf_jit_supports_subprog_tailcalls(void); bool bpf_jit_supports_kfunc_call(void); bool bpf_jit_supports_far_kfunc_call(void); +#ifdef CONFIG_HISOCK +bool bpf_jit_supports_ext_helper(void); +#endif u64 bpf_arch_uaddress_limit(void); bool bpf_helper_changes_pkt_data(enum bpf_func_id func_id); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 04eacc3d1b7e..e32c1f62fc44 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5696,6 +5696,12 @@ union bpf_attr { * **struct __sk_buff** hisock_egress programs. * Return * 0 on success, or negative error in case of failure. + * + * int bpf_ext_memcpy(void *dst, size_t dst_sz, const void *src, size_t src_sz) + * Description + * Copy *src_sz* bytes from *src* to *dst* if *dst_sz* >= *src_sz*. + * Return + * 0 on success, or negative error in case of failure. */ #define ___BPF_FUNC_MAPPER(FN, ctx...) \ FN(unspec, 0, ##ctx) \ @@ -5913,6 +5919,7 @@ union bpf_attr { FN(get_ingress_dst, 212, ##ctx) \ FN(set_ingress_dst, 213, ##ctx) \ FN(change_skb_dev, 214, ##ctx) \ + FN(ext_memcpy, 215, ##ctx) \ /* */ /* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 2ecaf891a167..5adf49397a67 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2939,6 +2939,13 @@ u64 __weak bpf_arch_uaddress_limit(void) #endif } +#ifdef CONFIG_HISOCK +bool __weak bpf_jit_supports_ext_helper(void) +{ + return false; +} +#endif + /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call * skb_copy_bits(), so provide a weak definition of it for NET-less config. */ diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 41f049ecb5c8..1da2fd748714 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1799,6 +1799,29 @@ static const struct bpf_func_proto bpf_dynptr_data_proto = { .arg3_type = ARG_CONST_ALLOC_SIZE_OR_ZERO, }; +#ifdef CONFIG_HISOCK +BPF_CALL_4(bpf_ext_memcpy, void *, dst, size_t, dst_sz, + const void *, src, size_t, src_sz) +{ + if (dst_sz < src_sz) + return -EINVAL; + + memcpy(dst, src, src_sz); + return 0; +} + +const struct bpf_func_proto bpf_ext_memcpy_proto = { + .func = bpf_ext_memcpy, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_MEM | MEM_UNINIT, + .arg2_type = ARG_CONST_SIZE, + .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg4_type = ARG_CONST_SIZE, +}; +#endif + const struct bpf_func_proto bpf_get_current_task_proto __weak; const struct bpf_func_proto bpf_get_current_task_btf_proto __weak; const struct bpf_func_proto bpf_probe_read_user_proto __weak; @@ -1855,6 +1878,10 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_strtol_proto; case BPF_FUNC_strtoul: return &bpf_strtoul_proto; +#ifdef CONFIG_HISOCK + case BPF_FUNC_ext_memcpy: + return &bpf_ext_memcpy_proto; +#endif default: break; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3a85c3ff4a43..85ca0f34c7c0 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -10314,6 +10314,21 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn err = push_callback_call(env, insn, insn_idx, meta.subprogno, set_user_ringbuf_callback_state); break; +#ifdef CONFIG_HISOCK + case BPF_FUNC_ext_memcpy: + { + /* XXX: cleanup & check if allowed to access dst mem */ + u32 regno = BPF_REG_1 + 3; + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; + struct bpf_insn *insn = &env->prog->insnsi[env->insn_idx]; + + if (!bpf_jit_supports_ext_helper() || + reg->umax_value <= 0 || reg->umax_value > 4096) + return -ENOTSUPP; + + insn->off = reg->umax_value; + } +#endif } if (err) @@ -17359,6 +17374,9 @@ static int do_check(struct bpf_verifier_env *env) if (opcode == BPF_CALL) { if (BPF_SRC(insn->code) != BPF_K || (insn->src_reg != BPF_PSEUDO_KFUNC_CALL +#ifdef CONFIG_HISOCK + && insn->imm != BPF_FUNC_ext_memcpy +#endif && insn->off != 0) || (insn->src_reg != BPF_REG_0 && insn->src_reg != BPF_PSEUDO_CALL && @@ -19664,6 +19682,12 @@ static int do_misc_fixups(struct bpf_verifier_env *env) continue; } +#ifdef CONFIG_HISOCK + /* will fixup bpf extension helper in jit */ + if (insn->imm == BPF_FUNC_ext_memcpy) + continue; +#endif + patch_call_imm: fn = env->ops->get_func_proto(insn->imm, env->prog); /* all functions that have prototype and verifier allowed diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 2b8d560fa908..083d2997146d 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5696,6 +5696,12 @@ union bpf_attr { * **struct __sk_buff** hisock_egress programs. * Return * 0 on success, or negative error in case of failure. + * + * int bpf_ext_memcpy(void *dst, size_t dst_sz, const void *src, size_t src_sz) + * Description + * Copy *src_sz* bytes from *src* to *dst* if *dst_sz* >= *src_sz*. + * Return + * 0 on success, or negative error in case of failure. */ #define ___BPF_FUNC_MAPPER(FN, ctx...) \ FN(unspec, 0, ##ctx) \ @@ -5913,6 +5919,7 @@ union bpf_attr { FN(get_ingress_dst, 212, ##ctx) \ FN(set_ingress_dst, 213, ##ctx) \ FN(change_skb_dev, 214, ##ctx) \ + FN(ext_memcpy, 215, ##ctx) \ /* */ /* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't -- 2.34.1

hulk inclusion category: featrue bugzilla: https://gitee.com/openeuler/kernel/issues/ICS15S -------------------------------- Enable CONFIG_HISOCK in openeuler_defconfig. Signed-off-by: Pu Lehui <pulehui@huawei.com> --- arch/arm64/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index f25dd0bd7790..baa4851317b2 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -1926,6 +1926,7 @@ CONFIG_CGROUP_NET_CLASSID=y CONFIG_NET_RX_BUSY_POLL=y CONFIG_BQL=y CONFIG_BPF_STREAM_PARSER=y +CONFIG_HISOCK=y CONFIG_NET_FLOW_LIMIT=y # diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index d00c4dd20b0f..1c595f3a89b5 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -1894,6 +1894,7 @@ CONFIG_CGROUP_NET_CLASSID=y CONFIG_NET_RX_BUSY_POLL=y CONFIG_BQL=y CONFIG_BPF_STREAM_PARSER=y +# CONFIG_HISOCK is not set CONFIG_NET_FLOW_LIMIT=y # -- 2.34.1

hulk inclusion category: featrue bugzilla: https://gitee.com/openeuler/kernel/issues/ICS15S -------------------------------- Add End-to-End HiSock Redirect sample. Signed-off-by: Pu Lehui <pulehui@huawei.com> --- samples/bpf/.gitignore | 1 + samples/bpf/Makefile | 3 + samples/bpf/hisock/bpf.c | 247 +++++++++++++++++++ samples/bpf/hisock/hisock_cmd.c | 405 ++++++++++++++++++++++++++++++++ 4 files changed, 656 insertions(+) create mode 100644 samples/bpf/hisock/bpf.c create mode 100644 samples/bpf/hisock/hisock_cmd.c diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore index 0002cd359fb1..4505e51c3f4d 100644 --- a/samples/bpf/.gitignore +++ b/samples/bpf/.gitignore @@ -41,6 +41,7 @@ xdp_adjust_tail xdp_fwd xdp_router_ipv4 xdp_tx_iptunnel +hisock/hisock_cmd testfile.img hbm_out.log iperf.* diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 3fa16412db15..2f19faa7fb63 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -46,6 +46,7 @@ tprogs-y += xdp_fwd tprogs-y += task_fd_query tprogs-y += ibumad tprogs-y += hbm +tprogs-y += hisock/hisock_cmd # Libbpf dependencies LIBBPF_SRC = $(TOOLS_PATH)/lib/bpf @@ -96,6 +97,7 @@ xdp_fwd-objs := xdp_fwd_user.o task_fd_query-objs := task_fd_query_user.o $(TRACE_HELPERS) ibumad-objs := ibumad_user.o hbm-objs := hbm.o $(CGROUP_HELPERS) +hisock_cmd-objs := hisock/hisock_cmd.o xdp_router_ipv4-objs := xdp_router_ipv4_user.o $(XDP_SAMPLE) @@ -149,6 +151,7 @@ always-y += task_fd_query_kern.o always-y += ibumad_kern.o always-y += hbm_out_kern.o always-y += hbm_edt_kern.o +always-y += hisock/bpf.o ifeq ($(ARCH), arm) # Strip all except -D__LINUX_ARM_ARCH__ option needed to handle linux diff --git a/samples/bpf/hisock/bpf.c b/samples/bpf/hisock/bpf.c new file mode 100644 index 000000000000..375f33fdf2e7 --- /dev/null +++ b/samples/bpf/hisock/bpf.c @@ -0,0 +1,247 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * + * Description: End-to-End HiSock Redirect sample. + */ +#include <uapi/linux/in.h> +#include <uapi/linux/if_ether.h> +#include <linux/if_vlan.h> +#include <uapi/linux/ip.h> +#include <uapi/linux/tcp.h> +#include <uapi/linux/bpf.h> + +#include <bpf/bpf_endian.h> +#include <bpf/bpf_helpers.h> + +#define IP_MF 0x2000 +#define IP_OFFSET 0x1FFF +#define CSUM_SHIFT_BITS 16 + +#define SOCKOPS_SUCC 1 +#define SOCKOPS_FAIL 0 + +#define PORT_LOCAL 1 +#define PORT_REMOTE 2 + +#define MAX_NUMA 8 +#define MAX_CONN_NUMA 4096 +#define MAX_CONN (MAX_CONN_NUMA * MAX_NUMA * 2) + +struct sock_tuple { + u32 saddr; + u32 daddr; + u16 sport; + u16 dport; +}; + +struct sock_value { + struct ethhdr ingress_eth; + bool eth_updated; + u32 ingress_ifindex; + void *ingress_dst; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(struct sock_tuple)); + __uint(value_size, sizeof(struct sock_value)); + __uint(max_entries, MAX_CONN); +} connmap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(u16)); + __uint(value_size, sizeof(u8)); + __uint(max_entries, 128); +} speed_port SEC(".maps"); + +static inline bool is_speed_flow(u32 local, u32 remote) +{ + u8 *val; + + val = bpf_map_lookup_elem(&speed_port, &local); + if (val && *val == PORT_LOCAL) + return true; + + val = bpf_map_lookup_elem(&speed_port, &remote); + if (val && *val == PORT_REMOTE) + return true; + + return false; +} + +SEC("hisock_sockops") +int hisock_sockops_prog(struct bpf_sock_ops *skops) +{ + struct sock_tuple key = { 0 }; + struct sock_value val = { 0 }; + void *dst; + + if (!is_speed_flow(skops->local_port, bpf_ntohl(skops->remote_port))) + return SOCKOPS_SUCC; + + switch (skops->op) { + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + dst = bpf_get_ingress_dst(skops); + if (!dst) + return SOCKOPS_FAIL; + + key.saddr = skops->remote_ip4; + key.sport = bpf_ntohl(skops->remote_port); + key.daddr = skops->local_ip4; + key.dport = skops->local_port; + + val.ingress_dst = dst; + bpf_map_update_elem(&connmap, &key, &val, BPF_ANY); + + bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG); + break; + case BPF_SOCK_OPS_STATE_CB: + if (skops->args[1] != BPF_TCP_CLOSE_WAIT && + skops->args[1] != BPF_TCP_FIN_WAIT1 && + skops->args[1] != BPF_TCP_CLOSE) + break; + + key.saddr = skops->remote_ip4; + key.sport = bpf_ntohl(skops->remote_port); + key.daddr = skops->local_ip4; + key.dport = skops->local_port; + + bpf_map_delete_elem(&connmap, &key); + + bpf_sock_ops_cb_flags_set(skops, + skops->bpf_sock_ops_cb_flags & ~BPF_SOCK_OPS_STATE_CB_FLAG); + break; + default: + break; + } + + return SOCKOPS_SUCC; +} + +SEC("hisock_ingress") +int hisock_ingress_prog(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct sock_tuple key = { 0 }; + struct sock_value *val; + struct ethhdr *ehdr; + struct tcphdr *thdr; + struct iphdr *ihdr; + + ehdr = (struct ethhdr *)data; + if (ehdr + 1 > data_end) + return XDP_PASS; + + if (ehdr->h_proto != bpf_htons(ETH_P_IP)) + return XDP_PASS; + + ihdr = (struct iphdr *)(ehdr + 1); + if (ihdr + 1 > data_end) + return XDP_PASS; + + if (ihdr->ihl != 5 || ihdr->protocol != IPPROTO_TCP) + return XDP_PASS; + + if (ihdr->frag_off & bpf_htons(IP_MF | IP_OFFSET)) + return XDP_PASS; + + thdr = (struct tcphdr *)(ihdr + 1); + if (thdr + 1 > data_end) + return XDP_PASS; + + if (thdr->syn || thdr->fin || thdr->rst) + return XDP_PASS; + + key.saddr = ihdr->saddr; + key.sport = bpf_ntohs(thdr->source); + key.daddr = ihdr->daddr; + key.dport = bpf_ntohs(thdr->dest); + + val = bpf_map_lookup_elem(&connmap, &key); + if (!val) + return XDP_PASS; + + if (unlikely(!val->eth_updated)) { + bpf_ext_memcpy(val->ingress_eth.h_source, ETH_ALEN, + ehdr->h_dest, ETH_ALEN); + bpf_ext_memcpy(val->ingress_eth.h_dest, ETH_ALEN, + ehdr->h_source, ETH_ALEN); + val->ingress_eth.h_proto = ehdr->h_proto; + val->eth_updated = true; + } + + if (unlikely(!val->ingress_ifindex)) + val->ingress_ifindex = ctx->ingress_ifindex; + + if (likely(val->ingress_dst)) + bpf_set_ingress_dst(ctx, val->ingress_dst); + + return XDP_HISOCK_REDIRECT; +} + +static inline void ipv4_csum(struct iphdr *ihdr) +{ + u32 csum = 0; + u16 *next_ip_u16 = (u16 *)ihdr; + + ihdr->check = 0; + for (size_t i = 0; i < sizeof(struct iphdr) >> 1; i++) + csum += *next_ip_u16++; + + ihdr->check = ~((csum & 0xffff) + (csum >> CSUM_SHIFT_BITS)); +} + +SEC("hisock_egress") +int hisock_egress_prog(struct __sk_buff *skb) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + struct sock_tuple key = { 0 }; + struct sock_value *val; + struct ethhdr *ehdr; + struct iphdr *ihdr; + int ret; + + key.saddr = skb->remote_ip4; + key.sport = bpf_ntohl(skb->remote_port); + key.daddr = skb->local_ip4; + key.dport = skb->local_port; + + val = bpf_map_lookup_elem(&connmap, &key); + if (!val) + return HISOCK_PASS; + + if (unlikely(!val->eth_updated)) + goto redirect; + + ihdr = (struct iphdr *)data; + if (ihdr + 1 > data_end) + return HISOCK_PASS; + + ihdr->tot_len = bpf_htons(skb->len); + ipv4_csum(ihdr); + + ret = bpf_skb_change_head(skb, ETH_HLEN, 0); + if (ret < 0) + goto redirect; + + data = (void *)(long)skb->data; + data_end = (void *)(long)skb->data_end; + + ehdr = (struct ethhdr *)data; + if (ehdr + 1 > data_end) + return HISOCK_DROP; + + bpf_ext_memcpy(ehdr, ETH_HLEN, &val->ingress_eth, ETH_HLEN); +redirect: + if (likely(val->ingress_ifindex)) + bpf_change_skb_dev(skb, val->ingress_ifindex); + + return HISOCK_REDIRECT; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/hisock/hisock_cmd.c b/samples/bpf/hisock/hisock_cmd.c new file mode 100644 index 000000000000..6b64c008b6c7 --- /dev/null +++ b/samples/bpf/hisock/hisock_cmd.c @@ -0,0 +1,405 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) Huawei Technologies Co., Ltd. 2025-2025. All rights reserved. + * + * Description: End-to-End HiSock Redirect sample. + */ +#include <linux/bpf.h> +#include <linux/if_link.h> +#include <assert.h> +#include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <signal.h> +#include <libgen.h> +#include <limits.h> +#include <sys/mount.h> +#include <sys/resource.h> +#include <net/if.h> + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + +#include "bpf_util.h" +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#define DEF_BPF_PATH "bpf.o" +#define PORT_LOCAL 1 +#define PORT_REMOTE 2 +#define MAX_IF_NUM 8 + +struct { + __u32 ifindex[MAX_IF_NUM]; + int if_num; + char *local_port; + char *remote_port; + char *cgrp_path; + char *bpf_path; + bool unload; + bool help; +} hisock; + +struct hisock_prog_info { + const char *prog_name; + enum bpf_prog_type prog_type; + enum bpf_attach_type attach_type; + int attach_flag; + int prog_fd; + bool is_xdp; +}; + +static struct hisock_prog_info prog_infos[] = { + { + .prog_name = "hisock_sockops_prog", + .prog_type = BPF_PROG_TYPE_SOCK_OPS, + .attach_type = BPF_CGROUP_SOCK_OPS, + .attach_flag = 0, + .is_xdp = false, + }, + { + .prog_name = "hisock_ingress_prog", + .prog_type = BPF_PROG_TYPE_XDP, + .attach_type = BPF_XDP, + .attach_flag = XDP_FLAGS_SKB_MODE, + .is_xdp = true, + }, + { + .prog_name = "hisock_egress_prog", + .prog_type = BPF_PROG_TYPE_HISOCK, + .attach_type = BPF_HISOCK_EGRESS, + .attach_flag = 0, + .is_xdp = false, + }, +}; + +static int set_prog_type(struct bpf_object *obj) +{ + enum bpf_attach_type attach_type; + enum bpf_prog_type prog_type; + struct bpf_program *prog; + const char *prog_name; + int i; + + bpf_object__for_each_program(prog, obj) { + prog_name = bpf_program__name(prog); + for (i = 0; i < ARRAY_SIZE(prog_infos); i++) { + if (!strcmp(prog_infos[i].prog_name, prog_name)) { + prog_type = prog_infos[i].prog_type; + attach_type = prog_infos[i].attach_type; + break; + } + } + + if (i == ARRAY_SIZE(prog_infos)) + return -1; + + bpf_program__set_type(prog, prog_type); + bpf_program__set_expected_attach_type(prog, attach_type); + } + + return 0; +} + +static int find_progs(struct bpf_object *obj) +{ + struct hisock_prog_info *info; + struct bpf_program *prog; + int i, prog_fd; + + for (i = 0; i < ARRAY_SIZE(prog_infos); i++) { + info = &prog_infos[i]; + prog = bpf_object__find_program_by_name(obj, info->prog_name); + if (!prog) { + fprintf(stderr, "ERROR: failed to find prog sec %s\n", info->prog_name); + return -1; + } + + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) { + fprintf(stderr, "ERROR: failed to get fd of prog %s\n", info->prog_name); + return -1; + } + + info->prog_fd = prog_fd; + } + + return 0; +} + +static int parse_port_range(const char *port_str, __u8 status, int map_fd) +{ + char *str = strdup(port_str); + char *token, *rest = str; + __u16 port; + + while ((token = strtok_r(rest, ",", &rest))) { + char *dash = strchr(token, '-'); + + if (dash) { + *dash = '\0'; + __u16 start = atoi(token); + __u16 end = atoi(dash + 1); + + if (start > end || start == 0 || end > 65535) { + fprintf(stderr, "Invalid port range: %s\n", token); + return -1; + } + + for (port = start; port <= end; port++) + bpf_map_update_elem(map_fd, &port, &status, BPF_ANY); + + printf("Speed port range %u-%u:%u\n", start, end, status); + } else { + port = atoi(token); + if (port == 0 || port > 65535) { + fprintf(stderr, "Invalid port: %s\n", token); + return -1; + } + bpf_map_update_elem(map_fd, &port, &status, BPF_ANY); + printf("Speed port %u:%u\n", port, status); + } + } + + free(str); + return 0; +} + +static int set_speed_port(struct bpf_object *obj) +{ + int map_fd; + + map_fd = bpf_object__find_map_fd_by_name(obj, "speed_port"); + if (map_fd < 0) { + fprintf(stderr, "ERROR: failed to find map fd\n"); + return -1; + } + + if (hisock.local_port && + parse_port_range(hisock.local_port, PORT_LOCAL, map_fd)) { + fprintf(stderr, "ERROR: failed to update local port\n"); + return -1; + } + + if (hisock.remote_port && + parse_port_range(hisock.remote_port, PORT_REMOTE, map_fd)) { + fprintf(stderr, "ERROR: failed to update remote port\n"); + return -1; + } + + return 0; +} + +static int detach_progs(void) +{ + struct hisock_prog_info *info; + int i, j, cgrp_fd; + int err_cnt = 0; + + cgrp_fd = open(hisock.cgrp_path, O_DIRECTORY, O_RDONLY); + if (cgrp_fd < 0) { + fprintf(stderr, "ERROR: failed to open cgrp %s\n", hisock.cgrp_path); + return -1; + } + + for (i = 0; i < ARRAY_SIZE(prog_infos); i++) { + info = &prog_infos[i]; + if (info->is_xdp) { + for (j = 0; j < hisock.if_num; j++) { + if (bpf_xdp_detach(hisock.ifindex[j], + info->attach_flag, NULL)) { + fprintf(stderr, + "ERROR: failed to detach prog %s\n", + info->prog_name); + err_cnt++; + } + } + continue; + } + + if (bpf_prog_detach(cgrp_fd, info->attach_type)) { + fprintf(stderr, "ERROR: failed to detach prog %s\n", info->prog_name); + err_cnt++; + } + } + + close(cgrp_fd); + return -err_cnt; +} + +static int attach_progs(void) +{ + struct hisock_prog_info *info; + int i, j, cgrp_fd; + + cgrp_fd = open(hisock.cgrp_path, O_DIRECTORY, O_RDONLY); + if (cgrp_fd < 0) { + fprintf(stderr, "ERROR: failed to open cgrp %s\n", hisock.cgrp_path); + return -1; + } + + for (i = 0; i < ARRAY_SIZE(prog_infos); i++) { + info = &prog_infos[i]; + if (info->is_xdp) { + for (j = 0; j < hisock.if_num; j++) { + if (bpf_xdp_attach(hisock.ifindex[j], info->prog_fd, + info->attach_flag, NULL)) + goto fail; + } + continue; + } + + if (bpf_prog_attach(info->prog_fd, cgrp_fd, info->attach_type, + info->attach_flag)) + goto fail; + } + + close(cgrp_fd); + return 0; +fail: + fprintf(stderr, "ERROR: failed to attach prog %s\n", info->prog_name); + close(cgrp_fd); + detach_progs(); + return -1; +} + +static int do_hisock(void) +{ + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + struct bpf_object *obj; + + setrlimit(RLIMIT_MEMLOCK, &r); + + obj = bpf_object__open(hisock.bpf_path); + if (libbpf_get_error(obj)) { + fprintf(stderr, "ERROR: failed to open bpf file\n"); + return -1; + } + + if (set_prog_type(obj)) { + fprintf(stderr, "ERROR: failed to set prog type\n"); + bpf_object__close(obj); + return -1; + } + + if (bpf_object__load(obj)) { + fprintf(stderr, "ERROR: failed to load bpf obj\n"); + bpf_object__close(obj); + return -1; + } + + if (find_progs(obj)) { + fprintf(stderr, "ERROR: failed to find progs\n"); + bpf_object__close(obj); + return -1; + } + + if (set_speed_port(obj)) { + fprintf(stderr, "ERROR: failed to set speed port\n"); + bpf_object__close(obj); + return -1; + } + + if (attach_progs()) { + fprintf(stderr, "ERROR: failed to attach progs\n"); + bpf_object__close(obj); + return -1; + } + + bpf_object__close(obj); + return 0; +} + +static void do_help(void) +{ + fprintf(stderr, + "Load: hisock_cmd [-f BPF_FILE] [-c CGRP_PATH] " + "[-p LOCAL_PORT] [-r REMOTE_PORT] [-i INTERFACE]\n" + "Unload: hisock_cmd -u [-c CGRP_PATH] [-i INTERFACE]\n"); +} + +static int parse_args(int argc, char **argv) +{ + char *ifname; + int opt; + + hisock.bpf_path = DEF_BPF_PATH; + hisock.if_num = 0; + + while ((opt = getopt(argc, argv, "f:c:p:r:i:uh")) != -1) { + switch (opt) { + case 'f': + hisock.bpf_path = optarg; + break; + case 'c': + hisock.cgrp_path = optarg; + break; + case 'p': + hisock.local_port = optarg; + break; + case 'r': + hisock.remote_port = optarg; + break; + case 'i': + ifname = optarg; + hisock.ifindex[hisock.if_num] = if_nametoindex(ifname); + hisock.if_num++; + break; + case 'u': + hisock.unload = true; + break; + case 'h': + hisock.help = true; + break; + default: + fprintf(stderr, "ERROR: unknown option %c\n", opt); + return -1; + } + } + + if (hisock.cgrp_path == NULL || + hisock.if_num == 0 || + (!hisock.unload && + hisock.local_port == NULL && + hisock.remote_port == NULL)) { + do_help(); + return -1; + } + + return 0; +} + +int main(int argc, char **argv) +{ + if (parse_args(argc, argv)) { + fprintf(stderr, "ERROR: failed to parse args\n"); + return -1; + } + + if (hisock.help) { + do_help(); + return 0; + } + + if (hisock.unload) { + if (detach_progs()) { + fprintf(stderr, "ERROR: failed to detach progs\n"); + return -1; + } + + printf("Unload HiSock successfully\n"); + return 0; + } + + if (do_hisock()) { + fprintf(stderr, "ERROR: failed to do hisock\n"); + return -1; + } + + printf("Load HiSock successfully\n"); + return 0; +} -- 2.34.1

反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/17753 邮件列表地址:https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/M4H... FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/17753 Mailing list address: https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/M4H...
participants (2)
-
patchwork bot
-
Pu Lehui