Liu Jian (1): cgroup: make cgroup_bpf_prog_attach work when cgroup2 is not mounted Pu Lehui (12): bpf: Add BPF_HISOCK_INGRESS attach type bpf: Add bpf_set_ingress_dst kfunc bpf: Add bpf_set_ingress/egress_dev kfunc bpf: Add bpf_get_skb_ethhdr kfunc bpf: Add bpf_handle_ingress/egress_ptype kfunc bpf: Apply BPF_HISOCK_INGRESS to hisock ingress bpf: Only deploy hisock in server bpf: Add multi port parse to hisock_cmd bpf: Add target comm parse to hisock_cmd bpf: Add ipv4-mapped ipv6 addr support for hisock bpf: Add local connect support for hisock bpf: Deprecate hisock unused kfuncs and orig ingress logic include/linux/cgroup.h | 1 + include/linux/filter.h | 24 ++ include/linux/netdevice.h | 4 + include/net/xdp.h | 5 - include/uapi/linux/bpf.h | 2 +- kernel/bpf/cgroup.c | 8 +- kernel/bpf/syscall.c | 33 ++- kernel/bpf/verifier.c | 47 ++++ kernel/cgroup/cgroup.c | 22 ++ net/Kconfig | 1 + net/core/dev.c | 54 +++-- net/core/filter.c | 201 +++++++++++----- net/ipv4/ip_output.c | 31 +-- samples/bpf/hisock/bpf.c | 395 ++++++++++++++++++++++++-------- samples/bpf/hisock/hisock_cmd.c | 198 +++++++++++----- tools/include/uapi/linux/bpf.h | 2 +- tools/lib/bpf/libbpf.c | 4 +- 17 files changed, 750 insertions(+), 282 deletions(-) -- 2.34.1
From: Liu Jian <liujian56@huawei.com> hulk inclusion category: featrue bugzilla: https://atomgit.com/openeuler/kernel/issues/8480 -------------------------------- BPF_PROG_TYPE_CGROUP* bpf programs is associated with cgroup2. If cgroup2 is not mounted, the bpf program is associated with cgrp_dfl_root.cgrp by default. Then we can use it like below: bpftool cgroup attach /sys/fs/cgroup/cpu sock_ops pinned /sys/fs/bpf/xxx Signed-off-by: Liu Jian <liujian56@huawei.com> Signed-off-by: Pu Lehui <pulehui@huawei.com> --- include/linux/cgroup.h | 1 + kernel/bpf/cgroup.c | 8 ++++---- kernel/cgroup/cgroup.c | 22 ++++++++++++++++++++++ 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index d4f69e89b3fb..e0be4e60aba8 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -108,6 +108,7 @@ struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, struct cgroup *cgroup_get_from_path(const char *path); struct cgroup *cgroup_get_from_fd(int fd); +struct cgroup *cgroup_get_from_fd_v2(int fd); struct cgroup *cgroup_v1v2_get_from_fd(int fd); int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 1082f5906967..c2403457d4b2 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1157,7 +1157,7 @@ int cgroup_bpf_prog_attach(const union bpf_attr *attr, struct cgroup *cgrp; int ret; - cgrp = cgroup_get_from_fd(attr->target_fd); + cgrp = cgroup_get_from_fd_v2(attr->target_fd); if (IS_ERR(cgrp)) return PTR_ERR(cgrp); @@ -1185,7 +1185,7 @@ int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) struct cgroup *cgrp; int ret; - cgrp = cgroup_get_from_fd(attr->target_fd); + cgrp = cgroup_get_from_fd_v2(attr->target_fd); if (IS_ERR(cgrp)) return PTR_ERR(cgrp); @@ -1304,7 +1304,7 @@ int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) if (attr->link_create.flags) return -EINVAL; - cgrp = cgroup_get_from_fd(attr->link_create.target_fd); + cgrp = cgroup_get_from_fd_v2(attr->link_create.target_fd); if (IS_ERR(cgrp)) return PTR_ERR(cgrp); @@ -1344,7 +1344,7 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr, struct cgroup *cgrp; int ret; - cgrp = cgroup_get_from_fd(attr->query.target_fd); + cgrp = cgroup_get_from_fd_v2(attr->query.target_fd); if (IS_ERR(cgrp)) return PTR_ERR(cgrp); diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index a14c7f53a602..17521bc192ee 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -7109,6 +7109,28 @@ struct cgroup *cgroup_get_from_fd(int fd) } EXPORT_SYMBOL_GPL(cgroup_get_from_fd); +/** + * same with cgroup_get_from_fd, only add cgrp_dfl_visible check + */ +struct cgroup *cgroup_get_from_fd_v2(int fd) +{ + struct cgroup *cgrp = cgroup_v1v2_get_from_fd(fd); + + if (IS_ERR(cgrp)) + return ERR_CAST(cgrp); + + if (!cgroup_on_dfl(cgrp)) { + cgroup_put(cgrp); + if (cgrp_dfl_visible) + return ERR_PTR(-EBADF); + + cgrp = &cgrp_dfl_root.cgrp; + cgroup_get(cgrp); + } + return cgrp; +} +EXPORT_SYMBOL_GPL(cgroup_get_from_fd_v2); + static u64 power_of_ten(int power) { u64 v = 1; -- 2.34.1
hulk inclusion category: featrue bugzilla: https://atomgit.com/openeuler/kernel/issues/8480 -------------------------------- Add BPF_HISOCK_INGRESS attach type, which will replace the XDP_HISOCK_REDIRECT action to redirect target receive skb to TCP. Signed-off-by: Pu Lehui <pulehui@huawei.com> --- include/linux/filter.h | 24 ++++++++++++ include/linux/netdevice.h | 4 ++ include/uapi/linux/bpf.h | 1 + kernel/bpf/syscall.c | 33 +++++++++++++--- net/Kconfig | 1 + net/core/dev.c | 38 ++++++++++++++++--- net/core/filter.c | 69 +++++++++++++++++++++++++++++++--- tools/include/uapi/linux/bpf.h | 1 + tools/lib/bpf/libbpf.c | 4 +- 9 files changed, 157 insertions(+), 18 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 27349660428a..53b43bd6b02d 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1610,4 +1610,28 @@ static inline void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, voi } #endif /* CONFIG_NET */ +#ifdef CONFIG_HISOCK +DECLARE_STATIC_KEY_FALSE(hisock_ingress_key); + +int hisock_ingress_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog); +int hisock_ingress_prog_detach(const union bpf_attr *attr); + +static inline int hisock_ingress_bpf_run(struct bpf_prog *prog, struct sk_buff *skb) +{ + void *saved_data_end; + int ret; + + if (unlikely(!prog)) + return HISOCK_PASS; + + rcu_read_lock(); + bpf_compute_and_save_data_end(skb, &saved_data_end); + ret = bpf_prog_run_pin_on_cpu(prog, skb); + bpf_restore_data_end(skb, saved_data_end); + rcu_read_unlock(); + + return ret; +} +#endif + #endif /* __LINUX_FILTER_H__ */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a68c371a5d7a..fd043ed1a9d9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2446,7 +2446,11 @@ struct net_device { struct devlink_port *devlink_port; +#ifdef CONFIG_HISOCK + KABI_USE(1, struct bpf_prog __rcu *hisock_ingress) +#else KABI_RESERVE(1) +#endif KABI_RESERVE(2) KABI_RESERVE(3) KABI_RESERVE(4) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index cf088c780ab1..a513dbb6d520 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1064,6 +1064,7 @@ enum bpf_attach_type { BPF_SCHED, #ifndef __GENKSYMS__ BPF_HISOCK_EGRESS, + BPF_HISOCK_INGRESS, #endif __MAX_BPF_ATTACH_TYPE }; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 6b28f4001758..042f44a27941 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2536,6 +2536,16 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type, if (expected_attach_type == BPF_NETFILTER) return 0; return -EINVAL; +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: + switch (expected_attach_type) { + case BPF_HISOCK_EGRESS: + case BPF_HISOCK_INGRESS: + return 0; + default: + return -EINVAL; + } +#endif case BPF_PROG_TYPE_SYSCALL: case BPF_PROG_TYPE_EXT: if (expected_attach_type) @@ -3835,6 +3845,7 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type) return BPF_PROG_TYPE_CGROUP_SOCKOPT; #ifdef CONFIG_HISOCK case BPF_HISOCK_EGRESS: + case BPF_HISOCK_INGRESS: return BPF_PROG_TYPE_HISOCK; #endif case BPF_TRACE_ITER: @@ -3995,9 +4006,6 @@ static int bpf_prog_attach(const union bpf_attr *attr) case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_SOCK_OPS: case BPF_PROG_TYPE_LSM: -#ifdef CONFIG_HISOCK - case BPF_PROG_TYPE_HISOCK: -#endif if (ptype == BPF_PROG_TYPE_LSM && prog->expected_attach_type != BPF_LSM_CGROUP) ret = -EINVAL; @@ -4007,6 +4015,14 @@ static int bpf_prog_attach(const union bpf_attr *attr) case BPF_PROG_TYPE_SCHED_CLS: ret = tcx_prog_attach(attr, prog); break; +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: + if (attr->attach_type == BPF_HISOCK_EGRESS) + ret = cgroup_bpf_prog_attach(attr, ptype, prog); + else if (attr->attach_type == BPF_HISOCK_INGRESS) + ret = hisock_ingress_prog_attach(attr, prog); + break; +#endif default: ret = -EINVAL; } @@ -4063,14 +4079,19 @@ static int bpf_prog_detach(const union bpf_attr *attr) case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_SOCK_OPS: case BPF_PROG_TYPE_LSM: -#ifdef CONFIG_HISOCK - case BPF_PROG_TYPE_HISOCK: -#endif ret = cgroup_bpf_prog_detach(attr, ptype); break; case BPF_PROG_TYPE_SCHED_CLS: ret = tcx_prog_detach(attr, prog); break; +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: + if (attr->attach_type == BPF_HISOCK_EGRESS) + ret = cgroup_bpf_prog_detach(attr, ptype); + else if (attr->attach_type == BPF_HISOCK_INGRESS) + ret = hisock_ingress_prog_detach(attr); + break; +#endif default: ret = -EINVAL; } diff --git a/net/Kconfig b/net/Kconfig index 31e8a650bb7b..f738a37a68d0 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -360,6 +360,7 @@ config BPF_STREAM_PARSER config HISOCK bool "enable HiSock Redirect Framework" depends on INET + depends on ARM64 depends on CGROUP_BPF depends on BPF_SYSCALL default n diff --git a/net/core/dev.c b/net/core/dev.c index 5dbe8b7f2aa0..8fdc2e3ec300 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5102,7 +5102,7 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) } #ifdef CONFIG_HISOCK -static int generic_xdp_hisock_redirect(struct sk_buff *skb) +static int do_hisock_ingress_redirect(struct sk_buff *skb) { const struct iphdr *iph; u32 len; @@ -5126,7 +5126,7 @@ static int generic_xdp_hisock_redirect(struct sk_buff *skb) if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) goto free_skb; - len = ntohs(iph->tot_len); + len = iph_totlen(skb, iph); if (skb->len < len || len < (iph->ihl * 4)) goto free_skb; @@ -5136,9 +5136,10 @@ static int generic_xdp_hisock_redirect(struct sk_buff *skb) iph = ip_hdr(skb); skb->transport_header = skb->network_header + iph->ihl * 4; - skb_orphan(skb); + if (!skb_sk_is_prefetched(skb)) + skb_orphan(skb); - if (!skb_valid_dst(skb)) { + if (unlikely(!skb_valid_dst(skb))) { if (ip_route_input_noref(skb, iph->daddr, iph->saddr, iph->tos, skb->dev)) goto free_skb; @@ -5155,7 +5156,7 @@ static int generic_xdp_hisock_redirect(struct sk_buff *skb) free_skb: kfree_skb(skb); out: - return -EFAULT; + return NET_RX_DROP; } #endif @@ -5184,7 +5185,7 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb) break; #ifdef CONFIG_HISOCK case XDP_HISOCK_REDIRECT: - err = generic_xdp_hisock_redirect(skb); + err = do_hisock_ingress_redirect(skb); if (err == -EOPNOTSUPP) return XDP_PASS; break; @@ -5563,6 +5564,31 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, } skip_taps: +#ifdef CONFIG_HISOCK + if (static_branch_unlikely(&hisock_ingress_key)) { + int act; + + if (pt_prev) { + ret = deliver_skb(skb, pt_prev, orig_dev); + pt_prev = NULL; + } + + act = hisock_ingress_bpf_run(rcu_dereference(skb->dev->hisock_ingress), skb); + switch (act) { + case HISOCK_PASS: + break; + case HISOCK_REDIRECT: + ret = do_hisock_ingress_redirect(skb); + if (ret != -EOPNOTSUPP) + goto out; + break; + case HISOCK_DROP: + default: + ret = NET_RX_DROP; + goto out; + } + } +#endif #ifdef CONFIG_NET_INGRESS if (static_branch_unlikely(&ingress_needed_key)) { bool another = false; diff --git a/net/core/filter.c b/net/core/filter.c index 00037445a6c6..acf1d574c3cb 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -8792,12 +8792,25 @@ static bool hisock_is_valid_access(int off, int size, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { - switch (off) { - case bpf_ctx_range(struct __sk_buff, tc_classid): - case bpf_ctx_range(struct __sk_buff, data_meta): - case bpf_ctx_range(struct __sk_buff, tstamp): - case bpf_ctx_range(struct __sk_buff, wire_len): + if (type == BPF_WRITE) return false; + + if (prog->expected_attach_type == BPF_HISOCK_EGRESS) { + switch (off) { + case bpf_ctx_range(struct __sk_buff, tc_classid): + case bpf_ctx_range(struct __sk_buff, data_meta): + case bpf_ctx_range(struct __sk_buff, tstamp): + case bpf_ctx_range(struct __sk_buff, wire_len): + return false; + } + } else if (prog->expected_attach_type == BPF_HISOCK_INGRESS) { + switch (off) { + case bpf_ctx_range_till(struct __sk_buff, mark, queue_mapping): + case bpf_ctx_range(struct __sk_buff, priority): + case bpf_ctx_range_till(struct __sk_buff, tc_index, tc_classid): + case bpf_ctx_range_till(struct __sk_buff, napi_id, gso_size): + return false; + } } switch (off) { @@ -11274,6 +11287,52 @@ const struct bpf_prog_ops flow_dissector_prog_ops = { .test_run = bpf_prog_test_run_flow_dissector, }; +#ifdef CONFIG_HISOCK +DEFINE_STATIC_KEY_FALSE(hisock_ingress_key); + +static int hisock_ingress_prog_install(const union bpf_attr *attr, struct bpf_prog *new) +{ + struct net *net = current->nsproxy->net_ns; + struct net_device *dev; + struct bpf_prog *old; + int ret = 0; + + if (attr->attach_type != BPF_HISOCK_INGRESS) + return -EINVAL; + + rtnl_lock(); + dev = __dev_get_by_index(net, attr->target_fd); + if (!dev) { + ret = -ENODEV; + goto out; + } + + old = rtnl_dereference(dev->hisock_ingress); + rcu_assign_pointer(dev->hisock_ingress, new); + + if (new && !old) + static_branch_inc(&hisock_ingress_key); + else if (!new && old) + static_branch_dec(&hisock_ingress_key); + + if (old) + bpf_prog_put(old); +out: + rtnl_unlock(); + return ret; +} + +int hisock_ingress_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) +{ + return hisock_ingress_prog_install(attr, prog); +} + +int hisock_ingress_prog_detach(const union bpf_attr *attr) +{ + return hisock_ingress_prog_install(attr, NULL); +} +#endif + int sk_detach_filter(struct sock *sk) { int ret = -ENOENT; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 5e631ae65920..4036c80105a2 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1064,6 +1064,7 @@ enum bpf_attach_type { BPF_SCHED, #ifndef __GENKSYMS__ BPF_HISOCK_EGRESS, + BPF_HISOCK_INGRESS, #endif __MAX_BPF_ATTACH_TYPE }; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 5f268feb3075..5cda26728481 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -124,6 +124,7 @@ static const char * const attach_type_name[] = { [BPF_TRACE_UPROBE_MULTI] = "trace_uprobe_multi", [BPF_SCHED] = "sched", [BPF_HISOCK_EGRESS] = "hisock_egress", + [BPF_HISOCK_INGRESS] = "hisock_ingress", }; static const char * const link_type_name[] = { @@ -8879,7 +8880,8 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE), SEC_DEF("netfilter", NETFILTER, BPF_NETFILTER, SEC_NONE), SEC_DEF("sched/", SCHED, BPF_SCHED, SEC_ATTACH_BTF, attach_sched), - SEC_DEF("hisock_egress", HISOCK, BPF_HISOCK_EGRESS, SEC_ATTACHABLE_OPT), + SEC_DEF("hisock_egress", HISOCK, BPF_HISOCK_EGRESS, SEC_ATTACHABLE), + SEC_DEF("hisock_ingress", HISOCK, BPF_HISOCK_INGRESS, SEC_ATTACHABLE), }; int libbpf_register_prog_handler(const char *sec, -- 2.34.1
hulk inclusion category: featrue bugzilla: https://atomgit.com/openeuler/kernel/issues/8480 -------------------------------- Add bpf_set_ingress_dst kfunc to set the receive dst entry of sock to ingress skb. Signed-off-by: Pu Lehui <pulehui@huawei.com> --- kernel/bpf/verifier.c | 24 ++++++++++++++++++++++++ net/core/filter.c | 24 ++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1b7799a7ac71..67ff1bcfb277 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -10811,6 +10811,9 @@ enum special_kfunc_type { KF_bpf_dynptr_slice, KF_bpf_dynptr_slice_rdwr, KF_bpf_dynptr_clone, +#ifdef CONFIG_HISOCK + KF_bpf_set_ingress_dst, +#endif }; BTF_SET_START(special_kfunc_set) @@ -10831,6 +10834,9 @@ BTF_ID(func, bpf_dynptr_from_xdp) BTF_ID(func, bpf_dynptr_slice) BTF_ID(func, bpf_dynptr_slice_rdwr) BTF_ID(func, bpf_dynptr_clone) +#ifdef CONFIG_HISOCK +BTF_ID(func, bpf_set_ingress_dst) +#endif BTF_SET_END(special_kfunc_set) BTF_ID_LIST(special_kfunc_list) @@ -10853,6 +10859,9 @@ BTF_ID(func, bpf_dynptr_from_xdp) BTF_ID(func, bpf_dynptr_slice) BTF_ID(func, bpf_dynptr_slice_rdwr) BTF_ID(func, bpf_dynptr_clone) +#ifdef CONFIG_HISOCK +BTF_ID(func, bpf_set_ingress_dst) +#endif static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta) { @@ -11829,6 +11838,16 @@ static int fetch_kfunc_meta(struct bpf_verifier_env *env, return 0; } +static int check_atype_kfunc_compatibility(struct bpf_verifier_env *env, u32 func_id) +{ +#ifdef CONFIG_HISOCK + if (func_id == special_kfunc_list[KF_bpf_set_ingress_dst] && + env->prog->expected_attach_type != BPF_HISOCK_INGRESS) + return -EACCES; +#endif + return 0; +} + static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx_p) { @@ -11858,6 +11877,11 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, insn_aux->is_iter_next = is_iter_next_kfunc(&meta); + if (check_atype_kfunc_compatibility(env, meta.func_id)) { + verbose(env, "calling kernel function %s is not allowed\n", func_name); + return -EACCES; + } + if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) { verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capability\n"); return -EACCES; diff --git a/net/core/filter.c b/net/core/filter.c index acf1d574c3cb..ff0ec4621cc0 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -12186,6 +12186,29 @@ __bpf_kfunc int bpf_xdp_set_ingress_dst(struct xdp_md *xdp_ctx, void *dst__ign) return 0; } +__bpf_kfunc int bpf_set_ingress_dst(struct __sk_buff *skb_ctx, unsigned long _sk) +{ + struct sk_buff *skb = (struct sk_buff *)skb_ctx; + struct sock *sk = (struct sock *)_sk; + struct dst_entry *dst; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + if (!sk || !virt_addr_valid(sk)) + return -EFAULT; + + if (!sk_fullsock(sk)) + return -EINVAL; + + dst = rcu_dereference(sk->sk_rx_dst); + if (dst) + dst = dst_check(dst, 0); + if (dst && sk->sk_rx_dst_ifindex == skb->skb_iif) + skb_dst_set_noref(skb, dst); + + return 0; +} + __bpf_kfunc int bpf_xdp_change_dev(struct xdp_md *xdp_ctx, u32 ifindex) { struct xdp_buff *xdp = (struct xdp_buff *)xdp_ctx; @@ -12258,6 +12281,7 @@ BTF_ID_FLAGS(func, bpf_skops_get_ingress_dst, KF_RET_NULL) BTF_SET8_END(bpf_kfunc_check_set_sock_ops) BTF_SET8_START(bpf_kfunc_check_set_hisock) +BTF_ID_FLAGS(func, bpf_set_ingress_dst) BTF_ID_FLAGS(func, bpf_skb_change_dev) BTF_SET8_END(bpf_kfunc_check_set_hisock) #endif -- 2.34.1
hulk inclusion category: featrue bugzilla: https://atomgit.com/openeuler/kernel/issues/8480 -------------------------------- Add bpf_xdp_set_ingress/egress_dev kfunc to set network device to ingress or egress skb. Signed-off-by: Pu Lehui <pulehui@huawei.com> --- kernel/bpf/verifier.c | 13 ++++++++++++- net/core/filter.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 67ff1bcfb277..f028f5650323 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -10813,6 +10813,8 @@ enum special_kfunc_type { KF_bpf_dynptr_clone, #ifdef CONFIG_HISOCK KF_bpf_set_ingress_dst, + KF_bpf_set_ingress_dev, + KF_bpf_set_egress_dev, #endif }; @@ -10836,6 +10838,8 @@ BTF_ID(func, bpf_dynptr_slice_rdwr) BTF_ID(func, bpf_dynptr_clone) #ifdef CONFIG_HISOCK BTF_ID(func, bpf_set_ingress_dst) +BTF_ID(func, bpf_set_ingress_dev) +BTF_ID(func, bpf_set_egress_dev) #endif BTF_SET_END(special_kfunc_set) @@ -10861,6 +10865,8 @@ BTF_ID(func, bpf_dynptr_slice_rdwr) BTF_ID(func, bpf_dynptr_clone) #ifdef CONFIG_HISOCK BTF_ID(func, bpf_set_ingress_dst) +BTF_ID(func, bpf_set_ingress_dev) +BTF_ID(func, bpf_set_egress_dev) #endif static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta) @@ -11841,9 +11847,14 @@ static int fetch_kfunc_meta(struct bpf_verifier_env *env, static int check_atype_kfunc_compatibility(struct bpf_verifier_env *env, u32 func_id) { #ifdef CONFIG_HISOCK - if (func_id == special_kfunc_list[KF_bpf_set_ingress_dst] && + if ((func_id == special_kfunc_list[KF_bpf_set_ingress_dst] || + func_id == special_kfunc_list[KF_bpf_set_ingress_dev]) && env->prog->expected_attach_type != BPF_HISOCK_INGRESS) return -EACCES; + + if (func_id == special_kfunc_list[KF_bpf_set_egress_dev] && + env->prog->expected_attach_type != BPF_HISOCK_EGRESS) + return -EACCES; #endif return 0; } diff --git a/net/core/filter.c b/net/core/filter.c index ff0ec4621cc0..442025f7841f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -12242,6 +12242,34 @@ __bpf_kfunc int bpf_skb_change_dev(struct __sk_buff *skb_ctx, u32 ifindex) skb->dev = dev; return 0; } + +__bpf_kfunc int +bpf_set_ingress_dev(struct __sk_buff *skb_ctx, unsigned long _dev) +{ + struct net_device *dev = (struct net_device *)_dev; + struct sk_buff *skb = (struct sk_buff *)skb_ctx; + + if (!dev || !virt_addr_valid(dev)) + return -EFAULT; + + skb->dev = dev; + skb->skb_iif = dev->ifindex; + skb->pkt_type = PACKET_HOST; + return 0; +} + +__bpf_kfunc int +bpf_set_egress_dev(struct __sk_buff *skb_ctx, unsigned long _dev) +{ + struct net_device *dev = (struct net_device *)_dev; + struct sk_buff *skb = (struct sk_buff *)skb_ctx; + + if (!dev || !virt_addr_valid(dev)) + return -EFAULT; + + skb->dev = dev; + return 0; +} #endif __diag_pop(); @@ -12282,6 +12310,8 @@ BTF_SET8_END(bpf_kfunc_check_set_sock_ops) BTF_SET8_START(bpf_kfunc_check_set_hisock) BTF_ID_FLAGS(func, bpf_set_ingress_dst) +BTF_ID_FLAGS(func, bpf_set_ingress_dev) +BTF_ID_FLAGS(func, bpf_set_egress_dev) BTF_ID_FLAGS(func, bpf_skb_change_dev) BTF_SET8_END(bpf_kfunc_check_set_hisock) #endif -- 2.34.1
hulk inclusion category: featrue bugzilla: https://atomgit.com/openeuler/kernel/issues/8480 -------------------------------- Add bpf_get_skb_ethhdr kfunc to fetch the ether header of the ingress skb. Signed-off-by: Pu Lehui <pulehui@huawei.com> --- kernel/bpf/verifier.c | 6 +++++- net/core/filter.c | 14 ++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f028f5650323..4e0d6c2cb3ed 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -10815,6 +10815,7 @@ enum special_kfunc_type { KF_bpf_set_ingress_dst, KF_bpf_set_ingress_dev, KF_bpf_set_egress_dev, + KF_bpf_get_skb_ethhdr, #endif }; @@ -10840,6 +10841,7 @@ BTF_ID(func, bpf_dynptr_clone) BTF_ID(func, bpf_set_ingress_dst) BTF_ID(func, bpf_set_ingress_dev) BTF_ID(func, bpf_set_egress_dev) +BTF_ID(func, bpf_get_skb_ethhdr) #endif BTF_SET_END(special_kfunc_set) @@ -10867,6 +10869,7 @@ BTF_ID(func, bpf_dynptr_clone) BTF_ID(func, bpf_set_ingress_dst) BTF_ID(func, bpf_set_ingress_dev) BTF_ID(func, bpf_set_egress_dev) +BTF_ID(func, bpf_get_skb_ethhdr) #endif static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta) @@ -11848,7 +11851,8 @@ static int check_atype_kfunc_compatibility(struct bpf_verifier_env *env, u32 fun { #ifdef CONFIG_HISOCK if ((func_id == special_kfunc_list[KF_bpf_set_ingress_dst] || - func_id == special_kfunc_list[KF_bpf_set_ingress_dev]) && + func_id == special_kfunc_list[KF_bpf_set_ingress_dev] || + func_id == special_kfunc_list[KF_bpf_get_skb_ethhdr]) && env->prog->expected_attach_type != BPF_HISOCK_INGRESS) return -EACCES; diff --git a/net/core/filter.c b/net/core/filter.c index 442025f7841f..3530e5bde7bd 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -12243,6 +12243,19 @@ __bpf_kfunc int bpf_skb_change_dev(struct __sk_buff *skb_ctx, u32 ifindex) return 0; } +__bpf_kfunc int +bpf_get_skb_ethhdr(struct __sk_buff *skb_ctx, struct ethhdr *peth, int size__sz) +{ + struct sk_buff *skb = (struct sk_buff *)skb_ctx; + struct ethhdr *eth = eth_hdr(skb); + + if (size__sz != sizeof(struct ethhdr)) + return -EINVAL; + + memcpy(peth, eth, size__sz); + return 0; +} + __bpf_kfunc int bpf_set_ingress_dev(struct __sk_buff *skb_ctx, unsigned long _dev) { @@ -12310,6 +12323,7 @@ BTF_SET8_END(bpf_kfunc_check_set_sock_ops) BTF_SET8_START(bpf_kfunc_check_set_hisock) BTF_ID_FLAGS(func, bpf_set_ingress_dst) +BTF_ID_FLAGS(func, bpf_get_skb_ethhdr) BTF_ID_FLAGS(func, bpf_set_ingress_dev) BTF_ID_FLAGS(func, bpf_set_egress_dev) BTF_ID_FLAGS(func, bpf_skb_change_dev) -- 2.34.1
hulk inclusion category: featrue bugzilla: https://atomgit.com/openeuler/kernel/issues/8480 -------------------------------- Add bpf_handle_ingress/egress_ptype kfunc to handle the ingress or egress ptype logic. Signed-off-by: Pu Lehui <pulehui@huawei.com> --- kernel/bpf/verifier.c | 12 ++++++++++-- net/core/filter.c | 42 ++++++++++++++++++++++++++++++++++++++++++ net/ipv4/ip_output.c | 31 +------------------------------ 3 files changed, 53 insertions(+), 32 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 4e0d6c2cb3ed..84d49d6ad825 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -10816,6 +10816,8 @@ enum special_kfunc_type { KF_bpf_set_ingress_dev, KF_bpf_set_egress_dev, KF_bpf_get_skb_ethhdr, + KF_bpf_handle_ingress_ptype, + KF_bpf_handle_egress_ptype, #endif }; @@ -10842,6 +10844,8 @@ BTF_ID(func, bpf_set_ingress_dst) BTF_ID(func, bpf_set_ingress_dev) BTF_ID(func, bpf_set_egress_dev) BTF_ID(func, bpf_get_skb_ethhdr) +BTF_ID(func, bpf_handle_ingress_ptype) +BTF_ID(func, bpf_handle_egress_ptype) #endif BTF_SET_END(special_kfunc_set) @@ -10870,6 +10874,8 @@ BTF_ID(func, bpf_set_ingress_dst) BTF_ID(func, bpf_set_ingress_dev) BTF_ID(func, bpf_set_egress_dev) BTF_ID(func, bpf_get_skb_ethhdr) +BTF_ID(func, bpf_handle_ingress_ptype) +BTF_ID(func, bpf_handle_egress_ptype) #endif static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta) @@ -11852,11 +11858,13 @@ static int check_atype_kfunc_compatibility(struct bpf_verifier_env *env, u32 fun #ifdef CONFIG_HISOCK if ((func_id == special_kfunc_list[KF_bpf_set_ingress_dst] || func_id == special_kfunc_list[KF_bpf_set_ingress_dev] || - func_id == special_kfunc_list[KF_bpf_get_skb_ethhdr]) && + func_id == special_kfunc_list[KF_bpf_get_skb_ethhdr] || + func_id == special_kfunc_list[KF_bpf_handle_ingress_ptype]) && env->prog->expected_attach_type != BPF_HISOCK_INGRESS) return -EACCES; - if (func_id == special_kfunc_list[KF_bpf_set_egress_dev] && + if ((func_id == special_kfunc_list[KF_bpf_set_egress_dev] || + func_id == special_kfunc_list[KF_bpf_handle_egress_ptype]) && env->prog->expected_attach_type != BPF_HISOCK_EGRESS) return -EACCES; #endif diff --git a/net/core/filter.c b/net/core/filter.c index 3530e5bde7bd..b2f72c492d26 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -12283,6 +12283,46 @@ bpf_set_egress_dev(struct __sk_buff *skb_ctx, unsigned long _dev) skb->dev = dev; return 0; } + +__bpf_kfunc void bpf_handle_ingress_ptype(struct __sk_buff *skb_ctx) +{ + struct sk_buff *skb = (struct sk_buff *)skb_ctx; + struct list_head *ptype_list = &ptype_all; + struct packet_type *ptype; + + rcu_read_lock(); +again: + list_for_each_entry_rcu(ptype, ptype_list, list) { + if (likely(!skb_orphan_frags_rx(skb, GFP_ATOMIC))) { + refcount_inc(&skb->users); + ptype->func(skb, skb->dev, ptype, skb->dev); + } + } + + if (ptype_list == &ptype_all) { + ptype_list = &skb->dev->ptype_all; + goto again; + } + + rcu_read_unlock(); +} + +__bpf_kfunc void bpf_handle_egress_ptype(struct __sk_buff *skb_ctx) +{ + struct sk_buff *skb = (struct sk_buff *)skb_ctx; + struct net_device *dev, *orig_dev = skb->dev; + + rcu_read_lock(); + dev = skb_dst_dev_rcu(skb); + skb->dev = dev; + skb->protocol = htons(ETH_P_IP); + + if (dev_nit_active(skb->dev)) + dev_queue_xmit_nit(skb, skb->dev); + + skb->dev = orig_dev; + rcu_read_unlock(); +} #endif __diag_pop(); @@ -12327,6 +12367,8 @@ BTF_ID_FLAGS(func, bpf_get_skb_ethhdr) BTF_ID_FLAGS(func, bpf_set_ingress_dev) BTF_ID_FLAGS(func, bpf_set_egress_dev) BTF_ID_FLAGS(func, bpf_skb_change_dev) +BTF_ID_FLAGS(func, bpf_handle_ingress_ptype) +BTF_ID_FLAGS(func, bpf_handle_egress_ptype) BTF_SET8_END(bpf_kfunc_check_set_hisock) #endif diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index a03449ba9740..83e283206d24 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -463,35 +463,6 @@ static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4) } #ifdef CONFIG_HISOCK -static int hisock_egress_redirect_xmit(struct sk_buff *skb) -{ - struct net_device *dev = skb->dev; - struct netdev_queue *txq; - bool free_skb = true; - int cpu, rc; - - rcu_read_lock_bh(); - - txq = netdev_core_pick_tx(dev, skb, NULL); - cpu = smp_processor_id(); - HARD_TX_LOCK(dev, txq, cpu); - if (!netif_xmit_stopped(txq)) { - rc = netdev_start_xmit(skb, dev, txq, 0); - if (dev_xmit_complete(rc)) - free_skb = false; - } - HARD_TX_UNLOCK(dev, txq); - - rcu_read_unlock_bh(); - - if (free_skb) { - rc = -ENETDOWN; - kfree_skb(skb); - } - - return rc; -} - static int do_hisock_egress_redirect(struct net *net, struct sock *sk, struct sk_buff *skb) { struct iphdr *iph; @@ -501,7 +472,7 @@ static int do_hisock_egress_redirect(struct net *net, struct sock *sk, struct sk skb->dev = skb_dst(skb)->dev; if (skb_mac_header_was_set(skb)) - return hisock_egress_redirect_xmit(skb); + return dev_queue_xmit(skb); iph = ip_hdr(skb); iph_set_totlen(iph, skb->len); -- 2.34.1
hulk inclusion category: featrue bugzilla: https://atomgit.com/openeuler/kernel/issues/8480 -------------------------------- Apply BPF_HISOCK_INGRESS to hisock ingress. Signed-off-by: Pu Lehui <pulehui@huawei.com> --- samples/bpf/hisock/bpf.c | 133 +++++++++++++++++++------------- samples/bpf/hisock/hisock_cmd.c | 28 +++---- 2 files changed, 89 insertions(+), 72 deletions(-) diff --git a/samples/bpf/hisock/bpf.c b/samples/bpf/hisock/bpf.c index 46159c96cb18..e6a70bad5245 100644 --- a/samples/bpf/hisock/bpf.c +++ b/samples/bpf/hisock/bpf.c @@ -4,8 +4,9 @@ * * Description: End-to-End HiSock Redirect sample. */ +#define KBUILD_MODNAME "foo" #include <linux/if_vlan.h> -#include <net/dst.h> +#include <linux/filter.h> #include <uapi/linux/in.h> #include <uapi/linux/if_ether.h> @@ -15,14 +16,12 @@ #include <bpf/bpf_endian.h> #include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> #define IP_MF 0x2000 #define IP_OFFSET 0x1FFF #define CSUM_SHIFT_BITS 16 -#define SOCKOPS_SUCC 1 -#define SOCKOPS_FAIL 0 - #define PORT_LOCAL 1 #define PORT_REMOTE 2 @@ -38,10 +37,11 @@ struct sock_tuple { }; struct sock_value { - struct dst_entry *ingress_dst; + unsigned long sk; + unsigned long egress_dev; + unsigned long ingress_dev; struct ethhdr ingress_eth; bool eth_updated; - u32 ingress_ifindex; }; struct { @@ -58,9 +58,12 @@ struct { __uint(max_entries, 128); } speed_port SEC(".maps"); -struct dst_entry *bpf_skops_get_ingress_dst(struct bpf_sock_ops *skops) __ksym; -int bpf_xdp_set_ingress_dst(struct xdp_md *xdp, void *dst) __ksym; -int bpf_skb_change_dev(struct __sk_buff *skb, u32 ifindex) __ksym; +int bpf_set_ingress_dst(struct __sk_buff *skb, unsigned long sk) __ksym; +int bpf_get_skb_ethhdr(struct __sk_buff *skb, struct ethhdr *peth, int size) __ksym; +int bpf_set_ingress_dev(struct __sk_buff *skb, unsigned long dev) __ksym; +int bpf_set_egress_dev(struct __sk_buff *skb, unsigned long dev) __ksym; +void bpf_handle_ingress_ptype(struct __sk_buff *skb) __ksym; +void bpf_handle_egress_ptype(struct __sk_buff *skb) __ksym; static inline bool is_speed_flow(u32 local, u32 remote) { @@ -77,29 +80,46 @@ static inline bool is_speed_flow(u32 local, u32 remote) return false; } +static inline unsigned long parse_ingress_dev(struct bpf_sock_ops *skops) +{ + struct sk_buff *skb; + struct net_device *dev; + + skb = BPF_CORE_READ((struct bpf_sock_ops_kern *)skops, skb); + dev = BPF_CORE_READ(skb, dev); + + return (unsigned long)dev; +} + +static inline unsigned long parse_egress_dev(struct __sk_buff *skb) +{ + struct net_device *dev; + + dev = BPF_CORE_READ((struct sk_buff *)skb, dev); + + return (unsigned long)dev; +} + SEC("hisock_sockops") int hisock_sockops_prog(struct bpf_sock_ops *skops) { struct sock_tuple key = { 0 }; struct sock_value val = { 0 }; - struct dst_entry *dst; if (!is_speed_flow(skops->local_port, bpf_ntohl(skops->remote_port))) - return SOCKOPS_SUCC; + return 1; switch (skops->op) { case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: - dst = bpf_skops_get_ingress_dst(skops); - if (!dst) - return SOCKOPS_FAIL; - key.saddr = skops->remote_ip4; key.sport = bpf_ntohl(skops->remote_port); key.daddr = skops->local_ip4; key.dport = skops->local_port; - val.ingress_dst = dst; + val.sk = (unsigned long)skops->sk; + val.ingress_dev = parse_ingress_dev(skops); + bpf_map_update_elem(&connmap, &key, &val, BPF_ANY); bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG); @@ -124,43 +144,39 @@ int hisock_sockops_prog(struct bpf_sock_ops *skops) break; } - return SOCKOPS_SUCC; + return 1; } SEC("hisock_ingress") -int hisock_ingress_prog(struct xdp_md *ctx) +int hisock_ingress_prog(struct __sk_buff *skb) { - void *data_end = (void *)(long)ctx->data_end; - void *data = (void *)(long)ctx->data; + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; struct sock_tuple key = { 0 }; struct sock_value *val; - struct ethhdr *ehdr; struct tcphdr *thdr; struct iphdr *ihdr; + struct ethhdr ehdr; - ehdr = (struct ethhdr *)data; - if (ehdr + 1 > data_end) - return XDP_PASS; - - if (ehdr->h_proto != bpf_htons(ETH_P_IP)) - return XDP_PASS; + if (skb->protocol != bpf_htons(ETH_P_IP)) + return HISOCK_PASS; - ihdr = (struct iphdr *)(ehdr + 1); + ihdr = (struct iphdr *)data; if (ihdr + 1 > data_end) - return XDP_PASS; + return HISOCK_PASS; if (ihdr->ihl != 5 || ihdr->protocol != IPPROTO_TCP) - return XDP_PASS; + return HISOCK_PASS; if (ihdr->frag_off & bpf_htons(IP_MF | IP_OFFSET)) - return XDP_PASS; + return HISOCK_PASS; thdr = (struct tcphdr *)(ihdr + 1); if (thdr + 1 > data_end) - return XDP_PASS; + return HISOCK_PASS; if (thdr->syn || thdr->fin || thdr->rst) - return XDP_PASS; + return HISOCK_PASS; key.saddr = ihdr->saddr; key.sport = bpf_ntohs(thdr->source); @@ -169,22 +185,25 @@ int hisock_ingress_prog(struct xdp_md *ctx) val = bpf_map_lookup_elem(&connmap, &key); if (!val) - return XDP_PASS; + return HISOCK_PASS; - if (unlikely(!val->eth_updated)) { - memcpy(val->ingress_eth.h_source, ehdr->h_dest, ETH_ALEN); - memcpy(val->ingress_eth.h_dest, ehdr->h_source, ETH_ALEN); - val->ingress_eth.h_proto = ehdr->h_proto; - val->eth_updated = true; + if (!val->eth_updated) { + if (!(bpf_get_skb_ethhdr(skb, &ehdr, sizeof(ehdr)))) { + memcpy(val->ingress_eth.h_source, ehdr.h_dest, ETH_ALEN); + memcpy(val->ingress_eth.h_dest, ehdr.h_source, ETH_ALEN); + val->ingress_eth.h_proto = ehdr.h_proto; + val->eth_updated = true; + } } - if (unlikely(!val->ingress_ifindex)) - val->ingress_ifindex = ctx->ingress_ifindex; + if (!val->egress_dev) + val->egress_dev = parse_egress_dev(skb); - if (likely(val->ingress_dst)) - bpf_xdp_set_ingress_dst(ctx, val->ingress_dst); + bpf_set_ingress_dev(skb, val->ingress_dev); + bpf_handle_ingress_ptype(skb); + bpf_set_ingress_dst(skb, val->sk); - return XDP_HISOCK_REDIRECT; + return HISOCK_REDIRECT; } static inline void ipv4_csum(struct iphdr *ihdr) @@ -207,8 +226,19 @@ int hisock_egress_prog(struct __sk_buff *skb) struct sock_tuple key = { 0 }; struct sock_value *val; struct ethhdr *ehdr; + struct tcphdr *thdr; struct iphdr *ihdr; - int ret; + + ihdr = (struct iphdr *)data; + if (ihdr + 1 > data_end) + return HISOCK_PASS; + + thdr = (struct tcphdr *)(ihdr + 1); + if (thdr + 1 > data_end) + return HISOCK_PASS; + + if (thdr->syn || thdr->fin || thdr->rst) + return HISOCK_PASS; key.saddr = skb->remote_ip4; key.sport = bpf_ntohl(skb->remote_port); @@ -219,18 +249,13 @@ int hisock_egress_prog(struct __sk_buff *skb) if (!val) return HISOCK_PASS; - if (unlikely(!val->eth_updated)) + if (!val->eth_updated) goto redirect; - ihdr = (struct iphdr *)data; - if (ihdr + 1 > data_end) - return HISOCK_PASS; - ihdr->tot_len = bpf_htons(skb->len); ipv4_csum(ihdr); - ret = bpf_skb_change_head(skb, ETH_HLEN, 0); - if (ret < 0) + if (bpf_skb_change_head(skb, ETH_HLEN, 0) < 0) goto redirect; data = (void *)(long)skb->data; @@ -241,9 +266,9 @@ int hisock_egress_prog(struct __sk_buff *skb) return HISOCK_DROP; memcpy(ehdr, &val->ingress_eth, ETH_HLEN); + bpf_handle_egress_ptype(skb); redirect: - if (likely(val->ingress_ifindex)) - bpf_skb_change_dev(skb, val->ingress_ifindex); + bpf_set_egress_dev(skb, val->egress_dev); return HISOCK_REDIRECT; } diff --git a/samples/bpf/hisock/hisock_cmd.c b/samples/bpf/hisock/hisock_cmd.c index 6b64c008b6c7..ae114ccb6c24 100644 --- a/samples/bpf/hisock/hisock_cmd.c +++ b/samples/bpf/hisock/hisock_cmd.c @@ -48,9 +48,8 @@ struct hisock_prog_info { const char *prog_name; enum bpf_prog_type prog_type; enum bpf_attach_type attach_type; - int attach_flag; + bool is_dev_attach; int prog_fd; - bool is_xdp; }; static struct hisock_prog_info prog_infos[] = { @@ -58,22 +57,17 @@ static struct hisock_prog_info prog_infos[] = { .prog_name = "hisock_sockops_prog", .prog_type = BPF_PROG_TYPE_SOCK_OPS, .attach_type = BPF_CGROUP_SOCK_OPS, - .attach_flag = 0, - .is_xdp = false, }, { .prog_name = "hisock_ingress_prog", - .prog_type = BPF_PROG_TYPE_XDP, - .attach_type = BPF_XDP, - .attach_flag = XDP_FLAGS_SKB_MODE, - .is_xdp = true, + .prog_type = BPF_PROG_TYPE_HISOCK, + .attach_type = BPF_HISOCK_INGRESS, + .is_dev_attach = true, }, { .prog_name = "hisock_egress_prog", .prog_type = BPF_PROG_TYPE_HISOCK, .attach_type = BPF_HISOCK_EGRESS, - .attach_flag = 0, - .is_xdp = false, }, }; @@ -208,10 +202,9 @@ static int detach_progs(void) for (i = 0; i < ARRAY_SIZE(prog_infos); i++) { info = &prog_infos[i]; - if (info->is_xdp) { + if (info->is_dev_attach) { for (j = 0; j < hisock.if_num; j++) { - if (bpf_xdp_detach(hisock.ifindex[j], - info->attach_flag, NULL)) { + if (bpf_prog_detach(hisock.ifindex[j], info->attach_type)) { fprintf(stderr, "ERROR: failed to detach prog %s\n", info->prog_name); @@ -244,17 +237,16 @@ static int attach_progs(void) for (i = 0; i < ARRAY_SIZE(prog_infos); i++) { info = &prog_infos[i]; - if (info->is_xdp) { + if (info->is_dev_attach) { for (j = 0; j < hisock.if_num; j++) { - if (bpf_xdp_attach(hisock.ifindex[j], info->prog_fd, - info->attach_flag, NULL)) + if (bpf_prog_attach(info->prog_fd, hisock.ifindex[j], + info->attach_type, 0)) goto fail; } continue; } - if (bpf_prog_attach(info->prog_fd, cgrp_fd, info->attach_type, - info->attach_flag)) + if (bpf_prog_attach(info->prog_fd, cgrp_fd, info->attach_type, 0)) goto fail; } -- 2.34.1
hulk inclusion category: featrue bugzilla: https://atomgit.com/openeuler/kernel/issues/8480 -------------------------------- Only deploy hisock in server. Signed-off-by: Pu Lehui <pulehui@huawei.com> --- samples/bpf/hisock/bpf.c | 16 +++-------- samples/bpf/hisock/hisock_cmd.c | 50 +++++++++++++++------------------ 2 files changed, 27 insertions(+), 39 deletions(-) diff --git a/samples/bpf/hisock/bpf.c b/samples/bpf/hisock/bpf.c index e6a70bad5245..03c6014aed43 100644 --- a/samples/bpf/hisock/bpf.c +++ b/samples/bpf/hisock/bpf.c @@ -22,9 +22,6 @@ #define IP_OFFSET 0x1FFF #define CSUM_SHIFT_BITS 16 -#define PORT_LOCAL 1 -#define PORT_REMOTE 2 - #define MAX_NUMA 8 #define MAX_CONN_NUMA 4096 #define MAX_CONN (MAX_CONN_NUMA * MAX_NUMA * 2) @@ -65,16 +62,12 @@ int bpf_set_egress_dev(struct __sk_buff *skb, unsigned long dev) __ksym; void bpf_handle_ingress_ptype(struct __sk_buff *skb) __ksym; void bpf_handle_egress_ptype(struct __sk_buff *skb) __ksym; -static inline bool is_speed_flow(u32 local, u32 remote) +static inline bool is_speed_flow(u16 port) { u8 *val; - val = bpf_map_lookup_elem(&speed_port, &local); - if (val && *val == PORT_LOCAL) - return true; - - val = bpf_map_lookup_elem(&speed_port, &remote); - if (val && *val == PORT_REMOTE) + val = bpf_map_lookup_elem(&speed_port, &port); + if (val && *val == 1) return true; return false; @@ -106,12 +99,11 @@ int hisock_sockops_prog(struct bpf_sock_ops *skops) struct sock_tuple key = { 0 }; struct sock_value val = { 0 }; - if (!is_speed_flow(skops->local_port, bpf_ntohl(skops->remote_port))) + if (!is_speed_flow(skops->local_port)) return 1; switch (skops->op) { case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: - case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: key.saddr = skops->remote_ip4; key.sport = bpf_ntohl(skops->remote_port); key.daddr = skops->local_ip4; diff --git a/samples/bpf/hisock/hisock_cmd.c b/samples/bpf/hisock/hisock_cmd.c index ae114ccb6c24..c490ef8f86f8 100644 --- a/samples/bpf/hisock/hisock_cmd.c +++ b/samples/bpf/hisock/hisock_cmd.c @@ -29,15 +29,12 @@ #include <bpf/libbpf.h> #define DEF_BPF_PATH "bpf.o" -#define PORT_LOCAL 1 -#define PORT_REMOTE 2 #define MAX_IF_NUM 8 struct { __u32 ifindex[MAX_IF_NUM]; int if_num; - char *local_port; - char *remote_port; + char *port; char *cgrp_path; char *bpf_path; bool unload; @@ -125,10 +122,11 @@ static int find_progs(struct bpf_object *obj) return 0; } -static int parse_port_range(const char *port_str, __u8 status, int map_fd) +static int parse_port_range(const char *port_str, int map_fd) { char *str = strdup(port_str); char *token, *rest = str; + __u8 val = 1; __u16 port; while ((token = strtok_r(rest, ",", &rest))) { @@ -144,18 +142,27 @@ static int parse_port_range(const char *port_str, __u8 status, int map_fd) return -1; } - for (port = start; port <= end; port++) - bpf_map_update_elem(map_fd, &port, &status, BPF_ANY); + for (port = start; port <= end; port++) { + if (bpf_map_update_elem(map_fd, &port, &val, BPF_ANY) < 0) { + fprintf(stderr, "ERROR: failed to update port range\n"); + return -1; + } + } - printf("Speed port range %u-%u:%u\n", start, end, status); + printf("Speed port range: %u-%u\n", start, end); } else { port = atoi(token); if (port == 0 || port > 65535) { fprintf(stderr, "Invalid port: %s\n", token); return -1; } - bpf_map_update_elem(map_fd, &port, &status, BPF_ANY); - printf("Speed port %u:%u\n", port, status); + + if (bpf_map_update_elem(map_fd, &port, &val, BPF_ANY) < 0) { + fprintf(stderr, "ERROR: failed to update port\n"); + return -1; + } + + printf("Speed port: %u\n", port); } } @@ -173,15 +180,8 @@ static int set_speed_port(struct bpf_object *obj) return -1; } - if (hisock.local_port && - parse_port_range(hisock.local_port, PORT_LOCAL, map_fd)) { - fprintf(stderr, "ERROR: failed to update local port\n"); - return -1; - } - - if (hisock.remote_port && - parse_port_range(hisock.remote_port, PORT_REMOTE, map_fd)) { - fprintf(stderr, "ERROR: failed to update remote port\n"); + if (hisock.port && parse_port_range(hisock.port, map_fd)) { + fprintf(stderr, "ERROR: failed to update port\n"); return -1; } @@ -310,7 +310,7 @@ static void do_help(void) { fprintf(stderr, "Load: hisock_cmd [-f BPF_FILE] [-c CGRP_PATH] " - "[-p LOCAL_PORT] [-r REMOTE_PORT] [-i INTERFACE]\n" + "[-p PORT] [-i INTERFACE]\n" "Unload: hisock_cmd -u [-c CGRP_PATH] [-i INTERFACE]\n"); } @@ -322,7 +322,7 @@ static int parse_args(int argc, char **argv) hisock.bpf_path = DEF_BPF_PATH; hisock.if_num = 0; - while ((opt = getopt(argc, argv, "f:c:p:r:i:uh")) != -1) { + while ((opt = getopt(argc, argv, "f:c:p:i:uh")) != -1) { switch (opt) { case 'f': hisock.bpf_path = optarg; @@ -331,10 +331,7 @@ static int parse_args(int argc, char **argv) hisock.cgrp_path = optarg; break; case 'p': - hisock.local_port = optarg; - break; - case 'r': - hisock.remote_port = optarg; + hisock.port = optarg; break; case 'i': ifname = optarg; @@ -356,8 +353,7 @@ static int parse_args(int argc, char **argv) if (hisock.cgrp_path == NULL || hisock.if_num == 0 || (!hisock.unload && - hisock.local_port == NULL && - hisock.remote_port == NULL)) { + hisock.port == NULL)) { do_help(); return -1; } -- 2.34.1
hulk inclusion category: featrue bugzilla: https://atomgit.com/openeuler/kernel/issues/8480 -------------------------------- Add multi port parse to hisock_cmd. Now we can execute the following command to add multi port: hisock_cmd -p 5201 -p 5302 -p 5400-5411 Signed-off-by: Pu Lehui <pulehui@huawei.com> --- samples/bpf/hisock/hisock_cmd.c | 45 +++++++++++++++++---------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/samples/bpf/hisock/hisock_cmd.c b/samples/bpf/hisock/hisock_cmd.c index c490ef8f86f8..d8db6a3ce415 100644 --- a/samples/bpf/hisock/hisock_cmd.c +++ b/samples/bpf/hisock/hisock_cmd.c @@ -30,15 +30,16 @@ #define DEF_BPF_PATH "bpf.o" #define MAX_IF_NUM 8 +#define MAX_PORT_NUM 8 struct { __u32 ifindex[MAX_IF_NUM]; int if_num; - char *port; + char *port[MAX_PORT_NUM]; + int port_num; char *cgrp_path; char *bpf_path; bool unload; - bool help; } hisock; struct hisock_prog_info { @@ -172,7 +173,7 @@ static int parse_port_range(const char *port_str, int map_fd) static int set_speed_port(struct bpf_object *obj) { - int map_fd; + int map_fd, i; map_fd = bpf_object__find_map_fd_by_name(obj, "speed_port"); if (map_fd < 0) { @@ -180,9 +181,11 @@ static int set_speed_port(struct bpf_object *obj) return -1; } - if (hisock.port && parse_port_range(hisock.port, map_fd)) { - fprintf(stderr, "ERROR: failed to update port\n"); - return -1; + for (i = 0; i < hisock.port_num; i++) { + if (hisock.port[i] && parse_port_range(hisock.port[i], map_fd)) { + fprintf(stderr, "ERROR: failed to update port\n"); + return -1; + } } return 0; @@ -316,11 +319,9 @@ static void do_help(void) static int parse_args(int argc, char **argv) { - char *ifname; int opt; hisock.bpf_path = DEF_BPF_PATH; - hisock.if_num = 0; while ((opt = getopt(argc, argv, "f:c:p:i:uh")) != -1) { switch (opt) { @@ -331,29 +332,34 @@ static int parse_args(int argc, char **argv) hisock.cgrp_path = optarg; break; case 'p': - hisock.port = optarg; + hisock.port[hisock.port_num] = optarg; + hisock.port_num++; break; case 'i': - ifname = optarg; - hisock.ifindex[hisock.if_num] = if_nametoindex(ifname); + hisock.ifindex[hisock.if_num] = if_nametoindex(optarg); hisock.if_num++; break; case 'u': hisock.unload = true; break; case 'h': - hisock.help = true; - break; + do_help(); + exit(0); default: fprintf(stderr, "ERROR: unknown option %c\n", opt); return -1; } } - if (hisock.cgrp_path == NULL || - hisock.if_num == 0 || - (!hisock.unload && - hisock.port == NULL)) { + if (hisock.unload && + (hisock.cgrp_path == NULL || hisock.if_num == 0)) { + do_help(); + return -1; + } + + if (!hisock.unload && + (hisock.cgrp_path == NULL || hisock.if_num == 0 || + hisock.port_num == 0)) { do_help(); return -1; } @@ -368,11 +374,6 @@ int main(int argc, char **argv) return -1; } - if (hisock.help) { - do_help(); - return 0; - } - if (hisock.unload) { if (detach_progs()) { fprintf(stderr, "ERROR: failed to detach progs\n"); -- 2.34.1
hulk inclusion category: featrue bugzilla: https://atomgit.com/openeuler/kernel/issues/8480 -------------------------------- Add target comm parse to hisock_cmd. Now we can execute the following command to speed up specific tasks: hisock_cmd -C iperf3 Signed-off-by: Pu Lehui <pulehui@huawei.com> --- samples/bpf/hisock/bpf.c | 30 +++++++++++++++++++++ samples/bpf/hisock/hisock_cmd.c | 48 ++++++++++++++++++++++++++++++--- 2 files changed, 75 insertions(+), 3 deletions(-) diff --git a/samples/bpf/hisock/bpf.c b/samples/bpf/hisock/bpf.c index 03c6014aed43..154abd62dd0d 100644 --- a/samples/bpf/hisock/bpf.c +++ b/samples/bpf/hisock/bpf.c @@ -26,6 +26,8 @@ #define MAX_CONN_NUMA 4096 #define MAX_CONN (MAX_CONN_NUMA * MAX_NUMA * 2) +#define MAX_COMM_NUM 8 + struct sock_tuple { u32 saddr; u32 daddr; @@ -55,6 +57,13 @@ struct { __uint(max_entries, 128); } speed_port SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(char[TASK_COMM_LEN])); + __uint(value_size, sizeof(u8)); + __uint(max_entries, MAX_COMM_NUM); +} target_comm SEC(".maps"); + int bpf_set_ingress_dst(struct __sk_buff *skb, unsigned long sk) __ksym; int bpf_get_skb_ethhdr(struct __sk_buff *skb, struct ethhdr *peth, int size) __ksym; int bpf_set_ingress_dev(struct __sk_buff *skb, unsigned long dev) __ksym; @@ -93,12 +102,33 @@ static inline unsigned long parse_egress_dev(struct __sk_buff *skb) return (unsigned long)dev; } +static void handle_listen_cb(struct bpf_sock_ops *skops) +{ + char comm[TASK_COMM_LEN] = { 0 }; + u8 *comm_val; + + bpf_get_current_comm(comm, sizeof(comm)); + + comm_val = bpf_map_lookup_elem(&target_comm, comm); + if (comm_val && *comm_val == 1) { + u16 key = skops->local_port; + u8 val = 1; + + bpf_map_update_elem(&speed_port, &key, &val, BPF_ANY); + } +} + SEC("hisock_sockops") int hisock_sockops_prog(struct bpf_sock_ops *skops) { struct sock_tuple key = { 0 }; struct sock_value val = { 0 }; + if (skops->op == BPF_SOCK_OPS_TCP_LISTEN_CB) { + handle_listen_cb(skops); + return 1; + } + if (!is_speed_flow(skops->local_port)) return 1; diff --git a/samples/bpf/hisock/hisock_cmd.c b/samples/bpf/hisock/hisock_cmd.c index d8db6a3ce415..97afb8b02562 100644 --- a/samples/bpf/hisock/hisock_cmd.c +++ b/samples/bpf/hisock/hisock_cmd.c @@ -28,15 +28,22 @@ #include <bpf/bpf.h> #include <bpf/libbpf.h> +#ifndef TASK_COMM_LEN +#define TASK_COMM_LEN 16 +#endif + #define DEF_BPF_PATH "bpf.o" #define MAX_IF_NUM 8 #define MAX_PORT_NUM 8 +#define MAX_COMM_NUM 8 struct { __u32 ifindex[MAX_IF_NUM]; int if_num; char *port[MAX_PORT_NUM]; int port_num; + char *comm[MAX_COMM_NUM]; + int comm_num; char *cgrp_path; char *bpf_path; bool unload; @@ -191,6 +198,31 @@ static int set_speed_port(struct bpf_object *obj) return 0; } +static int set_target_comm(struct bpf_object *obj) +{ + int map_fd, i; + + map_fd = bpf_object__find_map_fd_by_name(obj, "target_comm"); + if (map_fd < 0) { + fprintf(stderr, "ERROR: failed to find map fd\n"); + return -1; + } + + for (i = 0; i < hisock.comm_num; i++) { + char key[TASK_COMM_LEN] = { 0 }; + __u8 val = 1; + + strncpy(key, hisock.comm[i], sizeof(key) - 1); + if (bpf_map_update_elem(map_fd, &key, &val, BPF_ANY) < 0) { + fprintf(stderr, "ERROR: failed to update comm\n"); + return -1; + } + printf("Target comm: %s\n", key); + } + + return 0; +} + static int detach_progs(void) { struct hisock_prog_info *info; @@ -299,6 +331,12 @@ static int do_hisock(void) return -1; } + if (set_target_comm(obj)) { + fprintf(stderr, "ERROR: failed to set target comm\n"); + bpf_object__close(obj); + return -1; + } + if (attach_progs()) { fprintf(stderr, "ERROR: failed to attach progs\n"); bpf_object__close(obj); @@ -313,7 +351,7 @@ static void do_help(void) { fprintf(stderr, "Load: hisock_cmd [-f BPF_FILE] [-c CGRP_PATH] " - "[-p PORT] [-i INTERFACE]\n" + "[-p PORT] [-C COMM] [-i INTERFACE]\n" "Unload: hisock_cmd -u [-c CGRP_PATH] [-i INTERFACE]\n"); } @@ -323,7 +361,7 @@ static int parse_args(int argc, char **argv) hisock.bpf_path = DEF_BPF_PATH; - while ((opt = getopt(argc, argv, "f:c:p:i:uh")) != -1) { + while ((opt = getopt(argc, argv, "f:c:p:i:C:uh")) != -1) { switch (opt) { case 'f': hisock.bpf_path = optarg; @@ -339,6 +377,10 @@ static int parse_args(int argc, char **argv) hisock.ifindex[hisock.if_num] = if_nametoindex(optarg); hisock.if_num++; break; + case 'C': + hisock.comm[hisock.comm_num] = optarg; + hisock.comm_num++; + break; case 'u': hisock.unload = true; break; @@ -359,7 +401,7 @@ static int parse_args(int argc, char **argv) if (!hisock.unload && (hisock.cgrp_path == NULL || hisock.if_num == 0 || - hisock.port_num == 0)) { + (hisock.port_num == 0 && hisock.comm_num == 0))) { do_help(); return -1; } -- 2.34.1
hulk inclusion category: featrue bugzilla: https://atomgit.com/openeuler/kernel/issues/8480 -------------------------------- Add ipv4-mapped ipv6 addr support for hisock. Signed-off-by: Pu Lehui <pulehui@huawei.com> --- samples/bpf/hisock/bpf.c | 123 ++++++++++++++++++++++++++++----------- 1 file changed, 89 insertions(+), 34 deletions(-) diff --git a/samples/bpf/hisock/bpf.c b/samples/bpf/hisock/bpf.c index 154abd62dd0d..774f3d0223d9 100644 --- a/samples/bpf/hisock/bpf.c +++ b/samples/bpf/hisock/bpf.c @@ -82,6 +82,12 @@ static inline bool is_speed_flow(u16 port) return false; } +static inline bool is_ipv6_addr_mapped(u32 *addr6) +{ + return addr6[0] == 0 && addr6[1] == 0 && + addr6[2] == bpf_htonl(0x0000ffff); +} + static inline unsigned long parse_ingress_dev(struct bpf_sock_ops *skops) { struct sk_buff *skb; @@ -118,52 +124,101 @@ static void handle_listen_cb(struct bpf_sock_ops *skops) } } -SEC("hisock_sockops") -int hisock_sockops_prog(struct bpf_sock_ops *skops) +static void +handle_remote_estd_cb(struct bpf_sock_ops *skops, u32 *laddr, u32 *raddr) { struct sock_tuple key = { 0 }; struct sock_value val = { 0 }; - if (skops->op == BPF_SOCK_OPS_TCP_LISTEN_CB) { - handle_listen_cb(skops); - return 1; - } + key.saddr = *raddr; + key.daddr = *laddr; + key.sport = bpf_ntohl(skops->remote_port); + key.dport = skops->local_port; - if (!is_speed_flow(skops->local_port)) - return 1; + val.sk = (unsigned long)skops->sk; + val.ingress_dev = parse_ingress_dev(skops); + + bpf_map_update_elem(&connmap, &key, &val, BPF_ANY); - switch (skops->op) { - case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: - key.saddr = skops->remote_ip4; - key.sport = bpf_ntohl(skops->remote_port); - key.daddr = skops->local_ip4; - key.dport = skops->local_port; + bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG); +} - val.sk = (unsigned long)skops->sk; - val.ingress_dev = parse_ingress_dev(skops); +static inline void handle_passive_estd_inet_cb(struct bpf_sock_ops *skops) +{ + handle_remote_estd_cb(skops, &skops->local_ip4, &skops->remote_ip4); +} - bpf_map_update_elem(&connmap, &key, &val, BPF_ANY); +static inline void handle_passive_estd_inet6_cb(struct bpf_sock_ops *skops) +{ + handle_remote_estd_cb(skops, &skops->local_ip6[3], &skops->remote_ip6[3]); +} - bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG); - break; - case BPF_SOCK_OPS_STATE_CB: - if (skops->args[1] != BPF_TCP_CLOSE_WAIT && - skops->args[1] != BPF_TCP_FIN_WAIT1 && - skops->args[1] != BPF_TCP_CLOSE) - break; +static void +handle_terminate_cb(struct bpf_sock_ops *skops, u32 *laddr, u32 *raddr) +{ + struct sock_tuple key = { 0 }; + + if (skops->args[1] != BPF_TCP_CLOSE_WAIT && + skops->args[1] != BPF_TCP_FIN_WAIT1 && + skops->args[1] != BPF_TCP_CLOSE) + return; + + key.saddr = *raddr; + key.daddr = *laddr; + key.sport = bpf_ntohl(skops->remote_port); + key.dport = skops->local_port; + + bpf_map_delete_elem(&connmap, &key); - key.saddr = skops->remote_ip4; - key.sport = bpf_ntohl(skops->remote_port); - key.daddr = skops->local_ip4; - key.dport = skops->local_port; + bpf_sock_ops_cb_flags_set(skops, + skops->bpf_sock_ops_cb_flags & ~BPF_SOCK_OPS_STATE_CB_FLAG); +} + +static inline void handle_terminate_inet_cb(struct bpf_sock_ops *skops) +{ + handle_terminate_cb(skops, &skops->local_ip4, &skops->remote_ip4); +} - bpf_map_delete_elem(&connmap, &key); +static inline void handle_terminate_inet6_cb(struct bpf_sock_ops *skops) +{ + handle_terminate_cb(skops, &skops->local_ip6[3], &skops->remote_ip6[3]); +} + +SEC("hisock_sockops") +int hisock_sockops_prog(struct bpf_sock_ops *skops) +{ + if (skops->op == BPF_SOCK_OPS_TCP_LISTEN_CB) { + handle_listen_cb(skops); + return 1; + } - bpf_sock_ops_cb_flags_set(skops, - skops->bpf_sock_ops_cb_flags & ~BPF_SOCK_OPS_STATE_CB_FLAG); - break; - default: - break; + if (skops->family == AF_INET) { + switch (skops->op) { + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + if (!is_speed_flow(skops->local_port)) + break; + handle_passive_estd_inet_cb(skops); + break; + case BPF_SOCK_OPS_STATE_CB: + handle_terminate_inet_cb(skops); + break; + default: + break; + } + } else if (skops->family == AF_INET6 && + is_ipv6_addr_mapped(skops->local_ip6)) { + switch (skops->op) { + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + if (!is_speed_flow(skops->local_port)) + break; + handle_passive_estd_inet6_cb(skops); + break; + case BPF_SOCK_OPS_STATE_CB: + handle_terminate_inet6_cb(skops); + break; + default: + break; + } } return 1; -- 2.34.1
hulk inclusion category: featrue bugzilla: https://atomgit.com/openeuler/kernel/issues/8480 -------------------------------- Add local connect support for hisock. Signed-off-by: Pu Lehui <pulehui@huawei.com> --- net/core/filter.c | 4 ++ samples/bpf/hisock/bpf.c | 107 +++++++++++++++++++++++++++++++- samples/bpf/hisock/hisock_cmd.c | 49 ++++++++++++++- 3 files changed, 156 insertions(+), 4 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index b2f72c492d26..5a8ccbb3c619 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -8446,6 +8446,10 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sock_ops_reserve_hdr_opt_proto; case BPF_FUNC_tcp_sock: return &bpf_tcp_sock_proto; +#ifdef CONFIG_HISOCK + case BPF_FUNC_sk_release: + return &bpf_sk_release_proto; +#endif #endif /* CONFIG_INET */ #if IS_ENABLED(CONFIG_NETACC_BPF) case BPF_FUNC_get_current_comm: diff --git a/samples/bpf/hisock/bpf.c b/samples/bpf/hisock/bpf.c index 774f3d0223d9..0b7a2784b59b 100644 --- a/samples/bpf/hisock/bpf.c +++ b/samples/bpf/hisock/bpf.c @@ -50,6 +50,13 @@ struct { __uint(max_entries, MAX_CONN); } connmap SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_SOCKHASH); + __uint(key_size, sizeof(struct sock_tuple)); + __uint(value_size, sizeof(int)); + __uint(max_entries, MAX_CONN); +} local_connmap SEC(".maps"); + struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(key_size, sizeof(u16)); @@ -82,6 +89,28 @@ static inline bool is_speed_flow(u16 port) return false; } +static inline bool +is_local_conn(struct bpf_sock_ops *skops, u32 *laddr, u32 *raddr) +{ + struct sock_tuple key = { 0 }; + struct bpf_sock *sk; + + if (*laddr == *raddr) + return true; + + key.saddr = *laddr; + key.daddr = *raddr; + key.sport = skops->local_port; + key.dport = bpf_ntohl(skops->remote_port); + + sk = bpf_map_lookup_elem(&local_connmap, &key); + if (!sk) + return false; + + bpf_sk_release(sk); + return true; +} + static inline bool is_ipv6_addr_mapped(u32 *addr6) { return addr6[0] == 0 && addr6[1] == 0 && @@ -124,6 +153,19 @@ static void handle_listen_cb(struct bpf_sock_ops *skops) } } +static void +handle_local_estd_cb(struct bpf_sock_ops *skops, u32 *laddr, u32 *raddr) +{ + struct sock_tuple key = { 0 }; + + key.saddr = *raddr; + key.daddr = *laddr; + key.sport = bpf_ntohl(skops->remote_port); + key.dport = skops->local_port; + + bpf_sock_hash_update(skops, &local_connmap, &key, BPF_NOEXIST); +} + static void handle_remote_estd_cb(struct bpf_sock_ops *skops, u32 *laddr, u32 *raddr) { @@ -145,12 +187,28 @@ handle_remote_estd_cb(struct bpf_sock_ops *skops, u32 *laddr, u32 *raddr) static inline void handle_passive_estd_inet_cb(struct bpf_sock_ops *skops) { - handle_remote_estd_cb(skops, &skops->local_ip4, &skops->remote_ip4); + if (is_local_conn(skops, &skops->local_ip4, &skops->remote_ip4)) + handle_local_estd_cb(skops, &skops->local_ip4, &skops->remote_ip4); + else + handle_remote_estd_cb(skops, &skops->local_ip4, &skops->remote_ip4); } static inline void handle_passive_estd_inet6_cb(struct bpf_sock_ops *skops) { - handle_remote_estd_cb(skops, &skops->local_ip6[3], &skops->remote_ip6[3]); + if (is_local_conn(skops, &skops->local_ip6[3], &skops->remote_ip6[3])) + handle_local_estd_cb(skops, &skops->local_ip6[3], &skops->remote_ip6[3]); + else + handle_remote_estd_cb(skops, &skops->local_ip6[3], &skops->remote_ip6[3]); +} + +static inline void handle_active_estd_inet_cb(struct bpf_sock_ops *skops) +{ + handle_local_estd_cb(skops, &skops->local_ip4, &skops->remote_ip4); +} + +static inline void handle_active_estd_inet6_cb(struct bpf_sock_ops *skops) +{ + handle_local_estd_cb(skops, &skops->local_ip6[3], &skops->remote_ip6[3]); } static void @@ -199,6 +257,11 @@ int hisock_sockops_prog(struct bpf_sock_ops *skops) break; handle_passive_estd_inet_cb(skops); break; + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + if (!is_speed_flow(bpf_ntohl(skops->remote_port))) + break; + handle_active_estd_inet_cb(skops); + break; case BPF_SOCK_OPS_STATE_CB: handle_terminate_inet_cb(skops); break; @@ -213,6 +276,11 @@ int hisock_sockops_prog(struct bpf_sock_ops *skops) break; handle_passive_estd_inet6_cb(skops); break; + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + if (!is_speed_flow(bpf_ntohl(skops->remote_port))) + break; + handle_active_estd_inet6_cb(skops); + break; case BPF_SOCK_OPS_STATE_CB: handle_terminate_inet6_cb(skops); break; @@ -224,6 +292,41 @@ int hisock_sockops_prog(struct bpf_sock_ops *skops) return 1; } +static void +msg_redirect_cb(struct sk_msg_md *msg, u32 *laddr, u32 *raddr) +{ + struct sock_tuple key = { 0 }; + + key.daddr = *raddr; + key.saddr = *laddr; + key.dport = bpf_ntohl(msg->remote_port); + key.sport = msg->local_port; + + bpf_msg_redirect_hash(msg, &local_connmap, &key, BPF_F_INGRESS); +} + +static inline void msg_redirect_inet_cb(struct sk_msg_md *msg) +{ + msg_redirect_cb(msg, &msg->local_ip4, &msg->remote_ip4); +} + +static inline void msg_redirect_inet6_cb(struct sk_msg_md *msg) +{ + msg_redirect_cb(msg, &msg->local_ip6[3], &msg->remote_ip6[3]); +} + +SEC("hisock_skmsg") +int hisock_skmsg_prog(struct sk_msg_md *msg) +{ + if (msg->family == AF_INET) + msg_redirect_inet_cb(msg); + else if (msg->family == AF_INET6 && + is_ipv6_addr_mapped(msg->local_ip6)) + msg_redirect_inet6_cb(msg); + + return SK_PASS; +} + SEC("hisock_ingress") int hisock_ingress_prog(struct __sk_buff *skb) { diff --git a/samples/bpf/hisock/hisock_cmd.c b/samples/bpf/hisock/hisock_cmd.c index 97afb8b02562..7d8902129592 100644 --- a/samples/bpf/hisock/hisock_cmd.c +++ b/samples/bpf/hisock/hisock_cmd.c @@ -32,6 +32,7 @@ #define TASK_COMM_LEN 16 #endif +#define HISOCK_BPFFS "/sys/fs/bpf/hisock" #define DEF_BPF_PATH "bpf.o" #define MAX_IF_NUM 8 #define MAX_PORT_NUM 8 @@ -51,9 +52,11 @@ struct { struct hisock_prog_info { const char *prog_name; + const char *pin_map; enum bpf_prog_type prog_type; enum bpf_attach_type attach_type; bool is_dev_attach; + bool is_skmsg; int prog_fd; }; @@ -63,6 +66,13 @@ static struct hisock_prog_info prog_infos[] = { .prog_type = BPF_PROG_TYPE_SOCK_OPS, .attach_type = BPF_CGROUP_SOCK_OPS, }, + { + .prog_name = "hisock_skmsg_prog", + .prog_type = BPF_PROG_TYPE_SK_MSG, + .attach_type = BPF_SK_MSG_VERDICT, + .pin_map = "local_connmap", + .is_skmsg = true, + }, { .prog_name = "hisock_ingress_prog", .prog_type = BPF_PROG_TYPE_HISOCK, @@ -249,6 +259,16 @@ static int detach_progs(void) continue; } + if (info->is_skmsg) { + char pin_path[64]; + + snprintf(pin_path, sizeof(pin_path), "%s/%s", + HISOCK_BPFFS, info->pin_map); + + unlink(pin_path); + continue; + } + if (bpf_prog_detach(cgrp_fd, info->attach_type)) { fprintf(stderr, "ERROR: failed to detach prog %s\n", info->prog_name); err_cnt++; @@ -259,7 +279,7 @@ static int detach_progs(void) return -err_cnt; } -static int attach_progs(void) +static int attach_progs(struct bpf_object *obj) { struct hisock_prog_info *info; int i, j, cgrp_fd; @@ -281,6 +301,31 @@ static int attach_progs(void) continue; } + if (info->is_skmsg) { + struct bpf_map *map; + char pin_path[64]; + + map = bpf_object__find_map_by_name(obj, info->pin_map); + if (!map) { + fprintf(stderr, "ERROR: failed to find pin map\n"); + goto fail; + } + + snprintf(pin_path, sizeof(pin_path), "%s/%s", + HISOCK_BPFFS, info->pin_map); + + if (bpf_map__pin(map, pin_path)) { + fprintf(stderr, "ERROR: failed to pin map\n"); + goto fail; + } + + if (bpf_prog_attach(info->prog_fd, bpf_map__fd(map), + info->attach_type, 0)) + goto fail; + + continue; + } + if (bpf_prog_attach(info->prog_fd, cgrp_fd, info->attach_type, 0)) goto fail; } @@ -337,7 +382,7 @@ static int do_hisock(void) return -1; } - if (attach_progs()) { + if (attach_progs(obj)) { fprintf(stderr, "ERROR: failed to attach progs\n"); bpf_object__close(obj); return -1; -- 2.34.1
hulk inclusion category: featrue bugzilla: https://atomgit.com/openeuler/kernel/issues/8480 -------------------------------- Deprecate hisock unused kfuncs and orig ingress logic. Signed-off-by: Pu Lehui <pulehui@huawei.com> --- include/net/xdp.h | 5 -- include/uapi/linux/bpf.h | 1 - net/core/dev.c | 18 ++----- net/core/filter.c | 88 ---------------------------------- tools/include/uapi/linux/bpf.h | 1 - 5 files changed, 3 insertions(+), 110 deletions(-) diff --git a/include/net/xdp.h b/include/net/xdp.h index 4ca0a42e55c6..31698ef493b3 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -150,11 +150,6 @@ xdp_prepare_buff(struct xdp_buff *xdp, unsigned char *hard_start, xdp->data_meta = meta_valid ? data : data + 1; } -struct hisock_xdp_buff { - struct xdp_buff xdp; - struct sk_buff *skb; -}; - /* Reserve memory area at end-of data area. * * This macro reserves tailroom in the XDP buffer by limiting the diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a513dbb6d520..5aca37b62f79 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6319,7 +6319,6 @@ enum xdp_action { XDP_PASS, XDP_TX, XDP_REDIRECT, - XDP_HISOCK_REDIRECT = 100, }; /* user accessible metadata for XDP packet hook diff --git a/net/core/dev.c b/net/core/dev.c index 8fdc2e3ec300..dc1c65d2839d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5053,9 +5053,6 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, case XDP_REDIRECT: case XDP_TX: case XDP_PASS: -#ifdef CONFIG_HISOCK - case XDP_HISOCK_REDIRECT: -#endif break; default: bpf_warn_invalid_xdp_action(skb->dev, xdp_prog, act); @@ -5165,31 +5162,22 @@ static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key); int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb) { if (xdp_prog) { - struct hisock_xdp_buff hxdp; - struct xdp_buff *xdp = &hxdp.xdp; + struct xdp_buff xdp; u32 act; int err; - hxdp.skb = skb; - act = netif_receive_generic_xdp(skb, xdp, xdp_prog); + act = netif_receive_generic_xdp(skb, &xdp, xdp_prog); if (act != XDP_PASS) { switch (act) { case XDP_REDIRECT: err = xdp_do_generic_redirect(skb->dev, skb, - xdp, xdp_prog); + &xdp, xdp_prog); if (err) goto out_redir; break; case XDP_TX: generic_xdp_tx(skb, xdp_prog); break; -#ifdef CONFIG_HISOCK - case XDP_HISOCK_REDIRECT: - err = do_hisock_ingress_redirect(skb); - if (err == -EOPNOTSUPP) - return XDP_PASS; - break; -#endif } return XDP_DROP; } diff --git a/net/core/filter.c b/net/core/filter.c index 5a8ccbb3c619..6726632743a8 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -12151,45 +12151,6 @@ __bpf_kfunc int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern, } #ifdef CONFIG_HISOCK -__bpf_kfunc struct dst_entry * -bpf_skops_get_ingress_dst(struct bpf_sock_ops *skops_ctx) -{ - struct bpf_sock_ops_kern *skops = (struct bpf_sock_ops_kern *)skops_ctx; - struct sock *sk = skops->sk; - struct dst_entry *dst; - - WARN_ON_ONCE(!rcu_read_lock_held()); - - if (!sk || !sk_fullsock(sk)) - return NULL; - - dst = rcu_dereference(sk->sk_rx_dst); - if (dst) - dst = dst_check(dst, 0); - - return dst; -} - -__bpf_kfunc int bpf_xdp_set_ingress_dst(struct xdp_md *xdp_ctx, void *dst__ign) -{ - struct xdp_buff *xdp = (struct xdp_buff *)xdp_ctx; - struct hisock_xdp_buff *hxdp = (struct hisock_xdp_buff *)xdp; - struct dst_entry *_dst = (struct dst_entry *)dst__ign; - - if (!hxdp->skb) - return -EOPNOTSUPP; - - if (!_dst || !virt_addr_valid(_dst)) - return -EFAULT; - - /* same as skb_valid_dst */ - if (_dst->flags & DST_METADATA) - return -EINVAL; - - skb_dst_set_noref(hxdp->skb, _dst); - return 0; -} - __bpf_kfunc int bpf_set_ingress_dst(struct __sk_buff *skb_ctx, unsigned long _sk) { struct sk_buff *skb = (struct sk_buff *)skb_ctx; @@ -12213,40 +12174,6 @@ __bpf_kfunc int bpf_set_ingress_dst(struct __sk_buff *skb_ctx, unsigned long _sk return 0; } -__bpf_kfunc int bpf_xdp_change_dev(struct xdp_md *xdp_ctx, u32 ifindex) -{ - struct xdp_buff *xdp = (struct xdp_buff *)xdp_ctx; - struct hisock_xdp_buff *hxdp = (void *)xdp; - struct net_device *dev; - - WARN_ON_ONCE(!rcu_read_lock_held()); - - if (!hxdp->skb) - return -EOPNOTSUPP; - - dev = dev_get_by_index_rcu(&init_net, ifindex); - if (!dev) - return -ENODEV; - - hxdp->skb->dev = dev; - return 0; -} - -__bpf_kfunc int bpf_skb_change_dev(struct __sk_buff *skb_ctx, u32 ifindex) -{ - struct sk_buff *skb = (struct sk_buff *)skb_ctx; - struct net_device *dev; - - WARN_ON_ONCE(!rcu_read_lock_held()); - - dev = dev_get_by_index_rcu(&init_net, ifindex); - if (!dev) - return -ENODEV; - - skb->dev = dev; - return 0; -} - __bpf_kfunc int bpf_get_skb_ethhdr(struct __sk_buff *skb_ctx, struct ethhdr *peth, int size__sz) { @@ -12350,10 +12277,6 @@ BTF_SET8_END(bpf_kfunc_check_set_skb) BTF_SET8_START(bpf_kfunc_check_set_xdp) BTF_ID_FLAGS(func, bpf_dynptr_from_xdp) -#ifdef CONFIG_HISOCK -BTF_ID_FLAGS(func, bpf_xdp_set_ingress_dst) -BTF_ID_FLAGS(func, bpf_xdp_change_dev) -#endif BTF_SET8_END(bpf_kfunc_check_set_xdp) BTF_SET8_START(bpf_kfunc_check_set_sock_addr) @@ -12361,16 +12284,11 @@ BTF_ID_FLAGS(func, bpf_sock_addr_set_sun_path) BTF_SET8_END(bpf_kfunc_check_set_sock_addr) #ifdef CONFIG_HISOCK -BTF_SET8_START(bpf_kfunc_check_set_sock_ops) -BTF_ID_FLAGS(func, bpf_skops_get_ingress_dst, KF_RET_NULL) -BTF_SET8_END(bpf_kfunc_check_set_sock_ops) - BTF_SET8_START(bpf_kfunc_check_set_hisock) BTF_ID_FLAGS(func, bpf_set_ingress_dst) BTF_ID_FLAGS(func, bpf_get_skb_ethhdr) BTF_ID_FLAGS(func, bpf_set_ingress_dev) BTF_ID_FLAGS(func, bpf_set_egress_dev) -BTF_ID_FLAGS(func, bpf_skb_change_dev) BTF_ID_FLAGS(func, bpf_handle_ingress_ptype) BTF_ID_FLAGS(func, bpf_handle_egress_ptype) BTF_SET8_END(bpf_kfunc_check_set_hisock) @@ -12392,11 +12310,6 @@ static const struct btf_kfunc_id_set bpf_kfunc_set_sock_addr = { }; #ifdef CONFIG_HISOCK -static const struct btf_kfunc_id_set bpf_kfunc_set_sock_ops = { - .owner = THIS_MODULE, - .set = &bpf_kfunc_check_set_sock_ops, -}; - static const struct btf_kfunc_id_set bpf_kfunc_set_hisock = { .owner = THIS_MODULE, .set = &bpf_kfunc_check_set_hisock, @@ -12419,7 +12332,6 @@ static int __init bpf_kfunc_init(void) ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_NETFILTER, &bpf_kfunc_set_skb); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp); #ifdef CONFIG_HISOCK - ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SOCK_OPS, &bpf_kfunc_set_sock_ops); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_HISOCK, &bpf_kfunc_set_hisock); #endif return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 4036c80105a2..337fc55cd500 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -6322,7 +6322,6 @@ enum xdp_action { XDP_PASS, XDP_TX, XDP_REDIRECT, - XDP_HISOCK_REDIRECT = 100, }; /* user accessible metadata for XDP packet hook -- 2.34.1
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://atomgit.com/openeuler/kernel/merge_requests/20888 邮件列表地址:https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/BZC... FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://atomgit.com/openeuler/kernel/merge_requests/20888 Mailing list address: https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/BZC...
participants (2)
-
patchwork bot -
Pu Lehui