hulk inclusion category: featrue bugzilla: https://atomgit.com/openeuler/kernel/issues/8480 -------------------------------- Add BPF_HISOCK_INGRESS attach type, which will replace the XDP_HISOCK_REDIRECT action to redirect target receive skb to TCP. Signed-off-by: Pu Lehui <pulehui@huawei.com> --- include/linux/filter.h | 24 ++++++++++++ include/linux/netdevice.h | 4 ++ include/uapi/linux/bpf.h | 1 + kernel/bpf/syscall.c | 33 +++++++++++++--- net/Kconfig | 1 + net/core/dev.c | 38 ++++++++++++++++--- net/core/filter.c | 69 +++++++++++++++++++++++++++++++--- tools/include/uapi/linux/bpf.h | 1 + tools/lib/bpf/libbpf.c | 4 +- 9 files changed, 157 insertions(+), 18 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 27349660428a..53b43bd6b02d 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1610,4 +1610,28 @@ static inline void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off, voi } #endif /* CONFIG_NET */ +#ifdef CONFIG_HISOCK +DECLARE_STATIC_KEY_FALSE(hisock_ingress_key); + +int hisock_ingress_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog); +int hisock_ingress_prog_detach(const union bpf_attr *attr); + +static inline int hisock_ingress_bpf_run(struct bpf_prog *prog, struct sk_buff *skb) +{ + void *saved_data_end; + int ret; + + if (unlikely(!prog)) + return HISOCK_PASS; + + rcu_read_lock(); + bpf_compute_and_save_data_end(skb, &saved_data_end); + ret = bpf_prog_run_pin_on_cpu(prog, skb); + bpf_restore_data_end(skb, saved_data_end); + rcu_read_unlock(); + + return ret; +} +#endif + #endif /* __LINUX_FILTER_H__ */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a68c371a5d7a..fd043ed1a9d9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2446,7 +2446,11 @@ struct net_device { struct devlink_port *devlink_port; +#ifdef CONFIG_HISOCK + KABI_USE(1, struct bpf_prog __rcu *hisock_ingress) +#else KABI_RESERVE(1) +#endif KABI_RESERVE(2) KABI_RESERVE(3) KABI_RESERVE(4) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index cf088c780ab1..a513dbb6d520 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1064,6 +1064,7 @@ enum bpf_attach_type { BPF_SCHED, #ifndef __GENKSYMS__ BPF_HISOCK_EGRESS, + BPF_HISOCK_INGRESS, #endif __MAX_BPF_ATTACH_TYPE }; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 6b28f4001758..042f44a27941 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2536,6 +2536,16 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type, if (expected_attach_type == BPF_NETFILTER) return 0; return -EINVAL; +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: + switch (expected_attach_type) { + case BPF_HISOCK_EGRESS: + case BPF_HISOCK_INGRESS: + return 0; + default: + return -EINVAL; + } +#endif case BPF_PROG_TYPE_SYSCALL: case BPF_PROG_TYPE_EXT: if (expected_attach_type) @@ -3835,6 +3845,7 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type) return BPF_PROG_TYPE_CGROUP_SOCKOPT; #ifdef CONFIG_HISOCK case BPF_HISOCK_EGRESS: + case BPF_HISOCK_INGRESS: return BPF_PROG_TYPE_HISOCK; #endif case BPF_TRACE_ITER: @@ -3995,9 +4006,6 @@ static int bpf_prog_attach(const union bpf_attr *attr) case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_SOCK_OPS: case BPF_PROG_TYPE_LSM: -#ifdef CONFIG_HISOCK - case BPF_PROG_TYPE_HISOCK: -#endif if (ptype == BPF_PROG_TYPE_LSM && prog->expected_attach_type != BPF_LSM_CGROUP) ret = -EINVAL; @@ -4007,6 +4015,14 @@ static int bpf_prog_attach(const union bpf_attr *attr) case BPF_PROG_TYPE_SCHED_CLS: ret = tcx_prog_attach(attr, prog); break; +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: + if (attr->attach_type == BPF_HISOCK_EGRESS) + ret = cgroup_bpf_prog_attach(attr, ptype, prog); + else if (attr->attach_type == BPF_HISOCK_INGRESS) + ret = hisock_ingress_prog_attach(attr, prog); + break; +#endif default: ret = -EINVAL; } @@ -4063,14 +4079,19 @@ static int bpf_prog_detach(const union bpf_attr *attr) case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_SOCK_OPS: case BPF_PROG_TYPE_LSM: -#ifdef CONFIG_HISOCK - case BPF_PROG_TYPE_HISOCK: -#endif ret = cgroup_bpf_prog_detach(attr, ptype); break; case BPF_PROG_TYPE_SCHED_CLS: ret = tcx_prog_detach(attr, prog); break; +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: + if (attr->attach_type == BPF_HISOCK_EGRESS) + ret = cgroup_bpf_prog_detach(attr, ptype); + else if (attr->attach_type == BPF_HISOCK_INGRESS) + ret = hisock_ingress_prog_detach(attr); + break; +#endif default: ret = -EINVAL; } diff --git a/net/Kconfig b/net/Kconfig index 31e8a650bb7b..f738a37a68d0 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -360,6 +360,7 @@ config BPF_STREAM_PARSER config HISOCK bool "enable HiSock Redirect Framework" depends on INET + depends on ARM64 depends on CGROUP_BPF depends on BPF_SYSCALL default n diff --git a/net/core/dev.c b/net/core/dev.c index 5dbe8b7f2aa0..8fdc2e3ec300 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5102,7 +5102,7 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) } #ifdef CONFIG_HISOCK -static int generic_xdp_hisock_redirect(struct sk_buff *skb) +static int do_hisock_ingress_redirect(struct sk_buff *skb) { const struct iphdr *iph; u32 len; @@ -5126,7 +5126,7 @@ static int generic_xdp_hisock_redirect(struct sk_buff *skb) if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) goto free_skb; - len = ntohs(iph->tot_len); + len = iph_totlen(skb, iph); if (skb->len < len || len < (iph->ihl * 4)) goto free_skb; @@ -5136,9 +5136,10 @@ static int generic_xdp_hisock_redirect(struct sk_buff *skb) iph = ip_hdr(skb); skb->transport_header = skb->network_header + iph->ihl * 4; - skb_orphan(skb); + if (!skb_sk_is_prefetched(skb)) + skb_orphan(skb); - if (!skb_valid_dst(skb)) { + if (unlikely(!skb_valid_dst(skb))) { if (ip_route_input_noref(skb, iph->daddr, iph->saddr, iph->tos, skb->dev)) goto free_skb; @@ -5155,7 +5156,7 @@ static int generic_xdp_hisock_redirect(struct sk_buff *skb) free_skb: kfree_skb(skb); out: - return -EFAULT; + return NET_RX_DROP; } #endif @@ -5184,7 +5185,7 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb) break; #ifdef CONFIG_HISOCK case XDP_HISOCK_REDIRECT: - err = generic_xdp_hisock_redirect(skb); + err = do_hisock_ingress_redirect(skb); if (err == -EOPNOTSUPP) return XDP_PASS; break; @@ -5563,6 +5564,31 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, } skip_taps: +#ifdef CONFIG_HISOCK + if (static_branch_unlikely(&hisock_ingress_key)) { + int act; + + if (pt_prev) { + ret = deliver_skb(skb, pt_prev, orig_dev); + pt_prev = NULL; + } + + act = hisock_ingress_bpf_run(rcu_dereference(skb->dev->hisock_ingress), skb); + switch (act) { + case HISOCK_PASS: + break; + case HISOCK_REDIRECT: + ret = do_hisock_ingress_redirect(skb); + if (ret != -EOPNOTSUPP) + goto out; + break; + case HISOCK_DROP: + default: + ret = NET_RX_DROP; + goto out; + } + } +#endif #ifdef CONFIG_NET_INGRESS if (static_branch_unlikely(&ingress_needed_key)) { bool another = false; diff --git a/net/core/filter.c b/net/core/filter.c index 00037445a6c6..acf1d574c3cb 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -8792,12 +8792,25 @@ static bool hisock_is_valid_access(int off, int size, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { - switch (off) { - case bpf_ctx_range(struct __sk_buff, tc_classid): - case bpf_ctx_range(struct __sk_buff, data_meta): - case bpf_ctx_range(struct __sk_buff, tstamp): - case bpf_ctx_range(struct __sk_buff, wire_len): + if (type == BPF_WRITE) return false; + + if (prog->expected_attach_type == BPF_HISOCK_EGRESS) { + switch (off) { + case bpf_ctx_range(struct __sk_buff, tc_classid): + case bpf_ctx_range(struct __sk_buff, data_meta): + case bpf_ctx_range(struct __sk_buff, tstamp): + case bpf_ctx_range(struct __sk_buff, wire_len): + return false; + } + } else if (prog->expected_attach_type == BPF_HISOCK_INGRESS) { + switch (off) { + case bpf_ctx_range_till(struct __sk_buff, mark, queue_mapping): + case bpf_ctx_range(struct __sk_buff, priority): + case bpf_ctx_range_till(struct __sk_buff, tc_index, tc_classid): + case bpf_ctx_range_till(struct __sk_buff, napi_id, gso_size): + return false; + } } switch (off) { @@ -11274,6 +11287,52 @@ const struct bpf_prog_ops flow_dissector_prog_ops = { .test_run = bpf_prog_test_run_flow_dissector, }; +#ifdef CONFIG_HISOCK +DEFINE_STATIC_KEY_FALSE(hisock_ingress_key); + +static int hisock_ingress_prog_install(const union bpf_attr *attr, struct bpf_prog *new) +{ + struct net *net = current->nsproxy->net_ns; + struct net_device *dev; + struct bpf_prog *old; + int ret = 0; + + if (attr->attach_type != BPF_HISOCK_INGRESS) + return -EINVAL; + + rtnl_lock(); + dev = __dev_get_by_index(net, attr->target_fd); + if (!dev) { + ret = -ENODEV; + goto out; + } + + old = rtnl_dereference(dev->hisock_ingress); + rcu_assign_pointer(dev->hisock_ingress, new); + + if (new && !old) + static_branch_inc(&hisock_ingress_key); + else if (!new && old) + static_branch_dec(&hisock_ingress_key); + + if (old) + bpf_prog_put(old); +out: + rtnl_unlock(); + return ret; +} + +int hisock_ingress_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) +{ + return hisock_ingress_prog_install(attr, prog); +} + +int hisock_ingress_prog_detach(const union bpf_attr *attr) +{ + return hisock_ingress_prog_install(attr, NULL); +} +#endif + int sk_detach_filter(struct sock *sk) { int ret = -ENOENT; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 5e631ae65920..4036c80105a2 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1064,6 +1064,7 @@ enum bpf_attach_type { BPF_SCHED, #ifndef __GENKSYMS__ BPF_HISOCK_EGRESS, + BPF_HISOCK_INGRESS, #endif __MAX_BPF_ATTACH_TYPE }; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 5f268feb3075..5cda26728481 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -124,6 +124,7 @@ static const char * const attach_type_name[] = { [BPF_TRACE_UPROBE_MULTI] = "trace_uprobe_multi", [BPF_SCHED] = "sched", [BPF_HISOCK_EGRESS] = "hisock_egress", + [BPF_HISOCK_INGRESS] = "hisock_ingress", }; static const char * const link_type_name[] = { @@ -8879,7 +8880,8 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE), SEC_DEF("netfilter", NETFILTER, BPF_NETFILTER, SEC_NONE), SEC_DEF("sched/", SCHED, BPF_SCHED, SEC_ATTACH_BTF, attach_sched), - SEC_DEF("hisock_egress", HISOCK, BPF_HISOCK_EGRESS, SEC_ATTACHABLE_OPT), + SEC_DEF("hisock_egress", HISOCK, BPF_HISOCK_EGRESS, SEC_ATTACHABLE), + SEC_DEF("hisock_ingress", HISOCK, BPF_HISOCK_INGRESS, SEC_ATTACHABLE), }; int libbpf_register_prog_handler(const char *sec, -- 2.34.1