hulk inclusion category: featrue bugzilla: https://atomgit.com/openeuler/kernel/issues/8480 -------------------------------- Add BPF_HISOCK_INGRESS attach type, which will replace the XDP_HISOCK_REDIRECT action to redirect target receive skb to TCP. Signed-off-by: Pu Lehui <pulehui@huawei.com> --- include/linux/filter.h | 24 ++++++++++++ include/linux/netdevice.h | 4 ++ include/uapi/linux/bpf.h | 1 + kernel/bpf/syscall.c | 33 +++++++++++++--- net/Kconfig | 1 + net/core/dev.c | 36 +++++++++++++++--- net/core/filter.c | 69 +++++++++++++++++++++++++++++++--- tools/bpf/bpftool/common.c | 1 + tools/include/uapi/linux/bpf.h | 1 + tools/lib/bpf/libbpf.c | 4 +- 10 files changed, 157 insertions(+), 17 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 3c43603bfb62..5b9f8648b1ab 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1533,4 +1533,28 @@ static inline void run_gnet_bpf(enum gnet_bpf_attach_type atype, #endif +#ifdef CONFIG_HISOCK +DECLARE_STATIC_KEY_FALSE(hisock_ingress_key); + +int hisock_ingress_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog); +int hisock_ingress_prog_detach(const union bpf_attr *attr); + +static inline int hisock_ingress_bpf_run(struct bpf_prog *prog, struct sk_buff *skb) +{ + void *saved_data_end; + int ret; + + if (unlikely(!prog)) + return HISOCK_PASS; + + rcu_read_lock(); + bpf_compute_and_save_data_end(skb, &saved_data_end); + ret = bpf_prog_run_pin_on_cpu(prog, skb); + bpf_restore_data_end(skb, saved_data_end); + rcu_read_unlock(); + + return ret; +} +#endif + #endif /* __LINUX_FILTER_H__ */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 598f96f82874..5fd82c8e2b89 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2266,7 +2266,11 @@ struct net_device { /* protected by rtnl_lock */ struct bpf_xdp_entity xdp_state[__MAX_XDP_MODE]; +#ifdef CONFIG_HISOCK + KABI_USE(1, struct bpf_prog __rcu *hisock_ingress) +#else KABI_RESERVE(1) +#endif KABI_RESERVE(2) KABI_RESERVE(3) KABI_RESERVE(4) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a83e3fac7e01..faf056fccf13 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -252,6 +252,7 @@ enum bpf_attach_type { BPF_GNET_RCV_NIC_NODE, BPF_GNET_SEND_NIC_NODE, BPF_HISOCK_EGRESS, + BPF_HISOCK_INGRESS, #endif __MAX_BPF_ATTACH_TYPE }; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 72ba7f768280..8b54354c0ce0 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2089,6 +2089,16 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type, if (expected_attach_type == BPF_SK_LOOKUP) return 0; return -EINVAL; +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: + switch (expected_attach_type) { + case BPF_HISOCK_EGRESS: + case BPF_HISOCK_INGRESS: + return 0; + default: + return -EINVAL; + } +#endif case BPF_PROG_TYPE_EXT: if (expected_attach_type) return -EINVAL; @@ -3013,6 +3023,7 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type) return BPF_PROG_TYPE_CGROUP_SOCKOPT; #ifdef CONFIG_HISOCK case BPF_HISOCK_EGRESS: + case BPF_HISOCK_INGRESS: return BPF_PROG_TYPE_HISOCK; #endif case BPF_TRACE_ITER: @@ -3111,15 +3122,20 @@ static int bpf_prog_attach(const union bpf_attr *attr) case BPF_PROG_TYPE_CGROUP_SOCKOPT: case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_SOCK_OPS: -#ifdef CONFIG_HISOCK - case BPF_PROG_TYPE_HISOCK: -#endif ret = cgroup_bpf_prog_attach(attr, ptype, prog); break; #ifdef CONFIG_BPF_NET_GLOBAL_PROG case BPF_PROG_TYPE_NET_GLOBAL: ret = gnet_bpf_prog_attach(attr, ptype, prog); break; +#endif +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: + if (attr->attach_type == BPF_HISOCK_EGRESS) + ret = cgroup_bpf_prog_attach(attr, ptype, prog); + else if (attr->attach_type == BPF_HISOCK_INGRESS) + ret = hisock_ingress_prog_attach(attr, prog); + break; #endif default: ret = -EINVAL; @@ -3156,13 +3172,18 @@ static int bpf_prog_detach(const union bpf_attr *attr) case BPF_PROG_TYPE_CGROUP_SOCKOPT: case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_SOCK_OPS: -#ifdef CONFIG_HISOCK - case BPF_PROG_TYPE_HISOCK: -#endif return cgroup_bpf_prog_detach(attr, ptype); #ifdef CONFIG_BPF_NET_GLOBAL_PROG case BPF_PROG_TYPE_NET_GLOBAL: return gnet_bpf_prog_detach(attr, ptype); +#endif +#ifdef CONFIG_HISOCK + case BPF_PROG_TYPE_HISOCK: + if (attr->attach_type == BPF_HISOCK_EGRESS) + return cgroup_bpf_prog_detach(attr, ptype); + else if (attr->attach_type == BPF_HISOCK_INGRESS) + return hisock_ingress_prog_detach(attr); + return -EINVAL; #endif default: return -EINVAL; diff --git a/net/Kconfig b/net/Kconfig index d6d925d0712f..1e7f8a5b9008 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -332,6 +332,7 @@ config EULER_SOCKETMAP config HISOCK bool "enable HiSock Redirect Framework" depends on INET + depends on ARM64 depends on CGROUP_BPF depends on BPF_SYSCALL default n diff --git a/net/core/dev.c b/net/core/dev.c index 372bab73b8bf..27cb74f9ea32 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5019,7 +5019,7 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) } #ifdef CONFIG_HISOCK -static int generic_xdp_hisock_redirect(struct sk_buff *skb) +static int do_hisock_ingress_redirect(struct sk_buff *skb) { const struct iphdr *iph; u32 len; @@ -5053,9 +5053,10 @@ static int generic_xdp_hisock_redirect(struct sk_buff *skb) iph = ip_hdr(skb); skb->transport_header = skb->network_header + iph->ihl * 4; - skb_orphan(skb); + if (!skb_sk_is_prefetched(skb)) + skb_orphan(skb); - if (!skb_valid_dst(skb)) { + if (unlikely(!skb_valid_dst(skb))) { if (ip_route_input_noref(skb, iph->daddr, iph->saddr, iph->tos, skb->dev)) goto free_skb; @@ -5072,7 +5073,7 @@ static int generic_xdp_hisock_redirect(struct sk_buff *skb) free_skb: kfree_skb(skb); out: - return -EFAULT; + return NET_RX_DROP; } #endif @@ -5101,7 +5102,7 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb) break; #ifdef CONFIG_HISOCK case XDP_HISOCK_REDIRECT: - err = generic_xdp_hisock_redirect(skb); + err = do_hisock_ingress_redirect(skb); if (err == -EOPNOTSUPP) return XDP_PASS; break; @@ -5540,6 +5541,31 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, } skip_taps: +#ifdef CONFIG_HISOCK + if (static_branch_unlikely(&hisock_ingress_key)) { + int act; + + if (pt_prev) { + ret = deliver_skb(skb, pt_prev, orig_dev); + pt_prev = NULL; + } + + act = hisock_ingress_bpf_run(rcu_dereference(skb->dev->hisock_ingress), skb); + switch (act) { + case HISOCK_PASS: + break; + case HISOCK_REDIRECT: + ret = do_hisock_ingress_redirect(skb); + if (ret != -EOPNOTSUPP) + goto out; + break; + case HISOCK_DROP: + default: + ret = NET_RX_DROP; + goto out; + } + } +#endif #ifdef CONFIG_NET_INGRESS if (static_branch_unlikely(&ingress_needed_key)) { bool another = false; diff --git a/net/core/filter.c b/net/core/filter.c index adacca9ee505..cec7eace2d5c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7984,12 +7984,25 @@ static bool hisock_is_valid_access(int off, int size, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { - switch (off) { - case bpf_ctx_range(struct __sk_buff, tc_classid): - case bpf_ctx_range(struct __sk_buff, data_meta): - case bpf_ctx_range(struct __sk_buff, tstamp): - case bpf_ctx_range(struct __sk_buff, wire_len): + if (type == BPF_WRITE) return false; + + if (prog->expected_attach_type == BPF_HISOCK_EGRESS) { + switch (off) { + case bpf_ctx_range(struct __sk_buff, tc_classid): + case bpf_ctx_range(struct __sk_buff, data_meta): + case bpf_ctx_range(struct __sk_buff, tstamp): + case bpf_ctx_range(struct __sk_buff, wire_len): + return false; + } + } else if (prog->expected_attach_type == BPF_HISOCK_INGRESS) { + switch (off) { + case bpf_ctx_range_till(struct __sk_buff, mark, queue_mapping): + case bpf_ctx_range(struct __sk_buff, priority): + case bpf_ctx_range_till(struct __sk_buff, tc_index, tc_classid): + case bpf_ctx_range_till(struct __sk_buff, napi_id, gso_size): + return false; + } } switch (off) { @@ -10237,6 +10250,52 @@ const struct bpf_prog_ops flow_dissector_prog_ops = { .test_run = bpf_prog_test_run_flow_dissector, }; +#ifdef CONFIG_HISOCK +DEFINE_STATIC_KEY_FALSE(hisock_ingress_key); + +static int hisock_ingress_prog_install(const union bpf_attr *attr, struct bpf_prog *new) +{ + struct net *net = current->nsproxy->net_ns; + struct net_device *dev; + struct bpf_prog *old; + int ret = 0; + + if (attr->attach_type != BPF_HISOCK_INGRESS) + return -EINVAL; + + rtnl_lock(); + dev = __dev_get_by_index(net, attr->target_fd); + if (!dev) { + ret = -ENODEV; + goto out; + } + + old = rtnl_dereference(dev->hisock_ingress); + rcu_assign_pointer(dev->hisock_ingress, new); + + if (new && !old) + static_branch_inc(&hisock_ingress_key); + else if (!new && old) + static_branch_dec(&hisock_ingress_key); + + if (old) + bpf_prog_put(old); +out: + rtnl_unlock(); + return ret; +} + +int hisock_ingress_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog) +{ + return hisock_ingress_prog_install(attr, prog); +} + +int hisock_ingress_prog_detach(const union bpf_attr *attr) +{ + return hisock_ingress_prog_install(attr, NULL); +} +#endif + int sk_detach_filter(struct sock *sk) { int ret = -ENOENT; diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index c16dc2ba847e..52c9e84c05af 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -68,6 +68,7 @@ const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = { [BPF_SK_LOOKUP] = "sk_lookup", [BPF_SCHED] = "sched", [BPF_HISOCK_EGRESS] = "hisock_egress", + [BPF_HISOCK_INGRESS] = "hisock_ingress", }; void p_err(const char *fmt, ...) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index cab6526158fd..3501e5461a0a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -252,6 +252,7 @@ enum bpf_attach_type { BPF_GNET_RCV_NIC_NODE, BPF_GNET_SEND_NIC_NODE, BPF_HISOCK_EGRESS, + BPF_HISOCK_INGRESS, #endif __MAX_BPF_ATTACH_TYPE }; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 3eaf5c353008..3293f87e3a65 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -8502,8 +8502,10 @@ static const struct bpf_sec_def section_defs[] = { BPF_GNET_RCV_NIC_NODE), BPF_EAPROG_SEC("gnet/send_nic_node", BPF_PROG_TYPE_NET_GLOBAL, BPF_GNET_SEND_NIC_NODE), - BPF_APROG_SEC("hisock_egress", BPF_PROG_TYPE_HISOCK, + BPF_EAPROG_SEC("hisock_egress", BPF_PROG_TYPE_HISOCK, BPF_HISOCK_EGRESS), + BPF_EAPROG_SEC("hisock_ingress", BPF_PROG_TYPE_HISOCK, + BPF_HISOCK_INGRESS), }; #undef BPF_PROG_SEC_IMPL -- 2.34.1