From: Liu Jian liujian56@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8KU3B CVE: NA
--------------------------------
Add new optname(BPF_SO_ORIGINAL_DST 800, BPF_SO_REPLY_SRC 801) to get origdst/reply src for bpf progs. Now only support IPv4.
Signed-off-by: Wang Yufen wangyufen@huawei.com Signed-off-by: Zhengchao Shao shaozhengchao@huawei.com --- include/net/netfilter/nf_conntrack.h | 6 +++ include/uapi/linux/netfilter_ipv4.h | 2 + net/core/filter.c | 39 +++++++++++++++ net/netfilter/nf_conntrack_proto.c | 71 ++++++++++++++++++++++++++++ 4 files changed, 118 insertions(+)
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 4085765c3370..bb7fd376a8c5 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -377,4 +377,10 @@ int nf_ct_handle_fragments(struct net *net, struct sk_buff *skb, #define MODULE_ALIAS_NFCT_HELPER(helper) \ MODULE_ALIAS("nfct-helper-" helper)
+#if IS_ENABLED(CONFIG_NETACC_TERRACE) +typedef int (*bpf_getorigdst_opt_func)(struct sock *sk, int optname, + void *optval, int *optlen, int dir); +extern bpf_getorigdst_opt_func bpf_getorigdst_opt; +#endif + #endif /* _NF_CONNTRACK_H */ diff --git a/include/uapi/linux/netfilter_ipv4.h b/include/uapi/linux/netfilter_ipv4.h index 155e77d6a42d..00e78cc2782b 100644 --- a/include/uapi/linux/netfilter_ipv4.h +++ b/include/uapi/linux/netfilter_ipv4.h @@ -50,6 +50,8 @@ enum nf_ip_hook_priorities { /* 2.2 firewalling (+ masq) went from 64 through 76 */ /* 2.4 firewalling went 64 through 67. */ #define SO_ORIGINAL_DST 80 +#define BPF_SO_ORIGINAL_DST 800 +#define BPF_SO_REPLY_SRC 801
#endif /* _UAPI__LINUX_IP_NETFILTER_H */ diff --git a/net/core/filter.c b/net/core/filter.c index 3332aaac79a9..b5c9dad4fc6b 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5577,6 +5577,12 @@ static int bpf_sock_ops_get_syn(struct bpf_sock_ops_kern *bpf_sock, }
#if IS_ENABLED(CONFIG_NETACC_TERRACE) +#include <net/netfilter/nf_conntrack.h> +#include <linux/netfilter_ipv4.h> + +bpf_getorigdst_opt_func bpf_getorigdst_opt; +EXPORT_SYMBOL(bpf_getorigdst_opt); + static int bpf_sock_ops_get_uid_gid(struct bpf_sock_ops_kern *bpf_sock, char *optval, int optlen) { @@ -5595,6 +5601,36 @@ static int bpf_sock_ops_get_uid_gid(struct bpf_sock_ops_kern *bpf_sock,
return sizeof(u64); } + +static int bpf_sk_original_addr(struct bpf_sock_ops_kern *bpf_sock, + int optname, char *optval, int optlen) +{ + struct sock *sk = bpf_sock->sk; + int ret = -EINVAL; + + if (!sk_fullsock(sk)) + goto err_clear; + + if (!bpf_getorigdst_opt) + goto err_clear; + +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + if (optname == BPF_SO_ORIGINAL_DST) + ret = bpf_getorigdst_opt(sk, optname, optval, &optlen, + IP_CT_DIR_ORIGINAL); + else + ret = bpf_getorigdst_opt(sk, optname, optval, &optlen, + IP_CT_DIR_REPLY); + if (ret < 0) + goto err_clear; + return ret; +#endif + +err_clear: + memset(optval, 0, optlen); + return ret; +} + #endif
BPF_CALL_5(bpf_sock_ops_getsockopt, struct bpf_sock_ops_kern *, bpf_sock, @@ -5624,6 +5660,9 @@ BPF_CALL_5(bpf_sock_ops_getsockopt, struct bpf_sock_ops_kern *, bpf_sock, #if IS_ENABLED(CONFIG_NETACC_TERRACE) if (IS_ENABLED(CONFIG_INET) && optname == SK_BPF_GID_UID) return bpf_sock_ops_get_uid_gid(bpf_sock, optval, optlen); + else if (level == SOL_IP && (optname == BPF_SO_ORIGINAL_DST || + optname == BPF_SO_REPLY_SRC)) + return bpf_sk_original_addr(bpf_sock, optname, optval, optlen); #endif
return _bpf_getsockopt(bpf_sock->sk, level, optname, optval, optlen); diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index c928ff63b10e..37f1ae00497a 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -311,6 +311,69 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) return -ENOENT; }
+#if IS_ENABLED(CONFIG_NETACC_TERRACE) +static int +bpf_getorigdst_impl(struct sock *sk, int optval, void *user, int *len, int dir) +{ + const struct inet_sock *inet = inet_sk(sk); + const struct nf_conntrack_tuple_hash *h; + struct nf_conntrack_tuple tuple; + + memset(&tuple, 0, sizeof(tuple)); + + tuple.src.u3.ip = inet->inet_rcv_saddr; + tuple.src.u.tcp.port = inet->inet_sport; + tuple.dst.u3.ip = inet->inet_daddr; + tuple.dst.u.tcp.port = inet->inet_dport; + tuple.src.l3num = PF_INET; + tuple.dst.protonum = sk->sk_protocol; + + /* We only do TCP and SCTP at the moment: is there a better way? */ + if (tuple.dst.protonum != IPPROTO_TCP && + tuple.dst.protonum != IPPROTO_SCTP) { + pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n"); + return -ENOPROTOOPT; + } + + if ((unsigned int)*len < sizeof(struct sockaddr_in)) { + pr_debug("SO_ORIGINAL_DST: len %d not %zu\n", + *len, sizeof(struct sockaddr_in)); + return -EINVAL; + } + + h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple); + if (h) { + struct sockaddr_in sin; + struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); + + sin.sin_family = AF_INET; + if (dir == IP_CT_DIR_REPLY) { + sin.sin_port = ct->tuplehash[IP_CT_DIR_REPLY] + .tuple.src.u.tcp.port; + sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_REPLY] + .tuple.src.u3.ip; + } else { + sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.u.tcp.port; + sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.u3.ip; + } + memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); + + pr_debug("SO_ORIGINAL_DST: %pI4 %u\n", + &sin.sin_addr.s_addr, ntohs(sin.sin_port)); + nf_ct_put(ct); + + memcpy(user, &sin, sizeof(sin)); + return 0; + } + pr_debug("SO_ORIGINAL_DST: Can't find %pI4/%u-%pI4/%u.\n", + &tuple.src.u3.ip, ntohs(tuple.src.u.tcp.port), + &tuple.dst.u3.ip, ntohs(tuple.dst.u.tcp.port)); + return -ENOENT; +} +#endif + static struct nf_sockopt_ops so_getorigdst = { .pf = PF_INET, .get_optmin = SO_ORIGINAL_DST, @@ -655,6 +718,10 @@ int nf_conntrack_proto_init(void) goto cleanup_sockopt; #endif
+#if IS_ENABLED(CONFIG_NETACC_TERRACE) + bpf_getorigdst_opt = bpf_getorigdst_impl; +#endif + return ret;
#if IS_ENABLED(CONFIG_IPV6) @@ -666,6 +733,10 @@ int nf_conntrack_proto_init(void)
void nf_conntrack_proto_fini(void) { +#if IS_ENABLED(CONFIG_NETACC_TERRACE) + bpf_getorigdst_opt = NULL; +#endif + nf_unregister_sockopt(&so_getorigdst); #if IS_ENABLED(CONFIG_IPV6) nf_unregister_sockopt(&so_getorigdst6);