add redis sockmap sample code.
Liu Jian (4): cgroup: make cgroup_bpf_prog_attach work when cgroup2 is not mounted net: let sockops can use bpf_get_current_comm() net: add local_skb parameter to identify local tcp connection tools: add sample sockmap code for redis
include/linux/cgroup.h | 1 + include/linux/filter.h | 3 + include/linux/skbuff.h | 5 + include/uapi/linux/bpf.h | 1 + kernel/bpf/cgroup.c | 8 +- kernel/cgroup/cgroup.c | 24 +++ net/Kconfig | 5 + net/core/filter.c | 18 ++ net/ipv4/tcp_input.c | 6 +- net/ipv4/tcp_output.c | 9 + tools/include/uapi/linux/bpf.h | 1 + tools/netacc/Makefile | 24 +++ tools/netacc/bpf_sockmap.h | 167 +++++++++++++++++++ tools/netacc/net-acc | 35 ++++ tools/netacc/netacc.c | 296 +++++++++++++++++++++++++++++++++ tools/netacc/netaccsockmap.c | 160 ++++++++++++++++++ 16 files changed, 758 insertions(+), 5 deletions(-) create mode 100644 tools/netacc/Makefile create mode 100644 tools/netacc/bpf_sockmap.h create mode 100755 tools/netacc/net-acc create mode 100644 tools/netacc/netacc.c create mode 100644 tools/netacc/netaccsockmap.c
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/3438 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/Q...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/3438 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/Q...
From: Liu Jian liujian56@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8J90J
----------------------------------------------------
BPF_PROG_TYPE_CGROUP* bpf programs is associated with cgroup2. If cgroup2 is not mounted, the bpf program is associated with cgrp_dfl_root.cgrp by default.
Then we can use it like below: bpftool cgroup attach /sys/fs/cgroup/cpu sock_ops pinned /sys/fs/bpf/xxx
Signed-off-by: Liu Jian liujian56@huawei.com Signed-off-by: Zhengchao Shao shaozhengchao@huawei.com --- include/linux/cgroup.h | 1 + kernel/bpf/cgroup.c | 8 ++++---- kernel/cgroup/cgroup.c | 24 ++++++++++++++++++++++++ net/Kconfig | 5 +++++ 4 files changed, 34 insertions(+), 4 deletions(-)
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b307013b9c6c..8b37ad6ddb46 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -107,6 +107,7 @@ struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
struct cgroup *cgroup_get_from_path(const char *path); struct cgroup *cgroup_get_from_fd(int fd); +struct cgroup *cgroup_get_from_fd_v2(int fd); struct cgroup *cgroup_v1v2_get_from_fd(int fd);
int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 03b3d4492980..8591c1da3a21 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1134,7 +1134,7 @@ int cgroup_bpf_prog_attach(const union bpf_attr *attr, struct cgroup *cgrp; int ret;
- cgrp = cgroup_get_from_fd(attr->target_fd); + cgrp = cgroup_get_from_fd_v2(attr->target_fd); if (IS_ERR(cgrp)) return PTR_ERR(cgrp);
@@ -1162,7 +1162,7 @@ int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) struct cgroup *cgrp; int ret;
- cgrp = cgroup_get_from_fd(attr->target_fd); + cgrp = cgroup_get_from_fd_v2(attr->target_fd); if (IS_ERR(cgrp)) return PTR_ERR(cgrp);
@@ -1281,7 +1281,7 @@ int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) if (attr->link_create.flags) return -EINVAL;
- cgrp = cgroup_get_from_fd(attr->link_create.target_fd); + cgrp = cgroup_get_from_fd_v2(attr->link_create.target_fd); if (IS_ERR(cgrp)) return PTR_ERR(cgrp);
@@ -1321,7 +1321,7 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr, struct cgroup *cgrp; int ret;
- cgrp = cgroup_get_from_fd(attr->query.target_fd); + cgrp = cgroup_get_from_fd_v2(attr->query.target_fd); if (IS_ERR(cgrp)) return PTR_ERR(cgrp);
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 518725b57200..972a4d780a1b 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -6887,6 +6887,30 @@ struct cgroup *cgroup_get_from_fd(int fd) } EXPORT_SYMBOL_GPL(cgroup_get_from_fd);
+/** + * same with cgroup_get_from_fd, only add cgrp_dfl_visible check + */ +struct cgroup *cgroup_get_from_fd_v2(int fd) +{ +#if IS_ENABLED(CONFIG_NETACC_BPF) + struct cgroup *cgrp = cgroup_v1v2_get_from_fd(fd); + + if (IS_ERR(cgrp)) + return ERR_CAST(cgrp); + + if (!cgroup_on_dfl(cgrp)) { + cgroup_put(cgrp); + + cgrp = &cgrp_dfl_root.cgrp; + cgroup_get(cgrp); + } +#else + struct cgroup *cgrp = cgroup_get_from_fd(fd); +#endif + return cgrp; +} +EXPORT_SYMBOL_GPL(cgroup_get_from_fd_v2); + static u64 power_of_ten(int power) { u64 v = 1; diff --git a/net/Kconfig b/net/Kconfig index d532ec33f1fe..1becb9b1470b 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -508,4 +508,9 @@ config NETDEV_ADDR_LIST_TEST default KUNIT_ALL_TESTS depends on KUNIT
+config NETACC_BPF + bool "Network acceleration" + default y + help + Network acceleration in bpf. endif # if NET
From: Liu Jian liujian56@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8J90J
----------------------------------------------------
Let sockops can use bpf_get_current_comm().
Signed-off-by: Liu Jian liujian56@huawei.com Signed-off-by: Zhengchao Shao shaozhengchao@huawei.com --- net/core/filter.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/net/core/filter.c b/net/core/filter.c index a094694899c9..d5a22975a885 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -8214,6 +8214,10 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_tcp_sock: return &bpf_tcp_sock_proto; #endif /* CONFIG_INET */ +#if IS_ENABLED(CONFIG_NETACC_BPF) + case BPF_FUNC_get_current_comm: + return &bpf_get_current_comm_proto; +#endif default: return bpf_sk_base_func_proto(func_id); }
From: Liu Jian liujian56@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8J90J
----------------------------------------------------
Add the local_skb parameter to struct sk_buff to identify the local connection. Currently, this function is used only on BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB and BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB to help the sockops bpf program check whether the current connection is a local connection. Updating the local_skb variable only when the ACK packet is sent is sufficient for this function to work.
Signed-off-by: Liu Jian liujian56@huawei.com
Conflicts: include/linux/skbuff.h include/uapi/linux/bpf.h net/core/filter.c tools/include/uapi/linux/bpf.h
Signed-off-by: Zhengchao Shao shaozhengchao@huawei.com --- include/linux/filter.h | 3 +++ include/linux/skbuff.h | 5 +++++ include/uapi/linux/bpf.h | 1 + net/core/filter.c | 14 ++++++++++++++ net/ipv4/tcp_input.c | 6 +++++- net/ipv4/tcp_output.c | 9 +++++++++ tools/include/uapi/linux/bpf.h | 1 + 7 files changed, 38 insertions(+), 1 deletion(-)
diff --git a/include/linux/filter.h b/include/linux/filter.h index 761af6b3cf2b..95b8b5f15767 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1300,6 +1300,9 @@ struct bpf_sock_ops_kern { u8 op; u8 is_fullsock; u8 remaining_opt_len; +#if IS_ENABLED(CONFIG_NETACC_BPF) + u8 local_skb; +#endif u64 temp; /* temp and everything after is not * initialized to 0 before calling * the BPF program. New fields that diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 97bfef071255..e4524542a18f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1046,6 +1046,11 @@ struct sk_buff { u64 kcov_handle; #endif
+#if IS_ENABLED(CONFIG_NETACC_BPF) + __u8 local_skb; + __u8 pad0; + __u16 pad1; +#endif ); /* end headers group */
/* These elements must be at the end, see alloc_skb() for details. */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0448700890f7..4924f0cde1bc 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6666,6 +6666,7 @@ struct bpf_sock_ops { * been written yet. */ __u64 skb_hwtstamp; + __u32 local_skb; };
/* Definitions for bpf_sock_ops_cb_flags */ diff --git a/net/core/filter.c b/net/core/filter.c index d5a22975a885..efe4254c53d9 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -9054,6 +9054,11 @@ static bool sock_ops_is_valid_access(int off, int size, if (off % size != 0) return false;
+#if !(IS_ENABLED(CONFIG_NETACC_BPF)) + if (off == offsetof(struct bpf_sock_ops, local_skb)) + return false; +#endif + if (type == BPF_WRITE) { switch (off) { case offsetof(struct bpf_sock_ops, reply): @@ -10593,6 +10598,15 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, insn - jmp_on_null_skb - 1); break; } +#if IS_ENABLED(CONFIG_NETACC_BPF) + case offsetof(struct bpf_sock_ops, local_skb): + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern, + local_skb), + si->dst_reg, si->src_reg, + offsetof(struct bpf_sock_ops_kern, + local_skb)); + break; +#endif } return insn - insn_buf; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 1f9d1d445fb3..fd5c13c1fbc8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -186,8 +186,12 @@ static void bpf_skops_established(struct sock *sk, int bpf_op, sock_ops.is_fullsock = 1; sock_ops.sk = sk; /* sk with TCP_REPAIR_ON does not have skb in tcp_finish_connect */ - if (skb) + if (skb) { bpf_skops_init_skb(&sock_ops, skb, tcp_hdrlen(skb)); +#if IS_ENABLED(CONFIG_NETACC_BPF) + sock_ops.local_skb = skb->local_skb; +#endif + }
BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 9ccfdc825004..1917c62ad3bf 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3725,6 +3725,9 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, rcu_read_unlock(); #endif
+#if IS_ENABLED(CONFIG_NETACC_BPF) + skb->local_skb = 1; +#endif bpf_skops_write_hdr_opt((struct sock *)sk, skb, req, syn_skb, synack_type, &opts);
@@ -3965,6 +3968,9 @@ int tcp_connect(struct sock *sk) if (unlikely(!buff)) return -ENOBUFS;
+#if IS_ENABLED(CONFIG_NETACC_BPF) + buff->local_skb = 1; +#endif tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); tcp_mstamp_refresh(tp); tp->retrans_stamp = tcp_time_stamp(tp); @@ -4083,6 +4089,9 @@ void __tcp_send_ack(struct sock *sk, u32 rcv_nxt)
/* Reserve space for headers and prepare control bits. */ skb_reserve(buff, MAX_TCP_HEADER); +#if IS_ENABLED(CONFIG_NETACC_BPF) + buff->local_skb = 1; +#endif tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK);
/* We do not want pure acks influencing TCP Small Queues or fq/pacing diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 0448700890f7..4924f0cde1bc 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -6666,6 +6666,7 @@ struct bpf_sock_ops { * been written yet. */ __u64 skb_hwtstamp; + __u32 local_skb; };
/* Definitions for bpf_sock_ops_cb_flags */
From: Liu Jian liujian56@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8J90J
----------------------------------------------------
add sample sockmap code for redis
Signed-off-by: Liu Jian liujian56@huawei.com Signed-off-by: Zhengchao Shao shaozhengchao@huawei.com --- tools/netacc/Makefile | 24 +++ tools/netacc/bpf_sockmap.h | 167 ++++++++++++++++++++ tools/netacc/net-acc | 35 +++++ tools/netacc/netacc.c | 296 +++++++++++++++++++++++++++++++++++ tools/netacc/netaccsockmap.c | 160 +++++++++++++++++++ 5 files changed, 682 insertions(+) create mode 100644 tools/netacc/Makefile create mode 100644 tools/netacc/bpf_sockmap.h create mode 100755 tools/netacc/net-acc create mode 100644 tools/netacc/netacc.c create mode 100644 tools/netacc/netaccsockmap.c
diff --git a/tools/netacc/Makefile b/tools/netacc/Makefile new file mode 100644 index 000000000000..20aa35d97551 --- /dev/null +++ b/tools/netacc/Makefile @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: GPL-2.0 + +INSTALL ?= install +CLANG ?= clang +CC ?= gcc +BPFTOOL ?= bpftool +TOPDIR ?= ../.. +MKFLAGS = -I$(TOPDIR)/tools/lib -I$(TOPDIR)/tools/include/uapi/ +LDLIBBPF = -L$(TOPDIR)/tools/lib/bpf/ -l:libbpf.a + +all: + $(CLANG) -O2 -g -Wall -target bpf $(MKFLAGS) -c netaccsockmap.c -o netaccsockmap.o + $(BPFTOOL) gen skeleton netaccsockmap.o > netaccsockmap.skel.h + $(CC) -O2 -g -Wall $(MKFLAGS) netacc.c -o netacc $(LDLIBBPF) -lelf -lz + +clean: + rm -f netacc + rm -f netaccsockmap.skel.h + rm -f *.o + +install: + mkdir -p $(INSTALL_ROOT)/usr/sbin/tuned_acc/ + $(INSTALL) -m 755 net-acc $(INSTALL_ROOT)/usr/sbin/ + $(INSTALL) -m 755 netacc $(INSTALL_ROOT)/usr/sbin/tuned_acc/ diff --git a/tools/netacc/bpf_sockmap.h b/tools/netacc/bpf_sockmap.h new file mode 100644 index 000000000000..44dd3fee9ac3 --- /dev/null +++ b/tools/netacc/bpf_sockmap.h @@ -0,0 +1,167 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2023 Huawei Technologies Co., Ltd + */ + +#ifndef __BPF_SOCKMAP_H__ +#define __BPF_SOCKMAP_H__ + +#include <stddef.h> +#include <stdbool.h> +#include <linux/types.h> +#include <linux/bpf.h> + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +#define LOG_DEBUG 0 +#define SOCKMAP_SIZE 100000 + +#if LOG_DEBUG +#define net_dbg bpf_printk +#define net_err bpf_printk +#else +#define net_dbg(fmt, ...) do {} while (0) +#define net_err bpf_printk +#endif + +/* Unless otherwise specified, change ipaddr to network byte order */ +struct sock_key { + __u32 sip4; + __u32 dip4; + __u32 sport; + __u32 dport; + __u64 netns_cookie; +} __attribute__((packed)); + +struct { + __uint(type, BPF_MAP_TYPE_SOCKHASH); + __type(key, struct sock_key); + __type(value, int); + __uint(max_entries, SOCKMAP_SIZE); + __uint(map_flags, 0); +} netaccsock_map SEC(".maps"); + +struct sock_info { + __u64 redir_rx_cnt; + __u64 redir_tx_cnt; + int sk_flags; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct sock_key); + __type(value, struct sock_info); + __uint(max_entries, SOCKMAP_SIZE); + __uint(map_flags, 0); +} sockflag_map SEC(".maps"); + +/* in network byte order */ +#define IS_LOOPBACK(a) ((((__u32) (a)) & 0x000000ff) == 0x0000007f) +#define IS_NOT_LOOPBACK(a) ((((__u32) (a)) & 0x000000ff) != 0x0000007f) + +static inline void sock_key_add_netnsinfo(void *const ctx, struct sock_key *key) +{ + if (IS_NOT_LOOPBACK(key->sip4) || IS_NOT_LOOPBACK(key->dip4)) + key->netns_cookie = 0; + else + key->netns_cookie = bpf_get_netns_cookie(ctx); +} + +static inline void sock_key2peerkey(struct sock_key *key, struct sock_key *peer_key) +{ + peer_key->sip4 = key->dip4; + peer_key->sport = key->dport; + peer_key->dip4 = key->sip4; + peer_key->dport = key->sport; +} + +static inline void extract_key4_from_ops(struct bpf_sock_ops *ops, struct sock_key *key) +{ + key->dip4 = ops->remote_ip4; + key->sip4 = ops->local_ip4; + + // local_port is in host byte order + // and remote_port is in network byte order + key->sport = ops->local_port; + key->dport = bpf_ntohl(ops->remote_port); +} + +static inline void bpf_sock_ops_ipv4(struct bpf_sock_ops *skops) +{ + struct sock_key key = {}; + + extract_key4_from_ops(skops, &key); + sock_key_add_netnsinfo(skops, &key); + + bpf_sock_hash_update(skops, &netaccsock_map, &key, BPF_NOEXIST); +} + +static inline void bpf_sockmap_ipv4_insert(struct bpf_sock_ops *skops) +{ + if (bpf_ntohl(skops->remote_port) == 22 || skops->local_port == 22) + return; + + bpf_sock_ops_ipv4(skops); +} + +static inline void bpf_sockmap_ipv4_cleanup(struct bpf_sock_ops *skops, __u64 *cnt) +{ + struct sock_info *p_skinfo = NULL; + struct sock_key key = {}; + + extract_key4_from_ops(skops, &key); + sock_key_add_netnsinfo(skops, &key); + p_skinfo = bpf_map_lookup_elem(&sockflag_map, &key); + if (p_skinfo) { + if (cnt) + *cnt = p_skinfo->redir_tx_cnt; + bpf_map_delete_elem(&sockflag_map, &key); + } +} + +static inline void extract_key4_from_msg(struct sk_msg_md *msg, struct sock_key *key) +{ + key->sip4 = msg->local_ip4; + key->dip4 = msg->remote_ip4; + + // local_port is in host byte order + // and remote_port is in network byte order + key->sport = msg->local_port; + key->dport = bpf_ntohl(msg->remote_port); +} + +SEC("sk_msg") int netacc_redir(struct sk_msg_md *msg) +{ + struct sock_info *p_skinfo = NULL; + struct sock_info skinfo = {0}; + struct sock_key peer_key = {}; + struct sock_key key = {}; + int ret, addinfo = 0; + + extract_key4_from_msg(msg, &key); + sock_key_add_netnsinfo(msg, &key); + sock_key2peerkey(&key, &peer_key); + sock_key_add_netnsinfo(msg, &peer_key); + + p_skinfo = bpf_map_lookup_elem(&sockflag_map, &key); + if (p_skinfo != NULL && p_skinfo->sk_flags == 1) + return SK_PASS; + + if (p_skinfo == NULL) { + addinfo = 1; + p_skinfo = &skinfo; + } + + ret = bpf_msg_redirect_hash(msg, &netaccsock_map, &peer_key, BPF_F_INGRESS); + if (ret == SK_DROP) { + if (p_skinfo->sk_flags != 1) + p_skinfo->sk_flags = 1; + } + + p_skinfo->redir_tx_cnt++; + if (addinfo) + bpf_map_update_elem(&sockflag_map, &key, p_skinfo, BPF_ANY); + + return SK_PASS; +} +#endif diff --git a/tools/netacc/net-acc b/tools/netacc/net-acc new file mode 100755 index 000000000000..3b769e84168e --- /dev/null +++ b/tools/netacc/net-acc @@ -0,0 +1,35 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +function usage() { + echo "" + echo "Usage:" + echo " $0 [enable | disable]" + echo "" +} + +function get_cgrp_path() { + local CGRP=`mount | grep cgroup2 | head -n 1` + if [[ "$CGRP"X == "X" ]]; then + CGRP=`mount | grep cgroup | grep "net_cls,net_prio" | head -n 1` + fi + cgrp_path=`echo $CGRP | awk '{print $3}'` +} + +CMD=$1 + +get_cgrp_path +if [[ "$cgrp_path"X == "X" ]]; then + echo "Failed to obtain a valid cgroup mount point." + usage; + exit 1 +fi + +if [[ "$CMD"X == "enableX" ]]; then + /usr/sbin/tuned_acc/netacc enable ${cgrp_path} +elif [[ "$CMD"X == "disableX" ]]; then + /usr/sbin/tuned_acc/netacc disable ${cgrp_path} + exit 0 +else + usage; +fi diff --git a/tools/netacc/netacc.c b/tools/netacc/netacc.c new file mode 100644 index 000000000000..7c22490e2de4 --- /dev/null +++ b/tools/netacc/netacc.c @@ -0,0 +1,296 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2023 Huawei Technologies Co., Ltd + */ + +#include <argp.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/resource.h> +#include <time.h> +#include <unistd.h> +#include <sys/stat.h> +#include <fcntl.h> + +#include <bpf/libbpf.h> +#include <bpf/bpf.h> +#include "netaccsockmap.skel.h" + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) +#endif + +#define CG_PATH "/sys/fs/cgroup/unified" +#define PIN_PATH "/sys/fs/bpf/netacc/" + +static int bump_memlock_rlimit(void) +{ + struct rlimit rlim_new = { + .rlim_cur = RLIM_INFINITY, + .rlim_max = RLIM_INFINITY, + }; + + return setrlimit(RLIMIT_MEMLOCK, &rlim_new); +} + +struct net_acc_prog_info { + const char *prog_name; + const char *pin_path; + void **prog; + int *fd; +}; + +struct net_acc_map_info { + const char *map_name; + char *pin_path; + void **map; + int *fd; +}; + +struct { + int netacc_sockops_fd; + int netacc_redir_fd; + int netaccsock_map_fd; +} net_acc_fds; + +struct { + void *netacc_sockops_obj; + void *netacc_redir_obj; + void *netaccsock_map_obj; +} net_acc_obj; + +static struct net_acc_prog_info prog_infos[] = { + { + .prog_name = "netacc_sockops", + .pin_path = PIN_PATH"sockops", + .prog = &net_acc_obj.netacc_sockops_obj, + .fd = &net_acc_fds.netacc_sockops_fd, + }, + { + .prog_name = "netacc_redir", + .pin_path = PIN_PATH"sk_msg", + .prog = &net_acc_obj.netacc_redir_obj, + .fd = &net_acc_fds.netacc_redir_fd, + } +}; + +static struct net_acc_map_info map_infos[] = { + { + .map_name = "netaccsock_map", + .pin_path = PIN_PATH"netaccsock_map", + .map = &net_acc_obj.netaccsock_map_obj, + .fd = &net_acc_fds.netaccsock_map_fd, + } +}; + +int cg_fd = -1; +struct netaccsockmap *skel; + +int net_acc_enabled(void) +{ + int map_fd; + + map_fd = bpf_obj_get(map_infos[0].pin_path); + if (map_fd < 0) + return 0; + + close(map_fd); + return 1; +} + +int pin_prog_map(void) +{ + int i, mapj, progj; + int err = 0; + + mapj = ARRAY_SIZE(map_infos); + for (i = 0; i < mapj; i++) { + if (*map_infos[i].map) + err = bpf_map__pin(*map_infos[i].map, map_infos[i].pin_path); + if (err) { + mapj = i; + goto err1; + } + } + + progj = ARRAY_SIZE(prog_infos); + for (i = 0; i < progj; i++) { + if (*prog_infos[i].prog) + err = bpf_program__pin(*prog_infos[i].prog, prog_infos[i].pin_path); + if (err) { + progj = i; + goto err2; + } + } + return 0; +err2: + for (i = 0; i < progj; i++) { + if (*prog_infos[i].prog) + bpf_program__unpin(*prog_infos[i].prog, prog_infos[i].pin_path); + } +err1: + for (i = 0; i < mapj; i++) { + if (*map_infos[i].map) + bpf_map__unpin(*map_infos[i].map, map_infos[i].pin_path); + } + return 1; +} + +int attach_manually(void) +{ + int err; + + err = bpf_prog_attach(bpf_program__fd(skel->progs.netacc_sockops), cg_fd, + BPF_CGROUP_SOCK_OPS, 0); + if (err) { + fprintf(stderr, "failed to attach sockops programs, %d\n", err); + return -1; + } + + err = bpf_prog_attach(bpf_program__fd(skel->progs.netacc_redir), + bpf_map__fd(skel->maps.netaccsock_map), BPF_SK_MSG_VERDICT, 0); + if (err) { + fprintf(stderr, "failed to attach msg_verdict programs, %d\n", err); + goto cleanup1; + } + + net_acc_obj.netacc_sockops_obj = skel->progs.netacc_sockops; + net_acc_obj.netacc_redir_obj = skel->progs.netacc_redir; + net_acc_obj.netaccsock_map_obj = skel->maps.netaccsock_map; + return 0; +cleanup1: + bpf_prog_detach2(bpf_program__fd(skel->progs.netacc_sockops), cg_fd, BPF_CGROUP_SOCK_OPS); + return -1; +} + +void detach_manually(void) +{ + int err; + + err = bpf_prog_detach2(bpf_program__fd(skel->progs.netacc_redir), + bpf_map__fd(skel->maps.netaccsock_map), BPF_SK_MSG_VERDICT); + if (err) + fprintf(stderr, "failed to detach msg_verdict programs, %d\n", err); + + err = bpf_prog_detach2(bpf_program__fd(skel->progs.netacc_sockops), cg_fd, + BPF_CGROUP_SOCK_OPS); + if (err) + fprintf(stderr, "failed to detach sockops programs, %d\n", err); +} + +int net_acc_enable(void) +{ + int err; + + if (net_acc_enabled()) + return 0; + + err = bump_memlock_rlimit(); + if (err) { + fprintf(stderr, "failed to increase rlimit: %d", err); + close(cg_fd); + return 1; + } + + skel = netaccsockmap__open(); + if (!skel) { + fprintf(stderr, "failed to open and/or load BPF object\n"); + return 1; + } + + err = netaccsockmap__load(skel); + if (err) { + fprintf(stderr, "failed to load BPF object: %d\n", err); + goto cleanup; + } + + err = netaccsockmap__attach(skel); + if (err) { + fprintf(stderr, "failed to attach BPF programs\n"); + goto cleanup; + } + + err = attach_manually(); + if (err) { + fprintf(stderr, "failed to attach BPF programs\n"); + goto cleanup; + } + + err = pin_prog_map(); + if (err) { + fprintf(stderr, "failed to pin BPF programs and maps\n"); + goto cleanup1; + } + + return 0; + +cleanup1: + detach_manually(); +cleanup: + netaccsockmap__destroy(skel); + close(cg_fd); + + return err != 0; +} + + +int net_acc_disable(void) +{ + int i, err; + + if (!net_acc_enabled()) + return 0; + + for (i = 0; i < ARRAY_SIZE(map_infos); i++) { + if (map_infos[i].fd) { + *map_infos[i].fd = bpf_obj_get(map_infos[i].pin_path); + unlink(map_infos[i].pin_path); + } + } + + for (i = 0; i < ARRAY_SIZE(prog_infos); i++) { + if (prog_infos[i].fd) { + *prog_infos[i].fd = bpf_obj_get(prog_infos[i].pin_path); + unlink(prog_infos[i].pin_path); + } + } + + err = bpf_prog_detach2(net_acc_fds.netacc_redir_fd, + net_acc_fds.netaccsock_map_fd, BPF_SK_MSG_VERDICT); + if (err) + fprintf(stderr, "failed to detach msg_verdict programs, %d\n", err); + err = bpf_prog_detach2(net_acc_fds.netacc_sockops_fd, cg_fd, BPF_CGROUP_SOCK_OPS); + if (err) + fprintf(stderr, "failed to detach msg_verdict programs, %d\n", err); + + close(net_acc_fds.netacc_redir_fd); + close(net_acc_fds.netacc_sockops_fd); + rmdir(PIN_PATH); + return 0; +} + +int main(int argc, char **argv) +{ + char *cgrp_path = CG_PATH; + int ret = 1; + + if (argc != 2 && argc != 3) + return 1; + + if (argc == 3) + cgrp_path = argv[2]; + + cg_fd = open(cgrp_path, O_DIRECTORY, O_RDONLY); + if (cg_fd < 0) { + fprintf(stderr, "ERROR: (%d) open cgroup path failed: %s\n", cg_fd, cgrp_path); + return 1; + } + + if (strncmp(argv[1], "enable", 6) == 0) + ret = net_acc_enable(); + else if (strncmp(argv[1], "disable", 7) == 0) + ret = net_acc_disable(); + + close(cg_fd); + return ret; +} diff --git a/tools/netacc/netaccsockmap.c b/tools/netacc/netaccsockmap.c new file mode 100644 index 000000000000..e20b2f87908f --- /dev/null +++ b/tools/netacc/netaccsockmap.c @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2023 Huawei Technologies Co., Ltd + */ + +#include "bpf_sockmap.h" + +#define NETACC_BIND_MAP_SIZE 100 + +#define CHECK_ACC_SOCK 1 + +struct ipaddr_port { + __u32 ip4; + __u32 port; +} __attribute__((packed)); + +#if CHECK_ACC_SOCK +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct ipaddr_port); + __type(value, int); + __uint(max_entries, NETACC_BIND_MAP_SIZE); + __uint(map_flags, 0); +} netacc_bind_map SEC(".maps"); + +static inline int __is_netacc_sock(struct ipaddr_port *key) +{ + int *pv = NULL; + + pv = bpf_map_lookup_elem(&netacc_bind_map, key); + if (pv) + return 1; + + return 0; +} + +static inline int is_netacc_sock(struct ipaddr_port *key1, struct ipaddr_port *key10) +{ + net_dbg("is_netacc, ip1:0x%x, port1:0x%x\n", key1->ip4, key1->port); + + if (__is_netacc_sock(key1)) + return 1; + + if (__is_netacc_sock(key10)) + return 1; + + return 0; +} + +static inline void extract_dst_ipaddrport_from_ops(struct bpf_sock_ops *skops, + struct ipaddr_port *key) +{ + if (skops->op == BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB) { + key->ip4 = skops->remote_ip4; + // remote_port is in network byte order + key->port = bpf_ntohl(skops->remote_port); + } else if (skops->op == BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) { + key->ip4 = skops->local_ip4; + // local_port is in host byte order + key->port = skops->local_port; + } +} + +static inline int is_netacc_interested_tcp(struct bpf_sock_ops *skops) +{ + struct ipaddr_port key = {0}; + struct ipaddr_port key0; + + // only test server's port + extract_dst_ipaddrport_from_ops(skops, &key); + key0.ip4 = 0; + key0.port = key.port; + + if (!is_netacc_sock(&key, &key0)) + return 0; + net_dbg("this is netacc sock\n"); + + net_dbg("the sock is netacc loopback sock\n"); + return 1; +} + +static inline int update_netacc_info(struct bpf_sock_ops *skops) +{ + struct ipaddr_port key; + int value = 1; + char comm[16] = {0}; + + bpf_get_current_comm(comm, sizeof(comm)); + + if (bpf_strncmp(comm, 12, "redis-server")) + return 0; + + key.ip4 = skops->local_ip4; + key.port = skops->local_port; // host order + + bpf_map_update_elem(&netacc_bind_map, &key, &value, BPF_NOEXIST); + net_dbg("%s, update netaccinfo: sip:0x%x, sport:%d\n", comm, key.ip4, key.port); + return 1; +} + +static inline void clean_netacc_info(struct bpf_sock_ops *skops) +{ + struct ipaddr_port key; + + key.ip4 = skops->local_ip4; + key.port = skops->local_port; // host order + net_dbg("clean netaccinfo, 0x%x:%d\n", key.ip4, key.port); + bpf_map_delete_elem(&netacc_bind_map, &key); +} +#else +static inline int is_netacc_interested_tcp(struct bpf_sock_ops *skops) +{ + return 1; +} +static inline int update_netacc_info(struct bpf_sock_ops *skops) +{ + return 0; +} +static inline void clean_netacc_info(struct bpf_sock_ops *skops) +{} +#endif + +SEC("sockops") +int netacc_sockops(struct bpf_sock_ops *skops) +{ + switch (skops->op) { + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + if (skops->family == 2 && skops->local_skb) {// AF_INET + if (is_netacc_interested_tcp(skops)) { + net_dbg("bpf_sockops, sockmap, op:%d, sk:%p\n", + skops->op, skops->sk); + bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG); + bpf_sockmap_ipv4_insert(skops); + } else { + bpf_sock_ops_cb_flags_set(skops, 0); + } + } + break; + case BPF_SOCK_OPS_STATE_CB: + if (skops->family == 2 && skops->args[0] == BPF_TCP_LISTEN && + skops->args[1] == BPF_TCP_CLOSE) { + clean_netacc_info(skops); + } else if (skops->family == 2 && (skops->args[1] == BPF_TCP_CLOSE || + skops->args[1] == BPF_TCP_CLOSE_WAIT || + skops->args[1] == BPF_TCP_FIN_WAIT1)) { + bpf_sockmap_ipv4_cleanup(skops, NULL); + } + break; + case BPF_SOCK_OPS_TCP_LISTEN_CB: + if (skops->family == 2 && update_netacc_info(skops)) + bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG); + break; + default: + break; + } + return 1; +} + +char _license[] SEC("license") = "GPL"; +int _version SEC("version") = 1;