From: Liu Jian liujian56@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8J90J
----------------------------------------------------
add sample sockmap code for redis
Signed-off-by: Liu Jian liujian56@huawei.com Signed-off-by: Zhengchao Shao shaozhengchao@huawei.com --- tools/netacc/Makefile | 24 +++ tools/netacc/bpf_sockmap.h | 167 ++++++++++++++++++++ tools/netacc/net-acc | 45 ++++++ tools/netacc/netacc.c | 296 +++++++++++++++++++++++++++++++++++ tools/netacc/netaccsockmap.c | 160 +++++++++++++++++++ 5 files changed, 692 insertions(+) create mode 100644 tools/netacc/Makefile create mode 100644 tools/netacc/bpf_sockmap.h create mode 100755 tools/netacc/net-acc create mode 100644 tools/netacc/netacc.c create mode 100644 tools/netacc/netaccsockmap.c
diff --git a/tools/netacc/Makefile b/tools/netacc/Makefile new file mode 100644 index 000000000000..20aa35d97551 --- /dev/null +++ b/tools/netacc/Makefile @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: GPL-2.0 + +INSTALL ?= install +CLANG ?= clang +CC ?= gcc +BPFTOOL ?= bpftool +TOPDIR ?= ../.. +MKFLAGS = -I$(TOPDIR)/tools/lib -I$(TOPDIR)/tools/include/uapi/ +LDLIBBPF = -L$(TOPDIR)/tools/lib/bpf/ -l:libbpf.a + +all: + $(CLANG) -O2 -g -Wall -target bpf $(MKFLAGS) -c netaccsockmap.c -o netaccsockmap.o + $(BPFTOOL) gen skeleton netaccsockmap.o > netaccsockmap.skel.h + $(CC) -O2 -g -Wall $(MKFLAGS) netacc.c -o netacc $(LDLIBBPF) -lelf -lz + +clean: + rm -f netacc + rm -f netaccsockmap.skel.h + rm -f *.o + +install: + mkdir -p $(INSTALL_ROOT)/usr/sbin/tuned_acc/ + $(INSTALL) -m 755 net-acc $(INSTALL_ROOT)/usr/sbin/ + $(INSTALL) -m 755 netacc $(INSTALL_ROOT)/usr/sbin/tuned_acc/ diff --git a/tools/netacc/bpf_sockmap.h b/tools/netacc/bpf_sockmap.h new file mode 100644 index 000000000000..44dd3fee9ac3 --- /dev/null +++ b/tools/netacc/bpf_sockmap.h @@ -0,0 +1,167 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2023 Huawei Technologies Co., Ltd + */ + +#ifndef __BPF_SOCKMAP_H__ +#define __BPF_SOCKMAP_H__ + +#include <stddef.h> +#include <stdbool.h> +#include <linux/types.h> +#include <linux/bpf.h> + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +#define LOG_DEBUG 0 +#define SOCKMAP_SIZE 100000 + +#if LOG_DEBUG +#define net_dbg bpf_printk +#define net_err bpf_printk +#else +#define net_dbg(fmt, ...) do {} while (0) +#define net_err bpf_printk +#endif + +/* Unless otherwise specified, change ipaddr to network byte order */ +struct sock_key { + __u32 sip4; + __u32 dip4; + __u32 sport; + __u32 dport; + __u64 netns_cookie; +} __attribute__((packed)); + +struct { + __uint(type, BPF_MAP_TYPE_SOCKHASH); + __type(key, struct sock_key); + __type(value, int); + __uint(max_entries, SOCKMAP_SIZE); + __uint(map_flags, 0); +} netaccsock_map SEC(".maps"); + +struct sock_info { + __u64 redir_rx_cnt; + __u64 redir_tx_cnt; + int sk_flags; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct sock_key); + __type(value, struct sock_info); + __uint(max_entries, SOCKMAP_SIZE); + __uint(map_flags, 0); +} sockflag_map SEC(".maps"); + +/* in network byte order */ +#define IS_LOOPBACK(a) ((((__u32) (a)) & 0x000000ff) == 0x0000007f) +#define IS_NOT_LOOPBACK(a) ((((__u32) (a)) & 0x000000ff) != 0x0000007f) + +static inline void sock_key_add_netnsinfo(void *const ctx, struct sock_key *key) +{ + if (IS_NOT_LOOPBACK(key->sip4) || IS_NOT_LOOPBACK(key->dip4)) + key->netns_cookie = 0; + else + key->netns_cookie = bpf_get_netns_cookie(ctx); +} + +static inline void sock_key2peerkey(struct sock_key *key, struct sock_key *peer_key) +{ + peer_key->sip4 = key->dip4; + peer_key->sport = key->dport; + peer_key->dip4 = key->sip4; + peer_key->dport = key->sport; +} + +static inline void extract_key4_from_ops(struct bpf_sock_ops *ops, struct sock_key *key) +{ + key->dip4 = ops->remote_ip4; + key->sip4 = ops->local_ip4; + + // local_port is in host byte order + // and remote_port is in network byte order + key->sport = ops->local_port; + key->dport = bpf_ntohl(ops->remote_port); +} + +static inline void bpf_sock_ops_ipv4(struct bpf_sock_ops *skops) +{ + struct sock_key key = {}; + + extract_key4_from_ops(skops, &key); + sock_key_add_netnsinfo(skops, &key); + + bpf_sock_hash_update(skops, &netaccsock_map, &key, BPF_NOEXIST); +} + +static inline void bpf_sockmap_ipv4_insert(struct bpf_sock_ops *skops) +{ + if (bpf_ntohl(skops->remote_port) == 22 || skops->local_port == 22) + return; + + bpf_sock_ops_ipv4(skops); +} + +static inline void bpf_sockmap_ipv4_cleanup(struct bpf_sock_ops *skops, __u64 *cnt) +{ + struct sock_info *p_skinfo = NULL; + struct sock_key key = {}; + + extract_key4_from_ops(skops, &key); + sock_key_add_netnsinfo(skops, &key); + p_skinfo = bpf_map_lookup_elem(&sockflag_map, &key); + if (p_skinfo) { + if (cnt) + *cnt = p_skinfo->redir_tx_cnt; + bpf_map_delete_elem(&sockflag_map, &key); + } +} + +static inline void extract_key4_from_msg(struct sk_msg_md *msg, struct sock_key *key) +{ + key->sip4 = msg->local_ip4; + key->dip4 = msg->remote_ip4; + + // local_port is in host byte order + // and remote_port is in network byte order + key->sport = msg->local_port; + key->dport = bpf_ntohl(msg->remote_port); +} + +SEC("sk_msg") int netacc_redir(struct sk_msg_md *msg) +{ + struct sock_info *p_skinfo = NULL; + struct sock_info skinfo = {0}; + struct sock_key peer_key = {}; + struct sock_key key = {}; + int ret, addinfo = 0; + + extract_key4_from_msg(msg, &key); + sock_key_add_netnsinfo(msg, &key); + sock_key2peerkey(&key, &peer_key); + sock_key_add_netnsinfo(msg, &peer_key); + + p_skinfo = bpf_map_lookup_elem(&sockflag_map, &key); + if (p_skinfo != NULL && p_skinfo->sk_flags == 1) + return SK_PASS; + + if (p_skinfo == NULL) { + addinfo = 1; + p_skinfo = &skinfo; + } + + ret = bpf_msg_redirect_hash(msg, &netaccsock_map, &peer_key, BPF_F_INGRESS); + if (ret == SK_DROP) { + if (p_skinfo->sk_flags != 1) + p_skinfo->sk_flags = 1; + } + + p_skinfo->redir_tx_cnt++; + if (addinfo) + bpf_map_update_elem(&sockflag_map, &key, p_skinfo, BPF_ANY); + + return SK_PASS; +} +#endif diff --git a/tools/netacc/net-acc b/tools/netacc/net-acc new file mode 100755 index 000000000000..3ed428aaaf72 --- /dev/null +++ b/tools/netacc/net-acc @@ -0,0 +1,45 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +orig_cgrp_path="/sys/fs/cgroup/unified" +function usage() { + echo "" + echo "Usage:" + echo " $0 [enable | disable]" + echo "" +} + +function get_cgrp_path() { + local KERNEL_CGRP=`cat /proc/filesystems | grep cgroup2` + if [[ "$KERNEL_CGRP="X == "X" ]]; then + echo "kernel not support cgroupv2" + exit 1 + fi + local CGRP=`mount | grep cgroup2 | head -n 1` + if [[ "$CGRP"X == "X" ]]; then + mount -o rw,remount /sys/fs/cgroup + mkdir -p ${orig_cgrp_path} + mount -t cgroup2 -o nosuid,nodev,noexec none ${orig_cgrp_path} + mount -o ro,remount /sys/fs/cgroup + CGRP=`mount | grep cgroup2 | head -n 1` + fi + cgrp_path=`echo $CGRP | awk '{print $3}'` +} + +CMD=$1 + +get_cgrp_path +if [[ "$cgrp_path"X == "X" ]]; then + echo "Failed to obtain a valid cgroup mount point." + usage; + exit 1 +fi + +if [[ "$CMD"X == "enableX" ]]; then + /usr/sbin/tuned_acc/netacc enable ${cgrp_path} +elif [[ "$CMD"X == "disableX" ]]; then + /usr/sbin/tuned_acc/netacc disable ${cgrp_path} + exit 0 +else + usage; +fi diff --git a/tools/netacc/netacc.c b/tools/netacc/netacc.c new file mode 100644 index 000000000000..7c22490e2de4 --- /dev/null +++ b/tools/netacc/netacc.c @@ -0,0 +1,296 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2023 Huawei Technologies Co., Ltd + */ + +#include <argp.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/resource.h> +#include <time.h> +#include <unistd.h> +#include <sys/stat.h> +#include <fcntl.h> + +#include <bpf/libbpf.h> +#include <bpf/bpf.h> +#include "netaccsockmap.skel.h" + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) +#endif + +#define CG_PATH "/sys/fs/cgroup/unified" +#define PIN_PATH "/sys/fs/bpf/netacc/" + +static int bump_memlock_rlimit(void) +{ + struct rlimit rlim_new = { + .rlim_cur = RLIM_INFINITY, + .rlim_max = RLIM_INFINITY, + }; + + return setrlimit(RLIMIT_MEMLOCK, &rlim_new); +} + +struct net_acc_prog_info { + const char *prog_name; + const char *pin_path; + void **prog; + int *fd; +}; + +struct net_acc_map_info { + const char *map_name; + char *pin_path; + void **map; + int *fd; +}; + +struct { + int netacc_sockops_fd; + int netacc_redir_fd; + int netaccsock_map_fd; +} net_acc_fds; + +struct { + void *netacc_sockops_obj; + void *netacc_redir_obj; + void *netaccsock_map_obj; +} net_acc_obj; + +static struct net_acc_prog_info prog_infos[] = { + { + .prog_name = "netacc_sockops", + .pin_path = PIN_PATH"sockops", + .prog = &net_acc_obj.netacc_sockops_obj, + .fd = &net_acc_fds.netacc_sockops_fd, + }, + { + .prog_name = "netacc_redir", + .pin_path = PIN_PATH"sk_msg", + .prog = &net_acc_obj.netacc_redir_obj, + .fd = &net_acc_fds.netacc_redir_fd, + } +}; + +static struct net_acc_map_info map_infos[] = { + { + .map_name = "netaccsock_map", + .pin_path = PIN_PATH"netaccsock_map", + .map = &net_acc_obj.netaccsock_map_obj, + .fd = &net_acc_fds.netaccsock_map_fd, + } +}; + +int cg_fd = -1; +struct netaccsockmap *skel; + +int net_acc_enabled(void) +{ + int map_fd; + + map_fd = bpf_obj_get(map_infos[0].pin_path); + if (map_fd < 0) + return 0; + + close(map_fd); + return 1; +} + +int pin_prog_map(void) +{ + int i, mapj, progj; + int err = 0; + + mapj = ARRAY_SIZE(map_infos); + for (i = 0; i < mapj; i++) { + if (*map_infos[i].map) + err = bpf_map__pin(*map_infos[i].map, map_infos[i].pin_path); + if (err) { + mapj = i; + goto err1; + } + } + + progj = ARRAY_SIZE(prog_infos); + for (i = 0; i < progj; i++) { + if (*prog_infos[i].prog) + err = bpf_program__pin(*prog_infos[i].prog, prog_infos[i].pin_path); + if (err) { + progj = i; + goto err2; + } + } + return 0; +err2: + for (i = 0; i < progj; i++) { + if (*prog_infos[i].prog) + bpf_program__unpin(*prog_infos[i].prog, prog_infos[i].pin_path); + } +err1: + for (i = 0; i < mapj; i++) { + if (*map_infos[i].map) + bpf_map__unpin(*map_infos[i].map, map_infos[i].pin_path); + } + return 1; +} + +int attach_manually(void) +{ + int err; + + err = bpf_prog_attach(bpf_program__fd(skel->progs.netacc_sockops), cg_fd, + BPF_CGROUP_SOCK_OPS, 0); + if (err) { + fprintf(stderr, "failed to attach sockops programs, %d\n", err); + return -1; + } + + err = bpf_prog_attach(bpf_program__fd(skel->progs.netacc_redir), + bpf_map__fd(skel->maps.netaccsock_map), BPF_SK_MSG_VERDICT, 0); + if (err) { + fprintf(stderr, "failed to attach msg_verdict programs, %d\n", err); + goto cleanup1; + } + + net_acc_obj.netacc_sockops_obj = skel->progs.netacc_sockops; + net_acc_obj.netacc_redir_obj = skel->progs.netacc_redir; + net_acc_obj.netaccsock_map_obj = skel->maps.netaccsock_map; + return 0; +cleanup1: + bpf_prog_detach2(bpf_program__fd(skel->progs.netacc_sockops), cg_fd, BPF_CGROUP_SOCK_OPS); + return -1; +} + +void detach_manually(void) +{ + int err; + + err = bpf_prog_detach2(bpf_program__fd(skel->progs.netacc_redir), + bpf_map__fd(skel->maps.netaccsock_map), BPF_SK_MSG_VERDICT); + if (err) + fprintf(stderr, "failed to detach msg_verdict programs, %d\n", err); + + err = bpf_prog_detach2(bpf_program__fd(skel->progs.netacc_sockops), cg_fd, + BPF_CGROUP_SOCK_OPS); + if (err) + fprintf(stderr, "failed to detach sockops programs, %d\n", err); +} + +int net_acc_enable(void) +{ + int err; + + if (net_acc_enabled()) + return 0; + + err = bump_memlock_rlimit(); + if (err) { + fprintf(stderr, "failed to increase rlimit: %d", err); + close(cg_fd); + return 1; + } + + skel = netaccsockmap__open(); + if (!skel) { + fprintf(stderr, "failed to open and/or load BPF object\n"); + return 1; + } + + err = netaccsockmap__load(skel); + if (err) { + fprintf(stderr, "failed to load BPF object: %d\n", err); + goto cleanup; + } + + err = netaccsockmap__attach(skel); + if (err) { + fprintf(stderr, "failed to attach BPF programs\n"); + goto cleanup; + } + + err = attach_manually(); + if (err) { + fprintf(stderr, "failed to attach BPF programs\n"); + goto cleanup; + } + + err = pin_prog_map(); + if (err) { + fprintf(stderr, "failed to pin BPF programs and maps\n"); + goto cleanup1; + } + + return 0; + +cleanup1: + detach_manually(); +cleanup: + netaccsockmap__destroy(skel); + close(cg_fd); + + return err != 0; +} + + +int net_acc_disable(void) +{ + int i, err; + + if (!net_acc_enabled()) + return 0; + + for (i = 0; i < ARRAY_SIZE(map_infos); i++) { + if (map_infos[i].fd) { + *map_infos[i].fd = bpf_obj_get(map_infos[i].pin_path); + unlink(map_infos[i].pin_path); + } + } + + for (i = 0; i < ARRAY_SIZE(prog_infos); i++) { + if (prog_infos[i].fd) { + *prog_infos[i].fd = bpf_obj_get(prog_infos[i].pin_path); + unlink(prog_infos[i].pin_path); + } + } + + err = bpf_prog_detach2(net_acc_fds.netacc_redir_fd, + net_acc_fds.netaccsock_map_fd, BPF_SK_MSG_VERDICT); + if (err) + fprintf(stderr, "failed to detach msg_verdict programs, %d\n", err); + err = bpf_prog_detach2(net_acc_fds.netacc_sockops_fd, cg_fd, BPF_CGROUP_SOCK_OPS); + if (err) + fprintf(stderr, "failed to detach msg_verdict programs, %d\n", err); + + close(net_acc_fds.netacc_redir_fd); + close(net_acc_fds.netacc_sockops_fd); + rmdir(PIN_PATH); + return 0; +} + +int main(int argc, char **argv) +{ + char *cgrp_path = CG_PATH; + int ret = 1; + + if (argc != 2 && argc != 3) + return 1; + + if (argc == 3) + cgrp_path = argv[2]; + + cg_fd = open(cgrp_path, O_DIRECTORY, O_RDONLY); + if (cg_fd < 0) { + fprintf(stderr, "ERROR: (%d) open cgroup path failed: %s\n", cg_fd, cgrp_path); + return 1; + } + + if (strncmp(argv[1], "enable", 6) == 0) + ret = net_acc_enable(); + else if (strncmp(argv[1], "disable", 7) == 0) + ret = net_acc_disable(); + + close(cg_fd); + return ret; +} diff --git a/tools/netacc/netaccsockmap.c b/tools/netacc/netaccsockmap.c new file mode 100644 index 000000000000..e20b2f87908f --- /dev/null +++ b/tools/netacc/netaccsockmap.c @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2023 Huawei Technologies Co., Ltd + */ + +#include "bpf_sockmap.h" + +#define NETACC_BIND_MAP_SIZE 100 + +#define CHECK_ACC_SOCK 1 + +struct ipaddr_port { + __u32 ip4; + __u32 port; +} __attribute__((packed)); + +#if CHECK_ACC_SOCK +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct ipaddr_port); + __type(value, int); + __uint(max_entries, NETACC_BIND_MAP_SIZE); + __uint(map_flags, 0); +} netacc_bind_map SEC(".maps"); + +static inline int __is_netacc_sock(struct ipaddr_port *key) +{ + int *pv = NULL; + + pv = bpf_map_lookup_elem(&netacc_bind_map, key); + if (pv) + return 1; + + return 0; +} + +static inline int is_netacc_sock(struct ipaddr_port *key1, struct ipaddr_port *key10) +{ + net_dbg("is_netacc, ip1:0x%x, port1:0x%x\n", key1->ip4, key1->port); + + if (__is_netacc_sock(key1)) + return 1; + + if (__is_netacc_sock(key10)) + return 1; + + return 0; +} + +static inline void extract_dst_ipaddrport_from_ops(struct bpf_sock_ops *skops, + struct ipaddr_port *key) +{ + if (skops->op == BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB) { + key->ip4 = skops->remote_ip4; + // remote_port is in network byte order + key->port = bpf_ntohl(skops->remote_port); + } else if (skops->op == BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) { + key->ip4 = skops->local_ip4; + // local_port is in host byte order + key->port = skops->local_port; + } +} + +static inline int is_netacc_interested_tcp(struct bpf_sock_ops *skops) +{ + struct ipaddr_port key = {0}; + struct ipaddr_port key0; + + // only test server's port + extract_dst_ipaddrport_from_ops(skops, &key); + key0.ip4 = 0; + key0.port = key.port; + + if (!is_netacc_sock(&key, &key0)) + return 0; + net_dbg("this is netacc sock\n"); + + net_dbg("the sock is netacc loopback sock\n"); + return 1; +} + +static inline int update_netacc_info(struct bpf_sock_ops *skops) +{ + struct ipaddr_port key; + int value = 1; + char comm[16] = {0}; + + bpf_get_current_comm(comm, sizeof(comm)); + + if (bpf_strncmp(comm, 12, "redis-server")) + return 0; + + key.ip4 = skops->local_ip4; + key.port = skops->local_port; // host order + + bpf_map_update_elem(&netacc_bind_map, &key, &value, BPF_NOEXIST); + net_dbg("%s, update netaccinfo: sip:0x%x, sport:%d\n", comm, key.ip4, key.port); + return 1; +} + +static inline void clean_netacc_info(struct bpf_sock_ops *skops) +{ + struct ipaddr_port key; + + key.ip4 = skops->local_ip4; + key.port = skops->local_port; // host order + net_dbg("clean netaccinfo, 0x%x:%d\n", key.ip4, key.port); + bpf_map_delete_elem(&netacc_bind_map, &key); +} +#else +static inline int is_netacc_interested_tcp(struct bpf_sock_ops *skops) +{ + return 1; +} +static inline int update_netacc_info(struct bpf_sock_ops *skops) +{ + return 0; +} +static inline void clean_netacc_info(struct bpf_sock_ops *skops) +{} +#endif + +SEC("sockops") +int netacc_sockops(struct bpf_sock_ops *skops) +{ + switch (skops->op) { + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + if (skops->family == 2 && skops->local_skb) {// AF_INET + if (is_netacc_interested_tcp(skops)) { + net_dbg("bpf_sockops, sockmap, op:%d, sk:%p\n", + skops->op, skops->sk); + bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG); + bpf_sockmap_ipv4_insert(skops); + } else { + bpf_sock_ops_cb_flags_set(skops, 0); + } + } + break; + case BPF_SOCK_OPS_STATE_CB: + if (skops->family == 2 && skops->args[0] == BPF_TCP_LISTEN && + skops->args[1] == BPF_TCP_CLOSE) { + clean_netacc_info(skops); + } else if (skops->family == 2 && (skops->args[1] == BPF_TCP_CLOSE || + skops->args[1] == BPF_TCP_CLOSE_WAIT || + skops->args[1] == BPF_TCP_FIN_WAIT1)) { + bpf_sockmap_ipv4_cleanup(skops, NULL); + } + break; + case BPF_SOCK_OPS_TCP_LISTEN_CB: + if (skops->family == 2 && update_netacc_info(skops)) + bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG); + break; + default: + break; + } + return 1; +} + +char _license[] SEC("license") = "GPL"; +int _version SEC("version") = 1;