hulk inclusion category: feature bugzilla: NA CVE: N/A
----------------------------------------------------
add sample sockmap code for redis
Signed-off-by: Liu Jian liujian56@huawei.com --- tools/netacc/Makefile | 24 +++ tools/netacc/bpf_sockmap.h | 148 +++++++++++++++++++ tools/netacc/net-acc | 34 +++++ tools/netacc/redis_acc.c | 276 ++++++++++++++++++++++++++++++++++ tools/netacc/redissockmap.c | 287 ++++++++++++++++++++++++++++++++++++ 5 files changed, 769 insertions(+) create mode 100644 tools/netacc/Makefile create mode 100644 tools/netacc/bpf_sockmap.h create mode 100755 tools/netacc/net-acc create mode 100644 tools/netacc/redis_acc.c create mode 100644 tools/netacc/redissockmap.c
diff --git a/tools/netacc/Makefile b/tools/netacc/Makefile new file mode 100644 index 000000000000..bf1db37414d8 --- /dev/null +++ b/tools/netacc/Makefile @@ -0,0 +1,24 @@ +# SPDX-License-Identifier: GPL-2.0 + +INSTALL ?= install +CLANG ?= clang +CC ?= gcc +BPFTOOL ?= bpftool +TOPDIR = ../.. +MKFLAGS = -I$(TOPDIR)/tools/lib +LDLIBBPF = -L$(TOPDIR)/tools/lib/bpf/ -l:libbpf.a + +all: + $(CLANG) -O2 -g -Wall -target bpf $(MKFLAGS) -c redissockmap.c -o redissockmap.o + $(BPFTOOL) gen skeleton redissockmap.o > redis_acc.skel.h + $(CC) -O2 -g -Wall $(MKFLAGS) redis_acc.c -o redis_acc $(LDLIBBPF) -lelf -lz + +clean: + rm -f redis_acc + rm -f redis_acc.skel.h + rm -f *.o + +install: + mkdir -p $(INSTALL_ROOT)/usr/sbin/tuned_acc/ + $(INSTALL) -m 755 net-acc $(INSTALL_ROOT)/usr/sbin/ + $(INSTALL) -m 755 redis_acc $(INSTALL_ROOT)/usr/sbin/tuned_acc/ diff --git a/tools/netacc/bpf_sockmap.h b/tools/netacc/bpf_sockmap.h new file mode 100644 index 000000000000..8edcc6624593 --- /dev/null +++ b/tools/netacc/bpf_sockmap.h @@ -0,0 +1,148 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2023 Huawei Technologies Co., Ltd + */ + +#ifndef __BPF_SOCKMAP_H__ +#define __BPF_SOCKMAP_H__ + +#include <stddef.h> +#include <stdbool.h> +#include <linux/types.h> +#include <linux/bpf.h> + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +#define LOG_DEBUG 0 +#define SOCKMAP_SIZE 100000 + +#if LOG_DEBUG +#define net_dbg bpf_printk +#define net_err bpf_printk +#else +#define net_dbg(fmt, ...) do {} while (0) +#define net_err bpf_printk +#endif + +struct sock_key { + __u32 sip4; + __u32 dip4; + __u32 sport; + __u32 dport; +} __attribute__((packed)); + +struct { + __uint(type, BPF_MAP_TYPE_SOCKHASH); + __type(key, struct sock_key); + __type(value, int); + __uint(max_entries, SOCKMAP_SIZE); + __uint(map_flags, 0); +} redissock_map SEC(".maps"); + +struct sock_info { + __u64 redir_rx_cnt; + __u64 redir_tx_cnt; + int sk_flags; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct sock_key); + __type(value, struct sock_info); + __uint(max_entries, SOCKMAP_SIZE); + __uint(map_flags, 0); +} sockflag_map SEC(".maps"); + +static inline void sock_key2peerkey(struct sock_key *key, struct sock_key *peer_key) +{ + peer_key->sip4 = key->dip4; + peer_key->sport = key->dport; + peer_key->dip4 = key->sip4; + peer_key->dport = key->sport; +} + +static inline void extract_key4_from_ops(struct bpf_sock_ops *ops, struct sock_key *key) +{ + key->dip4 = ops->remote_ip4; + key->sip4 = ops->local_ip4; + + // local_port is in host byte order + // and remote_port is in network byte order + key->sport = ops->local_port; + key->dport = bpf_ntohl(ops->remote_port); +} + +static inline void bpf_sock_ops_ipv4(struct bpf_sock_ops *skops) +{ + struct sock_key key = {}; + + extract_key4_from_ops(skops, &key); + bpf_sock_hash_update(skops, &redissock_map, &key, BPF_NOEXIST); +} + +static inline void bpf_sockmap_ipv4_insert(struct bpf_sock_ops *skops) +{ + if (bpf_ntohl(skops->remote_port) == 22 || skops->local_port == 22) + return; + + bpf_sock_ops_ipv4(skops); +} + +static inline void bpf_sockmap_ipv4_cleanup(struct bpf_sock_ops *skops, __u64 *cnt) +{ + struct sock_info *p_skinfo = NULL; + struct sock_key key = {}; + + extract_key4_from_ops(skops, &key); + p_skinfo = bpf_map_lookup_elem(&sockflag_map, &key); + if (p_skinfo) { + if (cnt) + *cnt = p_skinfo->redir_tx_cnt; + bpf_map_delete_elem(&sockflag_map, &key); + } +} + +static inline void extract_key4_from_msg(struct sk_msg_md *msg, struct sock_key *key) +{ + key->sip4 = msg->local_ip4; + key->dip4 = msg->remote_ip4; + + // local_port is in host byte order + // and remote_port is in network byte order + key->sport = msg->local_port; + key->dport = bpf_ntohl(msg->remote_port); +} + +SEC("sk_msg") int redis_redir(struct sk_msg_md *msg) +{ + struct sock_info *p_skinfo = NULL; + struct sock_info skinfo = {0}; + struct sock_key peer_key = {}; + struct sock_key key = {}; + int ret, addinfo = 0; + + extract_key4_from_msg(msg, &key); + sock_key2peerkey(&key, &peer_key); + + p_skinfo = bpf_map_lookup_elem(&sockflag_map, &key); + if (p_skinfo != NULL && p_skinfo->sk_flags == 1) + return SK_PASS; + + if (p_skinfo == NULL) { + addinfo = 1; + p_skinfo = &skinfo; + } + + ret = bpf_msg_redirect_hash(msg, &redissock_map, &peer_key, BPF_F_INGRESS); + if (ret == SK_DROP) { + if (p_skinfo->sk_flags != 1) + p_skinfo->sk_flags = 1; + } + + p_skinfo->redir_tx_cnt++; + if (addinfo) + bpf_map_update_elem(&sockflag_map, &key, p_skinfo, BPF_ANY); + + return SK_PASS; +} +#endif diff --git a/tools/netacc/net-acc b/tools/netacc/net-acc new file mode 100755 index 000000000000..f3db4803ced3 --- /dev/null +++ b/tools/netacc/net-acc @@ -0,0 +1,34 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +function usage() { + echo "" + echo "Usage:" + echo " $0 [enable | disable]" + echo "" +} + +function mount_cgp2() { + CGP2=`mount | grep cgroup2` + if [[ "$CGP2"X == "X" ]]; then + CGP2_PATCH=/sys/fs/cgroup/tunned-acc + mount -o rw,remount /sys/fs/cgroup + mkdir -p ${CGP2_PATCH} + mount -t cgroup2 -o nosuid,nodev,noexec none ${CGP2_PATCH} + mount -o ro,remount /sys/fs/cgroup + fi +} + +CMD=$1 + +if [[ "$CMD"X == "enableX" ]]; then + mount_cgp2 + modprobe localip + /usr/sbin/tuned_acc/redis_acc enable +elif [[ "$CMD"X == "disableX" ]]; then + /usr/sbin/tuned_acc/redis_acc disable + rmmod localip + exit 0 +else + usage; +fi diff --git a/tools/netacc/redis_acc.c b/tools/netacc/redis_acc.c new file mode 100644 index 000000000000..ae81ec56ea7e --- /dev/null +++ b/tools/netacc/redis_acc.c @@ -0,0 +1,276 @@ +#include <argp.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/resource.h> +#include <time.h> +#include <unistd.h> +#include <sys/stat.h> +#include <fcntl.h> + +#include <bpf/libbpf.h> +#include <bpf/bpf.h> +#include "redis_acc.skel.h" + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) +#endif + +#define CG_PATH "/sys/fs/cgroup/tunned-acc" +#define PIN_PATH "/sys/fs/bpf/redis/" + +static int bump_memlock_rlimit(void) +{ + struct rlimit rlim_new = { + .rlim_cur = RLIM_INFINITY, + .rlim_max = RLIM_INFINITY, + }; + + return setrlimit(RLIMIT_MEMLOCK, &rlim_new); +} + +struct net_acc_prog_info { + const char *prog_name; + const char *pin_path; + void **prog; + int *fd; +}; + +struct net_acc_map_info { + const char *map_name; + char *pin_path; + void **map; + int *fd; +}; + +struct { + int redis_sockops_fd; + int redis_redir_fd; + int redissock_map_fd; +} net_acc_fds; + +struct { + void *redis_sockops_obj; + void *redis_redir_obj; + void *redissock_map_obj; +} net_acc_obj; + +static struct net_acc_prog_info prog_infos[] = { + { + .prog_name = "redis_sockops", + .pin_path = PIN_PATH"sockops", + .prog = &net_acc_obj.redis_sockops_obj, + .fd = &net_acc_fds.redis_sockops_fd, + }, + { + .prog_name = "redis_redir", + .pin_path = PIN_PATH"sk_msg", + .prog = &net_acc_obj.redis_redir_obj, + .fd = &net_acc_fds.redis_redir_fd, + } +}; + +static struct net_acc_map_info map_infos[] = { + { + .map_name = "redissock_map", + .pin_path = PIN_PATH"redissock_map", + .map = &net_acc_obj.redissock_map_obj, + .fd = &net_acc_fds.redissock_map_fd, + } +}; + +int cg_fd = -1; +struct redissockmap *skel; + +int net_acc_enabled(void) +{ + int map_fd; + + map_fd = bpf_obj_get(map_infos[0].pin_path); + if (map_fd < 0) + return 0; + + close(map_fd); + return 1; +} + +int pin_prog_map(void) +{ + int i, mapj, progj; + int err = 0; + + mapj = ARRAY_SIZE(map_infos); + for (i = 0; i < mapj; i++) { + if (*map_infos[i].map) + err = bpf_map__pin(*map_infos[i].map, map_infos[i].pin_path); + if (err) { + mapj = i; + goto err1; + } + } + + progj = ARRAY_SIZE(prog_infos); + for (i = 0; i < progj; i++) { + if (*prog_infos[i].prog) + err = bpf_program__pin(*prog_infos[i].prog, prog_infos[i].pin_path); + if (err) { + progj = i; + goto err2; + } + } + return 0; +err2: + for (i = 0; i < progj; i++) { + if (*prog_infos[i].prog) + bpf_program__unpin(*prog_infos[i].prog, prog_infos[i].pin_path); + } +err1: + for (i = 0; i < mapj; i++) { + if (*map_infos[i].map) + bpf_map__unpin(*map_infos[i].map, map_infos[i].pin_path); + } + return 1; +} + +int attach_manually(void) +{ + int err; + + err = bpf_prog_attach(bpf_program__fd(skel->progs.redis_sockops), cg_fd, BPF_CGROUP_SOCK_OPS, 0); + if (err) { + fprintf(stderr, "failed to attach sockops programs\n"); + return -1; + } + + err = bpf_prog_attach(bpf_program__fd(skel->progs.redis_redir), + bpf_map__fd(skel->maps.redissock_map), BPF_SK_MSG_VERDICT, 0); + if (err) { + fprintf(stderr, "failed to attach msg_verdict programs\n"); + goto cleanup1; + } + + net_acc_obj.redis_sockops_obj = skel->progs.redis_sockops; + net_acc_obj.redis_redir_obj = skel->progs.redis_redir; + net_acc_obj.redissock_map_obj = skel->maps.redissock_map; + return 0; +cleanup1: + bpf_prog_detach2(bpf_program__fd(skel->progs.redis_sockops), cg_fd, BPF_CGROUP_SOCK_OPS); + return -1; +} + +void detach_manually(void) +{ + bpf_prog_detach2(bpf_program__fd(skel->progs.redis_redir), + bpf_map__fd(skel->maps.redissock_map), BPF_SK_MSG_VERDICT); + bpf_prog_detach2(bpf_program__fd(skel->progs.redis_sockops), cg_fd, BPF_CGROUP_SOCK_OPS); +} + +int net_acc_enable(void) +{ + int err; + + if (net_acc_enabled()) + return 0; + + err = bump_memlock_rlimit(); + if (err) { + fprintf(stderr, "failed to increase rlimit: %d", err); + close(cg_fd); + return 1; + } + + skel = redissockmap__open(); + if (!skel) { + fprintf(stderr, "failed to open and/or load BPF object\n"); + return 1; + } + + err = redissockmap__load(skel); + if (err) { + fprintf(stderr, "failed to load BPF object: %d\n", err); + goto cleanup; + } + + err = redissockmap__attach(skel); + if (err) { + fprintf(stderr, "failed to attach BPF programs\n"); + goto cleanup; + } + + err = attach_manually(); + if (err) { + fprintf(stderr, "failed to attach BPF programs\n"); + goto cleanup; + } + + err = pin_prog_map(); + if (err) { + fprintf(stderr, "failed to pin BPF programs and maps\n"); + goto cleanup1; + } + + return 0; + +cleanup1: + detach_manually(); +cleanup: + redissockmap__destroy(skel); + close(cg_fd); + + return err != 0; +} + + +int net_acc_disable(void) +{ + int i; + + if (!net_acc_enabled()) + return 0; + + for (i = 0; i < ARRAY_SIZE(map_infos); i++) { + if (map_infos[i].fd) { + *map_infos[i].fd = bpf_obj_get(map_infos[i].pin_path); + unlink(map_infos[i].pin_path); + } + } + + for (i = 0; i < ARRAY_SIZE(prog_infos); i++) { + if (prog_infos[i].fd) { + *prog_infos[i].fd = bpf_obj_get(prog_infos[i].pin_path); + unlink(prog_infos[i].pin_path); + } + } + + bpf_prog_detach2(net_acc_fds.redis_redir_fd, + net_acc_fds.redissock_map_fd, BPF_SK_MSG_VERDICT); + bpf_prog_detach2(net_acc_fds.redis_sockops_fd, cg_fd, BPF_CGROUP_SOCK_OPS); + + close(net_acc_fds.redis_redir_fd); + close(net_acc_fds.redis_redir_fd); + close(net_acc_fds.redis_redir_fd); + rmdir(PIN_PATH); + return 0; +} + +int main(int argc, char **argv) +{ + int ret = 1; + + if (argc != 2) + return 1; + + cg_fd = open(CG_PATH, O_DIRECTORY, O_RDONLY); + if (cg_fd < 0) { + fprintf(stderr, "ERROR: (%i) open cgroup2 path failed: %s\n", cg_fd, CG_PATH); + return 1; + } + + if (strncmp(argv[1], "enable", 6) == 0) + ret = net_acc_enable(); + else if (strncmp(argv[1], "disable", 7) == 0) + ret = net_acc_disable(); + + close(cg_fd); + return ret; +} diff --git a/tools/netacc/redissockmap.c b/tools/netacc/redissockmap.c new file mode 100644 index 000000000000..b23df1aa3e6c --- /dev/null +++ b/tools/netacc/redissockmap.c @@ -0,0 +1,287 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2023 Huawei Technologies Co., Ltd + */ + +#include "bpf_sockmap.h" + +#define REDIS_BIND_MAP_SIZE 100 +#define BLOCKLIST_SIZE 1000 + +#define ENABLE_BLOCKLIST 0 +#define SHORT_THR 10 +#define BLOCK_THR 10000 + +struct local_ip { + __u32 ip4; +}; + +struct ipaddr_port { + __u32 ip4; + __u32 port; +} __attribute__((packed)); + +#if ENABLE_BLOCKLIST +struct { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __type(key, struct ipaddr_port); + __type(value, int); + __uint(max_entries, BLOCKLIST_SIZE); + __uint(map_flags, 0); +} blocklist_map SEC(".maps"); +#endif + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct ipaddr_port); + __type(value, int); + __uint(max_entries, REDIS_BIND_MAP_SIZE); + __uint(map_flags, 0); +} redis_bind_map SEC(".maps"); + + +static inline void extract_ipaddrport_from_ops(struct bpf_sock_ops *skops, + struct ipaddr_port *key1, struct ipaddr_port *key2) +{ + key1->ip4 = skops->remote_ip4; + // remote_port is in network byte order + key1->port = bpf_ntohl(skops->remote_port); + + key2->ip4 = skops->local_ip4; + // local_port is in host byte order + key2->port = skops->local_port; +} + +static inline int __is_redis_sock(struct ipaddr_port *key) +{ + int *pv = NULL; + + pv = bpf_map_lookup_elem(&redis_bind_map, key); + if (pv) + return 1; + + return 0; +} + +static inline int is_redis_sock(struct ipaddr_port *key1, struct ipaddr_port *key2, + struct ipaddr_port *key10, struct ipaddr_port *key20) +{ + net_dbg("is_redis, ip1:0x%x, port1:0x%x\n", key1->ip4, key1->port); + net_dbg("is_redis, ip2:0x%x, port2:0x%x\n", key2->ip4, key2->port); + + if (__is_redis_sock(key1)) + return 1; + + if (__is_redis_sock(key2)) + return 1; + + if (__is_redis_sock(key10)) + return 1; + + if (__is_redis_sock(key20)) + return 1; + + return 0; +} + +static inline int is_localip_sock(struct bpf_sock_ops *skops) +{ + struct local_ip remoteip; + + net_dbg("is_localip, ip1:0x%x, ip2:0x%x\n", + skops->local_ip4, skops->remote_ip4); + + // skops->local_ip4 must be the local IP address + remoteip.ip4 = skops->remote_ip4; + + if ((remoteip.ip4 & 0xff) == 0x7f) + return 1; + + if (!bpf_is_local_ipaddr(remoteip.ip4)) + return 0; + + return 1; +} + +#if ENABLE_BLOCKLIST +static inline int __is_in_block_list(struct ipaddr_port *key) +{ + int *pv = NULL; + + pv = bpf_map_lookup_elem(&blocklist_map, key); + if (pv && *pv > BLOCK_THR) + return 1; + + return 0; +} + +static inline int is_in_block_list(struct ipaddr_port *key1, struct ipaddr_port *key2, + struct ipaddr_port *key10, struct ipaddr_port *key20) +{ + + if (__is_in_block_list(key1)) + return 1; + if (__is_in_block_list(key2)) + return 1; + if (__is_in_block_list(key10)) + return 1; + if (__is_in_block_list(key20)) + return 1; + + return 0; +} + +static inline int __add_task2block_list(struct ipaddr_port *block) +{ + int *pv = NULL; + int value = 1; + + pv = bpf_map_lookup_elem(&blocklist_map, block); + if (pv == NULL) { + bpf_map_update_elem(&blocklist_map, block, &value, BPF_NOEXIST); + return 0; + } + + if (*pv > BLOCK_THR) + return 0; + + *pv += 1; + return 0; +} + +static inline int add_task2block_list(struct bpf_sock_ops *skops) +{ + struct ipaddr_port block1; + struct ipaddr_port block2; + + extract_ipaddrport_from_ops(skops, &block1, &block2); + + if (__is_redis_sock(&block1)) + return __add_task2block_list(&block1); + + if (__is_redis_sock(&block2)) + return __add_task2block_list(&block2); + + block1.ip4 = 0; + if (__is_redis_sock(&block1)) + return __add_task2block_list(&block1); + + block2.ip4 = 0; + if (__is_redis_sock(&block2)) + return __add_task2block_list(&block2); + + return 0; +} +#else +static inline int add_task2block_list(struct bpf_sock_ops *skops) +{ + return 0; +} +static inline int is_in_block_list(struct ipaddr_port *key1, struct ipaddr_port *key2, + struct ipaddr_port *key10, struct ipaddr_port *key20) +{ + return 0; +} +#endif + +static inline int is_redis_loopback_tcp(struct bpf_sock_ops *skops) +{ + struct ipaddr_port key10; + struct ipaddr_port key20; + struct ipaddr_port key1; + struct ipaddr_port key2; + + if (!is_localip_sock(skops)) + return 0; + net_dbg("this is localip\n"); + + extract_ipaddrport_from_ops(skops, &key1, &key2); + key10.ip4 = 0; + key10.port = key1.port; + key20.ip4 = 0; + key20.port = key2.port; + + if (!is_redis_sock(&key1, &key2, &key10, &key20)) + return 0; + net_dbg("this is redis sock\n"); + + if (is_in_block_list(&key1, &key2, &key10, &key20)) + return 0; + + net_dbg("the sock is redis loopback sock\n"); + return 1; +} + +static inline int update_redis_info(struct bpf_sock_ops *skops) +{ + struct ipaddr_port key; + int value = 1; + char comm[16] = {0}; + + bpf_get_current_comm(comm, sizeof(comm)); + if (comm[0] != 'r' || comm[1] != 'e' || comm[2] != 'd' || comm[3] != 'i' || + comm[4] != 's' || comm[5] != '-' || comm[6] != 's' || comm[7] != 'e' || + comm[8] != 'r' || comm[9] != 'v' || comm[10] != 'e' || comm[11] != 'r') + return 0; + + key.ip4 = skops->local_ip4; + key.port = skops->local_port; // host order + + bpf_map_update_elem(&redis_bind_map, &key, &value, BPF_NOEXIST); + net_dbg("%s, update redisinfo: sip:0x%x, sport:%d\n", comm, key.ip4, key.port); + return 1; +} + +static inline void clean_redis_info(struct bpf_sock_ops *skops) +{ + struct ipaddr_port key; + + key.ip4 = skops->local_ip4; + key.port = skops->local_port; // host order + net_dbg("clean redisinfo, 0x%x:%d\n", key.ip4, key.port); + bpf_map_delete_elem(&redis_bind_map, &key); +} + +SEC("sockops") int redis_sockops(struct bpf_sock_ops *skops) +{ + switch (skops->op) { + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + if (skops->family == 2) {// AF_INET + if (is_redis_loopback_tcp(skops)) { + net_dbg("bpf_sockops, sockmap, op:%d, sk:%p\n", + skops->op, skops->sk); + bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG); + bpf_sockmap_ipv4_insert(skops); + } else { + bpf_sock_ops_cb_flags_set(skops, 0); + } + } + break; + case BPF_SOCK_OPS_STATE_CB: + if (skops->family == 2 && skops->args[0] == BPF_TCP_LISTEN && + skops->args[1] == BPF_TCP_CLOSE) { + clean_redis_info(skops); + } else if (skops->family == 2 && (skops->args[1] == BPF_TCP_CLOSE || + skops->args[1] == BPF_TCP_CLOSE_WAIT || + skops->args[1] == BPF_TCP_FIN_WAIT1)) { + __u64 tx_cnt = SHORT_THR; + + bpf_sockmap_ipv4_cleanup(skops, &tx_cnt); + net_dbg("sockops sk:%p, state:%d, tx_cnt:%llu\n", + skops->sk, skops->args[1], tx_cnt); + if (tx_cnt < SHORT_THR) + add_task2block_list(skops); + } + break; + case BPF_SOCK_OPS_TCP_LISTEN_CB: + if (skops->family == 2 && update_redis_info(skops)) + bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG); + break; + default: + break; + } + return 1; +} + +char _license[] SEC("license") = "GPL"; +int _version SEC("version") = 1;