BMC is an in-kernel key-value cache implemented in BPF and proposed by paper [1]. The paper discussed BMC for memcached, obtaining at least 6x performance speedup.
This patch implements a sample BMC for Redis. Paper [1] implements BMC in XDP, bypassing the kernel network stack totally. Since Redis is based on TCP protocol, and it's almost impossible to fully process TCP traffic in XDP, so this patch implements BMC in sockmap, which locates at the top of kernel network stack. Since kernel network stack is not bypassed, the speedup is not significant. Any way, this is only a sample implementation, and performance improvements can be continuously optimized.
See [2] for details on how to build samples/bpf.
Output files: samples/bpf/bmctool samples/bpf/bmc/bpf.o
Sample usage: bmctool prog load -p 6379 ./bmc/bpf.o # load bmc bpf prog and attach it # to sockets with listen port 6379
bmctool stat # dump bmc status
bmctool prog unload # detach and unload bmc prog
[1] https://www.usenix.org/conference/nsdi21/presentation/ghigoff [2] https://www.kernel.org/doc/readme/samples-bpf-README.rst
Xu Kuohai (3): bpf: Add helper bpf_tcp_udpate_seq to synchronize tcp seq/ack bpf: Add xdp load and store helpers samples: bpf: Add sample BMC for Redis
include/uapi/linux/bpf.h | 27 ++ net/core/filter.c | 98 +++++ samples/bpf/Makefile | 3 + samples/bpf/bmc/bpf.c | 485 +++++++++++++++++++++ samples/bpf/bmc/common.h | 21 + samples/bpf/bmc/tool.c | 763 +++++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 27 ++ 7 files changed, 1424 insertions(+) create mode 100644 samples/bpf/bmc/bpf.c create mode 100644 samples/bpf/bmc/common.h create mode 100644 samples/bpf/bmc/tool.c
From: Xu Kuohai xukuohai@huawei.com
Offering: HULK hulk inclusion category: feature bugzilla: N/A
--------------------------------
In order to process TCP packet with BPF/XDP, it's necessary to synchronize seq and ack between kernel network stack and bpf prog. This patch introduces a sample helper to do the sync.
Note that, it's only used for samples, retransmission and congestion control are not supported.
Signed-off-by: He Fengqing hefengqing@huawei.com Signed-off-by: Xu Kuohai xukuohai@huawei.com Signed-off-by: Yang Jihong yangjihong@huawei.com --- include/uapi/linux/bpf.h | 11 ++++++++ net/core/filter.c | 50 ++++++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 11 ++++++++ 3 files changed, 72 insertions(+)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0a3e04249999..dd54acedc646 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4050,6 +4050,12 @@ union bpf_attr { * **-ENOENT** if symbol is not found. * * **-EPERM** if caller does not have permission to obtain kernel address. + * + * int bpf_update_tcp_seq(struct xdp_buff *ctx, struct bpf_sock_tuple *tuple, u32 len, u32 netns_id, u64 flags) + * Description + * Update tcp seq + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4235,6 +4241,7 @@ union bpf_attr { FN(btf_find_by_name_kind), \ FN(sys_close), \ FN(kallsyms_lookup_name), \ + FN(update_tcp_seq), \ /* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -4583,6 +4590,10 @@ struct bpf_sock_tuple { __be16 dport; } ipv6; }; + + __be32 seq; + __be32 delta; + __be32 ack_seq; };
struct bpf_xdp_sock { diff --git a/net/core/filter.c b/net/core/filter.c index 4d2290004325..3065b103f65e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6239,6 +6239,54 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = { .arg5_type = ARG_ANYTHING, };
+/* If we update tp->rcv_nxt, also update tp->bytes_received */ +static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq) +{ + u32 delta = seq - tp->rcv_nxt; + + sock_owned_by_me((struct sock *)tp); + tp->bytes_received += delta; + WRITE_ONCE(tp->rcv_nxt, seq); +} + +BPF_CALL_5(bpf_xdp_update_tcp_seq, struct xdp_buff *, ctx, + struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags) +{ + struct net *caller_net = dev_net(ctx->rxq->dev); + int ifindex = ctx->rxq->dev->ifindex; + struct sock *sk; + struct tcp_sock *tp; + + sk = __bpf_sk_lookup(NULL, tuple, len, caller_net, + ifindex, IPPROTO_TCP, netns_id, + flags); + if (!sk) + return -1; + + tp = tcp_sk(sk); + tcp_rcv_nxt_update(tp, tuple->seq + tuple->delta); + + WRITE_ONCE(tp->snd_nxt, tuple->ack_seq); + WRITE_ONCE(tp->copied_seq, tp->rcv_nxt); + WRITE_ONCE(tp->bytes_sent, tuple->ack_seq); + WRITE_ONCE(tp->bytes_acked, tuple->ack_seq); + WRITE_ONCE(tp->write_seq, tuple->ack_seq); + + return 0; +} + +static const struct bpf_func_proto bpf_xdp_update_tcp_seq_proto = { + .func = bpf_xdp_update_tcp_seq, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +}; + BPF_CALL_5(bpf_sock_addr_skc_lookup_tcp, struct bpf_sock_addr_kern *, ctx, struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) { @@ -7317,6 +7365,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_xdp_sk_lookup_udp_proto; case BPF_FUNC_sk_lookup_tcp: return &bpf_xdp_sk_lookup_tcp_proto; + case BPF_FUNC_update_tcp_seq: + return &bpf_xdp_update_tcp_seq_proto; case BPF_FUNC_sk_release: return &bpf_sk_release_proto; case BPF_FUNC_skc_lookup_tcp: diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 82cfa82231b5..4f86c2187ada 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4760,6 +4760,12 @@ union bpf_attr { * **-ENOENT** if symbol is not found. * * **-EPERM** if caller does not have permission to obtain kernel address. + * + * int bpf_update_tcp_seq(struct xdp_buff *ctx, struct bpf_sock_tuple *tuple, u32 len, u32 netns_id, u64 flags) + * Description + * Update tcp seq + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4945,6 +4951,7 @@ union bpf_attr { FN(btf_find_by_name_kind), \ FN(sys_close), \ FN(kallsyms_lookup_name), \ + FN(update_tcp_seq), \ /* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -5292,6 +5299,10 @@ struct bpf_sock_tuple { __be16 dport; } ipv6; }; + + __be32 seq; + __be32 delta; + __be32 ack_seq; };
struct bpf_xdp_sock {
From: Xu Kuohai xukuohai@huawei.com
Offering: HULK hulk inclusion category: feature bugzilla: N/A
--------------------------------
Add sample helper to store bytes to XDP buffer and load bytes from XDP buffer.
Signed-off-by: Xu Kuohai xukuohai@huawei.com --- include/uapi/linux/bpf.h | 16 ++++++++++++ net/core/filter.c | 48 ++++++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 16 ++++++++++++ 3 files changed, 80 insertions(+)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index dd54acedc646..db585d960d64 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4056,6 +4056,20 @@ union bpf_attr { * Update tcp seq * Return * 0 on success, or a negative error in case of failure. + * + * int bpf_xdp_store_bytes(struct xdp_buff *ctx, u32 offset, const void *from, u32 len) + * Description + * store *len* bytes from address *from* into xdp buffer *ctx*, at + * *offset* + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_xdp_load_bytes(struct xdp_buff *ctx, u32 offset, void *to, u32 len) + * Description + * load *len* bytes to address *to* from xdp buffer *ctx*, at + * *offset* + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4242,6 +4256,8 @@ union bpf_attr { FN(sys_close), \ FN(kallsyms_lookup_name), \ FN(update_tcp_seq), \ + FN(xdp_store_bytes), \ + FN(xdp_load_bytes), \ /* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/net/core/filter.c b/net/core/filter.c index 3065b103f65e..2750e94b025d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6287,6 +6287,50 @@ static const struct bpf_func_proto bpf_xdp_update_tcp_seq_proto = { .arg5_type = ARG_ANYTHING, };
+BPF_CALL_4(bpf_xdp_store_bytes, struct xdp_buff *, xdp, u32, offset, + const void *, from, u32, len) +{ + if (unlikely(offset > xdp->data_end - xdp->data || + len > xdp->data_end - xdp->data - offset)) + return -EINVAL; + + memmove(xdp->data + offset, from, len); + + return 0; +} + +static const struct bpf_func_proto bpf_xdp_store_bytes_proto = { + .func = bpf_xdp_store_bytes, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_ANYTHING, +}; + +BPF_CALL_4(bpf_xdp_load_bytes, struct xdp_buff *, xdp, u32, offset, + void *, to, u32, len) +{ + if (unlikely(offset > xdp->data_end - xdp->data || + len > xdp->data_end - xdp->data - offset)) + return -EINVAL; + + memmove(to, xdp->data + offset, len); + + return 0; +} + +static const struct bpf_func_proto bpf_xdp_load_bytes_proto = { + .func = bpf_xdp_load_bytes, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_ANYTHING, +}; + BPF_CALL_5(bpf_sock_addr_skc_lookup_tcp, struct bpf_sock_addr_kern *, ctx, struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) { @@ -7376,6 +7420,10 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_tcp_gen_syncookie: return &bpf_tcp_gen_syncookie_proto; #endif + case BPF_FUNC_xdp_store_bytes: + return &bpf_xdp_store_bytes_proto; + case BPF_FUNC_xdp_load_bytes: + return &bpf_xdp_load_bytes_proto; default: return bpf_sk_base_func_proto(func_id); } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 4f86c2187ada..57b927e99092 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4766,6 +4766,20 @@ union bpf_attr { * Update tcp seq * Return * 0 on success, or a negative error in case of failure. + * + * int bpf_xdp_store_bytes(struct xdp_buff *ctx, u32 offset, const void *from, u32 len) + * Description + * store *len* bytes from address *from* into xdp buffer *ctx*, at + * *offset* + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_xdp_load_bytes(struct xdp_buff *ctx, u32 offset, void *to, u32 len) + * Description + * load *len* bytes to address *to* from xdp buffer *ctx*, at + * *offset* + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4952,6 +4966,8 @@ union bpf_attr { FN(sys_close), \ FN(kallsyms_lookup_name), \ FN(update_tcp_seq), \ + FN(xdp_store_bytes), \ + FN(xdp_load_bytes), \ /* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
From: Xu Kuohai xukuohai@huawei.com
Offering: HULK hulk inclusion category: feature bugzilla: N/A
--------------------------------
BMC is an in-kernel key-value cache implemented in BPF and proposed by paper [1]. The paper discussed BMC for memcached, obtaining at least 6x performance speedup.
This patch implements a sample BMC for Redis.
See [2] for details on how to build samples/bpf.
Output files: samples/bpf/bmctool samples/bpf/bmc/bpf.o
Sample usage: bmctool prog load -p 6379 ./bmc/bpf.o # load bmc bpf prog and attach it # to sockets with listen port 6379
bmctool stat # dump bmc status
bmctool prog unload # detach and unload bmc prog
Tested with the following command:
./redis-benchmark -c 20 -r 1 -n 1000 -t get -h 192.168.4.101 -d 102
Without BMC: throughput summary: 41666.67 requests per second latency summary (msec): avg min p50 p95 p99 max 0.441 0.176 0.415 0.631 1.455 1.815
With BMC (100% HIT): throughput summary: 66666.67 requests per second latency summary (msec): avg min p50 p95 p99 max 0.223 0.096 0.215 0.311 0.743 0.759
BMC Stat: Total GET Requests: 1000 Hit GET Requests: 1000 (100.00%) Dropped GET Requests: 0 (0.00%) Total SET Requests: 1 Hit SET Requests: 1 (100.00%) Dropped SET Requests: 0 (0.00%)
[1] https://www.usenix.org/conference/nsdi21/presentation/ghigoff [2] https://www.kernel.org/doc/readme/samples-bpf-README.rst
Signed-off-by: Xu Kuohai xukuohai@huawei.com Signed-off-by: Yang Jihong yangjihong@huawei.com Signed-off-by: He Fengqing hefengqing@huawei.com (original demo) --- samples/bpf/Makefile | 3 + samples/bpf/bmc/bpf.c | 485 +++++++++++++++++++++++++ samples/bpf/bmc/common.h | 21 ++ samples/bpf/bmc/tool.c | 763 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 1272 insertions(+) create mode 100644 samples/bpf/bmc/bpf.c create mode 100644 samples/bpf/bmc/common.h create mode 100644 samples/bpf/bmc/tool.c
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 1d92e87565ad..e7c5cf5f9b8e 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -57,6 +57,7 @@ tprogs-y += hbm tprogs-y += sched_preempt tprogs-y += sched_select_core tprogs-y += sched_pick_task +tprogs-y := bmctool
# Libbpf dependencies LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a @@ -117,6 +118,7 @@ hbm-objs := bpf_load.o hbm.o $(CGROUP_HELPERS) sched_preempt-objs := sched_preempt_user.o sched_select_core-objs := sched_select_core_user.o sched_pick_task-objs := sched_pick_task_user.o +bmctool-objs := bmc/tool.o
# Tell kbuild to always build the programs always-y := $(tprogs-y) @@ -181,6 +183,7 @@ always-y += xdpsock_kern.o always-y += sched_preempt_kern.o always-y += sched_select_core_kern.o always-y += sched_pick_task_kern.o +always-y += bmc/bpf.o
ifeq ($(ARCH), arm) # Strip all except -D__LINUX_ARM_ARCH__ option needed to handle linux diff --git a/samples/bpf/bmc/bpf.c b/samples/bpf/bmc/bpf.c new file mode 100644 index 000000000000..154bc5665446 --- /dev/null +++ b/samples/bpf/bmc/bpf.c @@ -0,0 +1,485 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) Huawei Technologies Co., Ltd. 2022-2022. All rights reserved. + * + * Description: BPF program to accelerate Redis. The idea is to add a kernel + * cache for Redis data. When new Redis request is received, the kernel cache + * is checked, and if the requested data is found in the cache, a Redis reply + * message is constructed and sent back directly. + */ + +#include <uapi/linux/in.h> +#include <uapi/linux/if_ether.h> +#include <uapi/linux/ip.h> +#include <uapi/linux/tcp.h> +#include <uapi/linux/bpf.h> +#include <uapi/linux/pkt_cls.h> + +#include <bpf/bpf_endian.h> +#include <bpf/bpf_helpers.h> + +#include "common.h" + +#define BMC_MAX_REDIS_KEY_LEN 64 +#define BMC_MAX_REDIS_VALUE_LEN 128 + +#define IP_MF 0x2000 +#define IP_OFFSET 0x1FFF + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(u32)); + __uint(max_entries, 16); +} bmc_ports SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(u32)); + __uint(max_entries, 1); +} bmc_interface SEC(".maps"); + +struct redis_key { + u32 len; + /* encoded in redis format */ + u8 data[BMC_MAX_REDIS_KEY_LEN + 16]; +}; + +struct redis_value { + u32 len; + /* encoded in redis format */ + u8 data[BMC_MAX_REDIS_VALUE_LEN + 16]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __uint(key_size, sizeof(struct redis_key)); + __uint(value_size, sizeof(struct redis_value)); + __uint(max_entries, 10000); +} bmc_storage SEC(".maps"); + +struct redis_ctx { + struct redis_key key; + struct redis_value value; + u32 offset; +}; + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(struct redis_ctx)); + __uint(max_entries, 1); +} ctxmap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(struct redis_bmc_stat)); + __uint(max_entries, 1); +} bmc_stats SEC(".maps"); + +static inline struct redis_ctx *get_ctx(void) +{ + u32 key = 0; + + return bpf_map_lookup_elem(&ctxmap, &key); +} + +static inline struct redis_bmc_stat *get_stat(void) +{ + u32 key = 0; + + return bpf_map_lookup_elem(&bmc_stats, &key); +} + +static bool is_bmc_port(u32 port) +{ + u32 *val = bpf_map_lookup_elem(&bmc_ports, &port); + + return val != NULL && *val != 0; +} + +static inline void compute_ip_checksum(struct iphdr *ip) +{ + u32 csum = 0; + u16 *next_ip_u16 = (u16 *)ip; + + ip->check = 0; + +#pragma clang loop unroll(full) + for (int i = 0; i < (sizeof(*ip) >> 1); i++) + csum += *next_ip_u16++; + + ip->check = ~((csum & 0xffff) + (csum >> 16)); +} + +static inline void compute_tcp_checksum(struct iphdr *ip, struct tcphdr *tcp, + __u16 len, void *data_end) +{ + struct tcp_psedu_head { + __be32 saddr; + __be32 daddr; + __u8 zero; + __u8 proto; + __u16 tcplen; + }; + struct tcp_psedu_head psedu; + char *tail = NULL; + char left_over[2] = {0}; + + psedu.saddr = ip->saddr; + psedu.daddr = ip->daddr; + psedu.zero = 0; + psedu.proto = 6; + psedu.tcplen = bpf_htons(len); + + tcp->check = 0; + + u32 csum = 0; + u16 *next_u16 = (u16 *)&psedu; + unsigned int i; + +#pragma clang loop unroll(full) + for (i = 0; i < (sizeof(struct tcp_psedu_head) >> 1); i++) + csum += *next_u16++; + + next_u16 = (u16 *)tcp; + for (i = 0; i < 1024 && (i < len / 2); i++) { + if (next_u16 + 1 > data_end) + break; + csum += *next_u16++; + } + + if (len % 2 == 1) { + tail = (char *)next_u16; + if (tail < data_end) + left_over[0] = *tail; + csum += *(unsigned short *)left_over; + } + csum = (csum >> 16) + (csum & 0xffff); /* add in accumulated carries */ + csum += csum >> 16; /* add potential last carry */ + + tcp->check = (0xffff & ~csum); +} + +#define extract_kvdata(field, size, kv_data, kv_len) \ +do { \ + kv_data = payload; \ + kv_len = 0; \ + \ + if (payload + 1 > data_end || payload[0] != '$') \ + return XDP_PASS; \ + \ + payload++; \ + if (payload < data_end && payload[0] >= '0' && payload[0] <= '9') { \ + kv_len = kv_len * 10 + (payload[0] - '0'); \ + payload++; \ + } \ + \ + if (payload < data_end && payload[0] >= '0' && payload[0] <= '9') { \ + kv_len = kv_len * 10 + (payload[0] - '0'); \ + payload++; \ + } \ + \ + if (payload < data_end && payload[0] >= '0' && payload[0] <= '9') { \ + kv_len = kv_len * 10 + (payload[0] - '0'); \ + payload++; \ + } \ + \ + if (payload < data_end && payload[0] >= '0' && payload[0] <= '9') { \ + kv_len = kv_len * 10 + (payload[0] - '0'); \ + payload++; \ + } \ + \ + if (payload + 2 > data_end || payload[0] != '\r' || payload[1] != '\n') \ + return XDP_PASS; \ + \ + payload += 2; \ + \ + if (kv_len == 0 || kv_len > size) \ + return XDP_PASS; \ + \ + payload += kv_len + 2; \ + kv_len = payload - kv_data; \ + \ + if (kv_len > sizeof(ctx->field.data)) \ + return XDP_PASS; \ + \ + bpf_xdp_load_bytes(xdp, kv_data - data, ctx->field.data, kv_len); \ + ctx->field.len = kv_len; \ +} while (0) + +#define adjust_xdp_tail(size, len) \ +do { \ + char *new_end; \ + \ + new_end = payload = (char *)thdr + thdr->doff * 4; \ + for (i = 0; i < size && i < len; i++) \ + new_end++; \ + \ + if (new_end > data_end) \ + err = bpf_xdp_adjust_tail(xdp, new_end - data_end); \ + else if (new_end < data_end) \ + err = bpf_xdp_adjust_tail(xdp, -(data_end - new_end)); \ + \ + if (err) \ + return XDP_PASS; \ +} while (0) + +#define sync_tcp_seq(len, ndrop) \ +do { \ + struct bpf_sock_tuple tuple; \ + \ + tuple.ipv4.saddr = ihdr->saddr; \ + tuple.ipv4.daddr = ihdr->daddr; \ + tuple.ipv4.sport = thdr->source; \ + tuple.ipv4.dport = thdr->dest; \ + \ + tuple.seq = __bpf_ntohl(thdr->seq); \ + tuple.delta = __bpf_ntohs(ihdr->tot_len) - ihlen - thlen; \ + tuple.ack_seq = __bpf_ntohs(thdr->ack_seq) + len; \ + \ + if (bpf_update_tcp_seq(xdp, &tuple, sizeof(tuple.ipv4), -1, 0)) { \ + ndrop++; \ + return XDP_DROP; \ + } \ +} while (0) + +#define build_reply_head(len) \ +do { \ + thdr->doff = 5; /* discard tcp options */ \ + port = thdr->source; \ + thdr->source = thdr->dest; \ + thdr->dest = port; \ + \ + seq = __bpf_ntohl(thdr->seq); \ + seq += __bpf_ntohs(ihdr->tot_len) - ihlen - thlen; \ + thdr->seq = thdr->ack_seq; \ + thdr->ack_seq = __bpf_ntohl(seq); \ + \ + ipaddr = ihdr->saddr; \ + ihdr->saddr = ihdr->daddr; \ + ihdr->daddr = ipaddr; \ + ihdr->tot_len = __bpf_htons(ihlen + thdr->doff * 4 + len); \ + \ + memcpy(macaddr, ehdr->h_source, ETH_ALEN); \ + memcpy(ehdr->h_source, ehdr->h_dest, ETH_ALEN); \ + memcpy(ehdr->h_dest, macaddr, ETH_ALEN); \ +} while (0) + +SEC("bmc/main") +int bmc_main(struct xdp_md *xdp) +{ + int err; + u32 klen; + u32 vlen; + unsigned int i; + unsigned int seq; + u8 macaddr[ETH_ALEN]; + __be32 ipaddr; + __le16 port; + char *data = (char *)(long)xdp->data; + char *data_end = (char *)(long)xdp->data_end; + struct ethhdr *ehdr = NULL; + struct iphdr *ihdr = NULL; + struct tcphdr *thdr = NULL; + unsigned int ihlen; + unsigned int thlen; + char *payload; + u32 offset; + int is_get = 0; + int expect_get = 0; + struct redis_ctx *ctx; + struct redis_bmc_stat *stat; + char *key_data; + char *value_data; + u32 key_len; + u32 value_len; + + ehdr = (struct ethhdr *)data; + if (ehdr + 1 > data_end) + return XDP_PASS; + + if (ehdr->h_proto != __bpf_constant_htons(ETH_P_IP)) + return XDP_PASS; + + ihdr = (struct iphdr *)(ehdr + 1); + if (ihdr + 1 > data_end) + return XDP_PASS; + + if (ihdr->ihl != 5 || ihdr->protocol != IPPROTO_TCP) + return XDP_PASS; + + ihlen = ihdr->ihl * 4; + + if (ihdr->frag_off & __bpf_htons(IP_MF | IP_OFFSET)) + return XDP_PASS; + + if (__bpf_htons(ihdr->tot_len) > ETH_DATA_LEN) + return XDP_PASS; + + thdr = (struct tcphdr *)(ihdr + 1); + if (thdr + 1 > data_end) + return XDP_PASS; + + if (thdr->syn || thdr->fin || thdr->rst) + return XDP_PASS; + + if (!is_bmc_port(thdr->dest)) + return XDP_PASS; + + thlen = thdr->doff * 4; + payload = (void *)thdr + thlen; + + /* + * SET message format: + * "*3\r\n" // this is an array with 3 elements + * "$3\r\n" // the first element is a string with 3 characters + * "set\r\n" // the string is "set" + * "$5\r\n" // the second element is a string with 5 characters + * "key01\r\n" // the string is "key01" + * "$5\r\n" // the third element is a string with 5 characters + * "val01\r\n" // the string is "valu01" + * + * GET message format: + * "*2\r\n" // this is an array with 3 elements + * "$3\r\n" // the first element is a string with 3 characters + * "get\r\n" // the string is "get" + * "$5\r\n" // the second element is a string with 5 characters + * "key01\r\n" // the string is "key01" + */ + if (payload + 8 > data_end) + return XDP_PASS; + + if (payload[0] != '*' || (payload[1] != '2' && payload[1] != '3') || + payload[2] != '\r' || payload[3] != '\n' || payload[4] != '$' || + payload[5] != '3' || payload[6] != '\r' || payload[7] != '\n') + return XDP_PASS; + + expect_get = (payload[1] == '2'); + payload += 8; + + if (payload + 5 > data_end) + return XDP_PASS; + + switch (payload[0]) { + case 'g': + is_get = 1; + case 's': + if (payload[1] != 'e' || payload[2] != 't' || + payload[3] != '\r' || payload[4] != '\n') + return XDP_PASS; + break; + case 'G': + is_get = 1; + case 'S': + if (payload[1] != 'E' || payload[2] != 'T' || + payload[3] != '\r' || payload[4] != '\n') + return XDP_PASS; + break; + default: + return XDP_PASS; + } + + payload += 5; + + if (expect_get != is_get) + return XDP_PASS; + + ctx = get_ctx(); + if (!ctx) + return XDP_PASS; + + memset(ctx, 0, sizeof(*ctx)); + + stat = get_stat(); + if (!stat) + return XDP_PASS; + + extract_kvdata(key, BMC_MAX_REDIS_KEY_LEN, key_data, key_len); + + if (is_get) { + struct redis_value *val; + + stat->total_get_requests++; + + val = bpf_map_lookup_elem(&bmc_storage, &ctx->key); + if (!val || !val->len || val->len > sizeof(val->data)) + return XDP_PASS; + vlen = val->len; + + sync_tcp_seq(vlen, stat->drop_get_requests); + + build_reply_head(vlen); + + adjust_xdp_tail(BMC_MAX_REDIS_VALUE_LEN, vlen); + + data = (char *)(long)xdp->data; + data_end = (char *)(long)xdp->data_end; + + ihdr = (struct iphdr *)(data + sizeof(struct ethhdr)); + thdr = (struct tcphdr *)(ihdr + 1); + if (ihdr + 1 > data_end || thdr + 1 > data_end) + return XDP_PASS; + + offset = sizeof(*ehdr) + ihdr->ihl * 4 + thdr->doff * 4; + bpf_xdp_store_bytes(xdp, offset, val->data, vlen); + + compute_ip_checksum(ihdr); + + compute_tcp_checksum(ihdr, thdr, vlen + thdr->doff * 4, + data_end); + + stat->hit_get_requests++; + + return XDP_TX; + } else { + char reply[] = { '+', 'O', 'K', '\r', '\n'}; + + stat->total_set_requests++; + + /* make sure the stupid verifier will not reject the prog */ + payload = key_data; + for (i = 0; i < sizeof(ctx->key.data) && i < key_len; i++) + payload++; + + extract_kvdata(value, BMC_MAX_REDIS_VALUE_LEN, value_data, + value_len); + + err = bpf_map_update_elem(&bmc_storage, &ctx->key, + &ctx->value, BPF_ANY); + if (err) + return XDP_PASS; + + sync_tcp_seq(sizeof(reply), stat->drop_set_requests); + + build_reply_head(sizeof(reply)); + + adjust_xdp_tail(sizeof(reply), sizeof(reply)); + + data = (char *)(long)xdp->data; + data_end = (char *)(long)xdp->data_end; + + ihdr = (struct iphdr *)(data + sizeof(struct ethhdr)); + thdr = (struct tcphdr *)(ihdr + 1); + if (ihdr + 1 > data_end || thdr + 1 > data_end) + return XDP_PASS; + + offset = sizeof(*ehdr) + ihdr->ihl * 4 + thdr->doff * 4; + bpf_xdp_store_bytes(xdp, offset, reply, sizeof(reply)); + + compute_ip_checksum(ihdr); + + compute_tcp_checksum(ihdr, thdr, thdr->doff * 4 + sizeof(reply), + data_end); + + stat->hit_set_requests++; + + return XDP_TX; + } + + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/bmc/common.h b/samples/bpf/bmc/common.h new file mode 100644 index 000000000000..51c8623ab4f8 --- /dev/null +++ b/samples/bpf/bmc/common.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) Huawei Technologies Co., Ltd. 2022-2022. All rights reserved. + * Description: common header for both user prog and bpf kernel prog + */ +#ifndef __REDIS_BMC_COMMON_H__ +#define __REDIS_BMC_COMMON_H__ + +#define REDIS_GET_PROG_INDEX 0 +#define REDIS_SET_PROG_INDEX 1 + +struct redis_bmc_stat { + __u64 total_get_requests; + __u64 hit_get_requests; + __u64 drop_get_requests; + __u64 total_set_requests; + __u64 hit_set_requests; + __u64 drop_set_requests; +}; + +#endif diff --git a/samples/bpf/bmc/tool.c b/samples/bpf/bmc/tool.c new file mode 100644 index 000000000000..f7889434bcd4 --- /dev/null +++ b/samples/bpf/bmc/tool.c @@ -0,0 +1,763 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) Huawei Technologies Co., Ltd. 2022-2022. All rights reserved. + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <limits.h> +#include <errno.h> + +#include <linux/if_link.h> + +#include <unistd.h> +#include <net/if.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/select.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include <fcntl.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include "common.h" + +#define DEFAULT_CGROUP_PATH "/sys/fs/cgroup" +#define DEFAULT_REDIS_PORT 6379 + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) +#endif + +#define IFINDEX_NUM 8 + +struct { + char *cgroup_path; + char *bpf_path; + int cgroup_fd; + int map_ports_fd; + int map_storage_fd; + int map_interface_fd; + int map_stats_fd; + int redis_xdp_main_prog_fd; + uint16_t listen_port; + unsigned int ifindex; +} bmc; + +struct bmc_prog_info { + const char *sec_name; + enum bpf_prog_type prog_type; + enum bpf_attach_type attach_type; + int *p_prog_fd; + int *p_attach_fd; + unsigned int attach_flags; + unsigned int is_xdp_main; + const char *pin_path; + struct bpf_program *prog; +}; + +struct bmc_map_info { + const char *map_name; + int *p_map_fd; + char *pin_path; + struct bpf_map *map; + bool is_stat_map; + bool is_interface_map; +}; + +static struct bmc_prog_info prog_infos[] = { + { + .sec_name = "bmc/main", + .prog_type = BPF_PROG_TYPE_XDP, + .p_prog_fd = &bmc.redis_xdp_main_prog_fd, + .attach_flags = XDP_FLAGS_DRV_MODE, // XDP_FLAGS_SKB_MODE + .is_xdp_main = 1, + .pin_path = "/sys/fs/bpf/bmc/prog_xdp_main" + } +}; + +static struct bmc_map_info map_infos[] = { + { + .map_name = "bmc_ports", + .p_map_fd = &bmc.map_ports_fd, + .pin_path = "/sys/fs/bpf/bmc/map_ports" + }, + { + .map_name = "bmc_storage", + .p_map_fd = &bmc.map_storage_fd, + .pin_path = "/sys/fs/bpf/bmc/map_storage" + }, + { + .map_name = "bmc_interface", + .p_map_fd = &bmc.map_interface_fd, + .pin_path = "/sys/fs/bpf/bmc/interface", + .is_interface_map = true, + }, + { + .map_name = "bmc_stats", + .p_map_fd = &bmc.map_stats_fd, + .pin_path = "/sys/fs/bpf/bmc/stats", + .is_stat_map = true, + }, +}; + +static int find_type_by_sec_name(const char *sec_name, + enum bpf_prog_type *p_prog_type, + enum bpf_attach_type *p_attach_type) +{ + int i; + + if (sec_name == NULL) { + fprintf(stderr, "sec_name is NULL\n"); + return -1; + } + + for (i = 0; i < ARRAY_SIZE(prog_infos); i++) { + if (!strcmp(prog_infos[i].sec_name, sec_name)) { + *p_prog_type = prog_infos[i].prog_type; + *p_attach_type = prog_infos[i].attach_type; + return 0; + } + } + + fprintf(stderr, "unknown prog %s\n", sec_name); + + return -1; +} + +static int set_prog_type(struct bpf_object *obj) +{ + const char *sec_name; + struct bpf_program *prog; + enum bpf_prog_type prog_type; + enum bpf_attach_type attach_type; + + bpf_object__for_each_program(prog, obj) { + sec_name = bpf_program__section_name(prog); + if (find_type_by_sec_name(sec_name, &prog_type, &attach_type)) + return -1; + bpf_program__set_type(prog, prog_type); + if (prog_type != BPF_PROG_TYPE_XDP) + bpf_program__set_expected_attach_type(prog, attach_type); + } + + return 0; +} + +static struct bpf_object *load_bpf_file(const char *bpf_file) +{ + int err; + char err_buf[256]; + struct bpf_object *obj; + + obj = bpf_object__open(bpf_file); + err = libbpf_get_error(obj); + if (err) { + libbpf_strerror(err, err_buf, sizeof(err_buf)); + fprintf(stderr, "unable to open bpf file %s : %s\n", bpf_file, + err_buf); + return NULL; + } + + if (set_prog_type(obj)) { + bpf_object__close(obj); + return NULL; + } + + err = bpf_object__load(obj); + if (err) { + fprintf(stderr, "load bpf object failed\n"); + bpf_object__close(obj); + return NULL; + } + + return obj; +} + +static int find_prog(struct bpf_object *obj, const char *sec_name, + struct bpf_program **p_prog, int *p_prog_fd) +{ + int fd; + struct bpf_program *prog; + + prog = bpf_object__find_program_by_title(obj, sec_name); + if (!prog) { + fprintf(stderr, "failed to find prog %s\n", sec_name); + return -1; + } + + fd = bpf_program__fd(prog); + if (fd < 0) { + fprintf(stderr, "failed to get fd of prog %s\n", sec_name); + return -1; + } + + *p_prog = prog; + *p_prog_fd = fd; + + return 0; +} + +static void unpin_progs(int n) +{ + int i; + + for (i = 0; i < n; i++) + bpf_program__unpin(prog_infos[i].prog, prog_infos[i].pin_path); +} + +static int find_progs(struct bpf_object *obj) +{ + int i; + struct bmc_prog_info *info; + + for (i = 0; i < ARRAY_SIZE(prog_infos); i++) { + info = &prog_infos[i]; + + if (find_prog(obj, info->sec_name, &info->prog, info->p_prog_fd)) + goto error_find_prog; + + if (bpf_program__pin(info->prog, info->pin_path)) + goto error_find_prog; + } + + return 0; + +error_find_prog: + unpin_progs(i); + return -1; +} + +static int find_map(struct bpf_object *obj, const char *map_name, + struct bpf_map **p_map, int *p_map_fd) +{ + int fd; + struct bpf_map *map; + + map = bpf_object__find_map_by_name(obj, map_name); + if (!map) { + fprintf(stderr, "failed to find map %s\n", map_name); + return -1; + } + + fd = bpf_map__fd(map); + if (fd < 0) { + fprintf(stderr, "failed to get fd of map %s\n", map_name); + return -1; + } + + + *p_map = map; + *p_map_fd = fd; + + return 0; +} + +static void unpin_maps(int n) +{ + int i; + + for (i = 0; i < n; i++) + bpf_map__unpin(map_infos[i].map, map_infos[i].pin_path); +} + +static int find_maps(struct bpf_object *obj) +{ + int i; + __u32 key; + __u32 value; + int fd; + struct bmc_map_info *info; + + for (i = 0; i < ARRAY_SIZE(map_infos); i++) { + info = &map_infos[i]; + + if (find_map(obj, info->map_name, &info->map, info->p_map_fd)) + goto error_find_map; + + if (bpf_map__pin(info->map, info->pin_path)) { + fprintf(stderr, "failed to pin map %s to path %s\n", + info->map_name, info->pin_path); + goto error_find_map; + } + + if (info->is_interface_map) { + key = 0; + value = bmc.ifindex; + fd = bpf_map__fd(info->map); + bpf_map_update_elem(fd, &key, &value, 0); + } + } + + return 0; + +error_find_map: + unpin_maps(i); + return -1; +} + +static void detach_xdp_progs(unsigned int ifindex, __u32 flags) +{ + bpf_set_link_xdp_fd(ifindex, -1, flags); +} + +static void detach_progs(int n) +{ + int i; + struct bmc_prog_info *info; + + for (i = 0; i < n; i++) { + info = &prog_infos[i]; + if (info->is_xdp_main) + detach_xdp_progs(bmc.ifindex, info->attach_flags); + else if (info->prog_type != BPF_PROG_TYPE_XDP) + bpf_prog_detach(*info->p_prog_fd, info->attach_type); + } +} + +static int attach_xdp_prog(int prog_fd, __u32 flags) +{ + if (bmc.ifindex) { + if (bpf_set_link_xdp_fd(bmc.ifindex, prog_fd, flags)) { + fprintf(stderr, "failed to attach xdp prog\n"); + return -1; + } + } + return 0; +} + +static int attach_progs(struct bpf_object *obj) +{ + int i; + int err; + int prog_fd; + int attach_fd; + unsigned int flags; + enum bpf_attach_type type; + struct bmc_prog_info *info; + + for (i = 0; i < ARRAY_SIZE(prog_infos); i++) { + info = &prog_infos[i]; + prog_fd = *info->p_prog_fd; + flags = info->attach_flags; + + if (info->is_xdp_main) + err = attach_xdp_prog(prog_fd, flags); + else if (info->prog_type != BPF_PROG_TYPE_XDP && + info->p_attach_fd != NULL) { + attach_fd = *info->p_attach_fd; + type = info->attach_type; + err = bpf_prog_attach(prog_fd, attach_fd, type, flags); + } else + continue; + + if (err) { + fprintf(stderr, "attach prog %s failed!\n", + info->sec_name); + goto error_attach_prog; + } + } + + return 0; + +error_attach_prog: + detach_progs(i); + + return -1; +} + +static int add_bmc_port(void) +{ + int ret; + int map_fd = bmc.map_ports_fd; + uint16_t port = htons(bmc.listen_port); + uint32_t key = (uint32_t)port; + uint32_t value = 1; + + ret = bpf_map_update_elem(map_fd, &key, &value, 0); + if (ret) + fprintf(stderr, "failed to add port %u\n", port); + + return ret; +} + +static int setup_bpf(void) +{ + struct bpf_object *obj; + + bmc.cgroup_fd = open(bmc.cgroup_path, O_DIRECTORY, O_RDONLY); + if (bmc.cgroup_fd < 0) { + fprintf(stderr, "failed to open cgroup %s: %s\n", + bmc.cgroup_path, strerror(errno)); + return -1; + } + + obj = load_bpf_file(bmc.bpf_path); + if (!obj) + goto error_load_object; + + if (find_progs(obj)) + goto error_load_object; + + if (find_maps(obj)) + goto error_find_maps; + + if (attach_progs(obj)) + goto error_attach_progs; + + if (add_bmc_port()) + goto error_add_port; + + return 0; + +error_add_port: + detach_progs(ARRAY_SIZE(prog_infos)); +error_attach_progs: + unpin_maps(ARRAY_SIZE(map_infos)); +error_find_maps: + unpin_progs(ARRAY_SIZE(prog_infos)); +error_load_object: + bpf_object__close(obj); + close(bmc.cgroup_fd); + return -1; +} + +static int parse_load_args(int argc, char *argv[]) +{ + int opt; + int port; + const char *ifname = NULL; + + bmc.cgroup_path = DEFAULT_CGROUP_PATH; + bmc.listen_port = DEFAULT_REDIS_PORT; + bmc.ifindex = 0; + + while ((opt = getopt(argc, argv, "c:p:i:")) != -1) { + switch (opt) { + case 'c': + bmc.cgroup_path = optarg; + break; + case 'p': + port = atoi(optarg); + if (port <= 0 || port >= USHRT_MAX) { + fprintf(stderr, "invalid port: %s\n", optarg); + return -1; + } + bmc.listen_port = port; + break; + case 'i': + printf("interface: %s\n", optarg); + ifname = optarg; + bmc.ifindex = if_nametoindex(ifname); + break; + default: + fprintf(stderr, "unknown option %c\n", opt); + return -1; + } + } + + if (!bmc.ifindex) { + fprintf(stderr, "no netwrok interface found\n"); + return -1; + } + + if (optind >= argc) { + fprintf(stderr, "no bpf prog file found\n"); + return -1; + } + + bmc.bpf_path = argv[optind]; + + printf("bpf file: %s\n", bmc.bpf_path); + printf("cgroup path: %s\n", bmc.cgroup_path); + printf("listen port: %d\n", bmc.listen_port); + printf("interface: %s\n", ifname); + + return 0; +} + +struct cmd { + const char *name; + int (*func)(int argc, char *argv[]); +}; + +static int do_prog(int argc, char *argv[]); +static int do_stat(int argc, char *argv[]); + +static int do_prog_load(int argc, char *argv[]); +static int do_prog_unload(int argc, char *argv[]); + +static struct cmd main_cmds[] = { + { "prog", do_prog }, + { "stat", do_stat }, +}; + +static struct cmd prog_cmds[] = { + { "load", do_prog_load }, + { "unload", do_prog_unload }, +}; + +static char *elf_name; + +static int dispatch_cmd(struct cmd cmds[], int ncmd, int argc, + char *argv[], void (*help)(void)) +{ + int i; + int ret; + + if (argc <= 0) { + help(); + return -1; + } + + for (i = 0; i < ncmd; i++) { + if (!strcmp(argv[0], cmds[i].name)) { + ret = cmds[i].func(argc - 1, argv + 1); + if (ret == -2) { + help(); + ret = -1; + } + return ret; + } + } + + help(); + + return -1; +} + +static int do_prog_load(int argc, char *argv[]) +{ + if (parse_load_args(argc + 1, argv - 1) < 0) + return -2; + + if (setup_bpf()) + return -1; + + return 0; +} + +static int do_prog_unload(int argc, char *argv[]) +{ + int i; + int err; + int prog_fd; + int cgroup_fd; + int map_fd; + char *interface_map_path = NULL; + char *cgroup_path = DEFAULT_CGROUP_PATH; + __u32 ifindex; + __u32 key; + + if (argc > 1) + cgroup_path = argv[0]; + + cgroup_fd = open(cgroup_path, O_DIRECTORY, O_RDONLY); + if (cgroup_fd < 0) { + fprintf(stderr, "failed to open cgroup path: %s\n", + cgroup_path); + return -1; + } + + for (i = 0; i < ARRAY_SIZE(map_infos); i++) { + if (map_infos[i].is_interface_map) { + interface_map_path = map_infos[i].pin_path; + break; + } + } + + if (!interface_map_path) { + fprintf(stderr, "no interface map found\n"); + return -1; + } + + map_fd = bpf_obj_get(interface_map_path); + if (map_fd < 0) { + fprintf(stderr, "failed to get map from %s\n", + interface_map_path); + return -1; + } + + key = 0; + err = bpf_map_lookup_elem(map_fd, &key, &ifindex); + close(map_fd); + if (err) { + fprintf(stderr, "lookup interface failed\n"); + return -1; + } + + for (i = 0; i < ARRAY_SIZE(prog_infos); i++) { + if (prog_infos[i].attach_type == BPF_CGROUP_SOCK_OPS) { + prog_fd = bpf_obj_get(prog_infos[i].pin_path); + if (prog_fd >= 0) + bpf_prog_detach2(prog_fd, cgroup_fd, + BPF_CGROUP_SOCK_OPS); + } + + if (prog_infos[i].is_xdp_main) + detach_xdp_progs(ifindex, prog_infos[i].attach_flags); + + unlink(prog_infos[i].pin_path); + } + + for (i = 0; i < ARRAY_SIZE(map_infos); i++) + unlink(map_infos[i].pin_path); + + return 0; +} + +static void do_prog_help(void) +{ + fprintf(stderr, + "Usage: %s prog load [-c CGROUP_PATH] [-p LISTEN_PORT]" + " {-i INTERFACE} {BPF_FILE}\n" + " %s prog unload [CGROUP_PATH]\n", + elf_name, elf_name); +} + +static int do_prog(int argc, char *argv[]) +{ + return dispatch_cmd(prog_cmds, ARRAY_SIZE(prog_cmds), + argc, argv, do_prog_help); +} + +static int do_stat(int argc, char *argv[]) +{ + int i; + int fd; + int err; + int ncpu; + bool found = false; + struct bmc_map_info *info; + struct bpf_map_info map = {}; + struct redis_bmc_stat *percpu_stat; + struct redis_bmc_stat stat = {}; + __u32 len = sizeof(map); + __u32 key; + + ncpu = sysconf(_SC_NPROCESSORS_ONLN); + if (ncpu < 0) { + fprintf(stderr, "sysconf failed: %s\n", strerror(errno)); + return -1; + } + + percpu_stat = malloc(sizeof(struct redis_bmc_stat) * ncpu); + if (!percpu_stat) { + fprintf(stderr, "malloc percpu stat failed\n"); + return -1; + } + + for (i = 0; i < ARRAY_SIZE(map_infos); i++) { + info = &map_infos[i]; + if (info->is_stat_map) { + found = true; + break; + } + } + + if (!found) { + fprintf(stderr, "no stats map found\n"); + free(percpu_stat); + return -1; + } + + fd = bpf_obj_get(info->pin_path); + if (fd < 0) { + fprintf(stderr, "failed to open %s\n", + info->pin_path); + free(percpu_stat); + return -1; + } + + err = bpf_obj_get_info_by_fd(fd, &map, &len); + if (err) { + fprintf(stderr, "failed to get map info\n"); + err = -1; + goto out; + } + + if (map.type != BPF_MAP_TYPE_PERCPU_ARRAY) { + fprintf(stderr, "unexpected map type: %d\n", map.type); + err = -1; + goto out; + } + + if (map.key_size != sizeof(__u32)) { + fprintf(stderr, "unexpected map key_size: %u\n", map.key_size); + err = -1; + goto out; + } + + if (map.value_size != sizeof(struct redis_bmc_stat)) { + fprintf(stderr, "unexpected map key_size: %u\n", map.key_size); + err = -1; + goto out; + } + + key = 0; + err = bpf_map_lookup_elem(fd, &key, percpu_stat); + if (err) { + fprintf(stderr, "lookup cpu stat failed, cpu=%u\n", i); + err = -1; + goto out; + } + + for (int i = 0; i < ncpu; i++) { + stat.total_get_requests += percpu_stat[i].total_get_requests; + stat.hit_get_requests += percpu_stat[i].hit_get_requests; + stat.drop_get_requests += percpu_stat[i].drop_get_requests; + stat.total_set_requests += percpu_stat[i].total_set_requests; + stat.hit_set_requests += percpu_stat[i].hit_set_requests; + stat.drop_set_requests += percpu_stat[i].drop_set_requests; + } + + printf("Total GET Requests: %llu\n", stat.total_get_requests); + printf("Hit GET Requests: %llu (%.2f%%)\n", stat.hit_get_requests, + stat.total_get_requests == 0 ? 0 : + (double)stat.hit_get_requests / + (double)stat.total_get_requests * + 100); + printf("Dropped GET Requests: %llu (%.2lf%%)\n", stat.drop_get_requests, + stat.total_get_requests == 0 ? 0 : + (double)stat.drop_get_requests / + (double)stat.total_get_requests * + 100); + + printf("Total SET Requests: %llu\n", stat.total_set_requests); + printf("Hit SET Requests: %llu (%.2f%%)\n", stat.hit_set_requests, + stat.total_set_requests == 0 ? 0 : + (double)stat.hit_set_requests / + (double)stat.total_set_requests * + 100); + printf("Dropped SET Requests: %llu (%.2lf%%)\n", stat.drop_set_requests, + stat.total_set_requests == 0 ? 0 : + (double)stat.drop_set_requests / + (double)stat.total_set_requests * + 100); + +out: + close(fd); + free(percpu_stat); + + return err; +} + +static void do_main_help(void) +{ + fprintf(stderr, + "Usage: %s OBJECT { COMMAND | help }\n" + " OBJECT := { prog | stat }\n", + elf_name); +} + +int main(int argc, char *argv[]) +{ + elf_name = argv[0]; + + return dispatch_cmd(main_cmds, ARRAY_SIZE(main_cmds), + argc - 1, argv + 1, do_main_help); +}