From: Xu Kuohai xukuohai@huawei.com
hulk inclusion category: feature bugzilla: N/A
--------------------------------
In order to process TCP packet with BPF/XDP, it's necessary to synchronize seq and ack between kernel network stack and bpf prog. This patch introduces a sample helper to do the sync.
Note that, it's only used for samples, retransmission and congestion control are not supported.
Signed-off-by: He Fengqing hefengqing@huawei.com Signed-off-by: Xu Kuohai xukuohai@huawei.com Signed-off-by: Yang Jihong yangjihong@huawei.com --- include/uapi/linux/bpf.h | 11 ++++++++ net/core/filter.c | 50 ++++++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 11 ++++++++ 3 files changed, 72 insertions(+)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d5fbbc28b6a0..42eb2d52d939 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3777,6 +3777,12 @@ union bpf_attr { * to be enabled. * Return * 1 if the sched entity belongs to a cgroup, 0 otherwise. + * + * int bpf_update_tcp_seq(struct xdp_buff *ctx, struct bpf_sock_tuple *tuple, u32 len, u32 netns_id, u64 flags) + * Description + * Update tcp seq + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3940,6 +3946,7 @@ union bpf_attr { FN(sched_entity_to_tgidpid), \ FN(sched_entity_to_cgrpid), \ FN(sched_entity_belongs_to_cgrp), \ + FN(update_tcp_seq), \ /* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -4288,6 +4295,10 @@ struct bpf_sock_tuple { __be16 dport; } ipv6; }; + + __be32 seq; + __be32 delta; + __be32 ack_seq; };
struct bpf_xdp_sock { diff --git a/net/core/filter.c b/net/core/filter.c index 933fdf6e6a90..750f03e8a454 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6239,6 +6239,54 @@ static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = { .arg5_type = ARG_ANYTHING, };
+/* If we update tp->rcv_nxt, also update tp->bytes_received */ +static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq) +{ + u32 delta = seq - tp->rcv_nxt; + + sock_owned_by_me((struct sock *)tp); + tp->bytes_received += delta; + WRITE_ONCE(tp->rcv_nxt, seq); +} + +BPF_CALL_5(bpf_xdp_update_tcp_seq, struct xdp_buff *, ctx, + struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags) +{ + struct net *caller_net = dev_net(ctx->rxq->dev); + int ifindex = ctx->rxq->dev->ifindex; + struct sock *sk; + struct tcp_sock *tp; + + sk = __bpf_sk_lookup(NULL, tuple, len, caller_net, + ifindex, IPPROTO_TCP, netns_id, + flags); + if (!sk) + return -1; + + tp = tcp_sk(sk); + tcp_rcv_nxt_update(tp, tuple->seq + tuple->delta); + + WRITE_ONCE(tp->snd_nxt, tuple->ack_seq); + WRITE_ONCE(tp->copied_seq, tp->rcv_nxt); + WRITE_ONCE(tp->bytes_sent, tuple->ack_seq); + WRITE_ONCE(tp->bytes_acked, tuple->ack_seq); + WRITE_ONCE(tp->write_seq, tuple->ack_seq); + + return 0; +} + +static const struct bpf_func_proto bpf_xdp_update_tcp_seq_proto = { + .func = bpf_xdp_update_tcp_seq, + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +}; + BPF_CALL_5(bpf_sock_addr_skc_lookup_tcp, struct bpf_sock_addr_kern *, ctx, struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags) { @@ -7317,6 +7365,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_xdp_sk_lookup_udp_proto; case BPF_FUNC_sk_lookup_tcp: return &bpf_xdp_sk_lookup_tcp_proto; + case BPF_FUNC_update_tcp_seq: + return &bpf_xdp_update_tcp_seq_proto; case BPF_FUNC_sk_release: return &bpf_sk_release_proto; case BPF_FUNC_skc_lookup_tcp: diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index b2a0b189b797..5b4972559ea6 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3777,6 +3777,12 @@ union bpf_attr { * to be enabled. * Return * 1 if the sched entity belongs to a cgroup, 0 otherwise. + * + * int bpf_update_tcp_seq(struct xdp_buff *ctx, struct bpf_sock_tuple *tuple, u32 len, u32 netns_id, u64 flags) + * Description + * Update tcp seq + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3940,6 +3946,7 @@ union bpf_attr { FN(sched_entity_to_tgidpid), \ FN(sched_entity_to_cgrpid), \ FN(sched_entity_belongs_to_cgrp), \ + FN(update_tcp_seq), \ /* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper @@ -4287,6 +4294,10 @@ struct bpf_sock_tuple { __be16 dport; } ipv6; }; + + __be32 seq; + __be32 delta; + __be32 ack_seq; };
struct bpf_xdp_sock {