From: Wang Yufen wangyufen@huawei.com
hulk inclusion category: feature bugzilla: NA CVE: NA
-------------------------------------------------
Add new tcp COMP option to SYN and SYN-ACK when tcp COMP is enabled. connection compress payload only when both side support it.
For KABI compatible, smc_ok bit in struct tcp_options_received is reused for COMP option, so this config depends !CONFIG_SMC.
Signed-off-by: Wang Yufen wangyufen@huawei.com Reviewed-by: Wei Yongjun weiyongjun1@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/tcp.h | 2 +- include/net/inet_sock.h | 3 ++- include/net/sock.h | 1 + include/net/tcp.h | 12 +++++++++ net/ipv4/Kconfig | 1 + net/ipv4/Makefile | 1 + net/ipv4/syncookies.c | 2 ++ net/ipv4/tcp.c | 4 +++ net/ipv4/tcp_comp.c | 13 ++++++++++ net/ipv4/tcp_input.c | 44 +++++++++++++++++++++++++++------ net/ipv4/tcp_minisocks.c | 3 +++ net/ipv4/tcp_output.c | 53 ++++++++++++++++++++++++++++++++++++++++ net/ipv6/syncookies.c | 2 ++ 13 files changed, 132 insertions(+), 9 deletions(-) create mode 100644 net/ipv4/tcp_comp.c
diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 1192f1e76015f..0ea3a31cc670e 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -110,7 +110,7 @@ static inline void tcp_clear_options(struct tcp_options_received *rx_opt) { rx_opt->tstamp_ok = rx_opt->sack_ok = 0; rx_opt->wscale_ok = rx_opt->snd_wscale = 0; -#if IS_ENABLED(CONFIG_SMC) +#if IS_ENABLED(CONFIG_SMC) || IS_ENABLED(CONFIG_TCP_COMP) rx_opt->smc_ok = 0; #endif } diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index e8eef85006aa5..d5f309875338b 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -91,7 +91,8 @@ struct inet_request_sock { ecn_ok : 1, acked : 1, no_srccheck: 1, - smc_ok : 1; + smc_ok : 1, + comp_ok : 1; u32 ir_mark; union { struct ip_options_rcu __rcu *ireq_opt; diff --git a/include/net/sock.h b/include/net/sock.h index 920094bb75920..2c4e0800e2dc6 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -833,6 +833,7 @@ enum sock_flags { SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */ SOCK_RCU_FREE, /* wait rcu grace period in sk_destruct() */ SOCK_TXTIME, + SOCK_COMP, };
#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) diff --git a/include/net/tcp.h b/include/net/tcp.h index a9a0db9bef5ea..a6191053eff28 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -192,6 +192,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); */ #define TCPOPT_FASTOPEN_MAGIC 0xF989 #define TCPOPT_SMC_MAGIC 0xE2D4C3D9 +#define TCPOPT_COMP_MAGIC 0x7954
/* * TCP option lengths @@ -205,6 +206,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TCPOLEN_FASTOPEN_BASE 2 #define TCPOLEN_EXP_FASTOPEN_BASE 4 #define TCPOLEN_EXP_SMC_BASE 6 +#define TCPOLEN_EXP_COMP_BASE 4
/* But this is what stacks really send out. */ #define TCPOLEN_TSTAMP_ALIGNED 12 @@ -2208,4 +2210,14 @@ void clean_acked_data_disable(struct inet_connection_sock *icsk);
#endif
+#if IS_ENABLED(CONFIG_TCP_COMP) +extern struct static_key_false tcp_have_comp; +bool tcp_syn_comp_enabled(const struct tcp_sock *tp); +#else +static inline bool tcp_syn_comp_enabled(const struct tcp_sock *tp) +{ + return false; +} +#endif + #endif /* _TCP_H */ diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 883a638648e1f..dc4a6d1568543 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -757,6 +757,7 @@ config TCP_MD5SIG
config TCP_COMP bool "TCP: Transport Layer Compression support" + depends on !SMC ---help--- Enable kernel payload compression support for TCP protocol. This allows payload compression handling of the TCP protocol to be done in-kernel. diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 7446b98661d86..cba816a2fdf5a 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -64,6 +64,7 @@ obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o obj-$(CONFIG_NETLABEL) += cipso_ipv4.o +obj-$(CONFIG_TCP_COMP) += tcp_comp.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ xfrm4_output.o xfrm4_protocol.o diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 1a06850ef3cc5..8b2fdfc2d9992 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -356,6 +356,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) treq->tfo_listener = false; if (IS_ENABLED(CONFIG_SMC)) ireq->smc_ok = 0; + if (IS_ENABLED(CONFIG_TCP_COMP)) + ireq->comp_ok = 0;
ireq->ir_iif = inet_request_bound_dev_if(sk, skb);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 769e1f683471a..14e4872462a79 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -297,6 +297,10 @@ DEFINE_STATIC_KEY_FALSE(tcp_have_smc); EXPORT_SYMBOL(tcp_have_smc); #endif
+#if IS_ENABLED(CONFIG_TCP_COMP) +DEFINE_STATIC_KEY_FALSE(tcp_have_comp); +#endif + /* * Current number of TCP sockets. */ diff --git a/net/ipv4/tcp_comp.c b/net/ipv4/tcp_comp.c new file mode 100644 index 0000000000000..e2bf4fbb4c3ff --- /dev/null +++ b/net/ipv4/tcp_comp.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * TCP compression support + * + * Copyright(c) 2021 Huawei Technologies Co., Ltd + */ + +#include <net/tcp.h> + +bool tcp_syn_comp_enabled(const struct tcp_sock *tp) +{ + return true; +} diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index effba01eed84e..a058fb936c2a1 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3779,7 +3779,7 @@ static void tcp_parse_fastopen_option(int len, const unsigned char *cookie, foc->exp = exp_opt; }
-static void smc_parse_options(const struct tcphdr *th, +static bool smc_parse_options(const struct tcphdr *th, struct tcp_options_received *opt_rx, const unsigned char *ptr, int opsize) @@ -3788,10 +3788,31 @@ static void smc_parse_options(const struct tcphdr *th, if (static_branch_unlikely(&tcp_have_smc)) { if (th->syn && !(opsize & 1) && opsize >= TCPOLEN_EXP_SMC_BASE && - get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC) + get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC) { opt_rx->smc_ok = 1; + return true; + } } #endif + return false; +} + +static bool tcp_parse_comp_option(const struct tcphdr *th, + struct tcp_options_received *opt_rx, + const unsigned char *ptr, + int opsize) +{ +#if IS_ENABLED(CONFIG_TCP_COMP) + if (static_branch_unlikely(&tcp_have_comp)) { + if (th->syn && !(opsize & 1) && + opsize >= TCPOLEN_EXP_COMP_BASE && + get_unaligned_be16(ptr) == TCPOPT_COMP_MAGIC) { + opt_rx->smc_ok = 1; + return true; + } + } +#endif + return false; }
/* Look for tcp options. Normally only called on SYN and SYNACK packets. @@ -3897,15 +3918,21 @@ void tcp_parse_options(const struct net *net, */ if (opsize >= TCPOLEN_EXP_FASTOPEN_BASE && get_unaligned_be16(ptr) == - TCPOPT_FASTOPEN_MAGIC) + TCPOPT_FASTOPEN_MAGIC) { tcp_parse_fastopen_option(opsize - TCPOLEN_EXP_FASTOPEN_BASE, ptr + 2, th->syn, foc, true); - else - smc_parse_options(th, opt_rx, ptr, - opsize); - break; + break; + }
+ if (smc_parse_options(th, opt_rx, ptr, opsize)) + break; + + if (tcp_parse_comp_option(th, opt_rx, ptr, + opsize)) + break; + + break; } ptr += opsize-2; length -= opsize; @@ -6362,6 +6389,9 @@ static void tcp_openreq_init(struct request_sock *req, #if IS_ENABLED(CONFIG_SMC) ireq->smc_ok = rx_opt->smc_ok; #endif +#if IS_ENABLED(CONFIG_TCP_COMP) + ireq->comp_ok = rx_opt->smc_ok; +#endif }
struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index a20b393b45016..2b3be8f0e4170 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -529,6 +529,9 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->rcv_ssthresh = req->rsk_rcv_wnd; newtp->rcv_wnd = req->rsk_rcv_wnd; newtp->rx_opt.wscale_ok = ireq->wscale_ok; +#if IS_ENABLED(CONFIG_TCP_COMP) + newtp->rx_opt.smc_ok = ireq->comp_ok; +#endif if (newtp->rx_opt.wscale_ok) { newtp->rx_opt.snd_wscale = ireq->snd_wscale; newtp->rx_opt.rcv_wscale = ireq->rcv_wscale; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index d55ee43cd1b8f..37da027074004 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -401,6 +401,7 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp) #define OPTION_WSCALE (1 << 3) #define OPTION_FAST_OPEN_COOKIE (1 << 8) #define OPTION_SMC (1 << 9) +#define OPTION_COMP (1 << 10)
static void smc_options_write(__be32 *ptr, u16 *options) { @@ -417,6 +418,19 @@ static void smc_options_write(__be32 *ptr, u16 *options) #endif }
+static void comp_options_write(__be32 *ptr, u16 *options) +{ +#if IS_ENABLED(CONFIG_TCP_COMP) + if (static_branch_unlikely(&tcp_have_comp)) { + if (unlikely(OPTION_COMP & *options)) { + *ptr++ = htonl((TCPOPT_EXP << 24) | + (TCPOLEN_EXP_COMP_BASE << 16) | + (TCPOPT_COMP_MAGIC)); + } + } +#endif +} + struct tcp_out_options { u16 options; /* bit field of OPTION_* */ u16 mss; /* 0 to disable */ @@ -536,6 +550,8 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, }
smc_options_write(ptr, &options); + + comp_options_write(ptr, &options); }
static void smc_set_option(const struct tcp_sock *tp, @@ -571,6 +587,39 @@ static void smc_set_option_cond(const struct tcp_sock *tp, #endif }
+static void comp_set_option(const struct tcp_sock *tp, + struct tcp_out_options *opts, + unsigned int *remaining) +{ +#if IS_ENABLED(CONFIG_TCP_COMP) + if (static_branch_unlikely(&tcp_have_comp)) { + if (tcp_syn_comp_enabled(tp)) { + if (*remaining >= TCPOLEN_EXP_COMP_BASE) { + opts->options |= OPTION_COMP; + *remaining -= TCPOLEN_EXP_COMP_BASE; + } + } + } +#endif +} + +static void comp_set_option_cond(const struct tcp_sock *tp, + const struct inet_request_sock *ireq, + struct tcp_out_options *opts, + unsigned int *remaining) +{ +#if IS_ENABLED(CONFIG_TCP_COMP) + if (static_branch_unlikely(&tcp_have_comp)) { + if (tcp_syn_comp_enabled(tp) && ireq->comp_ok) { + if (*remaining >= TCPOLEN_EXP_COMP_BASE) { + opts->options |= OPTION_COMP; + *remaining -= TCPOLEN_EXP_COMP_BASE; + } + } + } +#endif +} + /* Compute TCP options for SYN packets. This is not the final * network wire format yet. */ @@ -639,6 +688,8 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
smc_set_option(tp, opts, &remaining);
+ comp_set_option(tp, opts, &remaining); + return MAX_TCP_OPTION_SPACE - remaining; }
@@ -704,6 +755,8 @@ static unsigned int tcp_synack_options(const struct sock *sk,
smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);
+ comp_set_option_cond(tcp_sk(sk), ireq, opts, &remaining); + return MAX_TCP_OPTION_SPACE - remaining; }
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index ec61b67a92be0..62a6148a305b3 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -219,6 +219,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) treq->txhash = net_tx_rndhash(); if (IS_ENABLED(CONFIG_SMC)) ireq->smc_ok = 0; + if (IS_ENABLED(CONFIG_TCP_COMP)) + ireq->comp_ok = 0;
/* * We need to lookup the dst_entry to get the correct window size.