From: Paolo Abeni pabeni@redhat.com
stable inclusion from stable-v5.10.117 commit abe35bf3be51482593076d516a680d79e5fbc8e1 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5L66B
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 8b796475fd7882663a870456466a4fb315cc1bd6 ]
Currently pedit tries to ensure that the accessed skb offset is writable via skb_unclone(). The action potentially allows touching any skb bytes, so it may end-up modifying shared data.
The above causes some sporadic MPTCP self-test failures, due to this code:
tc -n $ns2 filter add dev ns2eth$i egress \ protocol ip prio 1000 \ handle 42 fw \ action pedit munge offset 148 u8 invert \ pipe csum tcp \ index 100
The above modifies a data byte outside the skb head and the skb is a cloned one, carrying a TCP output packet.
This change addresses the issue by keeping track of a rough over-estimate highest skb offset accessed by the action and ensuring such offset is really writable.
Note that this may cause performance regressions in some scenarios, but hopefully pedit is not in the critical path.
Fixes: db2c24175d14 ("act_pedit: access skb->data safely") Acked-by: Mat Martineau mathew.j.martineau@linux.intel.com Tested-by: Geliang Tang geliang.tang@suse.com Signed-off-by: Paolo Abeni pabeni@redhat.com Acked-by: Jamal Hadi Salim jhs@mojatatu.com Link: https://lore.kernel.org/r/1fcf78e6679d0a287dd61bb0f04730ce33b3255d.165219462... Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Acked-by: Xie XiuQi xiexiuqi@huawei.com --- include/net/tc_act/tc_pedit.h | 1 + net/sched/act_pedit.c | 26 ++++++++++++++++++++++---- 2 files changed, 23 insertions(+), 4 deletions(-)
diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h index 748cf87a4d7e..3e02709a1df6 100644 --- a/include/net/tc_act/tc_pedit.h +++ b/include/net/tc_act/tc_pedit.h @@ -14,6 +14,7 @@ struct tcf_pedit { struct tc_action common; unsigned char tcfp_nkeys; unsigned char tcfp_flags; + u32 tcfp_off_max_hint; struct tc_pedit_key *tcfp_keys; struct tcf_pedit_key_ex *tcfp_keys_ex; }; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index b45304446e13..90510298b32a 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -149,7 +149,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, struct nlattr *pattr; struct tcf_pedit *p; int ret = 0, err; - int ksize; + int i, ksize; u32 index;
if (!nla) { @@ -228,6 +228,18 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, p->tcfp_nkeys = parm->nkeys; } memcpy(p->tcfp_keys, parm->keys, ksize); + p->tcfp_off_max_hint = 0; + for (i = 0; i < p->tcfp_nkeys; ++i) { + u32 cur = p->tcfp_keys[i].off; + + /* The AT option can read a single byte, we can bound the actual + * value with uchar max. + */ + cur += (0xff & p->tcfp_keys[i].offmask) >> p->tcfp_keys[i].shift; + + /* Each key touches 4 bytes starting from the computed offset */ + p->tcfp_off_max_hint = max(p->tcfp_off_max_hint, cur + 4); + }
p->tcfp_flags = parm->flags; goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); @@ -308,13 +320,18 @@ static int tcf_pedit_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_pedit *p = to_pedit(a); + u32 max_offset; int i;
- if (skb_unclone(skb, GFP_ATOMIC)) - return p->tcf_action; - spin_lock(&p->tcf_lock);
+ max_offset = (skb_transport_header_was_set(skb) ? + skb_transport_offset(skb) : + skb_network_offset(skb)) + + p->tcfp_off_max_hint; + if (skb_ensure_writable(skb, min(skb->len, max_offset))) + goto unlock; + tcf_lastuse_update(&p->tcf_tm);
if (p->tcfp_nkeys > 0) { @@ -403,6 +420,7 @@ static int tcf_pedit_act(struct sk_buff *skb, const struct tc_action *a, p->tcf_qstats.overlimits++; done: bstats_update(&p->tcf_bstats, skb); +unlock: spin_unlock(&p->tcf_lock); return p->tcf_action; }