From: Pavel Skripkin <paskripkin(a)gmail.com>
stable inclusion
from stable-v4.19.215
commit fbf150b16a3635634b7dfb7f229d8fcd643c6c51
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I9RDEJ
CVE: CVE-2021-47482
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
--------------------------------
commit 6f68cd634856f8ca93bafd623ba5357e0f648c68 upstream.
Syzbot reported ODEBUG warning in batadv_nc_mesh_free(). The problem was
in wrong error handling in batadv_mesh_init().
Before this patch batadv_mesh_init() was calling batadv_mesh_free() in case
of any batadv_*_init() calls failure. This approach may work well, when
there is some kind of indicator, which can tell which parts of batadv are
initialized; but there isn't any.
All written above lead to cleaning up uninitialized fields. Even if we hide
ODEBUG warning by initializing bat_priv->nc.work, syzbot was able to hit
GPF in batadv_nc_purge_paths(), because hash pointer in still NULL. [1]
To fix these bugs we can unwind batadv_*_init() calls one by one.
It is good approach for 2 reasons: 1) It fixes bugs on error handling
path 2) It improves the performance, since we won't call unneeded
batadv_*_free() functions.
So, this patch makes all batadv_*_init() clean up all allocated memory
before returning with an error to no call correspoing batadv_*_free()
and open-codes batadv_mesh_free() with proper order to avoid touching
uninitialized fields.
Link: https://lore.kernel.org/netdev/000000000000c87fbd05cef6bcb0@google.com/ [1]
Reported-and-tested-by: syzbot+28b0702ada0bf7381f58(a)syzkaller.appspotmail.com
Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol")
Signed-off-by: Pavel Skripkin <paskripkin(a)gmail.com>
Acked-by: Sven Eckelmann <sven(a)narfation.org>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
net/batman-adv/bridge_loop_avoidance.c | 8 +++-
net/batman-adv/main.c | 56 ++++++++++++++++++--------
net/batman-adv/network-coding.c | 4 +-
net/batman-adv/translation-table.c | 4 +-
4 files changed, 52 insertions(+), 20 deletions(-)
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 85faf25c2912..73dff16a57ff 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -1569,10 +1569,14 @@ int batadv_bla_init(struct batadv_priv *bat_priv)
return 0;
bat_priv->bla.claim_hash = batadv_hash_new(128);
- bat_priv->bla.backbone_hash = batadv_hash_new(32);
+ if (!bat_priv->bla.claim_hash)
+ return -ENOMEM;
- if (!bat_priv->bla.claim_hash || !bat_priv->bla.backbone_hash)
+ bat_priv->bla.backbone_hash = batadv_hash_new(32);
+ if (!bat_priv->bla.backbone_hash) {
+ batadv_hash_destroy(bat_priv->bla.claim_hash);
return -ENOMEM;
+ }
batadv_hash_set_lock_class(bat_priv->bla.claim_hash,
&batadv_claim_hash_lock_class_key);
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 79b8a2d8793e..bba64b9b3668 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -187,29 +187,41 @@ int batadv_mesh_init(struct net_device *soft_iface)
INIT_HLIST_HEAD(&bat_priv->softif_vlan_list);
INIT_HLIST_HEAD(&bat_priv->tp_list);
- ret = batadv_v_mesh_init(bat_priv);
- if (ret < 0)
- goto err;
-
ret = batadv_originator_init(bat_priv);
- if (ret < 0)
- goto err;
+ if (ret < 0) {
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+ goto err_orig;
+ }
ret = batadv_tt_init(bat_priv);
- if (ret < 0)
- goto err;
+ if (ret < 0) {
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+ goto err_tt;
+ }
+
+ ret = batadv_v_mesh_init(bat_priv);
+ if (ret < 0) {
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+ goto err_v;
+ }
ret = batadv_bla_init(bat_priv);
- if (ret < 0)
- goto err;
+ if (ret < 0) {
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+ goto err_bla;
+ }
ret = batadv_dat_init(bat_priv);
- if (ret < 0)
- goto err;
+ if (ret < 0) {
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+ goto err_dat;
+ }
ret = batadv_nc_mesh_init(bat_priv);
- if (ret < 0)
- goto err;
+ if (ret < 0) {
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+ goto err_nc;
+ }
batadv_gw_init(bat_priv);
batadv_mcast_init(bat_priv);
@@ -219,8 +231,20 @@ int batadv_mesh_init(struct net_device *soft_iface)
return 0;
-err:
- batadv_mesh_free(soft_iface);
+err_nc:
+ batadv_dat_free(bat_priv);
+err_dat:
+ batadv_bla_free(bat_priv);
+err_bla:
+ batadv_v_mesh_free(bat_priv);
+err_v:
+ batadv_tt_free(bat_priv);
+err_tt:
+ batadv_originator_free(bat_priv);
+err_orig:
+ batadv_purge_outstanding_packets(bat_priv, NULL);
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE);
+
return ret;
}
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 34caf129a9bf..eca1ff4224ab 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -167,8 +167,10 @@ int batadv_nc_mesh_init(struct batadv_priv *bat_priv)
&batadv_nc_coding_hash_lock_class_key);
bat_priv->nc.decoding_hash = batadv_hash_new(128);
- if (!bat_priv->nc.decoding_hash)
+ if (!bat_priv->nc.decoding_hash) {
+ batadv_hash_destroy(bat_priv->nc.coding_hash);
goto err;
+ }
batadv_hash_set_lock_class(bat_priv->nc.decoding_hash,
&batadv_nc_decoding_hash_lock_class_key);
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 9fa5389ea244..2b747c068702 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -4411,8 +4411,10 @@ int batadv_tt_init(struct batadv_priv *bat_priv)
return ret;
ret = batadv_tt_global_init(bat_priv);
- if (ret < 0)
+ if (ret < 0) {
+ batadv_tt_local_table_free(bat_priv);
return ret;
+ }
batadv_tvlv_handler_register(bat_priv, batadv_tt_tvlv_ogm_handler_v1,
batadv_tt_tvlv_unicast_handler_v1,
--
2.17.1
From: Pavel Skripkin <paskripkin(a)gmail.com>
stable inclusion
from stable-v4.19.215
commit fbf150b16a3635634b7dfb7f229d8fcd643c6c51
category: bugfix
bugzilla: NA
CVE: CVE-2021-47482
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
--------------------------------
commit 6f68cd634856f8ca93bafd623ba5357e0f648c68 upstream.
Syzbot reported ODEBUG warning in batadv_nc_mesh_free(). The problem was
in wrong error handling in batadv_mesh_init().
Before this patch batadv_mesh_init() was calling batadv_mesh_free() in case
of any batadv_*_init() calls failure. This approach may work well, when
there is some kind of indicator, which can tell which parts of batadv are
initialized; but there isn't any.
All written above lead to cleaning up uninitialized fields. Even if we hide
ODEBUG warning by initializing bat_priv->nc.work, syzbot was able to hit
GPF in batadv_nc_purge_paths(), because hash pointer in still NULL. [1]
To fix these bugs we can unwind batadv_*_init() calls one by one.
It is good approach for 2 reasons: 1) It fixes bugs on error handling
path 2) It improves the performance, since we won't call unneeded
batadv_*_free() functions.
So, this patch makes all batadv_*_init() clean up all allocated memory
before returning with an error to no call correspoing batadv_*_free()
and open-codes batadv_mesh_free() with proper order to avoid touching
uninitialized fields.
Link: https://lore.kernel.org/netdev/000000000000c87fbd05cef6bcb0@google.com/ [1]
Reported-and-tested-by: syzbot+28b0702ada0bf7381f58(a)syzkaller.appspotmail.com
Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol")
Signed-off-by: Pavel Skripkin <paskripkin(a)gmail.com>
Acked-by: Sven Eckelmann <sven(a)narfation.org>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
---
net/batman-adv/bridge_loop_avoidance.c | 8 +++-
net/batman-adv/main.c | 56 ++++++++++++++++++--------
net/batman-adv/network-coding.c | 4 +-
net/batman-adv/translation-table.c | 4 +-
4 files changed, 52 insertions(+), 20 deletions(-)
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 85faf25c2912..73dff16a57ff 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -1569,10 +1569,14 @@ int batadv_bla_init(struct batadv_priv *bat_priv)
return 0;
bat_priv->bla.claim_hash = batadv_hash_new(128);
- bat_priv->bla.backbone_hash = batadv_hash_new(32);
+ if (!bat_priv->bla.claim_hash)
+ return -ENOMEM;
- if (!bat_priv->bla.claim_hash || !bat_priv->bla.backbone_hash)
+ bat_priv->bla.backbone_hash = batadv_hash_new(32);
+ if (!bat_priv->bla.backbone_hash) {
+ batadv_hash_destroy(bat_priv->bla.claim_hash);
return -ENOMEM;
+ }
batadv_hash_set_lock_class(bat_priv->bla.claim_hash,
&batadv_claim_hash_lock_class_key);
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 79b8a2d8793e..bba64b9b3668 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -187,29 +187,41 @@ int batadv_mesh_init(struct net_device *soft_iface)
INIT_HLIST_HEAD(&bat_priv->softif_vlan_list);
INIT_HLIST_HEAD(&bat_priv->tp_list);
- ret = batadv_v_mesh_init(bat_priv);
- if (ret < 0)
- goto err;
-
ret = batadv_originator_init(bat_priv);
- if (ret < 0)
- goto err;
+ if (ret < 0) {
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+ goto err_orig;
+ }
ret = batadv_tt_init(bat_priv);
- if (ret < 0)
- goto err;
+ if (ret < 0) {
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+ goto err_tt;
+ }
+
+ ret = batadv_v_mesh_init(bat_priv);
+ if (ret < 0) {
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+ goto err_v;
+ }
ret = batadv_bla_init(bat_priv);
- if (ret < 0)
- goto err;
+ if (ret < 0) {
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+ goto err_bla;
+ }
ret = batadv_dat_init(bat_priv);
- if (ret < 0)
- goto err;
+ if (ret < 0) {
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+ goto err_dat;
+ }
ret = batadv_nc_mesh_init(bat_priv);
- if (ret < 0)
- goto err;
+ if (ret < 0) {
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+ goto err_nc;
+ }
batadv_gw_init(bat_priv);
batadv_mcast_init(bat_priv);
@@ -219,8 +231,20 @@ int batadv_mesh_init(struct net_device *soft_iface)
return 0;
-err:
- batadv_mesh_free(soft_iface);
+err_nc:
+ batadv_dat_free(bat_priv);
+err_dat:
+ batadv_bla_free(bat_priv);
+err_bla:
+ batadv_v_mesh_free(bat_priv);
+err_v:
+ batadv_tt_free(bat_priv);
+err_tt:
+ batadv_originator_free(bat_priv);
+err_orig:
+ batadv_purge_outstanding_packets(bat_priv, NULL);
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE);
+
return ret;
}
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 34caf129a9bf..eca1ff4224ab 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -167,8 +167,10 @@ int batadv_nc_mesh_init(struct batadv_priv *bat_priv)
&batadv_nc_coding_hash_lock_class_key);
bat_priv->nc.decoding_hash = batadv_hash_new(128);
- if (!bat_priv->nc.decoding_hash)
+ if (!bat_priv->nc.decoding_hash) {
+ batadv_hash_destroy(bat_priv->nc.coding_hash);
goto err;
+ }
batadv_hash_set_lock_class(bat_priv->nc.decoding_hash,
&batadv_nc_decoding_hash_lock_class_key);
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 9fa5389ea244..2b747c068702 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -4411,8 +4411,10 @@ int batadv_tt_init(struct batadv_priv *bat_priv)
return ret;
ret = batadv_tt_global_init(bat_priv);
- if (ret < 0)
+ if (ret < 0) {
+ batadv_tt_local_table_free(bat_priv);
return ret;
+ }
batadv_tvlv_handler_register(bat_priv, batadv_tt_tvlv_ogm_handler_v1,
batadv_tt_tvlv_unicast_handler_v1,
--
2.17.1
From: David Chen <david.chen(a)nutanix.com>
stable inclusion
from stable-v5.10.168
commit 0a626e27f984dfbe96bd8e4fd08f20a2ede3ea23
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I9R4KT
CVE: CVE-2023-52739
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
--------------------------------
commit 462a8e08e0e6287e5ce13187257edbf24213ed03 upstream.
When we upgraded our kernel, we started seeing some page corruption like
the following consistently:
BUG: Bad page state in process ganesha.nfsd pfn:1304ca
page:0000000022261c55 refcount:0 mapcount:-128 mapping:0000000000000000 index:0x0 pfn:0x1304ca
flags: 0x17ffffc0000000()
raw: 0017ffffc0000000 ffff8a513ffd4c98 ffffeee24b35ec08 0000000000000000
raw: 0000000000000000 0000000000000001 00000000ffffff7f 0000000000000000
page dumped because: nonzero mapcount
CPU: 0 PID: 15567 Comm: ganesha.nfsd Kdump: loaded Tainted: P B O 5.10.158-1.nutanix.20221209.el7.x86_64 #1
Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 04/05/2016
Call Trace:
dump_stack+0x74/0x96
bad_page.cold+0x63/0x94
check_new_page_bad+0x6d/0x80
rmqueue+0x46e/0x970
get_page_from_freelist+0xcb/0x3f0
? _cond_resched+0x19/0x40
__alloc_pages_nodemask+0x164/0x300
alloc_pages_current+0x87/0xf0
skb_page_frag_refill+0x84/0x110
...
Sometimes, it would also show up as corruption in the free list pointer
and cause crashes.
After bisecting the issue, we found the issue started from commit
e320d3012d25 ("mm/page_alloc.c: fix freeing non-compound pages"):
if (put_page_testzero(page))
free_the_page(page, order);
else if (!PageHead(page))
while (order-- > 0)
free_the_page(page + (1 << order), order);
So the problem is the check PageHead is racy because at this point we
already dropped our reference to the page. So even if we came in with
compound page, the page can already be freed and PageHead can return
false and we will end up freeing all the tail pages causing double free.
Fixes: e320d3012d25 ("mm/page_alloc.c: fix freeing non-compound pages")
Link: https://lore.kernel.org/lkml/BYAPR02MB448855960A9656EEA81141FC94D99@BYAPR02…
Cc: Andrew Morton <akpm(a)linux-foundation.org>
Cc: stable(a)vger.kernel.org
Signed-off-by: Chunwei Chen <david.chen(a)nutanix.com>
Reviewed-by: Vlastimil Babka <vbabka(a)suse.cz>
Reviewed-by: Matthew Wilcox (Oracle) <willy(a)infradead.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Ze Zuo <zuoze1(a)huawei.com>
---
mm/page_alloc.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1d946043ee637..19eb551e21d75 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4877,9 +4877,12 @@ static inline void free_the_page(struct page *page, unsigned int order)
void __free_pages(struct page *page, unsigned int order)
{
+ /* get PageHead before we drop reference */
+ int head = PageHead(page);
+
if (put_page_testzero(page))
free_the_page(page, order);
- else if (!PageHead(page))
+ else if (!head)
while (order-- > 0)
free_the_page(page + (1 << order), order);
}
--
2.25.1
From: Eric Dumazet <edumazet(a)google.com>
stable inclusion
from stable-v4.19.312
commit 93f0133b9d589cc6e865f254ad9be3e9d8133f50
category: bugfix
bugzilla: 190050
CVE: CVE-2024-35910
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
--------------------------------
[ Upstream commit 151c9c724d05d5b0dd8acd3e11cb69ef1f2dbada ]
We had various syzbot reports about tcp timers firing after
the corresponding netns has been dismantled.
Fortunately Josef Bacik could trigger the issue more often,
and could test a patch I wrote two years ago.
When TCP sockets are closed, we call inet_csk_clear_xmit_timers()
to 'stop' the timers.
inet_csk_clear_xmit_timers() can be called from any context,
including when socket lock is held.
This is the reason it uses sk_stop_timer(), aka del_timer().
This means that ongoing timers might finish much later.
For user sockets, this is fine because each running timer
holds a reference on the socket, and the user socket holds
a reference on the netns.
For kernel sockets, we risk that the netns is freed before
timer can complete, because kernel sockets do not hold
reference on the netns.
This patch adds inet_csk_clear_xmit_timers_sync() function
that using sk_stop_timer_sync() to make sure all timers
are terminated before the kernel socket is released.
Modules using kernel sockets close them in their netns exit()
handler.
Also add sock_not_owned_by_me() helper to get LOCKDEP
support : inet_csk_clear_xmit_timers_sync() must not be called
while socket lock is held.
It is very possible we can revert in the future commit
3a58f13a881e ("net: rds: acquire refcount on TCP sockets")
which attempted to solve the issue in rds only.
(net/smc/af_smc.c and net/mptcp/subflow.c have similar code)
We probably can remove the check_net() tests from
tcp_out_of_resources() and __tcp_close() in the future.
Reported-by: Josef Bacik <josef(a)toxicpanda.com>
Closes: https://lore.kernel.org/netdev/20240314210740.GA2823176@perftesting/
Fixes: 26abe14379f8 ("net: Modify sk_alloc to not reference count the netns of kernel sockets.")
Fixes: 8a68173691f0 ("net: sk_clone_lock() should only do get_net() if the parent is not a kernel socket")
Link: https://lore.kernel.org/bpf/CANn89i+484ffqb93aQm1N-tjxxvb3WDKX0EbD7318RwRgs…
Signed-off-by: Eric Dumazet <edumazet(a)google.com>
Tested-by: Josef Bacik <josef(a)toxicpanda.com>
Cc: Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
Link: https://lore.kernel.org/r/20240322135732.1535772-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
Signed-off-by: Liu Jian <liujian56(a)huawei.com>
---
include/net/inet_connection_sock.h | 1 +
include/net/sock.h | 7 +++++++
net/ipv4/inet_connection_sock.c | 14 ++++++++++++++
net/ipv4/tcp.c | 2 ++
4 files changed, 24 insertions(+)
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 696f96bd26ef..eba017715a0c 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -177,6 +177,7 @@ void inet_csk_init_xmit_timers(struct sock *sk,
void (*delack_handler)(struct timer_list *),
void (*keepalive_handler)(struct timer_list *));
void inet_csk_clear_xmit_timers(struct sock *sk);
+void inet_csk_clear_xmit_timers_sync(struct sock *sk);
static inline void inet_csk_schedule_ack(struct sock *sk)
{
diff --git a/include/net/sock.h b/include/net/sock.h
index 66e9c1060b13..5f706787ed4c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1631,6 +1631,13 @@ static inline void sock_owned_by_me(const struct sock *sk)
#endif
}
+static inline void sock_not_owned_by_me(const struct sock *sk)
+{
+#ifdef CONFIG_LOCKDEP
+ WARN_ON_ONCE(lockdep_sock_is_held(sk) && debug_locks);
+#endif
+}
+
static inline bool sock_owned_by_user(const struct sock *sk)
{
sock_owned_by_me(sk);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 4c3df3b44f8e..c1f90e08761c 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -568,6 +568,20 @@ void inet_csk_clear_xmit_timers(struct sock *sk)
}
EXPORT_SYMBOL(inet_csk_clear_xmit_timers);
+void inet_csk_clear_xmit_timers_sync(struct sock *sk)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ /* ongoing timer handlers need to acquire socket lock. */
+ sock_not_owned_by_me(sk);
+
+ icsk->icsk_pending = icsk->icsk_ack.pending = 0;
+
+ sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer);
+ sk_stop_timer_sync(sk, &icsk->icsk_delack_timer);
+ sk_stop_timer_sync(sk, &sk->sk_timer);
+}
+
void inet_csk_delete_keepalive_timer(struct sock *sk)
{
sk_stop_timer(sk, &sk->sk_timer);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6a903135c04f..14503cfd5480 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2522,6 +2522,8 @@ void tcp_close(struct sock *sk, long timeout)
lock_sock(sk);
__tcp_close(sk, timeout);
release_sock(sk);
+ if (!sk->sk_net_refcnt)
+ inet_csk_clear_xmit_timers_sync(sk);
sock_put(sk);
}
EXPORT_SYMBOL(tcp_close);
--
2.34.1
From: Christophe JAILLET <christophe.jaillet(a)wanadoo.fr>
mainline inclusion
from mainline-v6.3-rc1
commit ef6dfc4b238a435ab552207ec09e4a82b6426d31
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I9NZ3E
CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
--------------------------------
Functions used with __setup() return 1 when the argument has been
successfully parsed.
Reverse the returned value so that 1 is returned when kstrtobool() is
successful (i.e. returns 0).
My understanding of these __setup() functions is that returning 1 or 0
does not change much anyway - so this is more of a cleanup than a
functional fix.
I spot it and found it spurious while looking at something else.
Even if the output is not perfect, you'll get the idea with:
$ git grep -B2 -A10 retu.*kstrtobool | grep __setup -B10
Fixes: 3aac3ebea08f ("x86/signal: Implement sigaltstack size validation")
Signed-off-by: Christophe JAILLET <christophe.jaillet(a)wanadoo.fr>
Signed-off-by: Ingo Molnar <mingo(a)kernel.org>
Link: https://lore.kernel.org/r/73882d43ebe420c9d8fb82d0560021722b243000.16737175…
Signed-off-by: liwei <liwei728(a)huawei.com>
---
arch/x86/kernel/signal.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 8c991c547305..2a508a6a1fd2 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -921,7 +921,7 @@ static bool strict_sigaltstack_size __ro_after_init = false;
static int __init strict_sas_size(char *arg)
{
- return kstrtobool(arg, &strict_sigaltstack_size);
+ return kstrtobool(arg, &strict_sigaltstack_size) == 0;
}
__setup("strict_sas_size", strict_sas_size);
--
2.25.1
From: Eric Dumazet <edumazet(a)google.com>
stable inclusion
from stable-v5.10.215
commit e3e27d2b446deb1f643758a0c4731f5c22492810
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I9QG5Z
CVE: CVE-2024-35910
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
---------------------------
[ Upstream commit 151c9c724d05d5b0dd8acd3e11cb69ef1f2dbada ]
We had various syzbot reports about tcp timers firing after
the corresponding netns has been dismantled.
Fortunately Josef Bacik could trigger the issue more often,
and could test a patch I wrote two years ago.
When TCP sockets are closed, we call inet_csk_clear_xmit_timers()
to 'stop' the timers.
inet_csk_clear_xmit_timers() can be called from any context,
including when socket lock is held.
This is the reason it uses sk_stop_timer(), aka del_timer().
This means that ongoing timers might finish much later.
For user sockets, this is fine because each running timer
holds a reference on the socket, and the user socket holds
a reference on the netns.
For kernel sockets, we risk that the netns is freed before
timer can complete, because kernel sockets do not hold
reference on the netns.
This patch adds inet_csk_clear_xmit_timers_sync() function
that using sk_stop_timer_sync() to make sure all timers
are terminated before the kernel socket is released.
Modules using kernel sockets close them in their netns exit()
handler.
Also add sock_not_owned_by_me() helper to get LOCKDEP
support : inet_csk_clear_xmit_timers_sync() must not be called
while socket lock is held.
It is very possible we can revert in the future commit
3a58f13a881e ("net: rds: acquire refcount on TCP sockets")
which attempted to solve the issue in rds only.
(net/smc/af_smc.c and net/mptcp/subflow.c have similar code)
We probably can remove the check_net() tests from
tcp_out_of_resources() and __tcp_close() in the future.
Reported-by: Josef Bacik <josef(a)toxicpanda.com>
Closes: https://lore.kernel.org/netdev/20240314210740.GA2823176@perftesting/
Fixes: 26abe14379f8 ("net: Modify sk_alloc to not reference count the netns of kernel sockets.")
Fixes: 8a68173691f0 ("net: sk_clone_lock() should only do get_net() if the parent is not a kernel socket")
Link: https://lore.kernel.org/bpf/CANn89i+484ffqb93aQm1N-tjxxvb3WDKX0EbD7318RwRgs…
Signed-off-by: Eric Dumazet <edumazet(a)google.com>
Tested-by: Josef Bacik <josef(a)toxicpanda.com>
Cc: Tetsuo Handa <penguin-kernel(a)I-love.SAKURA.ne.jp>
Link: https://lore.kernel.org/r/20240322135732.1535772-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
Signed-off-by: Liu Jian <liujian56(a)huawei.com>
---
include/net/inet_connection_sock.h | 1 +
include/net/sock.h | 7 +++++++
net/ipv4/inet_connection_sock.c | 14 ++++++++++++++
net/ipv4/tcp.c | 2 ++
4 files changed, 24 insertions(+)
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 41764d20e334..8e62883a6f31 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -177,6 +177,7 @@ void inet_csk_init_xmit_timers(struct sock *sk,
void (*delack_handler)(struct timer_list *),
void (*keepalive_handler)(struct timer_list *));
void inet_csk_clear_xmit_timers(struct sock *sk);
+void inet_csk_clear_xmit_timers_sync(struct sock *sk);
static inline void inet_csk_schedule_ack(struct sock *sk)
{
diff --git a/include/net/sock.h b/include/net/sock.h
index 117f1126e45a..5025cc924a8c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1693,6 +1693,13 @@ static inline void sock_owned_by_me(const struct sock *sk)
#endif
}
+static inline void sock_not_owned_by_me(const struct sock *sk)
+{
+#ifdef CONFIG_LOCKDEP
+ WARN_ON_ONCE(lockdep_sock_is_held(sk) && debug_locks);
+#endif
+}
+
static inline bool sock_owned_by_user(const struct sock *sk)
{
sock_owned_by_me(sk);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 7a6d2484fe8f..04dd731441b2 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -576,6 +576,20 @@ void inet_csk_clear_xmit_timers(struct sock *sk)
}
EXPORT_SYMBOL(inet_csk_clear_xmit_timers);
+void inet_csk_clear_xmit_timers_sync(struct sock *sk)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ /* ongoing timer handlers need to acquire socket lock. */
+ sock_not_owned_by_me(sk);
+
+ icsk->icsk_pending = icsk->icsk_ack.pending = 0;
+
+ sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer);
+ sk_stop_timer_sync(sk, &icsk->icsk_delack_timer);
+ sk_stop_timer_sync(sk, &sk->sk_timer);
+}
+
void inet_csk_delete_keepalive_timer(struct sock *sk)
{
sk_stop_timer(sk, &sk->sk_timer);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 39d4767b190d..8eef91d14263 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2727,6 +2727,8 @@ void tcp_close(struct sock *sk, long timeout)
bh_unlock_sock(sk);
local_bh_enable();
release_sock(sk);
+ if (!sk->sk_net_refcnt)
+ inet_csk_clear_xmit_timers_sync(sk);
sock_put(sk);
}
EXPORT_SYMBOL(tcp_close);
--
2.34.1
From: Ido Schimmel <idosch(a)nvidia.com>
mainline inclusion
from mainline-v6.9-rc6
commit 8ca3f7a7b61393804c46f170743c3b839df13977
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I9Q9FZ
CVE: CVE-2024-35853
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
--------------------------------
The rehash delayed work migrates filters from one region to another.
This is done by iterating over all chunks (all the filters with the same
priority) in the region and in each chunk iterating over all the
filters.
If the migration fails, the code tries to migrate the filters back to
the old region. However, the rollback itself can also fail in which case
another migration will be erroneously performed. Besides the fact that
this ping pong is not a very good idea, it also creates a problem.
Each virtual chunk references two chunks: The currently used one
('vchunk->chunk') and a backup ('vchunk->chunk2'). During migration the
first holds the chunk we want to migrate filters to and the second holds
the chunk we are migrating filters from.
The code currently assumes - but does not verify - that the backup chunk
does not exist (NULL) if the currently used chunk does not reference the
target region. This assumption breaks when we are trying to rollback a
rollback, resulting in the backup chunk being overwritten and leaked
[1].
Fix by not rolling back a failed rollback and add a warning to avoid
future cases.
[1]
WARNING: CPU: 5 PID: 1063 at lib/parman.c:291 parman_destroy+0x17/0x20
Modules linked in:
CPU: 5 PID: 1063 Comm: kworker/5:11 Tainted: G W 6.9.0-rc2-custom-00784-gc6a05c468a0b #14
Hardware name: Mellanox Technologies Ltd. MSN3700/VMOD0005, BIOS 5.11 01/06/2019
Workqueue: mlxsw_core mlxsw_sp_acl_tcam_vregion_rehash_work
RIP: 0010:parman_destroy+0x17/0x20
[...]
Call Trace:
<TASK>
mlxsw_sp_acl_atcam_region_fini+0x19/0x60
mlxsw_sp_acl_tcam_region_destroy+0x49/0xf0
mlxsw_sp_acl_tcam_vregion_rehash_work+0x1f1/0x470
process_one_work+0x151/0x370
worker_thread+0x2cb/0x3e0
kthread+0xd0/0x100
ret_from_fork+0x34/0x50
ret_from_fork_asm+0x1a/0x30
</TASK>
Fixes: 843500518509 ("mlxsw: spectrum_acl: Do rollback as another call to mlxsw_sp_acl_tcam_vchunk_migrate_all()")
Signed-off-by: Ido Schimmel <idosch(a)nvidia.com>
Tested-by: Alexander Zubkov <green(a)qrator.net>
Reviewed-by: Petr Machata <petrm(a)nvidia.com>
Signed-off-by: Petr Machata <petrm(a)nvidia.com>
Reviewed-by: Simon Horman <horms(a)kernel.org>
Link: https://lore.kernel.org/r/d5edd4f4503934186ae5cfe268503b16345b4e0f.17137971…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
Signed-off-by: Zhang Changzhong <zhangchangzhong(a)huawei.com>
---
drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
index dbeba76..f99ed1f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
@@ -1239,6 +1239,8 @@ mlxsw_sp_acl_tcam_vchunk_migrate_start(struct mlxsw_sp *mlxsw_sp,
{
struct mlxsw_sp_acl_tcam_chunk *new_chunk;
+ WARN_ON(vchunk->chunk2);
+
new_chunk = mlxsw_sp_acl_tcam_chunk_create(mlxsw_sp, vchunk, region);
if (IS_ERR(new_chunk))
return PTR_ERR(new_chunk);
@@ -1383,6 +1385,8 @@ mlxsw_sp_acl_tcam_vregion_migrate(struct mlxsw_sp *mlxsw_sp,
err = mlxsw_sp_acl_tcam_vchunk_migrate_all(mlxsw_sp, vregion,
ctx, credits);
if (err) {
+ if (ctx->this_is_rollback)
+ return err;
/* In case migration was not successful, we need to swap
* so the original region pointer is assigned again
* to vregion->region.
--
2.9.5