From: Maxim Mikityanskiy maximmi@nvidia.com
mainline inclusion from mainline-v6.0-rc1 commit 94ce3b64c62d4b628cf85cd0d9a370aca8f7e43a category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I990AF CVE: CVE-2021-47131
--------------------------------
Currently, tls_device_down synchronizes with tls_device_resync_rx using RCU, however, the pointer to netdev is stored using WRITE_ONCE and loaded using READ_ONCE.
Although such approach is technically correct (rcu_dereference is essentially a READ_ONCE, and rcu_assign_pointer uses WRITE_ONCE to store NULL), using special RCU helpers for pointers is more valid, as it includes additional checks and might change the implementation transparently to the callers.
Mark the netdev pointer as __rcu and use the correct RCU helpers to access it. For non-concurrent access pass the right conditions that guarantee safe access (locks taken, refcount value). Also use the correct helper in mlx5e, where even READ_ONCE was missing.
The transition to RCU exposes existing issues, fixed by this commit:
1. bond_tls_device_xmit could read netdev twice, and it could become NULL the second time, after the NULL check passed.
2. Drivers shouldn't stop processing the last packet if tls_device_down just set netdev to NULL, before tls_dev_del was called. This prevents a possible packet drop when transitioning to the fallback software mode.
Fixes: 89df6a810470 ("net/bonding: Implement TLS TX device offload") Fixes: c55dcdd435aa ("net/tls: Fix use-after-free after the TLS device goes down and up") Signed-off-by: Maxim Mikityanskiy maximmi@nvidia.com Link: https://lore.kernel.org/r/20220810081602.1435800-1-maximmi@nvidia.com Signed-off-by: Jakub Kicinski kuba@kernel.org Conflicts: drivers/net/bonding/bond_main.c drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c include/net/tls.h net/tls/tls_device.c net/tls/tls_device_fallback.c Signed-off-by: Ziyang Xuan william.xuanziyang@huawei.com --- .../mellanox/mlx5/core/en_accel/tls_rxtx.c | 4 +++- include/net/tls.h | 2 +- net/tls/tls_device.c | 24 +++++++++++++------ net/tls/tls_device_fallback.c | 2 +- 4 files changed, 22 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c index be137d4a91692..22acd462856c0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c @@ -265,6 +265,7 @@ struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev, { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_tls_offload_context_tx *context; + struct net_device *tls_netdev; struct tls_context *tls_ctx; u32 expected_seq; int datalen; @@ -278,7 +279,8 @@ struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev, goto out;
tls_ctx = tls_get_ctx(skb->sk); - if (unlikely(tls_ctx->netdev != netdev)) + tls_netdev = rcu_dereference_bh(tls_ctx->netdev); + if (unlikely(tls_netdev != netdev)) goto out;
skb_seq = ntohl(tcp_hdr(skb)->seq); diff --git a/include/net/tls.h b/include/net/tls.h index b891f09bba909..4457bc67f2e66 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -207,7 +207,7 @@ struct tls_context { union tls_crypto_context crypto_recv;
struct list_head list; - struct net_device *netdev; + struct net_device __rcu *netdev; refcount_t refcount;
void *priv_ctx_tx; diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 8d7940a817694..82d5573cd054b 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -68,6 +68,7 @@ static void tls_device_free_ctx(struct tls_context *ctx) static void tls_device_gc_task(struct work_struct *work) { struct tls_context *ctx, *tmp; + struct net_device *netdev; unsigned long flags; LIST_HEAD(gc_list);
@@ -76,7 +77,11 @@ static void tls_device_gc_task(struct work_struct *work) spin_unlock_irqrestore(&tls_device_lock, flags);
list_for_each_entry_safe(ctx, tmp, &gc_list, list) { - struct net_device *netdev = ctx->netdev; + /* Safe, because this is the destroy flow, refcount is 0, so + * tls_device_down can't store this field in parallel. + */ + netdev = rcu_dereference_protected(ctx->netdev, + !refcount_read(&ctx->refcount));
if (netdev && ctx->tx_conf == TLS_HW) { netdev->tlsdev_ops->tls_dev_del(netdev, ctx, @@ -96,7 +101,7 @@ static void tls_device_attach(struct tls_context *ctx, struct sock *sk, if (sk->sk_destruct != tls_device_sk_destruct) { refcount_set(&ctx->refcount, 1); dev_hold(netdev); - ctx->netdev = netdev; + RCU_INIT_POINTER(ctx->netdev, netdev); spin_lock_irq(&tls_device_lock); list_add_tail(&ctx->list, &tls_device_list); spin_unlock_irq(&tls_device_lock); @@ -573,7 +578,7 @@ static void tls_device_resync_rx(struct tls_context *tls_ctx, struct net_device *netdev;
rcu_read_lock(); - netdev = READ_ONCE(tls_ctx->netdev); + netdev = rcu_dereference(tls_ctx->netdev); if (netdev) netdev->tlsdev_ops->tls_dev_resync_rx(netdev, sk, seq, rcd_sn); rcu_read_unlock(); @@ -958,7 +963,8 @@ void tls_device_offload_cleanup_rx(struct sock *sk) struct net_device *netdev;
down_read(&device_offload_lock); - netdev = tls_ctx->netdev; + netdev = rcu_dereference_protected(tls_ctx->netdev, + lockdep_is_held(&device_offload_lock)); if (!netdev) goto out;
@@ -967,7 +973,7 @@ void tls_device_offload_cleanup_rx(struct sock *sk)
if (tls_ctx->tx_conf != TLS_HW) { dev_put(netdev); - tls_ctx->netdev = NULL; + rcu_assign_pointer(tls_ctx->netdev, NULL); } else { set_bit(TLS_RX_DEV_CLOSED, &tls_ctx->flags); } @@ -988,7 +994,11 @@ static int tls_device_down(struct net_device *netdev)
spin_lock_irqsave(&tls_device_lock, flags); list_for_each_entry_safe(ctx, tmp, &tls_device_list, list) { - if (ctx->netdev != netdev || + struct net_device *ctx_netdev = + rcu_dereference_protected(ctx->netdev, + lockdep_is_held(&device_offload_lock)); + + if (ctx_netdev != netdev || !refcount_inc_not_zero(&ctx->refcount)) continue;
@@ -1006,7 +1016,7 @@ static int tls_device_down(struct net_device *netdev) /* Stop the RX and TX resync. * tls_dev_resync must not be called after tls_dev_del. */ - WRITE_ONCE(ctx->netdev, NULL); + rcu_assign_pointer(ctx->netdev, NULL);
/* Start skipping the RX resync logic completely. */ set_bit(TLS_RX_DEV_DEGRADED, &ctx->flags); diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index 55f1b7ed80f37..319aaed05571f 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -420,7 +420,7 @@ struct sk_buff *tls_validate_xmit_skb(struct sock *sk, struct net_device *dev, struct sk_buff *skb) { - if (dev == tls_get_ctx(sk)->netdev) + if (dev == rcu_dereference_bh(tls_get_ctx(sk)->netdev)) return skb;
return tls_sw_fallback(sk, skb);