From: Richard Palethorpe rpalethorpe@suse.com
[ Upstream commit 0ace17d56824165c7f4c68785d6b58971db954dd ]
write_wakeup can happen in parallel with close/hangup where tty->disc_data is set to NULL and the netdevice is freed thus also freeing disc_data. write_wakeup accesses disc_data so we must prevent close from freeing the netdev while write_wakeup has a non-NULL view of tty->disc_data.
We also need to make sure that accesses to disc_data are atomic. Which can all be done with RCU.
This problem was found by Syzkaller on SLCAN, but the same issue is reproducible with the SLIP line discipline using an LTP test based on the Syzkaller reproducer.
A fix which didn't use RCU was posted by Hillf Danton.
Fixes: 661f7fda21b1 ("slip: Fix deadlock in write_wakeup") Fixes: a8e83b17536a ("slcan: Port write_wakeup deadlock fix from slip") Reported-by: syzbot+017e491ae13c0068598a@syzkaller.appspotmail.com Signed-off-by: Richard Palethorpe rpalethorpe@suse.com Cc: Wolfgang Grandegger wg@grandegger.com Cc: Marc Kleine-Budde mkl@pengutronix.de Cc: "David S. Miller" davem@davemloft.net Cc: Tyler Hall tylerwhall@gmail.com Cc: linux-can@vger.kernel.org Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: syzkaller@googlegroups.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/can/slcan.c | 12 ++++++++++-- drivers/net/slip/slip.c | 12 ++++++++++-- 2 files changed, 20 insertions(+), 4 deletions(-)
diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c index cf0769a..b2e5bca 100644 --- a/drivers/net/can/slcan.c +++ b/drivers/net/can/slcan.c @@ -343,9 +343,16 @@ static void slcan_transmit(struct work_struct *work) */ static void slcan_write_wakeup(struct tty_struct *tty) { - struct slcan *sl = tty->disc_data; + struct slcan *sl; + + rcu_read_lock(); + sl = rcu_dereference(tty->disc_data); + if (!sl) + goto out;
schedule_work(&sl->tx_work); +out: + rcu_read_unlock(); }
/* Send a can_frame to a TTY queue. */ @@ -640,10 +647,11 @@ static void slcan_close(struct tty_struct *tty) return;
spin_lock_bh(&sl->lock); - tty->disc_data = NULL; + rcu_assign_pointer(tty->disc_data, NULL); sl->tty = NULL; spin_unlock_bh(&sl->lock);
+ synchronize_rcu(); flush_work(&sl->tx_work);
/* Flush network side */ diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index 77207f9..93f303e 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -452,9 +452,16 @@ static void slip_transmit(struct work_struct *work) */ static void slip_write_wakeup(struct tty_struct *tty) { - struct slip *sl = tty->disc_data; + struct slip *sl; + + rcu_read_lock(); + sl = rcu_dereference(tty->disc_data); + if (!sl) + goto out;
schedule_work(&sl->tx_work); +out: + rcu_read_unlock(); }
static void sl_tx_timeout(struct net_device *dev) @@ -882,10 +889,11 @@ static void slip_close(struct tty_struct *tty) return;
spin_lock_bh(&sl->lock); - tty->disc_data = NULL; + rcu_assign_pointer(tty->disc_data, NULL); sl->tty = NULL; spin_unlock_bh(&sl->lock);
+ synchronize_rcu(); flush_work(&sl->tx_work);
/* VSV = very important to remove timers */
From: Wenwen Wang wenwen@cs.uga.edu
[ Upstream commit fa865ba183d61c1ec8cbcab8573159c3b72b89a4 ]
In fs_open(), 'vcc' is allocated through kmalloc() and assigned to 'atm_vcc->dev_data.' In the following execution, if an error occurs, e.g., there is no more free channel, an error code EBUSY or ENOMEM will be returned. However, 'vcc' is not deallocated, leading to memory leaks. Note that, in normal cases where fs_open() returns 0, 'vcc' will be deallocated in fs_close(). But, if fs_open() fails, there is no guarantee that fs_close() will be invoked.
To fix this issue, deallocate 'vcc' before the error code is returned.
Signed-off-by: Wenwen Wang wenwen@cs.uga.edu Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/atm/firestream.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c index 4e46dc9..112b100 100644 --- a/drivers/atm/firestream.c +++ b/drivers/atm/firestream.c @@ -927,6 +927,7 @@ static int fs_open(struct atm_vcc *atm_vcc) } if (!to) { printk ("No more free channels for FS50..\n"); + kfree(vcc); return -EBUSY; } vcc->channo = dev->channo; @@ -937,6 +938,7 @@ static int fs_open(struct atm_vcc *atm_vcc) if (((DO_DIRECTION(rxtp) && dev->atm_vccs[vcc->channo])) || ( DO_DIRECTION(txtp) && test_bit (vcc->channo, dev->tx_inuse))) { printk ("Channel is in use for FS155.\n"); + kfree(vcc); return -EBUSY; } } @@ -950,6 +952,7 @@ static int fs_open(struct atm_vcc *atm_vcc) tc, sizeof (struct fs_transmit_config)); if (!tc) { fs_dprintk (FS_DEBUG_OPEN, "fs: can't alloc transmit_config.\n"); + kfree(vcc); return -ENOMEM; }
From: Eric Dumazet edumazet@google.com
[ Upstream commit 940ba14986657a50c15f694efca1beba31fa568f ]
A malicious user could use RAW sockets and fool GTP using them as standard SOCK_DGRAM UDP sockets.
BUG: KMSAN: uninit-value in udp_tunnel_encap_enable include/net/udp_tunnel.h:174 [inline] BUG: KMSAN: uninit-value in setup_udp_tunnel_sock+0x45e/0x6f0 net/ipv4/udp_tunnel.c:85 CPU: 0 PID: 11262 Comm: syz-executor613 Not tainted 5.5.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x1c9/0x220 lib/dump_stack.c:118 kmsan_report+0xf7/0x1e0 mm/kmsan/kmsan_report.c:118 __msan_warning+0x58/0xa0 mm/kmsan/kmsan_instr.c:215 udp_tunnel_encap_enable include/net/udp_tunnel.h:174 [inline] setup_udp_tunnel_sock+0x45e/0x6f0 net/ipv4/udp_tunnel.c:85 gtp_encap_enable_socket+0x37f/0x5a0 drivers/net/gtp.c:827 gtp_encap_enable drivers/net/gtp.c:844 [inline] gtp_newlink+0xfb/0x1e50 drivers/net/gtp.c:666 __rtnl_newlink net/core/rtnetlink.c:3305 [inline] rtnl_newlink+0x2973/0x3920 net/core/rtnetlink.c:3363 rtnetlink_rcv_msg+0x1153/0x1570 net/core/rtnetlink.c:5424 netlink_rcv_skb+0x451/0x650 net/netlink/af_netlink.c:2477 rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:5442 netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] netlink_unicast+0xf9e/0x1100 net/netlink/af_netlink.c:1328 netlink_sendmsg+0x1248/0x14d0 net/netlink/af_netlink.c:1917 sock_sendmsg_nosec net/socket.c:639 [inline] sock_sendmsg net/socket.c:659 [inline] ____sys_sendmsg+0x12b6/0x1350 net/socket.c:2330 ___sys_sendmsg net/socket.c:2384 [inline] __sys_sendmsg+0x451/0x5f0 net/socket.c:2417 __do_sys_sendmsg net/socket.c:2426 [inline] __se_sys_sendmsg+0x97/0xb0 net/socket.c:2424 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2424 do_syscall_64+0xb8/0x160 arch/x86/entry/common.c:296 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x441359 Code: e8 ac e8 ff ff 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 eb 08 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fff1cd0ac28 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000441359 RDX: 0000000000000000 RSI: 0000000020000100 RDI: 0000000000000003 RBP: 00000000006cb018 R08: 00000000004002c8 R09: 00000000004002c8 R10: 00000000004002c8 R11: 0000000000000246 R12: 00000000004020d0 R13: 0000000000402160 R14: 0000000000000000 R15: 0000000000000000
Uninit was created at: kmsan_save_stack_with_flags+0x3c/0x90 mm/kmsan/kmsan.c:144 kmsan_internal_alloc_meta_for_pages mm/kmsan/kmsan_shadow.c:307 [inline] kmsan_alloc_page+0x12a/0x310 mm/kmsan/kmsan_shadow.c:336 __alloc_pages_nodemask+0x57f2/0x5f60 mm/page_alloc.c:4800 alloc_pages_current+0x67d/0x990 mm/mempolicy.c:2207 alloc_pages include/linux/gfp.h:534 [inline] alloc_slab_page+0x111/0x12f0 mm/slub.c:1511 allocate_slab mm/slub.c:1656 [inline] new_slab+0x2bc/0x1130 mm/slub.c:1722 new_slab_objects mm/slub.c:2473 [inline] ___slab_alloc+0x1533/0x1f30 mm/slub.c:2624 __slab_alloc mm/slub.c:2664 [inline] slab_alloc_node mm/slub.c:2738 [inline] slab_alloc mm/slub.c:2783 [inline] kmem_cache_alloc+0xb23/0xd70 mm/slub.c:2788 sk_prot_alloc+0xf2/0x620 net/core/sock.c:1597 sk_alloc+0xf0/0xbe0 net/core/sock.c:1657 inet_create+0x7c7/0x1370 net/ipv4/af_inet.c:321 __sock_create+0x8eb/0xf00 net/socket.c:1420 sock_create net/socket.c:1471 [inline] __sys_socket+0x1a1/0x600 net/socket.c:1513 __do_sys_socket net/socket.c:1522 [inline] __se_sys_socket+0x8d/0xb0 net/socket.c:1520 __x64_sys_socket+0x4a/0x70 net/socket.c:1520 do_syscall_64+0xb8/0x160 arch/x86/entry/common.c:296 entry_SYSCALL_64_after_hwframe+0x44/0xa9
Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)") Signed-off-by: Eric Dumazet edumazet@google.com Cc: Pablo Neira pablo@netfilter.org Reported-by: syzbot syzkaller@googlegroups.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/gtp.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 6571cac..ee08644 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -809,19 +809,21 @@ static struct sock *gtp_encap_enable_socket(int fd, int type, return NULL; }
- if (sock->sk->sk_protocol != IPPROTO_UDP) { + sk = sock->sk; + if (sk->sk_protocol != IPPROTO_UDP || + sk->sk_type != SOCK_DGRAM || + (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)) { pr_debug("socket fd=%d not UDP\n", fd); sk = ERR_PTR(-EINVAL); goto out_sock; }
- lock_sock(sock->sk); - if (sock->sk->sk_user_data) { + lock_sock(sk); + if (sk->sk_user_data) { sk = ERR_PTR(-EBUSY); goto out_rel_sock; }
- sk = sock->sk; sock_hold(sk);
tuncfg.sk_user_data = gtp;
From: Yuki Taguchi tagyounit@gmail.com
[ Upstream commit 62ebaeaedee7591c257543d040677a60e35c7aec ]
After LRO/GRO is applied, SRv6 encapsulated packets have SKB_GSO_IPXIP6 feature flag, and this flag must be removed right after decapulation procedure.
Currently, SKB_GSO_IPXIP6 flag is not removed on End.D* actions, which creates inconsistent packet state, that is, a normal TCP/IP packets have the SKB_GSO_IPXIP6 flag. This behavior can cause unexpected fallback to GSO on routing to netdevices that do not support SKB_GSO_IPXIP6. For example, on inter-VRF forwarding, decapsulated packets separated into small packets by GSO because VRF devices do not support TSO for packets with SKB_GSO_IPXIP6 flag, and this degrades forwarding performance.
This patch removes encapsulation related GSO flags from the skb right after the End.D* action is applied.
Fixes: d7a669dd2f8b ("ipv6: sr: add helper functions for seg6local") Signed-off-by: Yuki Taguchi tagyounit@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/ipv6/seg6_local.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 60325db..607709a88 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -28,6 +28,7 @@ #include <net/addrconf.h> #include <net/ip6_route.h> #include <net/dst_cache.h> +#include <net/ip_tunnels.h> #ifdef CONFIG_IPV6_SEG6_HMAC #include <net/seg6_hmac.h> #endif @@ -135,7 +136,8 @@ static bool decap_and_validate(struct sk_buff *skb, int proto)
skb_reset_network_header(skb); skb_reset_transport_header(skb); - skb->encapsulation = 0; + if (iptunnel_pull_offloads(skb)) + return false;
return true; }
From: Florian Fainelli f.fainelli@gmail.com
[ Upstream commit 148965df1a990af98b2c84092c2a2274c7489284 ]
Before commit 7587935cfa11 ("net: bcmgenet: move NAPI initialization to ring initialization") moved the code, this used to be netif_tx_napi_add(), but we lost that small semantic change in the process, restore that.
Fixes: 7587935cfa11 ("net: bcmgenet: move NAPI initialization to ring initialization") Signed-off-by: Florian Fainelli f.fainelli@gmail.com Acked-by: Doug Berger opendmb@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index b7d7501..736a6a5 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -2166,8 +2166,8 @@ static void bcmgenet_init_tx_ring(struct bcmgenet_priv *priv, DMA_END_ADDR);
/* Initialize Tx NAPI */ - netif_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, - NAPI_POLL_WEIGHT); + netif_tx_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, + NAPI_POLL_WEIGHT); }
/* Initialize a RDMA ring */
From: Michael Ellerman mpe@ellerman.id.au
[ Upstream commit 3546d8f1bbe992488ed91592cf6bf76e7114791a =
The cxgb3 driver for "Chelsio T3-based gigabit and 10Gb Ethernet adapters" implements a custom ioctl as SIOCCHIOCTL/SIOCDEVPRIVATE in cxgb_extension_ioctl().
One of the subcommands of the ioctl is CHELSIO_GET_MEM, which appears to read memory directly out of the adapter and return it to userspace. It's not entirely clear what the contents of the adapter memory contains, but the assumption is that it shouldn't be accessible to all users.
So add a CAP_NET_ADMIN check to the CHELSIO_GET_MEM case. Put it after the is_offload() check, which matches two of the other subcommands in the same function which also check for is_offload() and CAP_NET_ADMIN.
Found by Ilja by code inspection, not tested as I don't have the required hardware.
Reported-by: Ilja Van Sprundel ivansprundel@ioactive.com Signed-off-by: Michael Ellerman mpe@ellerman.id.au Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c index 6be6de0..c82469a 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c @@ -2449,6 +2449,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
if (!is_offload(adapter)) return -EOPNOTSUPP; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; if (!(adapter->flags & FULL_INIT_DONE)) return -EIO; /* need the memory controllers */ if (copy_from_user(&t, useraddr, sizeof(t)))
From: Niko Kortstrom niko.kortstrom@nokia.com
[ Upstream commit 690afc165bb314354667f67157c1a1aea7dc797a ]
Support for moving IPv4 GRE tunnels between namespaces was added in commit b57708add314 ("gre: add x-netns support"). The respective change for IPv6 tunnels, commit 22f08069e8b4 ("ip6gre: add x-netns support") did not drop NETIF_F_NETNS_LOCAL flag so moving them from one netns to another is still denied in IPv6 case. Drop NETIF_F_NETNS_LOCAL flag from ip6gre tunnels to allow moving ip6gre tunnel endpoints between network namespaces.
Signed-off-by: Niko Kortstrom niko.kortstrom@nokia.com Acked-by: Nicolas Dichtel nicolas.dichtel@6wind.com Acked-by: William Tu u9012063@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/ipv6/ip6_gre.c | 3 --- 1 file changed, 3 deletions(-)
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 1f2d002..90621d4 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1486,7 +1486,6 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) dev->mtu -= 8;
if (tunnel->parms.collect_md) { - dev->features |= NETIF_F_NETNS_LOCAL; netif_keep_dst(dev); } ip6gre_tnl_init_features(dev); @@ -1914,7 +1913,6 @@ static void ip6gre_tap_setup(struct net_device *dev) dev->needs_free_netdev = true; dev->priv_destructor = ip6gre_dev_free;
- dev->features |= NETIF_F_NETNS_LOCAL; dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; netif_keep_dst(dev); @@ -2223,7 +2221,6 @@ static void ip6erspan_tap_setup(struct net_device *dev) dev->needs_free_netdev = true; dev->priv_destructor = ip6gre_dev_free;
- dev->features |= NETIF_F_NETNS_LOCAL; dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; netif_keep_dst(dev);
From: William Dauchy w.dauchy@criteo.com
[ Upstream commit 5311a69aaca30fa849c3cc46fb25f75727fb72d0 ]
in the same manner as commit d0f418516022 ("net, ip_tunnel: fix namespaces move"), fix namespace moving as it was broken since commit 8d79266bc48c ("ip6_tunnel: add collect_md mode to IPv6 tunnel"), but for ipv6 this time; there is no reason to keep it for ip6_tunnel.
Fixes: 8d79266bc48c ("ip6_tunnel: add collect_md mode to IPv6 tunnel") Signed-off-by: William Dauchy w.dauchy@criteo.com Acked-by: Nicolas Dichtel nicolas.dichtel@6wind.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/ipv6/ip6_tunnel.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index e3b4237..8e70a01 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1882,10 +1882,8 @@ static int ip6_tnl_dev_init(struct net_device *dev) if (err) return err; ip6_tnl_link_config(t); - if (t->parms.collect_md) { - dev->features |= NETIF_F_NETNS_LOCAL; + if (t->parms.collect_md) netif_keep_dst(dev); - } return 0; }
From: William Dauchy w.dauchy@criteo.com
[ Upstream commit d0f418516022c32ecceaf4275423e5bd3f8743a9 ]
in the same manner as commit 690afc165bb3 ("net: ip6_gre: fix moving ip6gre between namespaces"), fix namespace moving as it was broken since commit 2e15ea390e6f ("ip_gre: Add support to collect tunnel metadata."). Indeed, the ip6_gre commit removed the local flag for collect_md condition, so there is no reason to keep it for ip_gre/ip_tunnel.
this patch will fix both ip_tunnel and ip_gre modules.
Fixes: 2e15ea390e6f ("ip_gre: Add support to collect tunnel metadata.") Signed-off-by: William Dauchy w.dauchy@criteo.com Acked-by: Nicolas Dichtel nicolas.dichtel@6wind.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/ipv4/ip_tunnel.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index f03a1b6..14fd8a3 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -1203,10 +1203,8 @@ int ip_tunnel_init(struct net_device *dev) iph->version = 4; iph->ihl = 5;
- if (tunnel->collect_md) { - dev->features |= NETIF_F_NETNS_LOCAL; + if (tunnel->collect_md) netif_keep_dst(dev); - } return 0; } EXPORT_SYMBOL_GPL(ip_tunnel_init);
From: Eric Dumazet edumazet@google.com
[ Upstream commit d836f5c69d87473ff65c06a6123e5b2cf5e56f5b ]
rtnl_create_link() needs to apply dev->min_mtu and dev->max_mtu checks that we apply in do_setlink()
Otherwise malicious users can crash the kernel, for example after an integer overflow :
BUG: KASAN: use-after-free in memset include/linux/string.h:365 [inline] BUG: KASAN: use-after-free in __alloc_skb+0x37b/0x5e0 net/core/skbuff.c:238 Write of size 32 at addr ffff88819f20b9c0 by task swapper/0/0
CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.5.0-rc1-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: <IRQ> __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x197/0x210 lib/dump_stack.c:118 print_address_description.constprop.0.cold+0xd4/0x30b mm/kasan/report.c:374 __kasan_report.cold+0x1b/0x41 mm/kasan/report.c:506 kasan_report+0x12/0x20 mm/kasan/common.c:639 check_memory_region_inline mm/kasan/generic.c:185 [inline] check_memory_region+0x134/0x1a0 mm/kasan/generic.c:192 memset+0x24/0x40 mm/kasan/common.c:108 memset include/linux/string.h:365 [inline] __alloc_skb+0x37b/0x5e0 net/core/skbuff.c:238 alloc_skb include/linux/skbuff.h:1049 [inline] alloc_skb_with_frags+0x93/0x590 net/core/skbuff.c:5664 sock_alloc_send_pskb+0x7ad/0x920 net/core/sock.c:2242 sock_alloc_send_skb+0x32/0x40 net/core/sock.c:2259 mld_newpack+0x1d7/0x7f0 net/ipv6/mcast.c:1609 add_grhead.isra.0+0x299/0x370 net/ipv6/mcast.c:1713 add_grec+0x7db/0x10b0 net/ipv6/mcast.c:1844 mld_send_cr net/ipv6/mcast.c:1970 [inline] mld_ifc_timer_expire+0x3d3/0x950 net/ipv6/mcast.c:2477 call_timer_fn+0x1ac/0x780 kernel/time/timer.c:1404 expire_timers kernel/time/timer.c:1449 [inline] __run_timers kernel/time/timer.c:1773 [inline] __run_timers kernel/time/timer.c:1740 [inline] run_timer_softirq+0x6c3/0x1790 kernel/time/timer.c:1786 __do_softirq+0x262/0x98c kernel/softirq.c:292 invoke_softirq kernel/softirq.c:373 [inline] irq_exit+0x19b/0x1e0 kernel/softirq.c:413 exiting_irq arch/x86/include/asm/apic.h:536 [inline] smp_apic_timer_interrupt+0x1a3/0x610 arch/x86/kernel/apic/apic.c:1137 apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:829 </IRQ> RIP: 0010:native_safe_halt+0xe/0x10 arch/x86/include/asm/irqflags.h:61 Code: 98 6b ea f9 eb 8a cc cc cc cc cc cc e9 07 00 00 00 0f 00 2d 44 1c 60 00 f4 c3 66 90 e9 07 00 00 00 0f 00 2d 34 1c 60 00 fb f4 <c3> cc 55 48 89 e5 41 57 41 56 41 55 41 54 53 e8 4e 5d 9a f9 e8 79 RSP: 0018:ffffffff89807ce8 EFLAGS: 00000286 ORIG_RAX: ffffffffffffff13 RAX: 1ffffffff13266ae RBX: ffffffff8987a1c0 RCX: 0000000000000000 RDX: dffffc0000000000 RSI: 0000000000000006 RDI: ffffffff8987aa54 RBP: ffffffff89807d18 R08: ffffffff8987a1c0 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: dffffc0000000000 R13: ffffffff8a799980 R14: 0000000000000000 R15: 0000000000000000 arch_cpu_idle+0xa/0x10 arch/x86/kernel/process.c:690 default_idle_call+0x84/0xb0 kernel/sched/idle.c:94 cpuidle_idle_call kernel/sched/idle.c:154 [inline] do_idle+0x3c8/0x6e0 kernel/sched/idle.c:269 cpu_startup_entry+0x1b/0x20 kernel/sched/idle.c:361 rest_init+0x23b/0x371 init/main.c:451 arch_call_rest_init+0xe/0x1b start_kernel+0x904/0x943 init/main.c:784 x86_64_start_reservations+0x29/0x2b arch/x86/kernel/head64.c:490 x86_64_start_kernel+0x77/0x7b arch/x86/kernel/head64.c:471 secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:242
The buggy address belongs to the page: page:ffffea00067c82c0 refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 raw: 057ffe0000000000 ffffea00067c82c8 ffffea00067c82c8 0000000000000000 raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 page dumped because: kasan: bad access detected
Memory state around the buggy address: ffff88819f20b880: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff88819f20b900: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
ffff88819f20b980: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
^ ffff88819f20ba00: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff88819f20ba80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
Fixes: 61e84623ace3 ("net: centralize net_device min/max MTU checking") Signed-off-by: Eric Dumazet edumazet@google.com Reported-by: syzbot syzkaller@googlegroups.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/netdevice.h | 2 ++ net/core/dev.c | 29 +++++++++++++++++++---------- net/core/rtnetlink.c | 13 +++++++++++-- 3 files changed, 32 insertions(+), 12 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ee9dbff..78d5117 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3710,6 +3710,8 @@ void __dev_notify_flags(struct net_device *, unsigned int old_flags, int dev_get_alias(const struct net_device *, char *, size_t); int dev_change_net_namespace(struct net_device *, struct net *, const char *); int __dev_set_mtu(struct net_device *, int); +int dev_validate_mtu(struct net_device *dev, int mtu, + struct netlink_ext_ack *extack); int dev_set_mtu_ext(struct net_device *dev, int mtu, struct netlink_ext_ack *extack); int dev_set_mtu(struct net_device *, int); diff --git a/net/core/dev.c b/net/core/dev.c index 73ebaca..c14091c7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7752,6 +7752,22 @@ int __dev_set_mtu(struct net_device *dev, int new_mtu) } EXPORT_SYMBOL(__dev_set_mtu);
+int dev_validate_mtu(struct net_device *dev, int new_mtu, + struct netlink_ext_ack *extack) +{ + /* MTU must be positive, and in range */ + if (new_mtu < 0 || new_mtu < dev->min_mtu) { + NL_SET_ERR_MSG(extack, "mtu less than device minimum"); + return -EINVAL; + } + + if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) { + NL_SET_ERR_MSG(extack, "mtu greater than device maximum"); + return -EINVAL; + } + return 0; +} + /** * dev_set_mtu_ext - Change maximum transfer unit * @dev: device @@ -7768,16 +7784,9 @@ int dev_set_mtu_ext(struct net_device *dev, int new_mtu, if (new_mtu == dev->mtu) return 0;
- /* MTU must be positive, and in range */ - if (new_mtu < 0 || new_mtu < dev->min_mtu) { - NL_SET_ERR_MSG(extack, "mtu less than device minimum"); - return -EINVAL; - } - - if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) { - NL_SET_ERR_MSG(extack, "mtu greater than device maximum"); - return -EINVAL; - } + err = dev_validate_mtu(dev, new_mtu, extack); + if (err) + return err;
if (!netif_device_present(dev)) return -ENODEV; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index dbb3c0c..f51973f 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2875,8 +2875,17 @@ struct net_device *rtnl_create_link(struct net *net, dev->rtnl_link_ops = ops; dev->rtnl_link_state = RTNL_LINK_INITIALIZING;
- if (tb[IFLA_MTU]) - dev->mtu = nla_get_u32(tb[IFLA_MTU]); + if (tb[IFLA_MTU]) { + u32 mtu = nla_get_u32(tb[IFLA_MTU]); + int err; + + err = dev_validate_mtu(dev, mtu, NULL); + if (err) { + free_netdev(dev); + return ERR_PTR(err); + } + dev->mtu = mtu; + } if (tb[IFLA_ADDRESS]) { memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]), nla_len(tb[IFLA_ADDRESS]));
From: Cong Wang xiyou.wangcong@gmail.com
[ Upstream commit 61678d28d4a45ef376f5d02a839cc37509ae9281 ]
syzbot reported an out-of-bound access in em_nbyte. As initially analyzed by Eric, this is because em_nbyte sets its own em->datalen in em_nbyte_change() other than the one specified by user, but this value gets overwritten later by its caller tcf_em_validate(). We should leave em->datalen untouched to respect their choices.
I audit all the in-tree ematch users, all of those implement ->change() set em->datalen, so we can just avoid setting it twice in this case.
Reported-and-tested-by: syzbot+5af9a90dad568aa9f611@syzkaller.appspotmail.com Reported-by: syzbot+2f07903a5b05e7f36410@syzkaller.appspotmail.com Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: Eric Dumazet eric.dumazet@gmail.com Signed-off-by: Cong Wang xiyou.wangcong@gmail.com Reviewed-by: Eric Dumazet edumazet@google.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/sched/ematch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 1331a4c..750d88d 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -267,12 +267,12 @@ static int tcf_em_validate(struct tcf_proto *tp, } em->data = (unsigned long) v; } + em->datalen = data_len; } }
em->matchid = em_hdr->matchid; em->flags = em_hdr->flags; - em->datalen = data_len; em->net = net;
err = 0;
From: Jouni Hogander jouni.hogander@unikie.com
commit b8eb718348b8fb30b5a7d0a8fce26fb3f4ac741b upstream.
kobject_init_and_add takes reference even when it fails. This has to be given up by the caller in error handling. Otherwise memory allocated by kobject_init_and_add is never freed. Originally found by Syzkaller:
BUG: memory leak unreferenced object 0xffff8880679f8b08 (size 8): comm "netdev_register", pid 269, jiffies 4294693094 (age 12.132s) hex dump (first 8 bytes): 72 78 2d 30 00 36 20 d4 rx-0.6 . backtrace: [<000000008c93818e>] __kmalloc_track_caller+0x16e/0x290 [<000000001f2e4e49>] kvasprintf+0xb1/0x140 [<000000007f313394>] kvasprintf_const+0x56/0x160 [<00000000aeca11c8>] kobject_set_name_vargs+0x5b/0x140 [<0000000073a0367c>] kobject_init_and_add+0xd8/0x170 [<0000000088838e4b>] net_rx_queue_update_kobjects+0x152/0x560 [<000000006be5f104>] netdev_register_kobject+0x210/0x380 [<00000000e31dab9d>] register_netdevice+0xa1b/0xf00 [<00000000f68b2465>] __tun_chr_ioctl+0x20d5/0x3dd0 [<000000004c50599f>] tun_chr_ioctl+0x2f/0x40 [<00000000bbd4c317>] do_vfs_ioctl+0x1c7/0x1510 [<00000000d4c59e8f>] ksys_ioctl+0x99/0xb0 [<00000000946aea81>] __x64_sys_ioctl+0x78/0xb0 [<0000000038d946e5>] do_syscall_64+0x16f/0x580 [<00000000e0aa5d8f>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [<00000000285b3d1a>] 0xffffffffffffffff
Cc: David Miller davem@davemloft.net Cc: Lukas Bulwahn lukas.bulwahn@gmail.com Signed-off-by: Jouni Hogander jouni.hogander@unikie.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/core/net-sysfs.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-)
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index bf9a3b6..f8b592c 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -932,21 +932,23 @@ static int rx_queue_add_kobject(struct net_device *dev, int index) error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL, "rx-%u", index); if (error) - return error; + goto err;
dev_hold(queue->dev);
if (dev->sysfs_rx_queue_group) { error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group); - if (error) { - kobject_put(kobj); - return error; - } + if (error) + goto err; }
kobject_uevent(kobj, KOBJ_ADD);
return error; + +err: + kobject_put(kobj); + return error; } #endif /* CONFIG_SYSFS */
@@ -1471,21 +1473,21 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index) error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL, "tx-%u", index); if (error) - return error; + goto err;
dev_hold(queue->dev);
#ifdef CONFIG_BQL error = sysfs_create_group(kobj, &dql_group); - if (error) { - kobject_put(kobj); - return error; - } + if (error) + goto err; #endif
kobject_uevent(kobj, KOBJ_ADD);
- return 0; +err: + kobject_put(kobj); + return error; } #endif /* CONFIG_SYSFS */
From: Eric Dumazet edumazet@google.com
commit 48a322b6f9965b2f1e4ce81af972f0e287b07ed0 upstream.
kobject_put() should only be called in error path.
Fixes: b8eb718348b8 ("net-sysfs: Fix reference count leak in rx|netdev_queue_add_kobject") Signed-off-by: Eric Dumazet edumazet@google.com Cc: Jouni Hogander jouni.hogander@unikie.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/core/net-sysfs.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index f8b592c..305b07b 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1484,6 +1484,7 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index) #endif
kobject_uevent(kobj, KOBJ_ADD); + return 0;
err: kobject_put(kobj);
From: Jouni Hogander jouni.hogander@unikie.com
commit e0b60903b434a7ee21ba8d8659f207ed84101e89 upstream.
Dev_hold has to be called always in netdev_queue_add_kobject. Otherwise usage count drops below 0 in case of failure in kobject_init_and_add.
Fixes: b8eb718348b8 ("net-sysfs: Fix reference count leak in rx|netdev_queue_add_kobject") Reported-by: Hulk Robot hulkci@huawei.com Cc: Tetsuo Handa penguin-kernel@I-love.SAKURA.ne.jp Cc: David Miller davem@davemloft.net Cc: Lukas Bulwahn lukas.bulwahn@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/core/net-sysfs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 305b07b..4e4b103 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1469,14 +1469,17 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index) struct kobject *kobj = &queue->kobj; int error = 0;
+ /* Kobject_put later will trigger netdev_queue_release call + * which decreases dev refcount: Take that reference here + */ + dev_hold(queue->dev); + kobj->kset = dev->queues_kset; error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL, "tx-%u", index); if (error) goto err;
- dev_hold(queue->dev); - #ifdef CONFIG_BQL error = sysfs_create_group(kobj, &dql_group); if (error)
From: Jouni Hogander jouni.hogander@unikie.com
commit ddd9b5e3e765d8ed5a35786a6cb00111713fe161 upstream.
Dev_hold has to be called always in rx_queue_add_kobject. Otherwise usage count drops below 0 in case of failure in kobject_init_and_add.
Fixes: b8eb718348b8 ("net-sysfs: Fix reference count leak in rx|netdev_queue_add_kobject") Reported-by: syzbot syzbot+30209ea299c09d8785c9@syzkaller.appspotmail.com Cc: Tetsuo Handa penguin-kernel@I-love.SAKURA.ne.jp Cc: David Miller davem@davemloft.net Cc: Lukas Bulwahn lukas.bulwahn@gmail.com Signed-off-by: Jouni Hogander jouni.hogander@unikie.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/core/net-sysfs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 4e4b103..7614a4f 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -928,14 +928,17 @@ static int rx_queue_add_kobject(struct net_device *dev, int index) struct kobject *kobj = &queue->kobj; int error = 0;
+ /* Kobject_put later will trigger rx_queue_release call which + * decreases dev refcount: Take that reference here + */ + dev_hold(queue->dev); + kobj->kset = dev->queues_kset; error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL, "rx-%u", index); if (error) goto err;
- dev_hold(queue->dev); - if (dev->sysfs_rx_queue_group) { error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group); if (error)
From: Jouni Hogander jouni.hogander@unikie.com
[ Upstream commit cb626bf566eb4433318d35681286c494f04fedcc ]
Netdev_register_kobject is calling device_initialize. In case of error reference taken by device_initialize is not given up.
Drivers are supposed to call free_netdev in case of error. In non-error case the last reference is given up there and device release sequence is triggered. In error case this reference is kept and the release sequence is never started.
Fix this by setting reg_state as NETREG_UNREGISTERED if registering fails.
This is the rootcause for couple of memory leaks reported by Syzkaller:
BUG: memory leak unreferenced object 0xffff8880675ca008 (size 256): comm "netdev_register", pid 281, jiffies 4294696663 (age 6.808s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<0000000058ca4711>] kmem_cache_alloc_trace+0x167/0x280 [<000000002340019b>] device_add+0x882/0x1750 [<000000001d588c3a>] netdev_register_kobject+0x128/0x380 [<0000000011ef5535>] register_netdevice+0xa1b/0xf00 [<000000007fcf1c99>] __tun_chr_ioctl+0x20d5/0x3dd0 [<000000006a5b7b2b>] tun_chr_ioctl+0x2f/0x40 [<00000000f30f834a>] do_vfs_ioctl+0x1c7/0x1510 [<00000000fba062ea>] ksys_ioctl+0x99/0xb0 [<00000000b1c1b8d2>] __x64_sys_ioctl+0x78/0xb0 [<00000000984cabb9>] do_syscall_64+0x16f/0x580 [<000000000bde033d>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [<00000000e6ca2d9f>] 0xffffffffffffffff
BUG: memory leak unreferenced object 0xffff8880668ba588 (size 8): comm "kobject_set_nam", pid 286, jiffies 4294725297 (age 9.871s) hex dump (first 8 bytes): 6e 72 30 00 cc be df 2b nr0....+ backtrace: [<00000000a322332a>] __kmalloc_track_caller+0x16e/0x290 [<00000000236fd26b>] kstrdup+0x3e/0x70 [<00000000dd4a2815>] kstrdup_const+0x3e/0x50 [<0000000049a377fc>] kvasprintf_const+0x10e/0x160 [<00000000627fc711>] kobject_set_name_vargs+0x5b/0x140 [<0000000019eeab06>] dev_set_name+0xc0/0xf0 [<0000000069cb12bc>] netdev_register_kobject+0xc8/0x320 [<00000000f2e83732>] register_netdevice+0xa1b/0xf00 [<000000009e1f57cc>] __tun_chr_ioctl+0x20d5/0x3dd0 [<000000009c560784>] tun_chr_ioctl+0x2f/0x40 [<000000000d759e02>] do_vfs_ioctl+0x1c7/0x1510 [<00000000351d7c31>] ksys_ioctl+0x99/0xb0 [<000000008390040a>] __x64_sys_ioctl+0x78/0xb0 [<0000000052d196b7>] do_syscall_64+0x16f/0x580 [<0000000019af9236>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [<00000000bc384531>] 0xffffffffffffffff
v3 -> v4: Set reg_state to NETREG_UNREGISTERED if registering fails
v2 -> v3: * Replaced BUG_ON with WARN_ON in free_netdev and netdev_release
v1 -> v2: * Relying on driver calling free_netdev rather than calling put_device directly in error path
Reported-by: syzbot+ad8ca40ecd77896d51e2@syzkaller.appspotmail.com Cc: David Miller davem@davemloft.net Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: Lukas Bulwahn lukas.bulwahn@gmail.com Signed-off-by: Jouni Hogander jouni.hogander@unikie.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/core/dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/net/core/dev.c b/net/core/dev.c index c14091c7..1c0224e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8705,8 +8705,10 @@ int register_netdevice(struct net_device *dev) goto err_uninit;
ret = netdev_register_kobject(dev); - if (ret) + if (ret) { + dev->reg_state = NETREG_UNREGISTERED; goto err_uninit; + } dev->reg_state = NETREG_REGISTERED;
__netdev_update_features(dev);
From: James Hughes james.hughes@raspberrypi.org
[ Upstream commit ce896476c65d72b4b99fa09c2f33436b4198f034 ]
As reported by Eric Dumazet, there are still some outstanding cases where the driver does not handle TSO correctly when skb's are over a certain size. Most cases have been fixed, this patch should ensure that forwarded SKB's that are greater than MAX_SINGLE_PACKET_SIZE - TX_OVERHEAD are software segmented and handled correctly.
Signed-off-by: James Hughes james.hughes@raspberrypi.org Reviewed-by: Eric Dumazet edumazet@google.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/usb/lan78xx.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+)
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 7d708ae..9254888 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -31,6 +31,7 @@ #include <linux/mdio.h> #include <linux/phy.h> #include <net/ip6_checksum.h> +#include <net/vxlan.h> #include <linux/interrupt.h> #include <linux/irqdomain.h> #include <linux/irq.h> @@ -3686,6 +3687,19 @@ static void lan78xx_tx_timeout(struct net_device *net) tasklet_schedule(&dev->bh); }
+static netdev_features_t lan78xx_features_check(struct sk_buff *skb, + struct net_device *netdev, + netdev_features_t features) +{ + if (skb->len + TX_OVERHEAD > MAX_SINGLE_PACKET_SIZE) + features &= ~NETIF_F_GSO_MASK; + + features = vlan_features_check(skb, features); + features = vxlan_features_check(skb, features); + + return features; +} + static const struct net_device_ops lan78xx_netdev_ops = { .ndo_open = lan78xx_open, .ndo_stop = lan78xx_stop, @@ -3699,6 +3713,7 @@ static void lan78xx_tx_timeout(struct net_device *net) .ndo_set_features = lan78xx_set_features, .ndo_vlan_rx_add_vid = lan78xx_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = lan78xx_vlan_rx_kill_vid, + .ndo_features_check = lan78xx_features_check, };
static void lan78xx_stat_monitor(struct timer_list *t)
From: Paolo Abeni pabeni@redhat.com
[ Upstream commit d39ca2590d10712f412add7a88e1dd467a7246f4 ]
This reverts commit 0d4a6608f68c7532dcbfec2ea1150c9761767d03.
Willem reported that after commit 0d4a6608f68c ("udp: do rmem bulk free even if the rx sk queue is empty") the memory allocated by an almost idle system with many UDP sockets can grow a lot.
For stable kernel keep the solution as simple as possible and revert the offending commit.
Reported-by: Willem de Bruijn willemdebruijn.kernel@gmail.com Diagnosed-by: Eric Dumazet eric.dumazet@gmail.com Fixes: 0d4a6608f68c ("udp: do rmem bulk free even if the rx sk queue is empty") Signed-off-by: Paolo Abeni pabeni@redhat.com Acked-by: Willem de Bruijn willemb@google.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/ipv4/udp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 43e72e6..02c33ff 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1302,7 +1302,8 @@ static void udp_rmem_release(struct sock *sk, int size, int partial, if (likely(partial)) { up->forward_deficit += size; size = up->forward_deficit; - if (size < (sk->sk_rcvbuf >> 2)) + if (size < (sk->sk_rcvbuf >> 2) && + !skb_queue_empty(&up->reader_queue)) return; } else { size += up->forward_deficit;
From: Wen Yang wenyang@linux.alibaba.com
[ Upstream commit 5b2f1f3070b6447b76174ea8bfb7390dc6253ebd ]
do_div() does a 64-by-32 division. Use div64_long() instead of it if the divisor is long, to avoid truncation to 32-bit. And as a nice side effect also cleans up the function a bit.
Signed-off-by: Wen Yang wenyang@linux.alibaba.com Cc: Eric Dumazet edumazet@google.com Cc: "David S. Miller" davem@davemloft.net Cc: Alexey Kuznetsov kuznet@ms2.inr.ac.ru Cc: Hideaki YOSHIFUJI yoshfuji@linux-ipv6.org Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Eric Dumazet edumazet@google.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/ipv4/tcp_bbr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 02ff2dd..b371e665 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -680,8 +680,7 @@ static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs) * bandwidth sample. Delivered is in packets and interval_us in uS and * ratio will be <<1 for most connections. So delivered is first scaled. */ - bw = (u64)rs->delivered * BW_UNIT; - do_div(bw, rs->interval_us); + bw = div64_long((u64)rs->delivered * BW_UNIT, rs->interval_us);
/* If this sample is application-limited, it is likely to have a very * low delivered count that represents application behavior rather than
From: Eric Dumazet edumazet@google.com
[ Upstream commit 2bec445f9bf35e52e395b971df48d3e1e5dc704a ]
Latest commit 853697504de0 ("tcp: Fix highest_sack and highest_sack_seq") apparently allowed syzbot to trigger various crashes in TCP stack [1]
I believe this commit only made things easier for syzbot to find its way into triggering use-after-frees. But really the bugs could lead to bad TCP behavior or even plain crashes even for non malicious peers.
I have audited all calls to tcp_rtx_queue_unlink() and tcp_rtx_queue_unlink_and_free() and made sure tp->highest_sack would be updated if we are removing from rtx queue the skb that tp->highest_sack points to.
These updates were missing in three locations :
1) tcp_clean_rtx_queue() [This one seems quite serious, I have no idea why this was not caught earlier]
2) tcp_rtx_queue_purge() [Probably not a big deal for normal operations]
3) tcp_send_synack() [Probably not a big deal for normal operations]
[1] BUG: KASAN: use-after-free in tcp_highest_sack_seq include/net/tcp.h:1864 [inline] BUG: KASAN: use-after-free in tcp_highest_sack_seq include/net/tcp.h:1856 [inline] BUG: KASAN: use-after-free in tcp_check_sack_reordering+0x33c/0x3a0 net/ipv4/tcp_input.c:891 Read of size 4 at addr ffff8880a488d068 by task ksoftirqd/1/16
CPU: 1 PID: 16 Comm: ksoftirqd/1 Not tainted 5.5.0-rc5-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x197/0x210 lib/dump_stack.c:118 print_address_description.constprop.0.cold+0xd4/0x30b mm/kasan/report.c:374 __kasan_report.cold+0x1b/0x41 mm/kasan/report.c:506 kasan_report+0x12/0x20 mm/kasan/common.c:639 __asan_report_load4_noabort+0x14/0x20 mm/kasan/generic_report.c:134 tcp_highest_sack_seq include/net/tcp.h:1864 [inline] tcp_highest_sack_seq include/net/tcp.h:1856 [inline] tcp_check_sack_reordering+0x33c/0x3a0 net/ipv4/tcp_input.c:891 tcp_try_undo_partial net/ipv4/tcp_input.c:2730 [inline] tcp_fastretrans_alert+0xf74/0x23f0 net/ipv4/tcp_input.c:2847 tcp_ack+0x2577/0x5bf0 net/ipv4/tcp_input.c:3710 tcp_rcv_established+0x6dd/0x1e90 net/ipv4/tcp_input.c:5706 tcp_v4_do_rcv+0x619/0x8d0 net/ipv4/tcp_ipv4.c:1619 tcp_v4_rcv+0x307f/0x3b40 net/ipv4/tcp_ipv4.c:2001 ip_protocol_deliver_rcu+0x5a/0x880 net/ipv4/ip_input.c:204 ip_local_deliver_finish+0x23b/0x380 net/ipv4/ip_input.c:231 NF_HOOK include/linux/netfilter.h:307 [inline] NF_HOOK include/linux/netfilter.h:301 [inline] ip_local_deliver+0x1e9/0x520 net/ipv4/ip_input.c:252 dst_input include/net/dst.h:442 [inline] ip_rcv_finish+0x1db/0x2f0 net/ipv4/ip_input.c:428 NF_HOOK include/linux/netfilter.h:307 [inline] NF_HOOK include/linux/netfilter.h:301 [inline] ip_rcv+0xe8/0x3f0 net/ipv4/ip_input.c:538 __netif_receive_skb_one_core+0x113/0x1a0 net/core/dev.c:5148 __netif_receive_skb+0x2c/0x1d0 net/core/dev.c:5262 process_backlog+0x206/0x750 net/core/dev.c:6093 napi_poll net/core/dev.c:6530 [inline] net_rx_action+0x508/0x1120 net/core/dev.c:6598 __do_softirq+0x262/0x98c kernel/softirq.c:292 run_ksoftirqd kernel/softirq.c:603 [inline] run_ksoftirqd+0x8e/0x110 kernel/softirq.c:595 smpboot_thread_fn+0x6a3/0xa40 kernel/smpboot.c:165 kthread+0x361/0x430 kernel/kthread.c:255 ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352
Allocated by task 10091: save_stack+0x23/0x90 mm/kasan/common.c:72 set_track mm/kasan/common.c:80 [inline] __kasan_kmalloc mm/kasan/common.c:513 [inline] __kasan_kmalloc.constprop.0+0xcf/0xe0 mm/kasan/common.c:486 kasan_slab_alloc+0xf/0x20 mm/kasan/common.c:521 slab_post_alloc_hook mm/slab.h:584 [inline] slab_alloc_node mm/slab.c:3263 [inline] kmem_cache_alloc_node+0x138/0x740 mm/slab.c:3575 __alloc_skb+0xd5/0x5e0 net/core/skbuff.c:198 alloc_skb_fclone include/linux/skbuff.h:1099 [inline] sk_stream_alloc_skb net/ipv4/tcp.c:875 [inline] sk_stream_alloc_skb+0x113/0xc90 net/ipv4/tcp.c:852 tcp_sendmsg_locked+0xcf9/0x3470 net/ipv4/tcp.c:1282 tcp_sendmsg+0x30/0x50 net/ipv4/tcp.c:1432 inet_sendmsg+0x9e/0xe0 net/ipv4/af_inet.c:807 sock_sendmsg_nosec net/socket.c:652 [inline] sock_sendmsg+0xd7/0x130 net/socket.c:672 __sys_sendto+0x262/0x380 net/socket.c:1998 __do_sys_sendto net/socket.c:2010 [inline] __se_sys_sendto net/socket.c:2006 [inline] __x64_sys_sendto+0xe1/0x1a0 net/socket.c:2006 do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x49/0xbe
Freed by task 10095: save_stack+0x23/0x90 mm/kasan/common.c:72 set_track mm/kasan/common.c:80 [inline] kasan_set_free_info mm/kasan/common.c:335 [inline] __kasan_slab_free+0x102/0x150 mm/kasan/common.c:474 kasan_slab_free+0xe/0x10 mm/kasan/common.c:483 __cache_free mm/slab.c:3426 [inline] kmem_cache_free+0x86/0x320 mm/slab.c:3694 kfree_skbmem+0x178/0x1c0 net/core/skbuff.c:645 __kfree_skb+0x1e/0x30 net/core/skbuff.c:681 sk_eat_skb include/net/sock.h:2453 [inline] tcp_recvmsg+0x1252/0x2930 net/ipv4/tcp.c:2166 inet_recvmsg+0x136/0x610 net/ipv4/af_inet.c:838 sock_recvmsg_nosec net/socket.c:886 [inline] sock_recvmsg net/socket.c:904 [inline] sock_recvmsg+0xce/0x110 net/socket.c:900 __sys_recvfrom+0x1ff/0x350 net/socket.c:2055 __do_sys_recvfrom net/socket.c:2073 [inline] __se_sys_recvfrom net/socket.c:2069 [inline] __x64_sys_recvfrom+0xe1/0x1a0 net/socket.c:2069 do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294 entry_SYSCALL_64_after_hwframe+0x49/0xbe
The buggy address belongs to the object at ffff8880a488d040 which belongs to the cache skbuff_fclone_cache of size 456 The buggy address is located 40 bytes inside of 456-byte region [ffff8880a488d040, ffff8880a488d208) The buggy address belongs to the page: page:ffffea0002922340 refcount:1 mapcount:0 mapping:ffff88821b057000 index:0x0 raw: 00fffe0000000200 ffffea00022a5788 ffffea0002624a48 ffff88821b057000 raw: 0000000000000000 ffff8880a488d040 0000000100000006 0000000000000000 page dumped because: kasan: bad access detected
Memory state around the buggy address: ffff8880a488cf00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff8880a488cf80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
ffff8880a488d000: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb
^ ffff8880a488d080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff8880a488d100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
Fixes: 853697504de0 ("tcp: Fix highest_sack and highest_sack_seq") Fixes: 50895b9de1d3 ("tcp: highest_sack fix") Fixes: 737ff314563c ("tcp: use sequence distance to detect reordering") Signed-off-by: Eric Dumazet edumazet@google.com Cc: Cambda Zhu cambda@linux.alibaba.com Cc: Yuchung Cheng ycheng@google.com Cc: Neal Cardwell ncardwell@google.com Acked-by: Neal Cardwell ncardwell@google.com Acked-by: Yuchung Cheng ycheng@google.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/ipv4/tcp.c | 1 + net/ipv4/tcp_input.c | 1 + net/ipv4/tcp_output.c | 1 + 3 files changed, 3 insertions(+)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index af9361e..e80eb17 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2507,6 +2507,7 @@ static void tcp_rtx_queue_purge(struct sock *sk) { struct rb_node *p = rb_first(&sk->tcp_rtx_queue);
+ tcp_sk(sk)->highest_sack = NULL; while (p) { struct sk_buff *skb = rb_to_skb(p);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6225c87..dd7b751 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3152,6 +3152,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, tp->retransmit_skb_hint = NULL; if (unlikely(skb == tp->lost_skb_hint)) tp->lost_skb_hint = NULL; + tcp_highest_sack_replace(sk, skb, next); tcp_rtx_queue_unlink_and_free(skb, sk); }
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 1cc20ed..cc4ba42 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3165,6 +3165,7 @@ int tcp_send_synack(struct sock *sk) if (!nskb) return -ENOMEM; INIT_LIST_HEAD(&nskb->tcp_tsorted_anchor); + tcp_highest_sack_replace(sk, skb, nskb); tcp_rtx_queue_unlink_and_free(skb, sk); __skb_header_release(nskb); tcp_rbtree_insert(&sk->tcp_rtx_queue, nskb);
From: Eric Dumazet edumazet@google.com
[ Upstream commit 1efba987c48629c0c64703bb4ea76ca1a3771d17 ]
If both IFF_NAPI_FRAGS mode and XDP are enabled, and the XDP program consumes the skb, we need to clear the napi.skb (or risk a use-after-free) and release the mutex (or risk a deadlock)
WARNING: lock held when returning to user space! 5.5.0-rc6-syzkaller #0 Not tainted ------------------------------------------------ syz-executor.0/455 is leaving the kernel with locks still held! 1 lock held by syz-executor.0/455: #0: ffff888098f6e748 (&tfile->napi_mutex){+.+.}, at: tun_get_user+0x1604/0x3fc0 drivers/net/tun.c:1835
Fixes: 90e33d459407 ("tun: enable napi_gro_frags() for TUN/TAP driver") Signed-off-by: Eric Dumazet edumazet@google.com Reported-by: syzbot syzkaller@googlegroups.com Cc: Petar Penkov ppenkov@google.com Cc: Willem de Bruijn willemb@google.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/tun.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 9ae854c..5589d6cd 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1899,6 +1899,10 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (ret != XDP_PASS) { rcu_read_unlock(); local_bh_enable(); + if (frags) { + tfile->napi.skb = NULL; + mutex_unlock(&tfile->napi_mutex); + } return total_len; } }
From: David Howells dhowells@redhat.com
commit a45ea48e2bcd92c1f678b794f488ca0bda9835b8 upstream.
The afs filesystem needs to prohibit certain characters from cell names, such as '/', as these are used to form filenames in procfs, leading to the following warning being generated:
WARNING: CPU: 0 PID: 3489 at fs/proc/generic.c:178
Fix afs_alloc_cell() to disallow nonprintable characters, '/', '@' and names that begin with a dot.
Remove the check for "@cell" as that is then redundant.
This can be tested by running:
echo add foo/.bar 1.2.3.4 >/proc/fs/afs/cells
Note that we will also need to deal with:
- Names ending in ".invalid" shouldn't be passed to the DNS.
- Names that contain non-valid domainname chars shouldn't be passed to the DNS.
- DNS replies that say "your-dns-needs-immediate-attention.<gTLD>" and replies containing A records that say 127.0.53.53 should be considered invalid. [https://www.icann.org/en/system/files/files/name-collision-mitigation-01aug1...]
but these need to be dealt with by the kafs-client DNS program rather than the kernel.
Reported-by: syzbot+b904ba7c947a37b4b291@syzkaller.appspotmail.com Cc: stable@kernel.org Signed-off-by: David Howells dhowells@redhat.com Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/afs/cell.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/fs/afs/cell.c b/fs/afs/cell.c index ee07162..cce0e23 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -135,8 +135,17 @@ static struct afs_cell *afs_alloc_cell(struct afs_net *net, _leave(" = -ENAMETOOLONG"); return ERR_PTR(-ENAMETOOLONG); } - if (namelen == 5 && memcmp(name, "@cell", 5) == 0) + + /* Prohibit cell names that contain unprintable chars, '/' and '@' or + * that begin with a dot. This also precludes "@cell". + */ + if (name[0] == '.') return ERR_PTR(-EINVAL); + for (i = 0; i < namelen; i++) { + char ch = name[i]; + if (!isprint(ch) || ch == '/' || ch == '@') + return ERR_PTR(-EINVAL); + }
_enter("%*.*s,%s", namelen, namelen, name, vllist);
From: Luuk Paulussen luuk.paulussen@alliedtelesis.co.nz
commit cf3ca1877574a306c0207cbf7fdf25419d9229df upstream.
reg2volt returns the voltage that matches a given register value. Converting this back the other way with volt2reg didn't return the same register value because it used truncation instead of rounding.
This meant that values read from sysfs could not be written back to sysfs to set back the same register value.
With this change, volt2reg will return the same value for every voltage previously returned by reg2volt (for the set of possible input values)
Signed-off-by: Luuk Paulussen luuk.paulussen@alliedtelesis.co.nz Link: https://lore.kernel.org/r/20191205231659.1301-1-luuk.paulussen@alliedtelesis... cc: stable@vger.kernel.org Signed-off-by: Guenter Roeck linux@roeck-us.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/hwmon/adt7475.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/hwmon/adt7475.c b/drivers/hwmon/adt7475.c index f4c7516..0a87c5b 100644 --- a/drivers/hwmon/adt7475.c +++ b/drivers/hwmon/adt7475.c @@ -296,9 +296,10 @@ static inline u16 volt2reg(int channel, long volt, u8 bypass_attn) long reg;
if (bypass_attn & (1 << channel)) - reg = (volt * 1024) / 2250; + reg = DIV_ROUND_CLOSEST(volt * 1024, 2250); else - reg = (volt * r[1] * 1024) / ((r[0] + r[1]) * 2250); + reg = DIV_ROUND_CLOSEST(volt * r[1] * 1024, + (r[0] + r[1]) * 2250); return clamp_val(reg, 0, 1023) & (0xff << 2); }
From: Guenter Roeck linux@roeck-us.net
commit 3bf8bdcf3bada771eb12b57f2a30caee69e8ab8d upstream.
The hwmon core uses device managed functions, tied to the hwmon parent device, for various internal memory allocations. This is problematic since hwmon device lifetime does not necessarily match its parent's device lifetime. If there is a mismatch, memory leaks will accumulate until the parent device is released.
Fix the problem by managing all memory allocations internally. The only exception is memory allocation for thermal device registration, which can be tied to the hwmon device, along with thermal device registration itself.
Fixes: d560168b5d0f ("hwmon: (core) New hwmon registration API") Cc: stable@vger.kernel.org # v4.14.x: 47c332deb8e8: hwmon: Deal with errors from the thermal subsystem Cc: stable@vger.kernel.org # v4.14.x: 74e3512731bd: hwmon: (core) Fix double-free in __hwmon_device_register() Cc: stable@vger.kernel.org # v4.9.x: 3a412d5e4a1c: hwmon: (core) Simplify sysfs attribute name allocation Cc: stable@vger.kernel.org # v4.9.x: 47c332deb8e8: hwmon: Deal with errors from the thermal subsystem Cc: stable@vger.kernel.org # v4.9.x: 74e3512731bd: hwmon: (core) Fix double-free in __hwmon_device_register() Cc: stable@vger.kernel.org # v4.9+ Cc: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Guenter Roeck linux@roeck-us.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/hwmon/hwmon.c | 68 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 27 deletions(-)
diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c index 6b3559f..d34de21 100644 --- a/drivers/hwmon/hwmon.c +++ b/drivers/hwmon/hwmon.c @@ -51,6 +51,7 @@ struct hwmon_device_attribute {
#define to_hwmon_attr(d) \ container_of(d, struct hwmon_device_attribute, dev_attr) +#define to_dev_attr(a) container_of(a, struct device_attribute, attr)
/* * Thermal zone information @@ -58,7 +59,7 @@ struct hwmon_device_attribute { * also provides the sensor index. */ struct hwmon_thermal_data { - struct hwmon_device *hwdev; /* Reference to hwmon device */ + struct device *dev; /* Reference to hwmon device */ int index; /* sensor index */ };
@@ -95,9 +96,27 @@ static umode_t hwmon_dev_name_is_visible(struct kobject *kobj, NULL };
+static void hwmon_free_attrs(struct attribute **attrs) +{ + int i; + + for (i = 0; attrs[i]; i++) { + struct device_attribute *dattr = to_dev_attr(attrs[i]); + struct hwmon_device_attribute *hattr = to_hwmon_attr(dattr); + + kfree(hattr); + } + kfree(attrs); +} + static void hwmon_dev_release(struct device *dev) { - kfree(to_hwmon_device(dev)); + struct hwmon_device *hwdev = to_hwmon_device(dev); + + if (hwdev->group.attrs) + hwmon_free_attrs(hwdev->group.attrs); + kfree(hwdev->groups); + kfree(hwdev); }
static struct class hwmon_class = { @@ -121,11 +140,11 @@ static void hwmon_dev_release(struct device *dev) static int hwmon_thermal_get_temp(void *data, int *temp) { struct hwmon_thermal_data *tdata = data; - struct hwmon_device *hwdev = tdata->hwdev; + struct hwmon_device *hwdev = to_hwmon_device(tdata->dev); int ret; long t;
- ret = hwdev->chip->ops->read(&hwdev->dev, hwmon_temp, hwmon_temp_input, + ret = hwdev->chip->ops->read(tdata->dev, hwmon_temp, hwmon_temp_input, tdata->index, &t); if (ret < 0) return ret; @@ -139,8 +158,7 @@ static int hwmon_thermal_get_temp(void *data, int *temp) .get_temp = hwmon_thermal_get_temp, };
-static int hwmon_thermal_add_sensor(struct device *dev, - struct hwmon_device *hwdev, int index) +static int hwmon_thermal_add_sensor(struct device *dev, int index) { struct hwmon_thermal_data *tdata; struct thermal_zone_device *tzd; @@ -149,10 +167,10 @@ static int hwmon_thermal_add_sensor(struct device *dev, if (!tdata) return -ENOMEM;
- tdata->hwdev = hwdev; + tdata->dev = dev; tdata->index = index;
- tzd = devm_thermal_zone_of_sensor_register(&hwdev->dev, index, tdata, + tzd = devm_thermal_zone_of_sensor_register(dev, index, tdata, &hwmon_thermal_ops); /* * If CONFIG_THERMAL_OF is disabled, this returns -ENODEV, @@ -164,8 +182,7 @@ static int hwmon_thermal_add_sensor(struct device *dev, return 0; } #else -static int hwmon_thermal_add_sensor(struct device *dev, - struct hwmon_device *hwdev, int index) +static int hwmon_thermal_add_sensor(struct device *dev, int index) { return 0; } @@ -242,8 +259,7 @@ static bool is_string_attr(enum hwmon_sensor_types type, u32 attr) (type == hwmon_fan && attr == hwmon_fan_label); }
-static struct attribute *hwmon_genattr(struct device *dev, - const void *drvdata, +static struct attribute *hwmon_genattr(const void *drvdata, enum hwmon_sensor_types type, u32 attr, int index, @@ -271,7 +287,7 @@ static struct attribute *hwmon_genattr(struct device *dev, if ((mode & S_IWUGO) && !ops->write) return ERR_PTR(-EINVAL);
- hattr = devm_kzalloc(dev, sizeof(*hattr), GFP_KERNEL); + hattr = kzalloc(sizeof(*hattr), GFP_KERNEL); if (!hattr) return ERR_PTR(-ENOMEM);
@@ -478,8 +494,7 @@ static int hwmon_num_channel_attrs(const struct hwmon_channel_info *info) return n; }
-static int hwmon_genattrs(struct device *dev, - const void *drvdata, +static int hwmon_genattrs(const void *drvdata, struct attribute **attrs, const struct hwmon_ops *ops, const struct hwmon_channel_info *info) @@ -505,7 +520,7 @@ static int hwmon_genattrs(struct device *dev, attr_mask &= ~BIT(attr); if (attr >= template_size) return -EINVAL; - a = hwmon_genattr(dev, drvdata, info->type, attr, i, + a = hwmon_genattr(drvdata, info->type, attr, i, templates[attr], ops); if (IS_ERR(a)) { if (PTR_ERR(a) != -ENOENT) @@ -519,8 +534,7 @@ static int hwmon_genattrs(struct device *dev, }
static struct attribute ** -__hwmon_create_attrs(struct device *dev, const void *drvdata, - const struct hwmon_chip_info *chip) +__hwmon_create_attrs(const void *drvdata, const struct hwmon_chip_info *chip) { int ret, i, aindex = 0, nattrs = 0; struct attribute **attrs; @@ -531,15 +545,17 @@ static int hwmon_genattrs(struct device *dev, if (nattrs == 0) return ERR_PTR(-EINVAL);
- attrs = devm_kcalloc(dev, nattrs + 1, sizeof(*attrs), GFP_KERNEL); + attrs = kcalloc(nattrs + 1, sizeof(*attrs), GFP_KERNEL); if (!attrs) return ERR_PTR(-ENOMEM);
for (i = 0; chip->info[i]; i++) { - ret = hwmon_genattrs(dev, drvdata, &attrs[aindex], chip->ops, + ret = hwmon_genattrs(drvdata, &attrs[aindex], chip->ops, chip->info[i]); - if (ret < 0) + if (ret < 0) { + hwmon_free_attrs(attrs); return ERR_PTR(ret); + } aindex += ret; }
@@ -581,14 +597,13 @@ static int hwmon_genattrs(struct device *dev, for (i = 0; groups[i]; i++) ngroups++;
- hwdev->groups = devm_kcalloc(dev, ngroups, sizeof(*groups), - GFP_KERNEL); + hwdev->groups = kcalloc(ngroups, sizeof(*groups), GFP_KERNEL); if (!hwdev->groups) { err = -ENOMEM; goto free_hwmon; }
- attrs = __hwmon_create_attrs(dev, drvdata, chip); + attrs = __hwmon_create_attrs(drvdata, chip); if (IS_ERR(attrs)) { err = PTR_ERR(attrs); goto free_hwmon; @@ -633,8 +648,7 @@ static int hwmon_genattrs(struct device *dev, hwmon_temp_input, j)) continue; if (info[i]->config[j] & HWMON_T_INPUT) { - err = hwmon_thermal_add_sensor(dev, - hwdev, j); + err = hwmon_thermal_add_sensor(hdev, j); if (err) { device_unregister(hdev); goto ida_remove; @@ -647,7 +661,7 @@ static int hwmon_genattrs(struct device *dev, return hdev;
free_hwmon: - kfree(hwdev); + hwmon_dev_release(hdev); ida_remove: ida_simple_remove(&hwmon_ida, id); return ERR_PTR(err);
From: Alex Deucher alexander.deucher@amd.com
commit 5e89cd303e3a4505752952259b9f1ba036632544 upstream.
To account for parts of the chip that are "harvested" (disabled) due to silicon flaws, caches on some AMD GPUs must be initialized before ATS is enabled.
ATS is normally enabled by the IOMMU driver before the GPU driver loads, so this cache initialization would have to be done in a quirk, but that's too complex to be practical.
For Navi14 (device ID 0x7340), this initialization is done by the VBIOS, but apparently some boards went to production with an older VBIOS that doesn't do it. Disable ATS for those boards.
Link: https://lore.kernel.org/r/20200114205523.1054271-3-alexander.deucher@amd.com Bug: https://gitlab.freedesktop.org/drm/amd/issues/1015 See-also: d28ca864c493 ("PCI: Mark AMD Stoney Radeon R7 GPU ATS as broken") See-also: 9b44b0b09dec ("PCI: Mark AMD Stoney GPU ATS as broken") [bhelgaas: squash into one patch, simplify slightly, commit log] Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Bjorn Helgaas bhelgaas@google.com Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org
Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/pci/quirks.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-)
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 1cab79b..362d8f1 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -4893,18 +4893,25 @@ static void quirk_no_ext_tags(struct pci_dev *pdev)
#ifdef CONFIG_PCI_ATS /* - * Some devices have a broken ATS implementation causing IOMMU stalls. - * Don't use ATS for those devices. + * Some devices require additional driver setup to enable ATS. Don't use + * ATS for those devices as ATS will be enabled before the driver has had a + * chance to load and configure the device. */ -static void quirk_no_ats(struct pci_dev *pdev) +static void quirk_amd_harvest_no_ats(struct pci_dev *pdev) { - pci_info(pdev, "disabling ATS (broken on this device)\n"); + if (pdev->device == 0x7340 && pdev->revision != 0xc5) + return; + + pci_info(pdev, "disabling ATS\n"); pdev->ats_cap = 0; }
/* AMD Stoney platform GPU */ -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x98e4, quirk_no_ats); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x6900, quirk_no_ats); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x98e4, quirk_amd_harvest_no_ats); +/* AMD Iceland dGPU */ +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x6900, quirk_amd_harvest_no_ats); +/* AMD Navi14 dGPU */ +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7340, quirk_amd_harvest_no_ats); #endif /* CONFIG_PCI_ATS */
/* Freescale PCIe doesn't support MSI in RC mode */
From: Masami Hiramatsu mhiramat@kernel.org
commit aeed8aa3874dc15b9d82a6fe796fd7cfbb684448 upstream.
With CONFIG_PROVE_RCU_LIST, I had many suspicious RCU warnings when I ran ftracetest trigger testcases.
----- # dmesg -c > /dev/null # ./ftracetest test.d/trigger ... # dmesg | grep "RCU-list traversed" | cut -f 2 -d ] | cut -f 2 -d " " kernel/trace/trace_events_hist.c:6070 kernel/trace/trace_events_hist.c:1760 kernel/trace/trace_events_hist.c:5911 kernel/trace/trace_events_trigger.c:504 kernel/trace/trace_events_hist.c:1810 kernel/trace/trace_events_hist.c:3158 kernel/trace/trace_events_hist.c:3105 kernel/trace/trace_events_hist.c:5518 kernel/trace/trace_events_hist.c:5998 kernel/trace/trace_events_hist.c:6019 kernel/trace/trace_events_hist.c:6044 kernel/trace/trace_events_trigger.c:1500 kernel/trace/trace_events_trigger.c:1540 kernel/trace/trace_events_trigger.c:539 kernel/trace/trace_events_trigger.c:584 -----
I investigated those warnings and found that the RCU-list traversals in event trigger and hist didn't need to use RCU version because those were called only under event_mutex.
I also checked other RCU-list traversals related to event trigger list, and found that most of them were called from event_hist_trigger_func() or hist_unregister_trigger() or register/unregister functions except for a few cases.
Replace these unneeded RCU-list traversals with normal list traversal macro and lockdep_assert_held() to check the event_mutex is held.
Link: http://lkml.kernel.org/r/157680910305.11685.15110237954275915782.stgit@devno...
Cc: stable@vger.kernel.org Fixes: 30350d65ac567 ("tracing: Add variable support to hist triggers") Reviewed-by: Tom Zanussi zanussi@kernel.org Signed-off-by: Masami Hiramatsu mhiramat@kernel.org Signed-off-by: Steven Rostedt (VMware) rostedt@goodmis.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/trace/trace_events_hist.c | 38 +++++++++++++++++++++++++++---------- kernel/trace/trace_events_trigger.c | 20 ++++++++++++++----- 2 files changed, 43 insertions(+), 15 deletions(-)
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 0fb92d0..35d9cea 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -1511,11 +1511,13 @@ static struct hist_field *find_var(struct hist_trigger_data *hist_data, struct event_trigger_data *test; struct hist_field *hist_field;
+ lockdep_assert_held(&event_mutex); + hist_field = find_var_field(hist_data, var_name); if (hist_field) return hist_field;
- list_for_each_entry_rcu(test, &file->triggers, list) { + list_for_each_entry(test, &file->triggers, list) { if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { test_data = test->private_data; hist_field = find_var_field(test_data, var_name); @@ -1565,7 +1567,9 @@ static struct hist_field *find_file_var(struct trace_event_file *file, struct event_trigger_data *test; struct hist_field *hist_field;
- list_for_each_entry_rcu(test, &file->triggers, list) { + lockdep_assert_held(&event_mutex); + + list_for_each_entry(test, &file->triggers, list) { if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { test_data = test->private_data; hist_field = find_var_field(test_data, var_name); @@ -2828,7 +2832,9 @@ static char *find_trigger_filter(struct hist_trigger_data *hist_data, { struct event_trigger_data *test;
- list_for_each_entry_rcu(test, &file->triggers, list) { + lockdep_assert_held(&event_mutex); + + list_for_each_entry(test, &file->triggers, list) { if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { if (test->private_data == hist_data) return test->filter_str; @@ -2879,9 +2885,11 @@ static bool compatible_keys(struct hist_trigger_data *target_hist_data, struct event_trigger_data *test; unsigned int n_keys;
+ lockdep_assert_held(&event_mutex); + n_keys = target_hist_data->n_fields - target_hist_data->n_vals;
- list_for_each_entry_rcu(test, &file->triggers, list) { + list_for_each_entry(test, &file->triggers, list) { if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { hist_data = test->private_data;
@@ -4905,7 +4913,7 @@ static int hist_show(struct seq_file *m, void *v) goto out_unlock; }
- list_for_each_entry_rcu(data, &event_file->triggers, list) { + list_for_each_entry(data, &event_file->triggers, list) { if (data->cmd_ops->trigger_type == ETT_EVENT_HIST) hist_trigger_show(m, data, n++); } @@ -5296,7 +5304,9 @@ static int hist_register_trigger(char *glob, struct event_trigger_ops *ops, if (hist_data->attrs->name && !named_data) goto new;
- list_for_each_entry_rcu(test, &file->triggers, list) { + lockdep_assert_held(&event_mutex); + + list_for_each_entry(test, &file->triggers, list) { if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { if (!hist_trigger_match(data, test, named_data, false)) continue; @@ -5380,10 +5390,12 @@ static bool have_hist_trigger_match(struct event_trigger_data *data, struct event_trigger_data *test, *named_data = NULL; bool match = false;
+ lockdep_assert_held(&event_mutex); + if (hist_data->attrs->name) named_data = find_named_trigger(hist_data->attrs->name);
- list_for_each_entry_rcu(test, &file->triggers, list) { + list_for_each_entry(test, &file->triggers, list) { if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { if (hist_trigger_match(data, test, named_data, false)) { match = true; @@ -5401,10 +5413,12 @@ static bool hist_trigger_check_refs(struct event_trigger_data *data, struct hist_trigger_data *hist_data = data->private_data; struct event_trigger_data *test, *named_data = NULL;
+ lockdep_assert_held(&event_mutex); + if (hist_data->attrs->name) named_data = find_named_trigger(hist_data->attrs->name);
- list_for_each_entry_rcu(test, &file->triggers, list) { + list_for_each_entry(test, &file->triggers, list) { if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { if (!hist_trigger_match(data, test, named_data, false)) continue; @@ -5426,10 +5440,12 @@ static void hist_unregister_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *test, *named_data = NULL; bool unregistered = false;
+ lockdep_assert_held(&event_mutex); + if (hist_data->attrs->name) named_data = find_named_trigger(hist_data->attrs->name);
- list_for_each_entry_rcu(test, &file->triggers, list) { + list_for_each_entry(test, &file->triggers, list) { if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { if (!hist_trigger_match(data, test, named_data, false)) continue; @@ -5455,7 +5471,9 @@ static bool hist_file_check_refs(struct trace_event_file *file) struct hist_trigger_data *hist_data; struct event_trigger_data *test;
- list_for_each_entry_rcu(test, &file->triggers, list) { + lockdep_assert_held(&event_mutex); + + list_for_each_entry(test, &file->triggers, list) { if (test->cmd_ops->trigger_type == ETT_EVENT_HIST) { hist_data = test->private_data; if (check_var_refs(hist_data)) diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index cd12ecb..b05d1b6 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -495,7 +495,9 @@ void update_cond_flag(struct trace_event_file *file) struct event_trigger_data *data; bool set_cond = false;
- list_for_each_entry_rcu(data, &file->triggers, list) { + lockdep_assert_held(&event_mutex); + + list_for_each_entry(data, &file->triggers, list) { if (data->filter || event_command_post_trigger(data->cmd_ops) || event_command_needs_rec(data->cmd_ops)) { set_cond = true; @@ -530,7 +532,9 @@ static int register_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *test; int ret = 0;
- list_for_each_entry_rcu(test, &file->triggers, list) { + lockdep_assert_held(&event_mutex); + + list_for_each_entry(test, &file->triggers, list) { if (test->cmd_ops->trigger_type == data->cmd_ops->trigger_type) { ret = -EEXIST; goto out; @@ -575,7 +579,9 @@ static void unregister_trigger(char *glob, struct event_trigger_ops *ops, struct event_trigger_data *data; bool unregistered = false;
- list_for_each_entry_rcu(data, &file->triggers, list) { + lockdep_assert_held(&event_mutex); + + list_for_each_entry(data, &file->triggers, list) { if (data->cmd_ops->trigger_type == test->cmd_ops->trigger_type) { unregistered = true; list_del_rcu(&data->list); @@ -1490,7 +1496,9 @@ int event_enable_register_trigger(char *glob, struct event_trigger_data *test; int ret = 0;
- list_for_each_entry_rcu(test, &file->triggers, list) { + lockdep_assert_held(&event_mutex); + + list_for_each_entry(test, &file->triggers, list) { test_enable_data = test->private_data; if (test_enable_data && (test->cmd_ops->trigger_type == @@ -1530,7 +1538,9 @@ void event_enable_unregister_trigger(char *glob, struct event_trigger_data *data; bool unregistered = false;
- list_for_each_entry_rcu(data, &file->triggers, list) { + lockdep_assert_held(&event_mutex); + + list_for_each_entry(data, &file->triggers, list) { enable_data = data->private_data; if (enable_data && (data->cmd_ops->trigger_type ==
From: Johan Hovold johan@kernel.org
commit ba9a103f40fc4a3ec7558ec9b0b97d4f92034249 upstream.
The driver was issuing synchronous uninterruptible control requests without using a timeout. This could lead to the driver hanging on probe due to a malfunctioning (or malicious) device until the device is physically disconnected. While sleeping in probe the driver prevents other devices connected to the same hub from being added to (or removed from) the bus.
The USB upper limit of five seconds per request should be more than enough.
Fixes: 99f83c9c9ac9 ("[PATCH] USB: add driver for Keyspan Digital Remote") Signed-off-by: Johan Hovold johan@kernel.org Reviewed-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: stable stable@vger.kernel.org # 2.6.13 Link: https://lore.kernel.org/r/20200113171715.30621-1-johan@kernel.org Signed-off-by: Dmitry Torokhov dmitry.torokhov@gmail.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/input/misc/keyspan_remote.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/drivers/input/misc/keyspan_remote.c b/drivers/input/misc/keyspan_remote.c index a8937ce..af4db13 100644 --- a/drivers/input/misc/keyspan_remote.c +++ b/drivers/input/misc/keyspan_remote.c @@ -339,7 +339,8 @@ static int keyspan_setup(struct usb_device* dev) int retval = 0;
retval = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), - 0x11, 0x40, 0x5601, 0x0, NULL, 0, 0); + 0x11, 0x40, 0x5601, 0x0, NULL, 0, + USB_CTRL_SET_TIMEOUT); if (retval) { dev_dbg(&dev->dev, "%s - failed to set bit rate due to error: %d\n", __func__, retval); @@ -347,7 +348,8 @@ static int keyspan_setup(struct usb_device* dev) }
retval = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), - 0x44, 0x40, 0x0, 0x0, NULL, 0, 0); + 0x44, 0x40, 0x0, 0x0, NULL, 0, + USB_CTRL_SET_TIMEOUT); if (retval) { dev_dbg(&dev->dev, "%s - failed to set resume sensitivity due to error: %d\n", __func__, retval); @@ -355,7 +357,8 @@ static int keyspan_setup(struct usb_device* dev) }
retval = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), - 0x22, 0x40, 0x0, 0x0, NULL, 0, 0); + 0x22, 0x40, 0x0, 0x0, NULL, 0, + USB_CTRL_SET_TIMEOUT); if (retval) { dev_dbg(&dev->dev, "%s - failed to turn receive on due to error: %d\n", __func__, retval);
From: Hans Verkuil hverkuil-cisco@xs4all.nl
commit 8ff771f8c8d55d95f102cf88a970e541a8bd6bcf upstream.
This reverts commit a284e11c371e446371675668d8c8120a27227339.
This causes problems (drifting cursor) with at least the F11 function that reads more than 32 bytes.
The real issue is in the F54 driver, and so this should be fixed there, and not in rmi_smbus.c.
So first revert this bad commit, then fix the real problem in F54 in another patch.
Signed-off-by: Hans Verkuil hverkuil-cisco@xs4all.nl Reported-by: Timo Kaufmann timokau@zoho.com Fixes: a284e11c371e ("Input: synaptics-rmi4 - don't increment rmiaddr for SMBus transfers") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20200115124819.3191024-2-hverkuil-cisco@xs4all.nl Signed-off-by: Dmitry Torokhov dmitry.torokhov@gmail.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/input/rmi4/rmi_smbus.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/input/rmi4/rmi_smbus.c b/drivers/input/rmi4/rmi_smbus.c index 4b2466c..b6ccf39c 100644 --- a/drivers/input/rmi4/rmi_smbus.c +++ b/drivers/input/rmi4/rmi_smbus.c @@ -166,6 +166,7 @@ static int rmi_smb_write_block(struct rmi_transport_dev *xport, u16 rmiaddr, /* prepare to write next block of bytes */ cur_len -= SMB_MAX_COUNT; databuff += SMB_MAX_COUNT; + rmiaddr += SMB_MAX_COUNT; } exit: mutex_unlock(&rmi_smb->page_mutex); @@ -217,6 +218,7 @@ static int rmi_smb_read_block(struct rmi_transport_dev *xport, u16 rmiaddr, /* prepare to read next block of bytes */ cur_len -= SMB_MAX_COUNT; databuff += SMB_MAX_COUNT; + rmiaddr += SMB_MAX_COUNT; }
retval = 0;
From: Alex Sverdlin alexander.sverdlin@nokia.com
commit 927d780ee371d7e121cea4fc7812f6ef2cea461c upstream.
Scenario 1, ARMv7
=================
If code in arch/arm/kernel/ftrace.c would operate on mcount() pointer the following may be generated:
00000230 <prealloc_fixed_plts>: 230: b5f8 push {r3, r4, r5, r6, r7, lr} 232: b500 push {lr} 234: f7ff fffe bl 0 <__gnu_mcount_nc> 234: R_ARM_THM_CALL __gnu_mcount_nc 238: f240 0600 movw r6, #0 238: R_ARM_THM_MOVW_ABS_NC __gnu_mcount_nc 23c: f8d0 1180 ldr.w r1, [r0, #384] ; 0x180
FTRACE currently is not able to deal with it:
WARNING: CPU: 0 PID: 0 at .../kernel/trace/ftrace.c:1979 ftrace_bug+0x1ad/0x230() ... CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.4.116-... #1 ... [<c0314e3d>] (unwind_backtrace) from [<c03115e9>] (show_stack+0x11/0x14) [<c03115e9>] (show_stack) from [<c051a7f1>] (dump_stack+0x81/0xa8) [<c051a7f1>] (dump_stack) from [<c0321c5d>] (warn_slowpath_common+0x69/0x90) [<c0321c5d>] (warn_slowpath_common) from [<c0321cf3>] (warn_slowpath_null+0x17/0x1c) [<c0321cf3>] (warn_slowpath_null) from [<c038ee9d>] (ftrace_bug+0x1ad/0x230) [<c038ee9d>] (ftrace_bug) from [<c038f1f9>] (ftrace_process_locs+0x27d/0x444) [<c038f1f9>] (ftrace_process_locs) from [<c08915bd>] (ftrace_init+0x91/0xe8) [<c08915bd>] (ftrace_init) from [<c0885a67>] (start_kernel+0x34b/0x358) [<c0885a67>] (start_kernel) from [<00308095>] (0x308095) ---[ end trace cb88537fdc8fa200 ]--- ftrace failed to modify [<c031266c>] prealloc_fixed_plts+0x8/0x60 actual: 44:f2:e1:36 ftrace record flags: 0 (0) expected tramp: c03143e9
Scenario 2, ARMv4T ==================
ftrace: allocating 14435 entries in 43 pages ------------[ cut here ]------------ WARNING: CPU: 0 PID: 0 at kernel/trace/ftrace.c:2029 ftrace_bug+0x204/0x310 CPU: 0 PID: 0 Comm: swapper Not tainted 4.19.5 #1 Hardware name: Cirrus Logic EDB9302 Evaluation Board [<c0010a24>] (unwind_backtrace) from [<c000ecb0>] (show_stack+0x20/0x2c) [<c000ecb0>] (show_stack) from [<c03c72e8>] (dump_stack+0x20/0x30) [<c03c72e8>] (dump_stack) from [<c0021c18>] (__warn+0xdc/0x104) [<c0021c18>] (__warn) from [<c0021d7c>] (warn_slowpath_null+0x4c/0x5c) [<c0021d7c>] (warn_slowpath_null) from [<c0095360>] (ftrace_bug+0x204/0x310) [<c0095360>] (ftrace_bug) from [<c04dabac>] (ftrace_init+0x3b4/0x4d4) [<c04dabac>] (ftrace_init) from [<c04cef4c>] (start_kernel+0x20c/0x410) [<c04cef4c>] (start_kernel) from [<00000000>] ( (null)) ---[ end trace 0506a2f5dae6b341 ]--- ftrace failed to modify [<c000c350>] perf_trace_sys_exit+0x5c/0xe8 actual: 1e:ff:2f:e1 Initializing ftrace call sites ftrace record flags: 0 (0) expected tramp: c000fb24
The analysis for this problem has been already performed previously, refer to the link below.
Fix the above problems by allowing only selected reloc types in __mcount_loc. The list itself comes from the legacy recordmcount.pl script.
Link: https://lore.kernel.org/lkml/56961010.6000806@pengutronix.de/ Cc: stable@vger.kernel.org Fixes: ed60453fa8f8 ("ARM: 6511/1: ftrace: add ARM support for C version of recordmcount") Signed-off-by: Alexander Sverdlin alexander.sverdlin@nokia.com Acked-by: Steven Rostedt (VMware) rostedt@goodmis.org Signed-off-by: Russell King rmk+kernel@armlinux.org.uk Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- scripts/recordmcount.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+)
diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index 895c40e..3b0dcf3 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -39,6 +39,10 @@ #define R_AARCH64_ABS64 257 #endif
+#define R_ARM_PC24 1 +#define R_ARM_THM_CALL 10 +#define R_ARM_CALL 28 + static int fd_map; /* File descriptor for file being modified. */ static int mmap_failed; /* Boolean flag. */ static char gpfx; /* prefix for global symbol name (sometimes '_') */ @@ -414,6 +418,18 @@ static uint32_t w2nat(uint16_t const x) #define RECORD_MCOUNT_64 #include "recordmcount.h"
+static int arm_is_fake_mcount(Elf32_Rel const *rp) +{ + switch (ELF32_R_TYPE(w(rp->r_info))) { + case R_ARM_THM_CALL: + case R_ARM_CALL: + case R_ARM_PC24: + return 0; + } + + return 1; +} + /* 64-bit EM_MIPS has weird ELF64_Rela.r_info. * http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.p... * We interpret Table 29 Relocation Operation (Elf64_Rel, Elf64_Rela) [p.40] @@ -515,6 +531,7 @@ static void MIPS64_r_info(Elf64_Rel *const rp, unsigned sym, unsigned type) altmcount = "__gnu_mcount_nc"; make_nop = make_nop_arm; rel_type_nop = R_ARM_NONE; + is_fake_mcount32 = arm_is_fake_mcount; break; case EM_AARCH64: reltype = R_AARCH64_ABS64;
From: Michał Mirosław mirq-linux@rere.qmqm.pl
commit f571389c0b015e76f91c697c4c1700aba860d34f upstream.
Commit 7ad2ed1dfcbe inadvertently mixed up a quirk flag's name and broke SDR50 tuning override. Use correct NVQUIRK_ name.
Fixes: 7ad2ed1dfcbe ("mmc: tegra: enable UHS-I modes") Cc: stable@vger.kernel.org Acked-by: Adrian Hunter adrian.hunter@intel.com Reviewed-by: Thierry Reding treding@nvidia.com Tested-by: Thierry Reding treding@nvidia.com Signed-off-by: Michał Mirosław mirq-linux@rere.qmqm.pl Link: https://lore.kernel.org/r/9aff1d859935e59edd81e4939e40d6c55e0b55f6.157839038... Signed-off-by: Ulf Hansson ulf.hansson@linaro.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/mmc/host/sdhci-tegra.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c index 908b23e..14d749a 100644 --- a/drivers/mmc/host/sdhci-tegra.c +++ b/drivers/mmc/host/sdhci-tegra.c @@ -177,7 +177,7 @@ static void tegra_sdhci_reset(struct sdhci_host *host, u8 mask) misc_ctrl |= SDHCI_MISC_CTRL_ENABLE_DDR50; if (soc_data->nvquirks & NVQUIRK_ENABLE_SDR104) misc_ctrl |= SDHCI_MISC_CTRL_ENABLE_SDR104; - if (soc_data->nvquirks & SDHCI_MISC_CTRL_ENABLE_SDR50) + if (soc_data->nvquirks & NVQUIRK_ENABLE_SDR50) clk_ctrl |= SDHCI_CLOCK_CTRL_SDR50_TUNING_OVERRIDE; }
From: Michał Mirosław mirq-linux@rere.qmqm.pl
commit 2a187d03352086e300daa2044051db00044cd171 upstream.
For SDHCIv3+ with programmable clock mode, minimal clock frequency is still base clock / max(divider). Minimal programmable clock frequency is always greater than minimal divided clock frequency. Without this patch, SDHCI uses out-of-spec initial frequency when multiplier is big enough:
mmc1: mmc_rescan_try_freq: trying to init card at 468750 Hz [for 480 MHz source clock divided by 1024]
The code in sdhci_calc_clk() already chooses a correct SDCLK clock mode.
Fixes: c3ed3877625f ("mmc: sdhci: add support for programmable clock mode") Cc: stable@vger.kernel.org # 4f6aa3264af4: mmc: tegra: Only advertise UHS modes if IO regulator is present Cc: stable@vger.kernel.org Signed-off-by: Michał Mirosław mirq-linux@rere.qmqm.pl Acked-by: Adrian Hunter adrian.hunter@intel.com Link: https://lore.kernel.org/r/ffb489519a446caffe7a0a05c4b9372bd52397bb.157908203... Signed-off-by: Ulf Hansson ulf.hansson@linaro.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/mmc/host/sdhci.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 369817a..5a7fd89 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -3700,11 +3700,13 @@ int sdhci_setup_host(struct sdhci_host *host) if (host->ops->get_min_clock) mmc->f_min = host->ops->get_min_clock(host); else if (host->version >= SDHCI_SPEC_300) { - if (host->clk_mul) { - mmc->f_min = (host->max_clk * host->clk_mul) / 1024; + if (host->clk_mul) max_clk = host->max_clk * host->clk_mul; - } else - mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_300; + /* + * Divided Clock Mode minimum clock rate is always less than + * Programmable Clock Mode minimum clock rate. + */ + mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_300; } else mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_200;
From: Stephan Gerhold stephan@gerhold.net
commit 996d5d5f89a558a3608a46e73ccd1b99f1b1d058 upstream.
Setting the vibrator enable_mask is not implemented correctly:
For regmap_update_bits(map, reg, mask, val) we give in either regs->enable_mask or 0 (= no-op) as mask and "val" as value. But "val" actually refers to the vibrator voltage control register, which has nothing to do with the enable_mask.
So we usually end up doing nothing when we really wanted to enable the vibrator.
We want to set or clear the enable_mask (to enable/disable the vibrator). Therefore, change the call to always modify the enable_mask and set the bits only if we want to enable the vibrator.
Fixes: d4c7c5c96c92 ("Input: pm8xxx-vib - handle separate enable register") Signed-off-by: Stephan Gerhold stephan@gerhold.net Link: https://lore.kernel.org/r/20200114183442.45720-1-stephan@gerhold.net Signed-off-by: Dmitry Torokhov dmitry.torokhov@gmail.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/input/misc/pm8xxx-vibrator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/input/misc/pm8xxx-vibrator.c b/drivers/input/misc/pm8xxx-vibrator.c index 7dd1c1f..27b3db1 100644 --- a/drivers/input/misc/pm8xxx-vibrator.c +++ b/drivers/input/misc/pm8xxx-vibrator.c @@ -98,7 +98,7 @@ static int pm8xxx_vib_set(struct pm8xxx_vib *vib, bool on)
if (regs->enable_mask) rc = regmap_update_bits(vib->regmap, regs->enable_addr, - on ? regs->enable_mask : 0, val); + regs->enable_mask, on ? ~0 : 0);
return rc; }
From: Johan Hovold johan@kernel.org
commit 6b32391ed675827f8425a414abbc6fbd54ea54fe upstream.
Make sure to use the current alternate setting when verifying the interface descriptors to avoid binding to an invalid interface.
This in turn could cause the driver to misbehave or trigger a WARN() in usb_submit_urb() that kernels with panic_on_warn set would choke on.
Fixes: bdb5c57f209c ("Input: add sur40 driver for Samsung SUR40 (aka MS Surface 2.0/Pixelsense)") Signed-off-by: Johan Hovold johan@kernel.org Acked-by: Vladis Dronov vdronov@redhat.com Link: https://lore.kernel.org/r/20191210113737.4016-8-johan@kernel.org Signed-off-by: Dmitry Torokhov dmitry.torokhov@gmail.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/input/touchscreen/sur40.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/input/touchscreen/sur40.c b/drivers/input/touchscreen/sur40.c index 894843a..caa3aca 100644 --- a/drivers/input/touchscreen/sur40.c +++ b/drivers/input/touchscreen/sur40.c @@ -657,7 +657,7 @@ static int sur40_probe(struct usb_interface *interface, int error;
/* Check if we really have the right interface. */ - iface_desc = &interface->altsetting[0]; + iface_desc = interface->cur_altsetting; if (iface_desc->desc.bInterfaceClass != 0xFF) return -ENODEV;
From: Johan Hovold johan@kernel.org
commit a8eeb74df5a6bdb214b2b581b14782c5f5a0cf83 upstream.
The driver was checking the number of endpoints of the first alternate setting instead of the current one, something which could lead to the driver binding to an invalid interface.
This in turn could cause the driver to misbehave or trigger a WARN() in usb_submit_urb() that kernels with panic_on_warn set would choke on.
Fixes: 162f98dea487 ("Input: gtco - fix crash on detecting device without endpoints") Signed-off-by: Johan Hovold johan@kernel.org Acked-by: Vladis Dronov vdronov@redhat.com Link: https://lore.kernel.org/r/20191210113737.4016-5-johan@kernel.org Signed-off-by: Dmitry Torokhov dmitry.torokhov@gmail.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/input/tablet/gtco.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-)
diff --git a/drivers/input/tablet/gtco.c b/drivers/input/tablet/gtco.c index 3503122..799c94d 100644 --- a/drivers/input/tablet/gtco.c +++ b/drivers/input/tablet/gtco.c @@ -875,18 +875,14 @@ static int gtco_probe(struct usb_interface *usbinterface, }
/* Sanity check that a device has an endpoint */ - if (usbinterface->altsetting[0].desc.bNumEndpoints < 1) { + if (usbinterface->cur_altsetting->desc.bNumEndpoints < 1) { dev_err(&usbinterface->dev, "Invalid number of endpoints\n"); error = -EINVAL; goto err_free_urb; }
- /* - * The endpoint is always altsetting 0, we know this since we know - * this device only has one interrupt endpoint - */ - endpoint = &usbinterface->altsetting[0].endpoint[0].desc; + endpoint = &usbinterface->cur_altsetting->endpoint[0].desc;
/* Some debug */ dev_dbg(&usbinterface->dev, "gtco # interfaces: %d\n", usbinterface->num_altsetting); @@ -973,7 +969,7 @@ static int gtco_probe(struct usb_interface *usbinterface, input_dev->dev.parent = &usbinterface->dev;
/* Setup the URB, it will be posted later on open of input device */ - endpoint = &usbinterface->altsetting[0].endpoint[0].desc; + endpoint = &usbinterface->cur_altsetting->endpoint[0].desc;
usb_fill_int_urb(gtco->urbinfo, udev,
From: Johan Hovold johan@kernel.org
commit 3111491fca4f01764e0c158c5e0f7ced808eef51 upstream.
The driver was checking the number of endpoints of the first alternate setting instead of the current one, something which could lead to the driver binding to an invalid interface.
This in turn could cause the driver to misbehave or trigger a WARN() in usb_submit_urb() that kernels with panic_on_warn set would choke on.
Fixes: 8e20cf2bce12 ("Input: aiptek - fix crash on detecting device without endpoints") Signed-off-by: Johan Hovold johan@kernel.org Acked-by: Vladis Dronov vdronov@redhat.com Link: https://lore.kernel.org/r/20191210113737.4016-3-johan@kernel.org Signed-off-by: Dmitry Torokhov dmitry.torokhov@gmail.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/input/tablet/aiptek.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/input/tablet/aiptek.c b/drivers/input/tablet/aiptek.c index c82cd50..dc2ad1c 100644 --- a/drivers/input/tablet/aiptek.c +++ b/drivers/input/tablet/aiptek.c @@ -1815,14 +1815,14 @@ static ssize_t show_firmwareCode(struct device *dev, struct device_attribute *at input_set_abs_params(inputdev, ABS_WHEEL, AIPTEK_WHEEL_MIN, AIPTEK_WHEEL_MAX - 1, 0, 0);
/* Verify that a device really has an endpoint */ - if (intf->altsetting[0].desc.bNumEndpoints < 1) { + if (intf->cur_altsetting->desc.bNumEndpoints < 1) { dev_err(&intf->dev, "interface has %d endpoints, but must have minimum 1\n", - intf->altsetting[0].desc.bNumEndpoints); + intf->cur_altsetting->desc.bNumEndpoints); err = -EINVAL; goto fail3; } - endpoint = &intf->altsetting[0].endpoint[0].desc; + endpoint = &intf->cur_altsetting->endpoint[0].desc;
/* Go set up our URB, which is called when the tablet receives * input.
From: Johan Hovold johan@kernel.org
commit bcfcb7f9b480dd0be8f0df2df17340ca92a03b98 upstream.
The driver was checking the number of endpoints of the first alternate setting instead of the current one, something which could be used by a malicious device (or USB descriptor fuzzer) to trigger a NULL-pointer dereference.
Fixes: 1afca2b66aac ("Input: add Pegasus Notetaker tablet driver") Signed-off-by: Johan Hovold johan@kernel.org Acked-by: Martin Kepplinger martink@posteo.de Acked-by: Vladis Dronov vdronov@redhat.com Link: https://lore.kernel.org/r/20191210113737.4016-2-johan@kernel.org Signed-off-by: Dmitry Torokhov dmitry.torokhov@gmail.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/input/tablet/pegasus_notetaker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/input/tablet/pegasus_notetaker.c b/drivers/input/tablet/pegasus_notetaker.c index ffd03cf..570cdae 100644 --- a/drivers/input/tablet/pegasus_notetaker.c +++ b/drivers/input/tablet/pegasus_notetaker.c @@ -274,7 +274,7 @@ static int pegasus_probe(struct usb_interface *intf, return -ENODEV;
/* Sanity check that the device has an endpoint */ - if (intf->altsetting[0].desc.bNumEndpoints < 1) { + if (intf->cur_altsetting->desc.bNumEndpoints < 1) { dev_err(&intf->dev, "Invalid number of endpoints\n"); return -EINVAL; }
From: Chuhong Yuan hslester96@gmail.com
commit 97e24b095348a15ec08c476423c3b3b939186ad7 upstream.
The driver misses a check for devm_thermal_zone_of_sensor_register(). Add a check to fix it.
Fixes: e28d0c9cd381 ("input: convert sun4i-ts to use devm_thermal_zone_of_sensor_register") Signed-off-by: Chuhong Yuan hslester96@gmail.com Signed-off-by: Dmitry Torokhov dmitry.torokhov@gmail.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/input/touchscreen/sun4i-ts.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/input/touchscreen/sun4i-ts.c b/drivers/input/touchscreen/sun4i-ts.c index d2e14d9..ab44eb03 100644 --- a/drivers/input/touchscreen/sun4i-ts.c +++ b/drivers/input/touchscreen/sun4i-ts.c @@ -246,6 +246,7 @@ static int sun4i_ts_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct device_node *np = dev->of_node; struct device *hwmon; + struct thermal_zone_device *thermal; int error; u32 reg; bool ts_attached; @@ -365,7 +366,10 @@ static int sun4i_ts_probe(struct platform_device *pdev) if (IS_ERR(hwmon)) return PTR_ERR(hwmon);
- devm_thermal_zone_of_sensor_register(ts->dev, 0, ts, &sun4i_ts_tz_ops); + thermal = devm_thermal_zone_of_sensor_register(ts->dev, 0, ts, + &sun4i_ts_tz_ops); + if (IS_ERR(thermal)) + return PTR_ERR(thermal);
writel(TEMP_IRQ_EN(1), ts->base + TP_INT_FIFOC);
From: Florian Westphal fw@strlen.de
commit 7eaecf7963c1c8f62d62c6a8e7c439b0e7f2d365 upstream.
syzbot reports just another NULL deref crash because of missing test for presence of the attribute.
Reported-by: syzbot+cf23983d697c26c34f60@syzkaller.appspotmail.com Fixes: b96af92d6eaf9fadd ("netfilter: nf_tables: implement Passive OS fingerprint module in nft_osf") Signed-off-by: Florian Westphal fw@strlen.de Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/netfilter/nft_osf.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c index df4e3e0..a003533 100644 --- a/net/netfilter/nft_osf.c +++ b/net/netfilter/nft_osf.c @@ -47,6 +47,9 @@ static int nft_osf_init(const struct nft_ctx *ctx, struct nft_osf *priv = nft_expr_priv(expr); int err;
+ if (!tb[NFTA_OSF_DREG]) + return -EINVAL; + priv->dreg = nft_parse_register(tb[NFTA_OSF_DREG]); err = nft_validate_register_store(ctx, priv->dreg, NULL, NFT_DATA_VALUE, NFT_OSF_MAXGENRELEN);
From: Gilles Buloz gilles.buloz@kontron.com
commit 7713e62c8623c54dac88d1fa724aa487a38c3efb upstream.
in0 thresholds are written to the in2 thresholds registers in2 thresholds to in3 thresholds in3 thresholds to in4 thresholds in4 thresholds to in0 thresholds
Signed-off-by: Gilles Buloz gilles.buloz@kontron.com Link: https://lore.kernel.org/r/5de0f509.rc0oEvPOMjbfPW1w%gilles.buloz@kontron.com Fixes: 3434f3783580 ("hwmon: Driver for Nuvoton NCT7802Y") Signed-off-by: Guenter Roeck linux@roeck-us.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/hwmon/nct7802.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/hwmon/nct7802.c b/drivers/hwmon/nct7802.c index 38ffbdb..779ec8f 100644 --- a/drivers/hwmon/nct7802.c +++ b/drivers/hwmon/nct7802.c @@ -32,8 +32,8 @@ static const u8 REG_VOLTAGE[5] = { 0x09, 0x0a, 0x0c, 0x0d, 0x0e };
static const u8 REG_VOLTAGE_LIMIT_LSB[2][5] = { - { 0x40, 0x00, 0x42, 0x44, 0x46 }, - { 0x3f, 0x00, 0x41, 0x43, 0x45 }, + { 0x46, 0x00, 0x40, 0x42, 0x44 }, + { 0x45, 0x00, 0x3f, 0x41, 0x43 }, };
static const u8 REG_VOLTAGE_LIMIT_MSB[5] = { 0x48, 0x00, 0x47, 0x47, 0x48 };
From: Bart Van Assche bvanassche@acm.org
commit 04060db41178c7c244f2c7dcd913e7fd331de915 upstream.
iscsit_close_connection() calls isert_wait_conn(). Due to commit e9d3009cb936 both functions call target_wait_for_sess_cmds() although that last function should be called only once. Fix this by removing the target_wait_for_sess_cmds() call from isert_wait_conn() and by only calling isert_wait_conn() after target_wait_for_sess_cmds().
Fixes: e9d3009cb936 ("scsi: target: iscsi: Wait for all commands to finish before freeing a session"). Link: https://lore.kernel.org/r/20200116044737.19507-1-bvanassche@acm.org Reported-by: Rahul Kundu rahul.kundu@chelsio.com Signed-off-by: Bart Van Assche bvanassche@acm.org Tested-by: Mike Marciniszyn mike.marciniszyn@intel.com Acked-by: Sagi Grimberg sagi@grimberg.me Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/infiniband/ulp/isert/ib_isert.c | 12 ------------ drivers/target/iscsi/iscsi_target.c | 6 +++--- 2 files changed, 3 insertions(+), 15 deletions(-)
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index f39670c..9899f7e 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -2584,17 +2584,6 @@ static void isert_release_work(struct work_struct *work) } }
-static void -isert_wait4cmds(struct iscsi_conn *conn) -{ - isert_info("iscsi_conn %p\n", conn); - - if (conn->sess) { - target_sess_cmd_list_set_waiting(conn->sess->se_sess); - target_wait_for_sess_cmds(conn->sess->se_sess); - } -} - /** * isert_put_unsol_pending_cmds() - Drop commands waiting for * unsolicitate dataout @@ -2642,7 +2631,6 @@ static void isert_wait_conn(struct iscsi_conn *conn)
ib_drain_qp(isert_conn->qp); isert_put_unsol_pending_cmds(conn); - isert_wait4cmds(conn); isert_wait4logout(isert_conn);
queue_work(isert_release_wq, &isert_conn->release_work); diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 317d0f3..14bd54d 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -4123,9 +4123,6 @@ int iscsit_close_connection( iscsit_stop_nopin_response_timer(conn); iscsit_stop_nopin_timer(conn);
- if (conn->conn_transport->iscsit_wait_conn) - conn->conn_transport->iscsit_wait_conn(conn); - /* * During Connection recovery drop unacknowledged out of order * commands for this connection, and prepare the other commands @@ -4211,6 +4208,9 @@ int iscsit_close_connection( target_sess_cmd_list_set_waiting(sess->se_sess); target_wait_for_sess_cmds(sess->se_sess);
+ if (conn->conn_transport->iscsit_wait_conn) + conn->conn_transport->iscsit_wait_conn(conn); + ahash_request_free(conn->conn_tx_hash); if (conn->conn_rx_hash) { struct crypto_ahash *tfm;
From: Changbin Du changbin.du@gmail.com
commit d0695e2351102affd8efae83989056bc4b275917 upstream.
Just as commit 0566e40ce7 ("tracing: initcall: Ordered comparison of function pointers"), this patch fixes another remaining one in xen.h found by clang-9.
In file included from arch/x86/xen/trace.c:21: In file included from ./include/trace/events/xen.h:475: In file included from ./include/trace/define_trace.h:102: In file included from ./include/trace/trace_events.h:473: ./include/trace/events/xen.h:69:7: warning: ordered comparison of function \ pointers ('xen_mc_callback_fn_t' (aka 'void (*)(void *)') and 'xen_mc_callback_fn_t') [-Wordered-compare-function-pointers] __field(xen_mc_callback_fn_t, fn) ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ./include/trace/trace_events.h:421:29: note: expanded from macro '__field' ^ ./include/trace/trace_events.h:407:6: note: expanded from macro '__field_ext' is_signed_type(type), filter_type); \ ^ ./include/linux/trace_events.h:554:44: note: expanded from macro 'is_signed_type' ^
Fixes: c796f213a6934 ("xen/trace: add multicall tracing") Signed-off-by: Changbin Du changbin.du@gmail.com Signed-off-by: Steven Rostedt (VMware) rostedt@goodmis.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/trace/events/xen.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/include/trace/events/xen.h b/include/trace/events/xen.h index fdcf88b..bb76c03 100644 --- a/include/trace/events/xen.h +++ b/include/trace/events/xen.h @@ -66,7 +66,11 @@ TP_PROTO(xen_mc_callback_fn_t fn, void *data), TP_ARGS(fn, data), TP_STRUCT__entry( - __field(xen_mc_callback_fn_t, fn) + /* + * Use field_struct to avoid is_signed_type() + * comparison of a function pointer. + */ + __field_struct(xen_mc_callback_fn_t, fn) __field(void *, data) ), TP_fast_assign(
From: Al Viro viro@zeniv.linux.org.uk
commit d0cb50185ae942b03c4327be322055d622dc79f6 upstream.
may_create_in_sticky() call is done when we already have dropped the reference to dir.
Fixes: 30aba6656f61e (namei: allow restricted O_CREAT of FIFOs and regular files) Signed-off-by: Al Viro viro@zeniv.linux.org.uk Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/namei.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-)
diff --git a/fs/namei.c b/fs/namei.c index 6448cfb..1dd68b3 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1009,7 +1009,8 @@ static int may_linkat(struct path *link) * may_create_in_sticky - Check whether an O_CREAT open in a sticky directory * should be allowed, or not, on files that already * exist. - * @dir: the sticky parent directory + * @dir_mode: mode bits of directory + * @dir_uid: owner of directory * @inode: the inode of the file to open * * Block an O_CREAT open of a FIFO (or a regular file) when: @@ -1025,18 +1026,18 @@ static int may_linkat(struct path *link) * * Returns 0 if the open is allowed, -ve on error. */ -static int may_create_in_sticky(struct dentry * const dir, +static int may_create_in_sticky(umode_t dir_mode, kuid_t dir_uid, struct inode * const inode) { if ((!sysctl_protected_fifos && S_ISFIFO(inode->i_mode)) || (!sysctl_protected_regular && S_ISREG(inode->i_mode)) || - likely(!(dir->d_inode->i_mode & S_ISVTX)) || - uid_eq(inode->i_uid, dir->d_inode->i_uid) || + likely(!(dir_mode & S_ISVTX)) || + uid_eq(inode->i_uid, dir_uid) || uid_eq(current_fsuid(), inode->i_uid)) return 0;
- if (likely(dir->d_inode->i_mode & 0002) || - (dir->d_inode->i_mode & 0020 && + if (likely(dir_mode & 0002) || + (dir_mode & 0020 && ((sysctl_protected_fifos >= 2 && S_ISFIFO(inode->i_mode)) || (sysctl_protected_regular >= 2 && S_ISREG(inode->i_mode))))) { return -EACCES; @@ -3265,6 +3266,8 @@ static int do_last(struct nameidata *nd, struct file *file, const struct open_flags *op) { struct dentry *dir = nd->path.dentry; + kuid_t dir_uid = dir->d_inode->i_uid; + umode_t dir_mode = dir->d_inode->i_mode; int open_flag = op->open_flag; bool will_truncate = (open_flag & O_TRUNC) != 0; bool got_write = false; @@ -3400,7 +3403,7 @@ static int do_last(struct nameidata *nd, error = -EISDIR; if (d_is_dir(nd->path.dentry)) goto out; - error = may_create_in_sticky(dir, + error = may_create_in_sticky(dir_mode, dir_uid, d_backing_inode(nd->path.dentry)); if (unlikely(error)) goto out;
From: Finn Thain fthain@telegraphics.com.au
commit 865ad2f2201dc18685ba2686f13217f8b3a9c52c upstream.
The netif_stop_queue() call in sonic_send_packet() races with the netif_wake_queue() call in sonic_interrupt(). This causes issues like "NETDEV WATCHDOG: eth0 (macsonic): transmit queue 0 timed out". Fix this by disabling interrupts when accessing tx_skb[] and next_tx. Update a comment to clarify the synchronization properties.
Fixes: efcce839360f ("[PATCH] macsonic/jazzsonic network drivers update") Tested-by: Stan Johnson userm57@yahoo.com Signed-off-by: Finn Thain fthain@telegraphics.com.au Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/ethernet/natsemi/sonic.c | 49 +++++++++++++++++++++++++----------- drivers/net/ethernet/natsemi/sonic.h | 1 + 2 files changed, 36 insertions(+), 14 deletions(-)
diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index 5f1875f..b4ed6b0 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -63,6 +63,8 @@ static int sonic_open(struct net_device *dev)
netif_dbg(lp, ifup, dev, "%s: initializing sonic driver\n", __func__);
+ spin_lock_init(&lp->lock); + for (i = 0; i < SONIC_NUM_RRS; i++) { struct sk_buff *skb = netdev_alloc_skb(dev, SONIC_RBSIZE + 2); if (skb == NULL) { @@ -205,8 +207,6 @@ static void sonic_tx_timeout(struct net_device *dev) * wake the tx queue * Concurrently with all of this, the SONIC is potentially writing to * the status flags of the TDs. - * Until some mutual exclusion is added, this code will not work with SMP. However, - * MIPS Jazz machines and m68k Macs were all uni-processor machines. */
static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev) @@ -214,7 +214,8 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev) struct sonic_local *lp = netdev_priv(dev); dma_addr_t laddr; int length; - int entry = lp->next_tx; + int entry; + unsigned long flags;
netif_dbg(lp, tx_queued, dev, "%s: skb=%p\n", __func__, skb);
@@ -236,6 +237,10 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; }
+ spin_lock_irqsave(&lp->lock, flags); + + entry = lp->next_tx; + sonic_tda_put(dev, entry, SONIC_TD_STATUS, 0); /* clear status */ sonic_tda_put(dev, entry, SONIC_TD_FRAG_COUNT, 1); /* single fragment */ sonic_tda_put(dev, entry, SONIC_TD_PKTSIZE, length); /* length of packet */ @@ -245,10 +250,6 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev) sonic_tda_put(dev, entry, SONIC_TD_LINK, sonic_tda_get(dev, entry, SONIC_TD_LINK) | SONIC_EOL);
- /* - * Must set tx_skb[entry] only after clearing status, and - * before clearing EOL and before stopping queue - */ wmb(); lp->tx_len[entry] = length; lp->tx_laddr[entry] = laddr; @@ -271,6 +272,8 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev)
SONIC_WRITE(SONIC_CMD, SONIC_CR_TXP);
+ spin_unlock_irqrestore(&lp->lock, flags); + return NETDEV_TX_OK; }
@@ -283,9 +286,21 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id) struct net_device *dev = dev_id; struct sonic_local *lp = netdev_priv(dev); int status; + unsigned long flags; + + /* The lock has two purposes. Firstly, it synchronizes sonic_interrupt() + * with sonic_send_packet() so that the two functions can share state. + * Secondly, it makes sonic_interrupt() re-entrant, as that is required + * by macsonic which must use two IRQs with different priority levels. + */ + spin_lock_irqsave(&lp->lock, flags); + + status = SONIC_READ(SONIC_ISR) & SONIC_IMR_DEFAULT; + if (!status) { + spin_unlock_irqrestore(&lp->lock, flags);
- if (!(status = SONIC_READ(SONIC_ISR) & SONIC_IMR_DEFAULT)) return IRQ_NONE; + }
do { if (status & SONIC_INT_PKTRX) { @@ -299,11 +314,12 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id) int td_status; int freed_some = 0;
- /* At this point, cur_tx is the index of a TD that is one of: - * unallocated/freed (status set & tx_skb[entry] clear) - * allocated and sent (status set & tx_skb[entry] set ) - * allocated and not yet sent (status clear & tx_skb[entry] set ) - * still being allocated by sonic_send_packet (status clear & tx_skb[entry] clear) + /* The state of a Transmit Descriptor may be inferred + * from { tx_skb[entry], td_status } as follows. + * { clear, clear } => the TD has never been used + * { set, clear } => the TD was handed to SONIC + * { set, set } => the TD was handed back + * { clear, set } => the TD is available for re-use */
netif_dbg(lp, intr, dev, "%s: tx done\n", __func__); @@ -405,7 +421,12 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id) /* load CAM done */ if (status & SONIC_INT_LCD) SONIC_WRITE(SONIC_ISR, SONIC_INT_LCD); /* clear the interrupt */ - } while((status = SONIC_READ(SONIC_ISR) & SONIC_IMR_DEFAULT)); + + status = SONIC_READ(SONIC_ISR) & SONIC_IMR_DEFAULT; + } while (status); + + spin_unlock_irqrestore(&lp->lock, flags); + return IRQ_HANDLED; }
diff --git a/drivers/net/ethernet/natsemi/sonic.h b/drivers/net/ethernet/natsemi/sonic.h index 2b27f70..f950686 100644 --- a/drivers/net/ethernet/natsemi/sonic.h +++ b/drivers/net/ethernet/natsemi/sonic.h @@ -322,6 +322,7 @@ struct sonic_local { int msg_enable; struct device *device; /* generic device */ struct net_device_stats stats; + spinlock_t lock; };
#define TX_TIMEOUT (3 * HZ)
From: Finn Thain fthain@telegraphics.com.au
commit 5fedabf5a70be26b19d7520f09f12a62274317c6 upstream.
The chip can change a packet's descriptor status flags at any time. However, an active interrupt flag gets cleared rather late. This allows a race condition that could theoretically lose an interrupt. Fix this by clearing asserted interrupt flags immediately.
Fixes: efcce839360f ("[PATCH] macsonic/jazzsonic network drivers update") Tested-by: Stan Johnson userm57@yahoo.com Signed-off-by: Finn Thain fthain@telegraphics.com.au Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/ethernet/natsemi/sonic.c | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-)
diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index b4ed6b0..201569a 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -303,10 +303,11 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id) }
do { + SONIC_WRITE(SONIC_ISR, status); /* clear the interrupt(s) */ + if (status & SONIC_INT_PKTRX) { netif_dbg(lp, intr, dev, "%s: packet rx\n", __func__); sonic_rx(dev); /* got packet(s) */ - SONIC_WRITE(SONIC_ISR, SONIC_INT_PKTRX); /* clear the interrupt */ }
if (status & SONIC_INT_TXDN) { @@ -361,7 +362,6 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id) if (freed_some || lp->tx_skb[entry] == NULL) netif_wake_queue(dev); /* The ring is no longer full */ lp->cur_tx = entry; - SONIC_WRITE(SONIC_ISR, SONIC_INT_TXDN); /* clear the interrupt */ }
/* @@ -371,42 +371,31 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id) netif_dbg(lp, rx_err, dev, "%s: rx fifo overrun\n", __func__); lp->stats.rx_fifo_errors++; - SONIC_WRITE(SONIC_ISR, SONIC_INT_RFO); /* clear the interrupt */ } if (status & SONIC_INT_RDE) { netif_dbg(lp, rx_err, dev, "%s: rx descriptors exhausted\n", __func__); lp->stats.rx_dropped++; - SONIC_WRITE(SONIC_ISR, SONIC_INT_RDE); /* clear the interrupt */ } if (status & SONIC_INT_RBAE) { netif_dbg(lp, rx_err, dev, "%s: rx buffer area exceeded\n", __func__); lp->stats.rx_dropped++; - SONIC_WRITE(SONIC_ISR, SONIC_INT_RBAE); /* clear the interrupt */ }
/* counter overruns; all counters are 16bit wide */ - if (status & SONIC_INT_FAE) { + if (status & SONIC_INT_FAE) lp->stats.rx_frame_errors += 65536; - SONIC_WRITE(SONIC_ISR, SONIC_INT_FAE); /* clear the interrupt */ - } - if (status & SONIC_INT_CRC) { + if (status & SONIC_INT_CRC) lp->stats.rx_crc_errors += 65536; - SONIC_WRITE(SONIC_ISR, SONIC_INT_CRC); /* clear the interrupt */ - } - if (status & SONIC_INT_MP) { + if (status & SONIC_INT_MP) lp->stats.rx_missed_errors += 65536; - SONIC_WRITE(SONIC_ISR, SONIC_INT_MP); /* clear the interrupt */ - }
/* transmit error */ - if (status & SONIC_INT_TXER) { + if (status & SONIC_INT_TXER) if (SONIC_READ(SONIC_TCR) & SONIC_TCR_FU) netif_dbg(lp, tx_err, dev, "%s: tx fifo underrun\n", __func__); - SONIC_WRITE(SONIC_ISR, SONIC_INT_TXER); /* clear the interrupt */ - }
/* bus retry */ if (status & SONIC_INT_BR) { @@ -415,13 +404,8 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id) /* ... to help debug DMA problems causing endless interrupts. */ /* Bounce the eth interface to turn on the interrupt again. */ SONIC_WRITE(SONIC_IMR, 0); - SONIC_WRITE(SONIC_ISR, SONIC_INT_BR); /* clear the interrupt */ }
- /* load CAM done */ - if (status & SONIC_INT_LCD) - SONIC_WRITE(SONIC_ISR, SONIC_INT_LCD); /* clear the interrupt */ - status = SONIC_READ(SONIC_ISR) & SONIC_IMR_DEFAULT; } while (status);
From: Finn Thain fthain@telegraphics.com.au
commit e3885f576196ddfc670b3d53e745de96ffcb49ab upstream.
The driver accesses descriptor memory which is simultaneously accessed by the chip, so the compiler must not be allowed to re-order CPU accesses. sonic_buf_get() used 'volatile' to prevent that. sonic_buf_put() should have done so too but was overlooked.
Fixes: efcce839360f ("[PATCH] macsonic/jazzsonic network drivers update") Tested-by: Stan Johnson userm57@yahoo.com Signed-off-by: Finn Thain fthain@telegraphics.com.au Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/ethernet/natsemi/sonic.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/drivers/net/ethernet/natsemi/sonic.h b/drivers/net/ethernet/natsemi/sonic.h index f950686..fb160df 100644 --- a/drivers/net/ethernet/natsemi/sonic.h +++ b/drivers/net/ethernet/natsemi/sonic.h @@ -345,30 +345,30 @@ struct sonic_local { as far as we can tell. */ /* OpenBSD calls this "SWO". I'd like to think that sonic_buf_put() is a much better name. */ -static inline void sonic_buf_put(void* base, int bitmode, +static inline void sonic_buf_put(u16 *base, int bitmode, int offset, __u16 val) { if (bitmode) #ifdef __BIG_ENDIAN - ((__u16 *) base + (offset*2))[1] = val; + __raw_writew(val, base + (offset * 2) + 1); #else - ((__u16 *) base + (offset*2))[0] = val; + __raw_writew(val, base + (offset * 2) + 0); #endif else - ((__u16 *) base)[offset] = val; + __raw_writew(val, base + (offset * 1) + 0); }
-static inline __u16 sonic_buf_get(void* base, int bitmode, +static inline __u16 sonic_buf_get(u16 *base, int bitmode, int offset) { if (bitmode) #ifdef __BIG_ENDIAN - return ((volatile __u16 *) base + (offset*2))[1]; + return __raw_readw(base + (offset * 2) + 1); #else - return ((volatile __u16 *) base + (offset*2))[0]; + return __raw_readw(base + (offset * 2) + 0); #endif else - return ((volatile __u16 *) base)[offset]; + return __raw_readw(base + (offset * 1) + 0); }
/* Inlines that you should actually use for reading/writing DMA buffers */
From: Finn Thain fthain@telegraphics.com.au
commit 427db97df1ee721c20bdc9a66db8a9e1da719855 upstream.
The tx_aborted_errors statistic should count packets flagged with EXD, EXC, FU, or BCM bits because those bits denote an aborted transmission. That corresponds to the bitmask 0x0446, not 0x0642. Use macros for these constants to avoid mistakes. Better to leave out FIFO Underruns (FU) as there's a separate counter for that purpose.
Don't lump all these errors in with the general tx_errors counter as that's used for tx timeout events.
On the rx side, don't count RDE and RBAE interrupts as dropped packets. These interrupts don't indicate a lost packet, just a lack of resources. When a lack of resources results in a lost packet, this gets reported in the rx_missed_errors counter (along with RFO events).
Don't double-count rx_frame_errors and rx_crc_errors.
Don't use the general rx_errors counter for events that already have special counters.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Tested-by: Stan Johnson userm57@yahoo.com Signed-off-by: Finn Thain fthain@telegraphics.com.au Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/ethernet/natsemi/sonic.c | 21 +++++++-------------- drivers/net/ethernet/natsemi/sonic.h | 1 + 2 files changed, 8 insertions(+), 14 deletions(-)
diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index 201569a..ee3696c 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -329,18 +329,19 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id) if ((td_status = sonic_tda_get(dev, entry, SONIC_TD_STATUS)) == 0) break;
- if (td_status & 0x0001) { + if (td_status & SONIC_TCR_PTX) { lp->stats.tx_packets++; lp->stats.tx_bytes += sonic_tda_get(dev, entry, SONIC_TD_PKTSIZE); } else { - lp->stats.tx_errors++; - if (td_status & 0x0642) + if (td_status & (SONIC_TCR_EXD | + SONIC_TCR_EXC | SONIC_TCR_BCM)) lp->stats.tx_aborted_errors++; - if (td_status & 0x0180) + if (td_status & + (SONIC_TCR_NCRS | SONIC_TCR_CRLS)) lp->stats.tx_carrier_errors++; - if (td_status & 0x0020) + if (td_status & SONIC_TCR_OWC) lp->stats.tx_window_errors++; - if (td_status & 0x0004) + if (td_status & SONIC_TCR_FU) lp->stats.tx_fifo_errors++; }
@@ -370,17 +371,14 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id) if (status & SONIC_INT_RFO) { netif_dbg(lp, rx_err, dev, "%s: rx fifo overrun\n", __func__); - lp->stats.rx_fifo_errors++; } if (status & SONIC_INT_RDE) { netif_dbg(lp, rx_err, dev, "%s: rx descriptors exhausted\n", __func__); - lp->stats.rx_dropped++; } if (status & SONIC_INT_RBAE) { netif_dbg(lp, rx_err, dev, "%s: rx buffer area exceeded\n", __func__); - lp->stats.rx_dropped++; }
/* counter overruns; all counters are 16bit wide */ @@ -472,11 +470,6 @@ static void sonic_rx(struct net_device *dev) sonic_rra_put(dev, entry, SONIC_RR_BUFADR_H, bufadr_h); } else { /* This should only happen, if we enable accepting broken packets. */ - lp->stats.rx_errors++; - if (status & SONIC_RCR_FAER) - lp->stats.rx_frame_errors++; - if (status & SONIC_RCR_CRCR) - lp->stats.rx_crc_errors++; } if (status & SONIC_RCR_LPKT) { /* diff --git a/drivers/net/ethernet/natsemi/sonic.h b/drivers/net/ethernet/natsemi/sonic.h index fb160df..9e4ff8d 100644 --- a/drivers/net/ethernet/natsemi/sonic.h +++ b/drivers/net/ethernet/natsemi/sonic.h @@ -175,6 +175,7 @@ #define SONIC_TCR_NCRS 0x0100 #define SONIC_TCR_CRLS 0x0080 #define SONIC_TCR_EXC 0x0040 +#define SONIC_TCR_OWC 0x0020 #define SONIC_TCR_PMB 0x0008 #define SONIC_TCR_FU 0x0004 #define SONIC_TCR_BCM 0x0002
From: Finn Thain fthain@telegraphics.com.au
commit 9e311820f67e740f4fb8dcb82b4c4b5b05bdd1a5 upstream.
The SONIC can sometimes advance its rx buffer pointer (RRP register) without advancing its rx descriptor pointer (CRDA register). As a result the index of the current rx descriptor may not equal that of the current rx buffer. The driver mistakenly assumes that they are always equal. This assumption leads to incorrect packet lengths and possible packet duplication. Avoid this by calling a new function to locate the buffer corresponding to a given descriptor.
Fixes: efcce839360f ("[PATCH] macsonic/jazzsonic network drivers update") Tested-by: Stan Johnson userm57@yahoo.com Signed-off-by: Finn Thain fthain@telegraphics.com.au Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/ethernet/natsemi/sonic.c | 35 ++++++++++++++++++++++++++++++----- drivers/net/ethernet/natsemi/sonic.h | 5 +++-- 2 files changed, 33 insertions(+), 7 deletions(-)
diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index ee3696c..ce5fd05 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -412,6 +412,21 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id) return IRQ_HANDLED; }
+/* Return the array index corresponding to a given Receive Buffer pointer. */ +static int index_from_addr(struct sonic_local *lp, dma_addr_t addr, + unsigned int last) +{ + unsigned int i = last; + + do { + i = (i + 1) & SONIC_RRS_MASK; + if (addr == lp->rx_laddr[i]) + return i; + } while (i != last); + + return -ENOENT; +} + /* * We have a good packet(s), pass it/them up the network stack. */ @@ -431,6 +446,16 @@ static void sonic_rx(struct net_device *dev)
status = sonic_rda_get(dev, entry, SONIC_RD_STATUS); if (status & SONIC_RCR_PRX) { + u32 addr = (sonic_rda_get(dev, entry, + SONIC_RD_PKTPTR_H) << 16) | + sonic_rda_get(dev, entry, SONIC_RD_PKTPTR_L); + int i = index_from_addr(lp, addr, entry); + + if (i < 0) { + WARN_ONCE(1, "failed to find buffer!\n"); + break; + } + /* Malloc up new buffer. */ new_skb = netdev_alloc_skb(dev, SONIC_RBSIZE + 2); if (new_skb == NULL) { @@ -452,7 +477,7 @@ static void sonic_rx(struct net_device *dev)
/* now we have a new skb to replace it, pass the used one up the stack */ dma_unmap_single(lp->device, lp->rx_laddr[entry], SONIC_RBSIZE, DMA_FROM_DEVICE); - used_skb = lp->rx_skb[entry]; + used_skb = lp->rx_skb[i]; pkt_len = sonic_rda_get(dev, entry, SONIC_RD_PKTLEN); skb_trim(used_skb, pkt_len); used_skb->protocol = eth_type_trans(used_skb, dev); @@ -461,13 +486,13 @@ static void sonic_rx(struct net_device *dev) lp->stats.rx_bytes += pkt_len;
/* and insert the new skb */ - lp->rx_laddr[entry] = new_laddr; - lp->rx_skb[entry] = new_skb; + lp->rx_laddr[i] = new_laddr; + lp->rx_skb[i] = new_skb;
bufadr_l = (unsigned long)new_laddr & 0xffff; bufadr_h = (unsigned long)new_laddr >> 16; - sonic_rra_put(dev, entry, SONIC_RR_BUFADR_L, bufadr_l); - sonic_rra_put(dev, entry, SONIC_RR_BUFADR_H, bufadr_h); + sonic_rra_put(dev, i, SONIC_RR_BUFADR_L, bufadr_l); + sonic_rra_put(dev, i, SONIC_RR_BUFADR_H, bufadr_h); } else { /* This should only happen, if we enable accepting broken packets. */ } diff --git a/drivers/net/ethernet/natsemi/sonic.h b/drivers/net/ethernet/natsemi/sonic.h index 9e4ff8d..e6d47e4 100644 --- a/drivers/net/ethernet/natsemi/sonic.h +++ b/drivers/net/ethernet/natsemi/sonic.h @@ -275,8 +275,9 @@ #define SONIC_NUM_RDS SONIC_NUM_RRS /* number of receive descriptors */ #define SONIC_NUM_TDS 16 /* number of transmit descriptors */
-#define SONIC_RDS_MASK (SONIC_NUM_RDS-1) -#define SONIC_TDS_MASK (SONIC_NUM_TDS-1) +#define SONIC_RRS_MASK (SONIC_NUM_RRS - 1) +#define SONIC_RDS_MASK (SONIC_NUM_RDS - 1) +#define SONIC_TDS_MASK (SONIC_NUM_TDS - 1)
#define SONIC_RBSIZE 1520 /* size of one resource buffer */
From: Finn Thain fthain@telegraphics.com.au
commit eaabfd19b2c787bbe88dc32424b9a43d67293422 upstream.
The while loop in sonic_rx() traverses the rx descriptor ring. It stops when it reaches a descriptor that the SONIC has not used. Each iteration advances the EOL flag so the SONIC can keep using more descriptors. Therefore, the while loop has no definite termination condition.
The algorithm described in the National Semiconductor literature is quite different. It consumes descriptors up to the one with its EOL flag set (which will also have its "in use" flag set). All freed descriptors are then returned to the ring at once, by adjusting the EOL flags (and link pointers).
Adopt the algorithm from datasheet as it's simpler, terminates quickly and avoids a lot of pointless descriptor EOL flag changes.
Fixes: efcce839360f ("[PATCH] macsonic/jazzsonic network drivers update") Tested-by: Stan Johnson userm57@yahoo.com Signed-off-by: Finn Thain fthain@telegraphics.com.au Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/ethernet/natsemi/sonic.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-)
diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index ce5fd05..0c62ef5 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -435,6 +435,7 @@ static void sonic_rx(struct net_device *dev) struct sonic_local *lp = netdev_priv(dev); int status; int entry = lp->cur_rx; + int prev_entry = lp->eol_rx;
while (sonic_rda_get(dev, entry, SONIC_RD_IN_USE) == 0) { struct sk_buff *used_skb; @@ -515,13 +516,21 @@ static void sonic_rx(struct net_device *dev) /* * give back the descriptor */ - sonic_rda_put(dev, entry, SONIC_RD_LINK, - sonic_rda_get(dev, entry, SONIC_RD_LINK) | SONIC_EOL); sonic_rda_put(dev, entry, SONIC_RD_IN_USE, 1); - sonic_rda_put(dev, lp->eol_rx, SONIC_RD_LINK, - sonic_rda_get(dev, lp->eol_rx, SONIC_RD_LINK) & ~SONIC_EOL); - lp->eol_rx = entry; - lp->cur_rx = entry = (entry + 1) & SONIC_RDS_MASK; + + prev_entry = entry; + entry = (entry + 1) & SONIC_RDS_MASK; + } + + lp->cur_rx = entry; + + if (prev_entry != lp->eol_rx) { + /* Advance the EOL flag to put descriptors back into service */ + sonic_rda_put(dev, prev_entry, SONIC_RD_LINK, SONIC_EOL | + sonic_rda_get(dev, prev_entry, SONIC_RD_LINK)); + sonic_rda_put(dev, lp->eol_rx, SONIC_RD_LINK, ~SONIC_EOL & + sonic_rda_get(dev, lp->eol_rx, SONIC_RD_LINK)); + lp->eol_rx = prev_entry; } /* * If any worth-while packets have been received, netif_rx()
From: Finn Thain fthain@telegraphics.com.au
commit 94b166349503957079ef5e7d6f667f157aea014a upstream.
After sonic_tx_timeout() calls sonic_init(), it can happen that sonic_rx() will subsequently encounter a receive descriptor with no flags set. Remove the comment that says that this can't happen.
When giving a receive descriptor to the SONIC, clear the descriptor status field. That way, any rx descriptor with flags set can only be a newly received packet.
Don't process a descriptor without the LPKT bit set. The buffer is still in use by the SONIC.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Tested-by: Stan Johnson userm57@yahoo.com Signed-off-by: Finn Thain fthain@telegraphics.com.au Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/ethernet/natsemi/sonic.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index 0c62ef5..bc15f97 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -433,7 +433,6 @@ static int index_from_addr(struct sonic_local *lp, dma_addr_t addr, static void sonic_rx(struct net_device *dev) { struct sonic_local *lp = netdev_priv(dev); - int status; int entry = lp->cur_rx; int prev_entry = lp->eol_rx;
@@ -444,9 +443,10 @@ static void sonic_rx(struct net_device *dev) u16 bufadr_l; u16 bufadr_h; int pkt_len; + u16 status = sonic_rda_get(dev, entry, SONIC_RD_STATUS);
- status = sonic_rda_get(dev, entry, SONIC_RD_STATUS); - if (status & SONIC_RCR_PRX) { + /* If the RD has LPKT set, the chip has finished with the RB */ + if ((status & SONIC_RCR_PRX) && (status & SONIC_RCR_LPKT)) { u32 addr = (sonic_rda_get(dev, entry, SONIC_RD_PKTPTR_H) << 16) | sonic_rda_get(dev, entry, SONIC_RD_PKTPTR_L); @@ -494,10 +494,6 @@ static void sonic_rx(struct net_device *dev) bufadr_h = (unsigned long)new_laddr >> 16; sonic_rra_put(dev, i, SONIC_RR_BUFADR_L, bufadr_l); sonic_rra_put(dev, i, SONIC_RR_BUFADR_H, bufadr_h); - } else { - /* This should only happen, if we enable accepting broken packets. */ - } - if (status & SONIC_RCR_LPKT) { /* * this was the last packet out of the current receive buffer * give the buffer back to the SONIC @@ -510,12 +506,11 @@ static void sonic_rx(struct net_device *dev) __func__); SONIC_WRITE(SONIC_ISR, SONIC_INT_RBE); /* clear the flag */ } - } else - printk(KERN_ERR "%s: rx desc without RCR_LPKT. Shouldn't happen !?\n", - dev->name); + } /* * give back the descriptor */ + sonic_rda_put(dev, entry, SONIC_RD_STATUS, 0); sonic_rda_put(dev, entry, SONIC_RD_IN_USE, 1);
prev_entry = entry;
From: Finn Thain fthain@telegraphics.com.au
commit 89ba879e95582d3bba55081e45b5409e883312ca upstream.
As soon as the driver is finished with a receive buffer it allocs a new one and overwrites the corresponding RRA entry with a new buffer pointer.
Problem is, the buffer pointer is split across two word-sized registers. It can't be updated in one atomic store. So this operation races with the chip while it stores received packets and advances its RRP register. This could result in memory corruption by a DMA write.
Avoid this problem by adding buffers only at the location given by the RWP register, in accordance with the National Semiconductor datasheet.
Re-factor this code into separate functions to calculate a RRA pointer and to update the RWP.
Fixes: efcce839360f ("[PATCH] macsonic/jazzsonic network drivers update") Tested-by: Stan Johnson userm57@yahoo.com Signed-off-by: Finn Thain fthain@telegraphics.com.au Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/ethernet/natsemi/sonic.c | 150 +++++++++++++++++++++-------------- drivers/net/ethernet/natsemi/sonic.h | 18 ++++- 2 files changed, 105 insertions(+), 63 deletions(-)
diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index bc15f97..883b464 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -427,6 +427,59 @@ static int index_from_addr(struct sonic_local *lp, dma_addr_t addr, return -ENOENT; }
+/* Allocate and map a new skb to be used as a receive buffer. */ +static bool sonic_alloc_rb(struct net_device *dev, struct sonic_local *lp, + struct sk_buff **new_skb, dma_addr_t *new_addr) +{ + *new_skb = netdev_alloc_skb(dev, SONIC_RBSIZE + 2); + if (!*new_skb) + return false; + + if (SONIC_BUS_SCALE(lp->dma_bitmode) == 2) + skb_reserve(*new_skb, 2); + + *new_addr = dma_map_single(lp->device, skb_put(*new_skb, SONIC_RBSIZE), + SONIC_RBSIZE, DMA_FROM_DEVICE); + if (!*new_addr) { + dev_kfree_skb(*new_skb); + *new_skb = NULL; + return false; + } + + return true; +} + +/* Place a new receive resource in the Receive Resource Area and update RWP. */ +static void sonic_update_rra(struct net_device *dev, struct sonic_local *lp, + dma_addr_t old_addr, dma_addr_t new_addr) +{ + unsigned int entry = sonic_rr_entry(dev, SONIC_READ(SONIC_RWP)); + unsigned int end = sonic_rr_entry(dev, SONIC_READ(SONIC_RRP)); + u32 buf; + + /* The resources in the range [RRP, RWP) belong to the SONIC. This loop + * scans the other resources in the RRA, those in the range [RWP, RRP). + */ + do { + buf = (sonic_rra_get(dev, entry, SONIC_RR_BUFADR_H) << 16) | + sonic_rra_get(dev, entry, SONIC_RR_BUFADR_L); + + if (buf == old_addr) + break; + + entry = (entry + 1) & SONIC_RRS_MASK; + } while (entry != end); + + WARN_ONCE(buf != old_addr, "failed to find resource!\n"); + + sonic_rra_put(dev, entry, SONIC_RR_BUFADR_H, new_addr >> 16); + sonic_rra_put(dev, entry, SONIC_RR_BUFADR_L, new_addr & 0xffff); + + entry = (entry + 1) & SONIC_RRS_MASK; + + SONIC_WRITE(SONIC_RWP, sonic_rr_addr(dev, entry)); +} + /* * We have a good packet(s), pass it/them up the network stack. */ @@ -435,18 +488,15 @@ static void sonic_rx(struct net_device *dev) struct sonic_local *lp = netdev_priv(dev); int entry = lp->cur_rx; int prev_entry = lp->eol_rx; + bool rbe = false;
while (sonic_rda_get(dev, entry, SONIC_RD_IN_USE) == 0) { - struct sk_buff *used_skb; - struct sk_buff *new_skb; - dma_addr_t new_laddr; - u16 bufadr_l; - u16 bufadr_h; - int pkt_len; u16 status = sonic_rda_get(dev, entry, SONIC_RD_STATUS);
/* If the RD has LPKT set, the chip has finished with the RB */ if ((status & SONIC_RCR_PRX) && (status & SONIC_RCR_LPKT)) { + struct sk_buff *new_skb; + dma_addr_t new_laddr; u32 addr = (sonic_rda_get(dev, entry, SONIC_RD_PKTPTR_H) << 16) | sonic_rda_get(dev, entry, SONIC_RD_PKTPTR_L); @@ -457,55 +507,35 @@ static void sonic_rx(struct net_device *dev) break; }
- /* Malloc up new buffer. */ - new_skb = netdev_alloc_skb(dev, SONIC_RBSIZE + 2); - if (new_skb == NULL) { + if (sonic_alloc_rb(dev, lp, &new_skb, &new_laddr)) { + struct sk_buff *used_skb = lp->rx_skb[i]; + int pkt_len; + + /* Pass the used buffer up the stack */ + dma_unmap_single(lp->device, addr, SONIC_RBSIZE, + DMA_FROM_DEVICE); + + pkt_len = sonic_rda_get(dev, entry, + SONIC_RD_PKTLEN); + skb_trim(used_skb, pkt_len); + used_skb->protocol = eth_type_trans(used_skb, + dev); + netif_rx(used_skb); + lp->stats.rx_packets++; + lp->stats.rx_bytes += pkt_len; + + lp->rx_skb[i] = new_skb; + lp->rx_laddr[i] = new_laddr; + } else { + /* Failed to obtain a new buffer so re-use it */ + new_laddr = addr; lp->stats.rx_dropped++; - break; } - /* provide 16 byte IP header alignment unless DMA requires otherwise */ - if(SONIC_BUS_SCALE(lp->dma_bitmode) == 2) - skb_reserve(new_skb, 2); - - new_laddr = dma_map_single(lp->device, skb_put(new_skb, SONIC_RBSIZE), - SONIC_RBSIZE, DMA_FROM_DEVICE); - if (!new_laddr) { - dev_kfree_skb(new_skb); - printk(KERN_ERR "%s: Failed to map rx buffer, dropping packet.\n", dev->name); - lp->stats.rx_dropped++; - break; - } - - /* now we have a new skb to replace it, pass the used one up the stack */ - dma_unmap_single(lp->device, lp->rx_laddr[entry], SONIC_RBSIZE, DMA_FROM_DEVICE); - used_skb = lp->rx_skb[i]; - pkt_len = sonic_rda_get(dev, entry, SONIC_RD_PKTLEN); - skb_trim(used_skb, pkt_len); - used_skb->protocol = eth_type_trans(used_skb, dev); - netif_rx(used_skb); - lp->stats.rx_packets++; - lp->stats.rx_bytes += pkt_len; - - /* and insert the new skb */ - lp->rx_laddr[i] = new_laddr; - lp->rx_skb[i] = new_skb; - - bufadr_l = (unsigned long)new_laddr & 0xffff; - bufadr_h = (unsigned long)new_laddr >> 16; - sonic_rra_put(dev, i, SONIC_RR_BUFADR_L, bufadr_l); - sonic_rra_put(dev, i, SONIC_RR_BUFADR_H, bufadr_h); - /* - * this was the last packet out of the current receive buffer - * give the buffer back to the SONIC + /* If RBE is already asserted when RWP advances then + * it's safe to clear RBE after processing this packet. */ - lp->cur_rwp += SIZEOF_SONIC_RR * SONIC_BUS_SCALE(lp->dma_bitmode); - if (lp->cur_rwp >= lp->rra_end) lp->cur_rwp = lp->rra_laddr & 0xffff; - SONIC_WRITE(SONIC_RWP, lp->cur_rwp); - if (SONIC_READ(SONIC_ISR) & SONIC_INT_RBE) { - netif_dbg(lp, rx_err, dev, "%s: rx buffer exhausted\n", - __func__); - SONIC_WRITE(SONIC_ISR, SONIC_INT_RBE); /* clear the flag */ - } + rbe = rbe || SONIC_READ(SONIC_ISR) & SONIC_INT_RBE; + sonic_update_rra(dev, lp, addr, new_laddr); } /* * give back the descriptor @@ -527,6 +557,9 @@ static void sonic_rx(struct net_device *dev) sonic_rda_get(dev, lp->eol_rx, SONIC_RD_LINK)); lp->eol_rx = prev_entry; } + + if (rbe) + SONIC_WRITE(SONIC_ISR, SONIC_INT_RBE); /* * If any worth-while packets have been received, netif_rx() * has done a mark_bh(NET_BH) for us and will work on them @@ -641,15 +674,10 @@ static int sonic_init(struct net_device *dev) }
/* initialize all RRA registers */ - lp->rra_end = (lp->rra_laddr + SONIC_NUM_RRS * SIZEOF_SONIC_RR * - SONIC_BUS_SCALE(lp->dma_bitmode)) & 0xffff; - lp->cur_rwp = (lp->rra_laddr + (SONIC_NUM_RRS - 1) * SIZEOF_SONIC_RR * - SONIC_BUS_SCALE(lp->dma_bitmode)) & 0xffff; - - SONIC_WRITE(SONIC_RSA, lp->rra_laddr & 0xffff); - SONIC_WRITE(SONIC_REA, lp->rra_end); - SONIC_WRITE(SONIC_RRP, lp->rra_laddr & 0xffff); - SONIC_WRITE(SONIC_RWP, lp->cur_rwp); + SONIC_WRITE(SONIC_RSA, sonic_rr_addr(dev, 0)); + SONIC_WRITE(SONIC_REA, sonic_rr_addr(dev, SONIC_NUM_RRS)); + SONIC_WRITE(SONIC_RRP, sonic_rr_addr(dev, 0)); + SONIC_WRITE(SONIC_RWP, sonic_rr_addr(dev, SONIC_NUM_RRS - 1)); SONIC_WRITE(SONIC_URRA, lp->rra_laddr >> 16); SONIC_WRITE(SONIC_EOBC, (SONIC_RBSIZE >> 1) - (lp->dma_bitmode ? 2 : 1));
diff --git a/drivers/net/ethernet/natsemi/sonic.h b/drivers/net/ethernet/natsemi/sonic.h index e6d47e4..cc2f7b4 100644 --- a/drivers/net/ethernet/natsemi/sonic.h +++ b/drivers/net/ethernet/natsemi/sonic.h @@ -314,8 +314,6 @@ struct sonic_local { u32 rda_laddr; /* logical DMA address of RDA */ dma_addr_t rx_laddr[SONIC_NUM_RRS]; /* logical DMA addresses of rx skbuffs */ dma_addr_t tx_laddr[SONIC_NUM_TDS]; /* logical DMA addresses of tx skbuffs */ - unsigned int rra_end; - unsigned int cur_rwp; unsigned int cur_rx; unsigned int cur_tx; /* first unacked transmit packet */ unsigned int eol_rx; @@ -450,6 +448,22 @@ static inline __u16 sonic_rra_get(struct net_device* dev, int entry, (entry * SIZEOF_SONIC_RR) + offset); }
+static inline u16 sonic_rr_addr(struct net_device *dev, int entry) +{ + struct sonic_local *lp = netdev_priv(dev); + + return lp->rra_laddr + + entry * SIZEOF_SONIC_RR * SONIC_BUS_SCALE(lp->dma_bitmode); +} + +static inline u16 sonic_rr_entry(struct net_device *dev, u16 addr) +{ + struct sonic_local *lp = netdev_priv(dev); + + return (addr - (u16)lp->rra_laddr) / (SIZEOF_SONIC_RR * + SONIC_BUS_SCALE(lp->dma_bitmode)); +} + static const char version[] = "sonic.c:v0.92 20.9.98 tsbogend@alpha.franken.de\n";
From: Finn Thain fthain@telegraphics.com.au
commit 3f4b7e6a2be982fd8820a2b54d46dd9c351db899 upstream.
Make sure the SONIC's DMA engine is idle before altering the transmit and receive descriptors. Add a helper for this as it will be needed again.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Tested-by: Stan Johnson userm57@yahoo.com Signed-off-by: Finn Thain fthain@telegraphics.com.au Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/ethernet/natsemi/sonic.c | 25 +++++++++++++++++++++++++ drivers/net/ethernet/natsemi/sonic.h | 3 +++ 2 files changed, 28 insertions(+)
diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index 883b464..e8e2a51 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -115,6 +115,24 @@ static int sonic_open(struct net_device *dev) return 0; }
+/* Wait for the SONIC to become idle. */ +static void sonic_quiesce(struct net_device *dev, u16 mask) +{ + struct sonic_local * __maybe_unused lp = netdev_priv(dev); + int i; + u16 bits; + + for (i = 0; i < 1000; ++i) { + bits = SONIC_READ(SONIC_CMD) & mask; + if (!bits) + return; + if (irqs_disabled() || in_interrupt()) + udelay(20); + else + usleep_range(100, 200); + } + WARN_ONCE(1, "command deadline expired! 0x%04x\n", bits); +}
/* * Close the SONIC device @@ -131,6 +149,9 @@ static int sonic_close(struct net_device *dev) /* * stop the SONIC, disable interrupts */ + SONIC_WRITE(SONIC_CMD, SONIC_CR_RXDIS); + sonic_quiesce(dev, SONIC_CR_ALL); + SONIC_WRITE(SONIC_IMR, 0); SONIC_WRITE(SONIC_ISR, 0x7fff); SONIC_WRITE(SONIC_CMD, SONIC_CR_RST); @@ -170,6 +191,9 @@ static void sonic_tx_timeout(struct net_device *dev) * put the Sonic into software-reset mode and * disable all interrupts before releasing DMA buffers */ + SONIC_WRITE(SONIC_CMD, SONIC_CR_RXDIS); + sonic_quiesce(dev, SONIC_CR_ALL); + SONIC_WRITE(SONIC_IMR, 0); SONIC_WRITE(SONIC_ISR, 0x7fff); SONIC_WRITE(SONIC_CMD, SONIC_CR_RST); @@ -657,6 +681,7 @@ static int sonic_init(struct net_device *dev) */ SONIC_WRITE(SONIC_CMD, 0); SONIC_WRITE(SONIC_CMD, SONIC_CR_RXDIS); + sonic_quiesce(dev, SONIC_CR_ALL);
/* * initialize the receive resource area diff --git a/drivers/net/ethernet/natsemi/sonic.h b/drivers/net/ethernet/natsemi/sonic.h index cc2f7b4..1df6d2f 100644 --- a/drivers/net/ethernet/natsemi/sonic.h +++ b/drivers/net/ethernet/natsemi/sonic.h @@ -110,6 +110,9 @@ #define SONIC_CR_TXP 0x0002 #define SONIC_CR_HTX 0x0001
+#define SONIC_CR_ALL (SONIC_CR_LCAM | SONIC_CR_RRRA | \ + SONIC_CR_RXEN | SONIC_CR_TXP) + /* * SONIC data configuration bits */
From: Finn Thain fthain@telegraphics.com.au
commit 27e0c31c5f27c1d1a1d9d135c123069f60dcf97b upstream.
There are several issues relating to command register usage during chip initialization.
Firstly, the SONIC sometimes comes out of software reset with the Start Timer bit set. This gets logged as,
macsonic macsonic eth0: sonic_init: status=24, i=101
Avoid this by giving the Stop Timer command earlier than later.
Secondly, the loop that waits for the Read RRA command to complete has the break condition inverted. That's why the for loop iterates until its termination condition. Call the helper for this instead.
Finally, give the Receiver Enable command after clearing interrupts, not before, to avoid the possibility of losing an interrupt.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Tested-by: Stan Johnson userm57@yahoo.com Signed-off-by: Finn Thain fthain@telegraphics.com.au Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/ethernet/natsemi/sonic.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-)
diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index e8e2a51..b09d13c 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -663,7 +663,6 @@ static void sonic_multicast_list(struct net_device *dev) */ static int sonic_init(struct net_device *dev) { - unsigned int cmd; struct sonic_local *lp = netdev_priv(dev); int i;
@@ -680,7 +679,7 @@ static int sonic_init(struct net_device *dev) * enable interrupts, then completely initialize the SONIC */ SONIC_WRITE(SONIC_CMD, 0); - SONIC_WRITE(SONIC_CMD, SONIC_CR_RXDIS); + SONIC_WRITE(SONIC_CMD, SONIC_CR_RXDIS | SONIC_CR_STP); sonic_quiesce(dev, SONIC_CR_ALL);
/* @@ -710,14 +709,7 @@ static int sonic_init(struct net_device *dev) netif_dbg(lp, ifup, dev, "%s: issuing RRRA command\n", __func__);
SONIC_WRITE(SONIC_CMD, SONIC_CR_RRRA); - i = 0; - while (i++ < 100) { - if (SONIC_READ(SONIC_CMD) & SONIC_CR_RRRA) - break; - } - - netif_dbg(lp, ifup, dev, "%s: status=%x, i=%d\n", __func__, - SONIC_READ(SONIC_CMD), i); + sonic_quiesce(dev, SONIC_CR_RRRA);
/* * Initialize the receive descriptors so that they @@ -805,15 +797,11 @@ static int sonic_init(struct net_device *dev) * enable receiver, disable loopback * and enable all interrupts */ - SONIC_WRITE(SONIC_CMD, SONIC_CR_RXEN | SONIC_CR_STP); SONIC_WRITE(SONIC_RCR, SONIC_RCR_DEFAULT); SONIC_WRITE(SONIC_TCR, SONIC_TCR_DEFAULT); SONIC_WRITE(SONIC_ISR, 0x7fff); SONIC_WRITE(SONIC_IMR, SONIC_IMR_DEFAULT); - - cmd = SONIC_READ(SONIC_CMD); - if ((cmd & SONIC_CR_RXEN) == 0 || (cmd & SONIC_CR_STP) == 0) - printk(KERN_ERR "sonic_init: failed, status=%x\n", cmd); + SONIC_WRITE(SONIC_CMD, SONIC_CR_RXEN);
netif_dbg(lp, ifup, dev, "%s: new status=%x\n", __func__, SONIC_READ(SONIC_CMD));
From: Finn Thain fthain@telegraphics.com.au
commit 772f66421d5aa0b9f256056f513bbc38ac132271 upstream.
Section 4.3.1 of the datasheet says,
This bit [TXP] must not be set if a Load CAM operation is in progress (LCAM is set). The SONIC will lock up if both bits are set simultaneously.
Testing has shown that the driver sometimes attempts to set LCAM while TXP is set. Avoid this by waiting for command completion before and after giving the LCAM command.
After issuing the Load CAM command, poll for !SONIC_CR_LCAM rather than SONIC_INT_LCD, because the SONIC_CR_TXP bit can't be used until !SONIC_CR_LCAM.
When in reset mode, take the opportunity to reset the CAM Enable register.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Tested-by: Stan Johnson userm57@yahoo.com Signed-off-by: Finn Thain fthain@telegraphics.com.au Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/ethernet/natsemi/sonic.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-)
diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index b09d13c..a88685e 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -633,6 +633,8 @@ static void sonic_multicast_list(struct net_device *dev) (netdev_mc_count(dev) > 15)) { rcr |= SONIC_RCR_AMC; } else { + unsigned long flags; + netif_dbg(lp, ifup, dev, "%s: mc_count %d\n", __func__, netdev_mc_count(dev)); sonic_set_cam_enable(dev, 1); /* always enable our own address */ @@ -646,9 +648,14 @@ static void sonic_multicast_list(struct net_device *dev) i++; } SONIC_WRITE(SONIC_CDC, 16); - /* issue Load CAM command */ SONIC_WRITE(SONIC_CDP, lp->cda_laddr & 0xffff); + + /* LCAM and TXP commands can't be used simultaneously */ + spin_lock_irqsave(&lp->lock, flags); + sonic_quiesce(dev, SONIC_CR_TXP); SONIC_WRITE(SONIC_CMD, SONIC_CR_LCAM); + sonic_quiesce(dev, SONIC_CR_LCAM); + spin_unlock_irqrestore(&lp->lock, flags); } }
@@ -674,6 +681,9 @@ static int sonic_init(struct net_device *dev) SONIC_WRITE(SONIC_ISR, 0x7fff); SONIC_WRITE(SONIC_CMD, SONIC_CR_RST);
+ /* While in reset mode, clear CAM Enable register */ + SONIC_WRITE(SONIC_CE, 0); + /* * clear software reset flag, disable receiver, clear and * enable interrupts, then completely initialize the SONIC @@ -784,14 +794,7 @@ static int sonic_init(struct net_device *dev) * load the CAM */ SONIC_WRITE(SONIC_CMD, SONIC_CR_LCAM); - - i = 0; - while (i++ < 100) { - if (SONIC_READ(SONIC_ISR) & SONIC_INT_LCD) - break; - } - netif_dbg(lp, ifup, dev, "%s: CMD=%x, ISR=%x, i=%d\n", __func__, - SONIC_READ(SONIC_CMD), SONIC_READ(SONIC_ISR), i); + sonic_quiesce(dev, SONIC_CR_LCAM);
/* * enable receiver, disable loopback
From: Finn Thain fthain@telegraphics.com.au
commit 686f85d71d095f1d26b807e23b0f0bfd22042c45 upstream.
Section 5.5.3.2 of the datasheet says,
If FIFO Underrun, Byte Count Mismatch, Excessive Collision, or Excessive Deferral (if enabled) errors occur, transmission ceases.
In this situation, the chip asserts a TXER interrupt rather than TXDN. But the handler for the TXDN is the only way that the transmit queue gets restarted. Hence, an aborted transmission can result in a watchdog timeout.
This problem can be reproduced on congested link, as that can result in excessive transmitter collisions. Another way to reproduce this is with a FIFO Underrun, which may be caused by DMA latency.
In event of a TXER interrupt, prevent a watchdog timeout by restarting transmission.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Tested-by: Stan Johnson userm57@yahoo.com Signed-off-by: Finn Thain fthain@telegraphics.com.au Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/ethernet/natsemi/sonic.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index a88685e..69282f3 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -414,10 +414,19 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id) lp->stats.rx_missed_errors += 65536;
/* transmit error */ - if (status & SONIC_INT_TXER) - if (SONIC_READ(SONIC_TCR) & SONIC_TCR_FU) - netif_dbg(lp, tx_err, dev, "%s: tx fifo underrun\n", - __func__); + if (status & SONIC_INT_TXER) { + u16 tcr = SONIC_READ(SONIC_TCR); + + netif_dbg(lp, tx_err, dev, "%s: TXER intr, TCR %04x\n", + __func__, tcr); + + if (tcr & (SONIC_TCR_EXD | SONIC_TCR_EXC | + SONIC_TCR_FU | SONIC_TCR_BCM)) { + /* Aborted transmission. Try again. */ + netif_stop_queue(dev); + SONIC_WRITE(SONIC_CMD, SONIC_CR_TXP); + } + }
/* bus retry */ if (status & SONIC_INT_BR) {
From: Tom Zanussi tom.zanussi@linux.intel.com
commit 656fe2ba85e81d00e4447bf77b8da2be3c47acb2 upstream.
Since every var ref for a trigger has an entry in the var_ref[] array, use that to destroy the var_refs, instead of piecemeal via the field expressions.
This allows us to avoid having to keep and treat differently separate lists for the action-related references, which future patches will remove.
Link: http://lkml.kernel.org/r/fad1a164f0e257c158e70d6eadbf6c586e04b2a2.1545161087...
Acked-by: Namhyung Kim namhyung@kernel.org Reviewed-by: Masami Hiramatsu mhiramat@kernel.org Signed-off-by: Tom Zanussi tom.zanussi@linux.intel.com Signed-off-by: Steven Rostedt (VMware) rostedt@goodmis.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/trace/trace_events_hist.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-)
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 35d9cea..d606c0e5 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -2175,6 +2175,15 @@ static int contains_operator(char *str) return field_op; }
+static void __destroy_hist_field(struct hist_field *hist_field) +{ + kfree(hist_field->var.name); + kfree(hist_field->name); + kfree(hist_field->type); + + kfree(hist_field); +} + static void destroy_hist_field(struct hist_field *hist_field, unsigned int level) { @@ -2186,14 +2195,13 @@ static void destroy_hist_field(struct hist_field *hist_field, if (!hist_field) return;
+ if (hist_field->flags & HIST_FIELD_FL_VAR_REF) + return; /* var refs will be destroyed separately */ + for (i = 0; i < HIST_FIELD_OPERANDS_MAX; i++) destroy_hist_field(hist_field->operands[i], level + 1);
- kfree(hist_field->var.name); - kfree(hist_field->name); - kfree(hist_field->type); - - kfree(hist_field); + __destroy_hist_field(hist_field); }
static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data, @@ -2320,6 +2328,12 @@ static void destroy_hist_fields(struct hist_trigger_data *hist_data) hist_data->fields[i] = NULL; } } + + for (i = 0; i < hist_data->n_var_refs; i++) { + WARN_ON(!(hist_data->var_refs[i]->flags & HIST_FIELD_FL_VAR_REF)); + __destroy_hist_field(hist_data->var_refs[i]); + hist_data->var_refs[i] = NULL; + } }
static int init_var_ref(struct hist_field *ref_field,
From: Tom Zanussi tom.zanussi@linux.intel.com
commit de40f033d4e84e843d6a12266e3869015ea9097c upstream.
Have create_var_ref() manage the hist trigger's var_ref list, rather than having similar code doing it in multiple places. This cleans up the code and makes sure var_refs are always accounted properly.
Also, document the var_ref-related functions to make what their purpose clearer.
Link: http://lkml.kernel.org/r/05ddae93ff514e66fc03897d6665231892939913.1545161087...
Acked-by: Namhyung Kim namhyung@kernel.org Reviewed-by: Masami Hiramatsu mhiramat@kernel.org Signed-off-by: Tom Zanussi tom.zanussi@linux.intel.com Signed-off-by: Steven Rostedt (VMware) rostedt@goodmis.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/trace/trace_events_hist.c | 93 ++++++++++++++++++++++++++++++++-------- 1 file changed, 75 insertions(+), 18 deletions(-)
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index d606c0e5..4beac53 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -1274,6 +1274,17 @@ static u64 hist_field_cpu(struct hist_field *hist_field, return cpu; }
+/** + * check_field_for_var_ref - Check if a VAR_REF field references a variable + * @hist_field: The VAR_REF field to check + * @var_data: The hist trigger that owns the variable + * @var_idx: The trigger variable identifier + * + * Check the given VAR_REF field to see whether or not it references + * the given variable associated with the given trigger. + * + * Return: The VAR_REF field if it does reference the variable, NULL if not + */ static struct hist_field * check_field_for_var_ref(struct hist_field *hist_field, struct hist_trigger_data *var_data, @@ -1324,6 +1335,18 @@ static u64 hist_field_cpu(struct hist_field *hist_field, return found; }
+/** + * find_var_ref - Check if a trigger has a reference to a trigger variable + * @hist_data: The hist trigger that might have a reference to the variable + * @var_data: The hist trigger that owns the variable + * @var_idx: The trigger variable identifier + * + * Check the list of var_refs[] on the first hist trigger to see + * whether any of them are references to the variable on the second + * trigger. + * + * Return: The VAR_REF field referencing the variable if so, NULL if not + */ static struct hist_field *find_var_ref(struct hist_trigger_data *hist_data, struct hist_trigger_data *var_data, unsigned int var_idx) @@ -1350,6 +1373,20 @@ static struct hist_field *find_var_ref(struct hist_trigger_data *hist_data, return found; }
+/** + * find_any_var_ref - Check if there is a reference to a given trigger variable + * @hist_data: The hist trigger + * @var_idx: The trigger variable identifier + * + * Check to see whether the given variable is currently referenced by + * any other trigger. + * + * The trigger the variable is defined on is explicitly excluded - the + * assumption being that a self-reference doesn't prevent a trigger + * from being removed. + * + * Return: The VAR_REF field referencing the variable if so, NULL if not + */ static struct hist_field *find_any_var_ref(struct hist_trigger_data *hist_data, unsigned int var_idx) { @@ -1368,6 +1405,19 @@ static struct hist_field *find_any_var_ref(struct hist_trigger_data *hist_data, return found; }
+/** + * check_var_refs - Check if there is a reference to any of trigger's variables + * @hist_data: The hist trigger + * + * A trigger can define one or more variables. If any one of them is + * currently referenced by any other trigger, this function will + * determine that. + + * Typically used to determine whether or not a trigger can be removed + * - if there are any references to a trigger's variables, it cannot. + * + * Return: True if there is a reference to any of trigger's variables + */ static bool check_var_refs(struct hist_trigger_data *hist_data) { struct hist_field *field; @@ -2392,7 +2442,23 @@ static int init_var_ref(struct hist_field *ref_field, goto out; }
-static struct hist_field *create_var_ref(struct hist_field *var_field, +/** + * create_var_ref - Create a variable reference and attach it to trigger + * @hist_data: The trigger that will be referencing the variable + * @var_field: The VAR field to create a reference to + * @system: The optional system string + * @event_name: The optional event_name string + * + * Given a variable hist_field, create a VAR_REF hist_field that + * represents a reference to it. + * + * This function also adds the reference to the trigger that + * now references the variable. + * + * Return: The VAR_REF field if successful, NULL if not + */ +static struct hist_field *create_var_ref(struct hist_trigger_data *hist_data, + struct hist_field *var_field, char *system, char *event_name) { unsigned long flags = HIST_FIELD_FL_VAR_REF; @@ -2404,6 +2470,9 @@ static struct hist_field *create_var_ref(struct hist_field *var_field, destroy_hist_field(ref_field, 0); return NULL; } + + hist_data->var_refs[hist_data->n_var_refs] = ref_field; + ref_field->var_ref_idx = hist_data->n_var_refs++; }
return ref_field; @@ -2477,7 +2546,8 @@ static struct hist_field *parse_var_ref(struct hist_trigger_data *hist_data,
var_field = find_event_var(hist_data, system, event_name, var_name); if (var_field) - ref_field = create_var_ref(var_field, system, event_name); + ref_field = create_var_ref(hist_data, var_field, + system, event_name);
if (!ref_field) hist_err_event("Couldn't find variable: $", @@ -2597,8 +2667,6 @@ static struct hist_field *parse_atom(struct hist_trigger_data *hist_data, if (!s) { hist_field = parse_var_ref(hist_data, ref_system, ref_event, ref_var); if (hist_field) { - hist_data->var_refs[hist_data->n_var_refs] = hist_field; - hist_field->var_ref_idx = hist_data->n_var_refs++; if (var_name) { hist_field = create_alias(hist_data, hist_field, var_name); if (!hist_field) { @@ -3376,7 +3444,6 @@ static int onmax_create(struct hist_trigger_data *hist_data, unsigned int var_ref_idx = hist_data->n_var_refs; struct field_var *field_var; char *onmax_var_str, *param; - unsigned long flags; unsigned int i; int ret = 0;
@@ -3393,18 +3460,10 @@ static int onmax_create(struct hist_trigger_data *hist_data, return -EINVAL; }
- flags = HIST_FIELD_FL_VAR_REF; - ref_field = create_hist_field(hist_data, NULL, flags, NULL); + ref_field = create_var_ref(hist_data, var_field, NULL, NULL); if (!ref_field) return -ENOMEM;
- if (init_var_ref(ref_field, var_field, NULL, NULL)) { - destroy_hist_field(ref_field, 0); - ret = -ENOMEM; - goto out; - } - hist_data->var_refs[hist_data->n_var_refs] = ref_field; - ref_field->var_ref_idx = hist_data->n_var_refs++; data->onmax.var = ref_field;
data->fn = onmax_save; @@ -3595,9 +3654,6 @@ static void save_synth_var_ref(struct hist_trigger_data *hist_data, struct hist_field *var_ref) { hist_data->synth_var_refs[hist_data->n_synth_var_refs++] = var_ref; - - hist_data->var_refs[hist_data->n_var_refs] = var_ref; - var_ref->var_ref_idx = hist_data->n_var_refs++; }
static int check_synth_field(struct synth_event *event, @@ -3752,7 +3808,8 @@ static int onmatch_create(struct hist_trigger_data *hist_data, }
if (check_synth_field(event, hist_field, field_pos) == 0) { - var_ref = create_var_ref(hist_field, system, event_name); + var_ref = create_var_ref(hist_data, hist_field, + system, event_name); if (!var_ref) { kfree(p); ret = -ENOMEM;
From: "Steven Rostedt (VMware)" rostedt@goodmis.org
commit 8bcebc77e85f3d7536f96845a0fe94b1dddb6af0 upstream.
While working on a tool to convert SQL syntex into the histogram language of the kernel, I discovered the following bug:
# echo 'first u64 start_time u64 end_time pid_t pid u64 delta' >> synthetic_events # echo 'hist:keys=pid:start=common_timestamp' > events/sched/sched_waking/trigger # echo 'hist:keys=next_pid:delta=common_timestamp-$start,start2=$start:onmatch(sched.sched_waking).trace(first,$start2,common_timestamp,next_pid,$delta)' > events/sched/sched_switch/trigger
Would not display any histograms in the sched_switch histogram side.
But if I were to swap the location of
"delta=common_timestamp-$start" with "start2=$start"
Such that the last line had:
# echo 'hist:keys=next_pid:start2=$start,delta=common_timestamp-$start:onmatch(sched.sched_waking).trace(first,$start2,common_timestamp,next_pid,$delta)' > events/sched/sched_switch/trigger
The histogram works as expected.
What I found out is that the expressions clear out the value once it is resolved. As the variables are resolved in the order listed, when processing:
delta=common_timestamp-$start
The $start is cleared. When it gets to "start2=$start", it errors out with "unresolved symbol" (which is silent as this happens at the location of the trace), and the histogram is dropped.
When processing the histogram for variable references, instead of adding a new reference for a variable used twice, use the same reference. That way, not only is it more efficient, but the order will no longer matter in processing of the variables.
From Tom Zanussi:
"Just to clarify some more about what the problem was is that without your patch, we would have two separate references to the same variable, and during resolve_var_refs(), they'd both want to be resolved separately, so in this case, since the first reference to start wasn't part of an expression, it wouldn't get the read-once flag set, so would be read normally, and then the second reference would do the read-once read and also be read but using read-once. So everything worked and you didn't see a problem:
from: start2=$start,delta=common_timestamp-$start
In the second case, when you switched them around, the first reference would be resolved by doing the read-once, and following that the second reference would try to resolve and see that the variable had already been read, so failed as unset, which caused it to short-circuit out and not do the trigger action to generate the synthetic event:
to: delta=common_timestamp-$start,start2=$start
With your patch, we only have the single resolution which happens correctly the one time it's resolved, so this can't happen."
Link: https://lore.kernel.org/r/20200116154216.58ca08eb@gandalf.local.home
Cc: stable@vger.kernel.org Fixes: 067fe038e70f6 ("tracing: Add variable reference handling to hist triggers") Reviewed-by: Tom Zanuss zanussi@kernel.org Tested-by: Tom Zanussi zanussi@kernel.org Signed-off-by: Steven Rostedt (VMware) rostedt@goodmis.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/trace/trace_events_hist.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+)
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 4beac53..dbd3c97 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -49,6 +49,7 @@ struct hist_field { struct ftrace_event_field *field; unsigned long flags; hist_field_fn_t fn; + unsigned int ref; unsigned int size; unsigned int offset; unsigned int is_signed; @@ -2225,8 +2226,16 @@ static int contains_operator(char *str) return field_op; }
+static void get_hist_field(struct hist_field *hist_field) +{ + hist_field->ref++; +} + static void __destroy_hist_field(struct hist_field *hist_field) { + if (--hist_field->ref > 1) + return; + kfree(hist_field->var.name); kfree(hist_field->name); kfree(hist_field->type); @@ -2268,6 +2277,8 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data, if (!hist_field) return NULL;
+ hist_field->ref = 1; + hist_field->hist_data = hist_data;
if (flags & HIST_FIELD_FL_EXPR || flags & HIST_FIELD_FL_ALIAS) @@ -2463,6 +2474,17 @@ static struct hist_field *create_var_ref(struct hist_trigger_data *hist_data, { unsigned long flags = HIST_FIELD_FL_VAR_REF; struct hist_field *ref_field; + int i; + + /* Check if the variable already exists */ + for (i = 0; i < hist_data->n_var_refs; i++) { + ref_field = hist_data->var_refs[i]; + if (ref_field->var.idx == var_field->var.idx && + ref_field->var.hist_data == var_field->hist_data) { + get_hist_field(ref_field); + return ref_field; + } + }
ref_field = create_hist_field(var_field->hist_data, NULL, flags, NULL); if (ref_field) {
From: Masato Suzuki masato.suzuki@wdc.com
ZBC/ZAC report zones command may return less bytes than requested if the number of matching zones for the report request is small. However, unlike read or write commands, the remainder of incomplete report zones commands cannot be automatically requested by the block layer: the start sector of the next report cannot be known, and the report reply may not be 512B aligned for SAS drives (a report zone reply size is always a multiple of 64B). The regular request completion code executing bio_advance() and restart of the command remainder part currently causes invalid zone descriptor data to be reported to the caller if the report zone size is smaller than 512B (a case that can happen easily for a report of the last zones of a SAS drive for example).
Since blkdev_report_zones() handles report zone command processing in a loop until completion (no more zones are being reported), we can safely avoid that the block layer performs an incorrect bio_advance() call and restart of the remainder of incomplete report zone BIOs. To do so, always indicate a full completion of REQ_OP_ZONE_REPORT by setting good_bytes to the request buffer size and by setting the command resid to 0. This does not affect the post processing of the report zone reply done by sd_zbc_complete() since the reply header indicates the number of zones reported.
Fixes: 89d947561077 ("sd: Implement support for ZBC devices") Cc: stable@vger.kernel.org # 4.19 Cc: stable@vger.kernel.org # 4.14 Signed-off-by: Masato Suzuki masato.suzuki@wdc.com Reviewed-by: Damien Le Moal damien.lemoal@wdc.com Acked-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/scsi/sd.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 710d7b0..1229a98 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1974,9 +1974,13 @@ static int sd_done(struct scsi_cmnd *SCpnt) } break; case REQ_OP_ZONE_REPORT: + /* To avoid that the block layer performs an incorrect + * bio_advance() call and restart of the remainder of + * incomplete report zone BIOs, always indicate a full + * completion of REQ_OP_ZONE_REPORT. + */ if (!result) { - good_bytes = scsi_bufflen(SCpnt) - - scsi_get_resid(SCpnt); + good_bytes = scsi_bufflen(SCpnt); scsi_set_resid(SCpnt, 0); } else { good_bytes = 0;
From: Ard Biesheuvel ard.biesheuvel@linaro.org
commit 504582e8e40b90b8f8c58783e2d1e4f6a2b71a3a upstream.
Commit 79c65d179a40e145 ("crypto: cbc - Convert to skcipher") updated the generic CBC template wrapper from a blkcipher to a skcipher algo, to get away from the deprecated blkcipher interface. However, as a side effect, drivers that instantiate CBC transforms using the blkcipher as a fallback no longer work, since skciphers can wrap blkciphers but not the other way around. This broke the geode-aes driver.
So let's fix it by moving to the sync skcipher interface when allocating the fallback. At the same time, align with the generic API for ECB and CBC by rejecting inputs that are not a multiple of the AES block size.
Fixes: 79c65d179a40e145 ("crypto: cbc - Convert to skcipher") Cc: stable@vger.kernel.org # v4.20+ ONLY Signed-off-by: Ard Biesheuvel ard.biesheuvel@linaro.org Signed-off-by: Florian Bezdeka florian@bezdeka.de Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Signed-off-by: Florian Bezdeka florian@bezdeka.de Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/crypto/geode-aes.c | 57 +++++++++++++++++++++++++++------------------- drivers/crypto/geode-aes.h | 2 +- 2 files changed, 35 insertions(+), 24 deletions(-)
diff --git a/drivers/crypto/geode-aes.c b/drivers/crypto/geode-aes.c index eb2a0a7..d670f700 100644 --- a/drivers/crypto/geode-aes.c +++ b/drivers/crypto/geode-aes.c @@ -14,6 +14,7 @@ #include <linux/spinlock.h> #include <crypto/algapi.h> #include <crypto/aes.h> +#include <crypto/skcipher.h>
#include <linux/io.h> #include <linux/delay.h> @@ -170,13 +171,15 @@ static int geode_setkey_blk(struct crypto_tfm *tfm, const u8 *key, /* * The requested key size is not supported by HW, do a fallback */ - op->fallback.blk->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; - op->fallback.blk->base.crt_flags |= (tfm->crt_flags & CRYPTO_TFM_REQ_MASK); + crypto_skcipher_clear_flags(op->fallback.blk, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(op->fallback.blk, + tfm->crt_flags & CRYPTO_TFM_REQ_MASK);
- ret = crypto_blkcipher_setkey(op->fallback.blk, key, len); + ret = crypto_skcipher_setkey(op->fallback.blk, key, len); if (ret) { tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; - tfm->crt_flags |= (op->fallback.blk->base.crt_flags & CRYPTO_TFM_RES_MASK); + tfm->crt_flags |= crypto_skcipher_get_flags(op->fallback.blk) & + CRYPTO_TFM_RES_MASK; } return ret; } @@ -185,33 +188,28 @@ static int fallback_blk_dec(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - unsigned int ret; - struct crypto_blkcipher *tfm; struct geode_aes_op *op = crypto_blkcipher_ctx(desc->tfm); + SKCIPHER_REQUEST_ON_STACK(req, op->fallback.blk);
- tfm = desc->tfm; - desc->tfm = op->fallback.blk; - - ret = crypto_blkcipher_decrypt_iv(desc, dst, src, nbytes); + skcipher_request_set_tfm(req, op->fallback.blk); + skcipher_request_set_callback(req, 0, NULL, NULL); + skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
- desc->tfm = tfm; - return ret; + return crypto_skcipher_decrypt(req); } + static int fallback_blk_enc(struct blkcipher_desc *desc, struct scatterlist *dst, struct scatterlist *src, unsigned int nbytes) { - unsigned int ret; - struct crypto_blkcipher *tfm; struct geode_aes_op *op = crypto_blkcipher_ctx(desc->tfm); + SKCIPHER_REQUEST_ON_STACK(req, op->fallback.blk);
- tfm = desc->tfm; - desc->tfm = op->fallback.blk; - - ret = crypto_blkcipher_encrypt_iv(desc, dst, src, nbytes); + skcipher_request_set_tfm(req, op->fallback.blk); + skcipher_request_set_callback(req, 0, NULL, NULL); + skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
- desc->tfm = tfm; - return ret; + return crypto_skcipher_encrypt(req); }
static void @@ -311,6 +309,9 @@ static void fallback_exit_cip(struct crypto_tfm *tfm) struct blkcipher_walk walk; int err, ret;
+ if (nbytes % AES_BLOCK_SIZE) + return -EINVAL; + if (unlikely(op->keylen != AES_KEYSIZE_128)) return fallback_blk_dec(desc, dst, src, nbytes);
@@ -343,6 +344,9 @@ static void fallback_exit_cip(struct crypto_tfm *tfm) struct blkcipher_walk walk; int err, ret;
+ if (nbytes % AES_BLOCK_SIZE) + return -EINVAL; + if (unlikely(op->keylen != AES_KEYSIZE_128)) return fallback_blk_enc(desc, dst, src, nbytes);
@@ -370,8 +374,9 @@ static int fallback_init_blk(struct crypto_tfm *tfm) const char *name = crypto_tfm_alg_name(tfm); struct geode_aes_op *op = crypto_tfm_ctx(tfm);
- op->fallback.blk = crypto_alloc_blkcipher(name, 0, - CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); + op->fallback.blk = crypto_alloc_skcipher(name, 0, + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK);
if (IS_ERR(op->fallback.blk)) { printk(KERN_ERR "Error allocating fallback algo %s\n", name); @@ -385,7 +390,7 @@ static void fallback_exit_blk(struct crypto_tfm *tfm) { struct geode_aes_op *op = crypto_tfm_ctx(tfm);
- crypto_free_blkcipher(op->fallback.blk); + crypto_free_skcipher(op->fallback.blk); op->fallback.blk = NULL; }
@@ -424,6 +429,9 @@ static void fallback_exit_blk(struct crypto_tfm *tfm) struct blkcipher_walk walk; int err, ret;
+ if (nbytes % AES_BLOCK_SIZE) + return -EINVAL; + if (unlikely(op->keylen != AES_KEYSIZE_128)) return fallback_blk_dec(desc, dst, src, nbytes);
@@ -454,6 +462,9 @@ static void fallback_exit_blk(struct crypto_tfm *tfm) struct blkcipher_walk walk; int err, ret;
+ if (nbytes % AES_BLOCK_SIZE) + return -EINVAL; + if (unlikely(op->keylen != AES_KEYSIZE_128)) return fallback_blk_enc(desc, dst, src, nbytes);
diff --git a/drivers/crypto/geode-aes.h b/drivers/crypto/geode-aes.h index f442ca9..c5763a04 100644 --- a/drivers/crypto/geode-aes.h +++ b/drivers/crypto/geode-aes.h @@ -64,7 +64,7 @@ struct geode_aes_op { u8 *iv;
union { - struct crypto_blkcipher *blk; + struct crypto_skcipher *blk; struct crypto_cipher *cip; } fallback; u32 keylen;
From: Suzuki K Poulose suzuki.poulose@arm.com
commit 730766bae3280a25d40ea76a53dc6342e84e6513 upstream.
During a perf session we try to allocate buffers on the "node" associated with the CPU the event is bound to. If it is not bound to a CPU, we use the current CPU node, using smp_processor_id(). However this is unsafe in a pre-emptible context and could generate the splats as below :
BUG: using smp_processor_id() in preemptible [00000000] code: perf/2544
Use NUMA_NO_NODE hint instead of using the current node for events not bound to CPUs.
Fixes: 2997aa4063d97fdb39 ("coresight: etb10: implementing AUX API") Cc: Mathieu Poirier mathieu.poirier@linaro.org Signed-off-by: Suzuki K Poulose suzuki.poulose@arm.com Cc: stable stable@vger.kernel.org # 4.6+ Signed-off-by: Mathieu Poirier mathieu.poirier@linaro.org Link: https://lore.kernel.org/r/20190620221237.3536-5-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/hwtracing/coresight/coresight-etb10.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c index 0dad862..6cf28b0 100644 --- a/drivers/hwtracing/coresight/coresight-etb10.c +++ b/drivers/hwtracing/coresight/coresight-etb10.c @@ -275,9 +275,7 @@ static void *etb_alloc_buffer(struct coresight_device *csdev, int cpu, int node; struct cs_buffers *buf;
- if (cpu == -1) - cpu = smp_processor_id(); - node = cpu_to_node(cpu); + node = (cpu == -1) ? NUMA_NO_NODE : cpu_to_node(cpu);
buf = kzalloc_node(sizeof(struct cs_buffers), GFP_KERNEL, node); if (!buf)
From: Suzuki K Poulose suzuki.poulose@arm.com
commit 024c1fd9dbcc1d8a847f1311f999d35783921b7f upstream.
During a perf session we try to allocate buffers on the "node" associated with the CPU the event is bound to. If it is not bound to a CPU, we use the current CPU node, using smp_processor_id(). However this is unsafe in a pre-emptible context and could generate the splats as below :
BUG: using smp_processor_id() in preemptible [00000000] code: perf/2544 caller is tmc_alloc_etf_buffer+0x5c/0x60 CPU: 2 PID: 2544 Comm: perf Not tainted 5.1.0-rc6-147786-g116841e #344 Hardware name: ARM LTD ARM Juno Development Platform/ARM Juno Development Platform, BIOS EDK II Feb 1 2019 Call trace: dump_backtrace+0x0/0x150 show_stack+0x14/0x20 dump_stack+0x9c/0xc4 debug_smp_processor_id+0x10c/0x110 tmc_alloc_etf_buffer+0x5c/0x60 etm_setup_aux+0x1c4/0x230 rb_alloc_aux+0x1b8/0x2b8 perf_mmap+0x35c/0x478 mmap_region+0x34c/0x4f0 do_mmap+0x2d8/0x418 vm_mmap_pgoff+0xd0/0xf8 ksys_mmap_pgoff+0x88/0xf8 __arm64_sys_mmap+0x28/0x38 el0_svc_handler+0xd8/0x138 el0_svc+0x8/0xc
Use NUMA_NO_NODE hint instead of using the current node for events not bound to CPUs.
Fixes: 2e499bbc1a929ac ("coresight: tmc: implementing TMC-ETF AUX space API") Cc: Mathieu Poirier mathieu.poirier@linaro.org Signed-off-by: Suzuki K Poulose suzuki.poulose@arm.com Cc: stable stable@vger.kernel.org # 4.7+ Signed-off-by: Mathieu Poirier mathieu.poirier@linaro.org Link: https://lore.kernel.org/r/20190620221237.3536-4-mathieu.poirier@linaro.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/hwtracing/coresight/coresight-tmc-etf.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index e310613..e90af39 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -304,9 +304,7 @@ static void *tmc_alloc_etf_buffer(struct coresight_device *csdev, int cpu, int node; struct cs_buffers *buf;
- if (cpu == -1) - cpu = smp_processor_id(); - node = cpu_to_node(cpu); + node = (cpu == -1) ? NUMA_NO_NODE : cpu_to_node(cpu);
/* Allocate memory structure for interaction with Perf */ buf = kzalloc_node(sizeof(struct cs_buffers), GFP_KERNEL, node);
From: Wen Huang huangwenabc@gmail.com
commit e5e884b42639c74b5b57dc277909915c0aefc8bb upstream.
add_ie_rates() copys rates without checking the length in bss descriptor from remote AP.when victim connects to remote attacker, this may trigger buffer overflow. lbs_ibss_join_existing() copys rates without checking the length in bss descriptor from remote IBSS node.when victim connects to remote attacker, this may trigger buffer overflow. Fix them by putting the length check before performing copy.
This fix addresses CVE-2019-14896 and CVE-2019-14897. This also fix build warning of mixed declarations and code.
Reported-by: kbuild test robot lkp@intel.com Signed-off-by: Wen Huang huangwenabc@gmail.com Signed-off-by: Kalle Valo kvalo@codeaurora.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/wireless/marvell/libertas/cfg.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-)
diff --git a/drivers/net/wireless/marvell/libertas/cfg.c b/drivers/net/wireless/marvell/libertas/cfg.c index 57edfad..c9401c1 100644 --- a/drivers/net/wireless/marvell/libertas/cfg.c +++ b/drivers/net/wireless/marvell/libertas/cfg.c @@ -273,6 +273,10 @@ static int lbs_add_supported_rates_tlv(u8 *tlv) int hw, ap, ap_max = ie[1]; u8 hw_rate;
+ if (ap_max > MAX_RATES) { + lbs_deb_assoc("invalid rates\n"); + return tlv; + } /* Advance past IE header */ ie += 2;
@@ -1717,6 +1721,9 @@ static int lbs_ibss_join_existing(struct lbs_private *priv, struct cmd_ds_802_11_ad_hoc_join cmd; u8 preamble = RADIO_PREAMBLE_SHORT; int ret = 0; + int hw, i; + u8 rates_max; + u8 *rates;
/* TODO: set preamble based on scan result */ ret = lbs_set_radio(priv, preamble, 1); @@ -1775,9 +1782,12 @@ static int lbs_ibss_join_existing(struct lbs_private *priv, if (!rates_eid) { lbs_add_rates(cmd.bss.rates); } else { - int hw, i; - u8 rates_max = rates_eid[1]; - u8 *rates = cmd.bss.rates; + rates_max = rates_eid[1]; + if (rates_max > MAX_RATES) { + lbs_deb_join("invalid rates"); + goto out; + } + rates = cmd.bss.rates; for (hw = 0; hw < ARRAY_SIZE(lbs_rates); hw++) { u8 hw_rate = lbs_rates[hw].bitrate / 5; for (i = 0; i < rates_max; i++) {
From: Hans Verkuil hverkuil-cisco@xs4all.nl
commit ee8951e56c0f960b9621636603a822811cef3158 upstream.
v4l2_vbi_format, v4l2_sliced_vbi_format and v4l2_sdr_format have a reserved array at the end that should be zeroed by drivers as per the V4L2 spec. Older drivers often do not do this, so just handle this in the core.
Signed-off-by: Hans Verkuil hverkuil-cisco@xs4all.nl Signed-off-by: Mauro Carvalho Chehab mchehab@kernel.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/media/v4l2-core/v4l2-ioctl.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index 7675b64..f75d892 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -1548,12 +1548,12 @@ static int v4l_s_fmt(const struct v4l2_ioctl_ops *ops, case V4L2_BUF_TYPE_VBI_CAPTURE: if (unlikely(!ops->vidioc_s_fmt_vbi_cap)) break; - CLEAR_AFTER_FIELD(p, fmt.vbi); + CLEAR_AFTER_FIELD(p, fmt.vbi.flags); return ops->vidioc_s_fmt_vbi_cap(file, fh, arg); case V4L2_BUF_TYPE_SLICED_VBI_CAPTURE: if (unlikely(!ops->vidioc_s_fmt_sliced_vbi_cap)) break; - CLEAR_AFTER_FIELD(p, fmt.sliced); + CLEAR_AFTER_FIELD(p, fmt.sliced.io_size); return ops->vidioc_s_fmt_sliced_vbi_cap(file, fh, arg); case V4L2_BUF_TYPE_VIDEO_OUTPUT: if (unlikely(!ops->vidioc_s_fmt_vid_out)) @@ -1576,22 +1576,22 @@ static int v4l_s_fmt(const struct v4l2_ioctl_ops *ops, case V4L2_BUF_TYPE_VBI_OUTPUT: if (unlikely(!ops->vidioc_s_fmt_vbi_out)) break; - CLEAR_AFTER_FIELD(p, fmt.vbi); + CLEAR_AFTER_FIELD(p, fmt.vbi.flags); return ops->vidioc_s_fmt_vbi_out(file, fh, arg); case V4L2_BUF_TYPE_SLICED_VBI_OUTPUT: if (unlikely(!ops->vidioc_s_fmt_sliced_vbi_out)) break; - CLEAR_AFTER_FIELD(p, fmt.sliced); + CLEAR_AFTER_FIELD(p, fmt.sliced.io_size); return ops->vidioc_s_fmt_sliced_vbi_out(file, fh, arg); case V4L2_BUF_TYPE_SDR_CAPTURE: if (unlikely(!ops->vidioc_s_fmt_sdr_cap)) break; - CLEAR_AFTER_FIELD(p, fmt.sdr); + CLEAR_AFTER_FIELD(p, fmt.sdr.buffersize); return ops->vidioc_s_fmt_sdr_cap(file, fh, arg); case V4L2_BUF_TYPE_SDR_OUTPUT: if (unlikely(!ops->vidioc_s_fmt_sdr_out)) break; - CLEAR_AFTER_FIELD(p, fmt.sdr); + CLEAR_AFTER_FIELD(p, fmt.sdr.buffersize); return ops->vidioc_s_fmt_sdr_out(file, fh, arg); case V4L2_BUF_TYPE_META_CAPTURE: if (unlikely(!ops->vidioc_s_fmt_meta_cap)) @@ -1635,12 +1635,12 @@ static int v4l_try_fmt(const struct v4l2_ioctl_ops *ops, case V4L2_BUF_TYPE_VBI_CAPTURE: if (unlikely(!ops->vidioc_try_fmt_vbi_cap)) break; - CLEAR_AFTER_FIELD(p, fmt.vbi); + CLEAR_AFTER_FIELD(p, fmt.vbi.flags); return ops->vidioc_try_fmt_vbi_cap(file, fh, arg); case V4L2_BUF_TYPE_SLICED_VBI_CAPTURE: if (unlikely(!ops->vidioc_try_fmt_sliced_vbi_cap)) break; - CLEAR_AFTER_FIELD(p, fmt.sliced); + CLEAR_AFTER_FIELD(p, fmt.sliced.io_size); return ops->vidioc_try_fmt_sliced_vbi_cap(file, fh, arg); case V4L2_BUF_TYPE_VIDEO_OUTPUT: if (unlikely(!ops->vidioc_try_fmt_vid_out)) @@ -1663,22 +1663,22 @@ static int v4l_try_fmt(const struct v4l2_ioctl_ops *ops, case V4L2_BUF_TYPE_VBI_OUTPUT: if (unlikely(!ops->vidioc_try_fmt_vbi_out)) break; - CLEAR_AFTER_FIELD(p, fmt.vbi); + CLEAR_AFTER_FIELD(p, fmt.vbi.flags); return ops->vidioc_try_fmt_vbi_out(file, fh, arg); case V4L2_BUF_TYPE_SLICED_VBI_OUTPUT: if (unlikely(!ops->vidioc_try_fmt_sliced_vbi_out)) break; - CLEAR_AFTER_FIELD(p, fmt.sliced); + CLEAR_AFTER_FIELD(p, fmt.sliced.io_size); return ops->vidioc_try_fmt_sliced_vbi_out(file, fh, arg); case V4L2_BUF_TYPE_SDR_CAPTURE: if (unlikely(!ops->vidioc_try_fmt_sdr_cap)) break; - CLEAR_AFTER_FIELD(p, fmt.sdr); + CLEAR_AFTER_FIELD(p, fmt.sdr.buffersize); return ops->vidioc_try_fmt_sdr_cap(file, fh, arg); case V4L2_BUF_TYPE_SDR_OUTPUT: if (unlikely(!ops->vidioc_try_fmt_sdr_out)) break; - CLEAR_AFTER_FIELD(p, fmt.sdr); + CLEAR_AFTER_FIELD(p, fmt.sdr.buffersize); return ops->vidioc_try_fmt_sdr_out(file, fh, arg); case V4L2_BUF_TYPE_META_CAPTURE: if (unlikely(!ops->vidioc_try_fmt_meta_cap))
From: Bo Wu wubo40@huawei.com
commit bba340c79bfe3644829db5c852fdfa9e33837d6d upstream.
In iscsi_if_rx func, after receiving one request through iscsi_if_recv_msg func, iscsi_if_send_reply will be called to try to reply to the request in a do-while loop. If the iscsi_if_send_reply function keeps returning -EAGAIN, a deadlock will occur.
For example, a client only send msg without calling recvmsg func, then it will result in the watchdog soft lockup. The details are given as follows:
sock_fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ISCSI); retval = bind(sock_fd, (struct sock addr*) & src_addr, sizeof(src_addr); while (1) { state_msg = sendmsg(sock_fd, &msg, 0); //Note: recvmsg(sock_fd, &msg, 0) is not processed here. } close(sock_fd);
watchdog: BUG: soft lockup - CPU#7 stuck for 22s! [netlink_test:253305] Sample time: 4000897528 ns(HZ: 250) Sample stat: curr: user: 675503481560, nice: 321724050, sys: 448689506750, idle: 4654054240530, iowait: 40885550700, irq: 14161174020, softirq: 8104324140, st: 0 deta: user: 0, nice: 0, sys: 3998210100, idle: 0, iowait: 0, irq: 1547170, softirq: 242870, st: 0 Sample softirq: TIMER: 992 SCHED: 8 Sample irqstat: irq 2: delta 1003, curr: 3103802, arch_timer CPU: 7 PID: 253305 Comm: netlink_test Kdump: loaded Tainted: G OE Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 pstate: 40400005 (nZcv daif +PAN -UAO) pc : __alloc_skb+0x104/0x1b0 lr : __alloc_skb+0x9c/0x1b0 sp : ffff000033603a30 x29: ffff000033603a30 x28: 00000000000002dd x27: ffff800b34ced810 x26: ffff800ba7569f00 x25: 00000000ffffffff x24: 0000000000000000 x23: ffff800f7c43f600 x22: 0000000000480020 x21: ffff0000091d9000 x20: ffff800b34eff200 x19: ffff800ba7569f00 x18: 0000000000000000 x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 x14: 0001000101000100 x13: 0000000101010000 x12: 0101000001010100 x11: 0001010101010001 x10: 00000000000002dd x9 : ffff000033603d58 x8 : ffff800b34eff400 x7 : ffff800ba7569200 x6 : ffff800b34eff400 x5 : 0000000000000000 x4 : 00000000ffffffff x3 : 0000000000000000 x2 : 0000000000000001 x1 : ffff800b34eff2c0 x0 : 0000000000000300 Call trace: __alloc_skb+0x104/0x1b0 iscsi_if_rx+0x144/0x12bc [scsi_transport_iscsi] netlink_unicast+0x1e0/0x258 netlink_sendmsg+0x310/0x378 sock_sendmsg+0x4c/0x70 sock_write_iter+0x90/0xf0 __vfs_write+0x11c/0x190 vfs_write+0xac/0x1c0 ksys_write+0x6c/0xd8 __arm64_sys_write+0x24/0x30 el0_svc_common+0x78/0x130 el0_svc_handler+0x38/0x78 el0_svc+0x8/0xc
Link: https://lore.kernel.org/r/EDBAAA0BBBA2AC4E9C8B6B81DEEE1D6915E3D4D2@dggeml505... Signed-off-by: Bo Wu wubo40@huawei.com Reviewed-by: Zhiqiang Liu liuzhiqiang26@huawei.com Reviewed-by: Lee Duncan lduncan@suse.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/scsi/scsi_transport_iscsi.c | 7 +++++++ 1 file changed, 7 insertions(+)
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 4d0fc6b..4c4781e 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -37,6 +37,8 @@
#define ISCSI_TRANSPORT_VERSION "2.0-870"
+#define ISCSI_SEND_MAX_ALLOWED 10 + static int dbg_session; module_param_named(debug_session, dbg_session, int, S_IRUGO | S_IWUSR); @@ -3680,6 +3682,7 @@ static int iscsi_logout_flashnode_sid(struct iscsi_transport *transport, struct nlmsghdr *nlh; struct iscsi_uevent *ev; uint32_t group; + int retries = ISCSI_SEND_MAX_ALLOWED;
nlh = nlmsg_hdr(skb); if (nlh->nlmsg_len < sizeof(*nlh) + sizeof(*ev) || @@ -3710,6 +3713,10 @@ static int iscsi_logout_flashnode_sid(struct iscsi_transport *transport, break; err = iscsi_if_send_reply(portid, nlh->nlmsg_type, ev, sizeof(*ev)); + if (err == -EAGAIN && --retries < 0) { + printk(KERN_WARNING "Send reply failed, error %d\n", err); + break; + } } while (err < 0 && err != -ECONNREFUSED && err != -ESRCH); skb_pull(skb, rlen); }
From: Kadlecsik József kadlec@blackhole.kfki.hu
commit 32c72165dbd0e246e69d16a3ad348a4851afd415 upstream.
The bitmap allocation did not use full unsigned long sizes when calculating the required size and that was triggered by KASAN as slab-out-of-bounds read in several places. The patch fixes all of them.
Reported-by: syzbot+fabca5cbf5e54f3fe2de@syzkaller.appspotmail.com Reported-by: syzbot+827ced406c9a1d9570ed@syzkaller.appspotmail.com Reported-by: syzbot+190d63957b22ef673ea5@syzkaller.appspotmail.com Reported-by: syzbot+dfccdb2bdb4a12ad425e@syzkaller.appspotmail.com Reported-by: syzbot+df0d0f5895ef1f41a65b@syzkaller.appspotmail.com Reported-by: syzbot+b08bd19bb37513357fd4@syzkaller.appspotmail.com Reported-by: syzbot+53cdd0ec0bbabd53370a@syzkaller.appspotmail.com Signed-off-by: Jozsef Kadlecsik kadlec@netfilter.org Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/netfilter/ipset/ip_set.h | 7 ------- net/netfilter/ipset/ip_set_bitmap_gen.h | 2 +- net/netfilter/ipset/ip_set_bitmap_ip.c | 6 +++--- net/netfilter/ipset/ip_set_bitmap_ipmac.c | 6 +++--- net/netfilter/ipset/ip_set_bitmap_port.c | 6 +++--- 5 files changed, 10 insertions(+), 17 deletions(-)
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 1d100ef..7e39049 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -451,13 +451,6 @@ static inline int nla_put_ipaddr6(struct sk_buff *skb, int type, sizeof(*addr)); }
-/* Calculate the bytes required to store the inclusive range of a-b */ -static inline int -bitmap_bytes(u32 a, u32 b) -{ - return 4 * ((((b - a + 8) / 8) + 3) / 4); -} - #include <linux/netfilter/ipset/ip_set_timeout.h> #include <linux/netfilter/ipset/ip_set_comment.h> #include <linux/netfilter/ipset/ip_set_counter.h> diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index af480ff..37f6806 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -79,7 +79,7 @@
if (set->extensions & IPSET_EXT_DESTROY) mtype_ext_cleanup(set); - memset(map->members, 0, map->memsize); + bitmap_zero(map->members, map->elements); set->elements = 0; set->ext_size = 0; } diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index 488d6d0..e325707 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -40,7 +40,7 @@
/* Type structure */ struct bitmap_ip { - void *members; /* the set members */ + unsigned long *members; /* the set members */ u32 first_ip; /* host byte order, included in range */ u32 last_ip; /* host byte order, included in range */ u32 elements; /* number of max elements in the set */ @@ -223,7 +223,7 @@ struct bitmap_ip_elem { u32 first_ip, u32 last_ip, u32 elements, u32 hosts, u8 netmask) { - map->members = ip_set_alloc(map->memsize); + map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN); if (!map->members) return false; map->first_ip = first_ip; @@ -313,7 +313,7 @@ struct bitmap_ip_elem { if (!map) return -ENOMEM;
- map->memsize = bitmap_bytes(0, elements - 1); + map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long); set->variant = &bitmap_ip; if (!init_map_ip(set, map, first_ip, last_ip, elements, hosts, netmask)) { diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 794e0335..9669cac 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -46,7 +46,7 @@ enum {
/* Type structure */ struct bitmap_ipmac { - void *members; /* the set members */ + unsigned long *members; /* the set members */ u32 first_ip; /* host byte order, included in range */ u32 last_ip; /* host byte order, included in range */ u32 elements; /* number of max elements in the set */ @@ -303,7 +303,7 @@ struct bitmap_ipmac_elem { init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map, u32 first_ip, u32 last_ip, u32 elements) { - map->members = ip_set_alloc(map->memsize); + map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN); if (!map->members) return false; map->first_ip = first_ip; @@ -364,7 +364,7 @@ struct bitmap_ipmac_elem { if (!map) return -ENOMEM;
- map->memsize = bitmap_bytes(0, elements - 1); + map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long); set->variant = &bitmap_ipmac; if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { kfree(map); diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index b561ca8..ae09f2a 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -34,7 +34,7 @@
/* Type structure */ struct bitmap_port { - void *members; /* the set members */ + unsigned long *members; /* the set members */ u16 first_port; /* host byte order, included in range */ u16 last_port; /* host byte order, included in range */ u32 elements; /* number of max elements in the set */ @@ -208,7 +208,7 @@ struct bitmap_port_elem { init_map_port(struct ip_set *set, struct bitmap_port *map, u16 first_port, u16 last_port) { - map->members = ip_set_alloc(map->memsize); + map->members = bitmap_zalloc(map->elements, GFP_KERNEL | __GFP_NOWARN); if (!map->members) return false; map->first_port = first_port; @@ -248,7 +248,7 @@ struct bitmap_port_elem { return -ENOMEM;
map->elements = elements; - map->memsize = bitmap_bytes(0, map->elements); + map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long); set->variant = &bitmap_port; if (!init_map_port(set, map, first_port, last_port)) { kfree(map);
From: Pablo Neira Ayuso pablo@netfilter.org
commit 826035498ec14b77b62a44f0cb6b94d45530db6f upstream.
This new helper function validates that unknown family and chain type coming from userspace do not trigger an out-of-bound array access. Bail out in case __nft_chain_type_get() returns NULL from nft_chain_parse_hook().
Fixes: 9370761c56b6 ("netfilter: nf_tables: convert built-in tables/chains to chain types") Reported-by: syzbot+156a04714799b1d480bc@syzkaller.appspotmail.com Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/netfilter/nf_tables_api.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 7f0d3ff..5881f66 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -472,14 +472,27 @@ static inline u64 nf_tables_alloc_handle(struct nft_table *table) static const struct nft_chain_type *chain_type[NFPROTO_NUMPROTO][NFT_CHAIN_T_MAX];
static const struct nft_chain_type * +__nft_chain_type_get(u8 family, enum nft_chain_types type) +{ + if (family >= NFPROTO_NUMPROTO || + type >= NFT_CHAIN_T_MAX) + return NULL; + + return chain_type[family][type]; +} + +static const struct nft_chain_type * __nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family) { + const struct nft_chain_type *type; int i;
for (i = 0; i < NFT_CHAIN_T_MAX; i++) { - if (chain_type[family][i] != NULL && - !nla_strcmp(nla, chain_type[family][i]->name)) - return chain_type[family][i]; + type = __nft_chain_type_get(family, i); + if (!type) + continue; + if (!nla_strcmp(nla, type->name)) + return type; } return NULL; } @@ -1050,11 +1063,8 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx)
void nft_register_chain_type(const struct nft_chain_type *ctype) { - if (WARN_ON(ctype->family >= NFPROTO_NUMPROTO)) - return; - nfnl_lock(NFNL_SUBSYS_NFTABLES); - if (WARN_ON(chain_type[ctype->family][ctype->type] != NULL)) { + if (WARN_ON(__nft_chain_type_get(ctype->family, ctype->type))) { nfnl_unlock(NFNL_SUBSYS_NFTABLES); return; } @@ -1511,7 +1521,10 @@ static int nft_chain_parse_hook(struct net *net, hook->num = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])); hook->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY]));
- type = chain_type[family][NFT_CHAIN_T_DEFAULT]; + type = __nft_chain_type_get(family, NFT_CHAIN_T_DEFAULT); + if (!type) + return -EOPNOTSUPP; + if (nla[NFTA_CHAIN_TYPE]) { type = nf_tables_chain_type_lookup(net, nla[NFTA_CHAIN_TYPE], family, autoload);
From: Martin Schiller ms@dev.tdt.de
commit e21dba7a4df4d93da237da65a096084b4f2e87b4 upstream.
This patch fixes 2 issues in x25_connect():
1. It makes absolutely no sense to reset the neighbour and the connection state after a (successful) nonblocking call of x25_connect. This prevents any connection from being established, since the response (call accept) cannot be processed.
2. Any further calls to x25_connect() while a call is pending should simply return, instead of creating new Call Request (on different logical channels).
This patch should also fix the "KASAN: null-ptr-deref Write in x25_connect" and "BUG: unable to handle kernel NULL pointer dereference in x25_connect" bugs reported by syzbot.
Signed-off-by: Martin Schiller ms@dev.tdt.de Reported-by: syzbot+429c200ffc8772bfe070@syzkaller.appspotmail.com Reported-by: syzbot+eec0c87f31a7c3b66f7b@syzkaller.appspotmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/x25/af_x25.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 20a51139..bd1cbbf 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -765,6 +765,10 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr, if (sk->sk_state == TCP_ESTABLISHED) goto out;
+ rc = -EALREADY; /* Do nothing if call is already in progress */ + if (sk->sk_state == TCP_SYN_SENT) + goto out; + sk->sk_state = TCP_CLOSE; sock->state = SS_UNCONNECTED;
@@ -811,7 +815,7 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr, /* Now the loop */ rc = -EINPROGRESS; if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) - goto out_put_neigh; + goto out;
rc = x25_wait_for_connection_establishment(sk); if (rc)
From: Wei Yang richard.weiyang@gmail.com
commit 83af658898cb292a32d8b6cd9b51266d7cfc4b6a upstream.
pgdat_resize_lock is used to protect pgdat's memory region information like: node_start_pfn, node_present_pages, etc. While in function sparse_add/remove_one_section(), pgdat_resize_lock is used to protect initialization/release of one mem_section. This looks not proper.
These code paths are currently protected by mem_hotplug_lock currently but should there ever be any reason for locking at the sparse layer a dedicated lock should be introduced.
Following is the current call trace of sparse_add/remove_one_section()
mem_hotplug_begin() arch_add_memory() add_pages() __add_pages() __add_section() sparse_add_one_section() mem_hotplug_done()
mem_hotplug_begin() arch_remove_memory() __remove_pages() __remove_section() sparse_remove_one_section() mem_hotplug_done()
The comment above the pgdat_resize_lock also mentions "Holding this will also guarantee that any pfn_valid() stays that way.", which is true with the current implementation and false after this patch. But current implementation doesn't meet this comment. There isn't any pfn walkers to take the lock so this looks like a relict from the past. This patch also removes this comment.
[richard.weiyang@gmail.com: v4] Link: http://lkml.kernel.org/r/20181204085657.20472-1-richard.weiyang@gmail.com [mhocko@suse.com: changelog suggestion] Link: http://lkml.kernel.org/r/20181128091243.19249-1-richard.weiyang@gmail.com Signed-off-by: Wei Yang richard.weiyang@gmail.com Reviewed-by: David Hildenbrand david@redhat.com Acked-by: Michal Hocko mhocko@suse.com Cc: Dave Hansen dave.hansen@intel.com Cc: Oscar Salvador osalvador@suse.de Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: David Hildenbrand david@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org
Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/mmzone.h | 3 +-- mm/sparse.c | 9 +-------- 2 files changed, 2 insertions(+), 10 deletions(-)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 390dece..9c6181b 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -641,8 +641,7 @@ struct zonelist { #if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT) /* * Must be held any time you expect node_start_pfn, node_present_pages - * or node_spanned_pages stay constant. Holding this will also - * guarantee that any pfn_valid() stays that way. + * or node_spanned_pages stay constant. * * pgdat_resize_lock() and pgdat_resize_unlock() are provided to * manipulate node_size_lock without checking for CONFIG_MEMORY_HOTPLUG diff --git a/mm/sparse.c b/mm/sparse.c index 504c3ea..0599e7f 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -678,7 +678,6 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat, struct mem_section *ms; struct page *memmap; unsigned long *usemap; - unsigned long flags; int ret;
/* @@ -698,8 +697,6 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat, return -ENOMEM; }
- pgdat_resize_lock(pgdat, &flags); - ms = __pfn_to_section(start_pfn); if (ms->section_mem_map & SECTION_MARKED_PRESENT) { ret = -EEXIST; @@ -718,7 +715,6 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat, sparse_init_one_section(ms, section_nr, memmap, usemap);
out: - pgdat_resize_unlock(pgdat, &flags); if (ret < 0) { kfree(usemap); __kfree_section_memmap(memmap, altmap); @@ -780,10 +776,8 @@ void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, unsigned long map_offset, struct vmem_altmap *altmap) { struct page *memmap = NULL; - unsigned long *usemap = NULL, flags; - struct pglist_data *pgdat = zone->zone_pgdat; + unsigned long *usemap = NULL;
- pgdat_resize_lock(pgdat, &flags); if (ms->section_mem_map) { usemap = ms->pageblock_flags; memmap = sparse_decode_mem_map(ms->section_mem_map, @@ -791,7 +785,6 @@ void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, ms->section_mem_map = 0; ms->pageblock_flags = NULL; } - pgdat_resize_unlock(pgdat, &flags);
clear_hwpoisoned_pages(memmap + map_offset, PAGES_PER_SECTION - map_offset);
From: Wei Yang richard.weiyang@gmail.com
commit 4e0d2e7ef14d9e1c900dac909db45263822b824f upstream.
Since the information needed in sparse_add_one_section() is node id to allocate proper memory, it is not necessary to pass its pgdat.
This patch changes the prototype of sparse_add_one_section() to pass node id directly. This is intended to reduce misleading that sparse_add_one_section() would touch pgdat.
Link: http://lkml.kernel.org/r/20181204085657.20472-2-richard.weiyang@gmail.com Signed-off-by: Wei Yang richard.weiyang@gmail.com Reviewed-by: David Hildenbrand david@redhat.com Acked-by: Michal Hocko mhocko@suse.com Cc: Dave Hansen dave.hansen@intel.com Cc: Oscar Salvador osalvador@suse.de Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: David Hildenbrand david@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/memory_hotplug.h | 4 ++-- mm/memory_hotplug.c | 2 +- mm/sparse.c | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 6f13a5a..008e528 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -335,8 +335,8 @@ extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages, struct vmem_altmap *altmap); extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); extern bool is_memblock_offlined(struct memory_block *mem); -extern int sparse_add_one_section(struct pglist_data *pgdat, - unsigned long start_pfn, struct vmem_altmap *altmap); +extern int sparse_add_one_section(int nid, unsigned long start_pfn, + struct vmem_altmap *altmap); extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, unsigned long map_offset, struct vmem_altmap *altmap); extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index bfd148d..968379e 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -255,7 +255,7 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn, if (pfn_valid(phys_start_pfn)) return -EEXIST;
- ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn, altmap); + ret = sparse_add_one_section(nid, phys_start_pfn, altmap); if (ret < 0) return ret;
diff --git a/mm/sparse.c b/mm/sparse.c index 0599e7f..8550915 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -671,8 +671,8 @@ static void free_map_bootmem(struct page *memmap) * set. If this is <=0, then that means that the passed-in * map was not consumed and must be freed. */ -int __meminit sparse_add_one_section(struct pglist_data *pgdat, - unsigned long start_pfn, struct vmem_altmap *altmap) +int __meminit sparse_add_one_section(int nid, unsigned long start_pfn, + struct vmem_altmap *altmap) { unsigned long section_nr = pfn_to_section_nr(start_pfn); struct mem_section *ms; @@ -684,11 +684,11 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat, * no locking for this, because it does its own * plus, it does a kmalloc */ - ret = sparse_index_init(section_nr, pgdat->node_id); + ret = sparse_index_init(section_nr, nid); if (ret < 0 && ret != -EEXIST) return ret; ret = 0; - memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, altmap); + memmap = kmalloc_section_memmap(section_nr, nid, altmap); if (!memmap) return -ENOMEM; usemap = __kmalloc_section_usemap();
From: Wei Yang richard.weiyang@gmail.com
commit 3b6fd6ffb27c2efa003c6d4d15ca72c054b71d7c upstream.
In cb5e39b8038b ("drivers: base: refactor add_memory_section() to add_memory_block()"), add_memory_block() is introduced, which is only invoked in memory_dev_init().
When combining these two loops in memory_dev_init() and add_memory_block(), they looks like this:
for (i = 0; i < NR_MEM_SECTIONS; i += sections_per_block) for (j = i; (j < i + sections_per_block) && j < NR_MEM_SECTIONS; j++)
Since it is sure the (i < NR_MEM_SECTIONS) and j sits in its own memory block, the check of (j < NR_MEM_SECTIONS) is not necessary.
This patch just removes this check.
Link: http://lkml.kernel.org/r/20181123222811.18216-1-richard.weiyang@gmail.com Signed-off-by: Wei Yang richard.weiyang@gmail.com Reviewed-by: Andrew Morton akpm@linux-foundation.org Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: "Rafael J. Wysocki" rafael@kernel.org Cc: Seth Jennings sjenning@redhat.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: David Hildenbrand david@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/base/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/base/memory.c b/drivers/base/memory.c index b625642..5e49333 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -691,7 +691,7 @@ static int add_memory_block(int base_section_nr) int i, ret, section_count = 0, section_nr;
for (i = base_section_nr; - (i < base_section_nr + sections_per_block) && i < NR_MEM_SECTIONS; + i < base_section_nr + sections_per_block; i++) { if (!present_section_nr(i)) continue;
From: Oscar Salvador osalvador@suse.com
commit 2c2a5af6fed20cf74401c9d64319c76c5ff81309 upstream.
-- snip --
Missing unification of mm/hmm.c and kernel/memremap.c
-- snip --
Patch series "Do not touch pages in hot-remove path", v2.
This patchset aims for two things:
1) A better definition about offline and hot-remove stage 2) Solving bugs where we can access non-initialized pages during hot-remove operations [2] [3].
This is achieved by moving all page/zone handling to the offline stage, so we do not need to access pages when hot-removing memory.
[1] https://patchwork.kernel.org/cover/10691415/ [2] https://patchwork.kernel.org/patch/10547445/ [3] https://www.spinics.net/lists/linux-mm/msg161316.html
This patch (of 5):
This is a preparation for the following-up patches. The idea of passing the nid is that it will allow us to get rid of the zone parameter afterwards.
Link: http://lkml.kernel.org/r/20181127162005.15833-2-osalvador@suse.de Signed-off-by: Oscar Salvador osalvador@suse.de Reviewed-by: David Hildenbrand david@redhat.com Reviewed-by: Pavel Tatashin pasha.tatashin@soleen.com Cc: Michal Hocko mhocko@suse.com Cc: Dan Williams dan.j.williams@intel.com Cc: Jerome Glisse jglisse@redhat.com Cc: Jonathan Cameron Jonathan.Cameron@huawei.com Cc: "Rafael J. Wysocki" rafael@kernel.org
Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: David Hildenbrand david@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/ia64/mm/init.c | 2 +- arch/powerpc/mm/mem.c | 3 ++- arch/s390/mm/init.c | 2 +- arch/sh/mm/init.c | 2 +- arch/x86/mm/init_32.c | 2 +- arch/x86/mm/init_64.c | 3 ++- include/linux/memory_hotplug.h | 4 ++-- kernel/memremap.c | 5 ++++- mm/hmm.c | 4 +++- mm/memory_hotplug.c | 2 +- 10 files changed, 18 insertions(+), 11 deletions(-)
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 3b85c3e..b54d0ee 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -662,7 +662,7 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, }
#ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) +int arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 9a6afd9..1b6e0ef 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -140,7 +140,8 @@ int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap * }
#ifdef CONFIG_MEMORY_HOTREMOVE -int __meminit arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) +int __meminit arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 34bd72d..ab3e33a 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -243,7 +243,7 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, }
#ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) +int arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) { /* * There is no hardware or firmware interface which could trigger a diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 7713c08..5c91bb6 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -444,7 +444,7 @@ int memory_add_physaddr_to_nid(u64 addr) #endif
#ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) +int arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = PFN_DOWN(start); unsigned long nr_pages = size >> PAGE_SHIFT; diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 979e0a0..9fa503f 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -861,7 +861,7 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, }
#ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) +int arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index a3e9c6e..32066d5 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1142,7 +1142,8 @@ void __ref vmemmap_free(unsigned long start, unsigned long end, remove_pagetable(start, end, true, NULL); }
-int __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) +int __ref arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 008e528..df77a75 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -109,8 +109,8 @@ static inline bool movable_node_is_enabled(void) }
#ifdef CONFIG_MEMORY_HOTREMOVE -extern int arch_remove_memory(u64 start, u64 size, - struct vmem_altmap *altmap); +extern int arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap); extern int __remove_pages(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages, struct vmem_altmap *altmap); #endif /* CONFIG_MEMORY_HOTREMOVE */ diff --git a/kernel/memremap.c b/kernel/memremap.c index 7c5fb8a..2ee2e67 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -121,6 +121,7 @@ static void devm_memremap_pages_release(void *data) struct resource *res = &pgmap->res; resource_size_t align_start, align_size; unsigned long pfn; + int nid;
pgmap->kill(pgmap->ref); for_each_device_pfn(pfn, pgmap) @@ -131,13 +132,15 @@ static void devm_memremap_pages_release(void *data) align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) - align_start;
+ nid = page_to_nid(pfn_to_page(align_start >> PAGE_SHIFT)); + mem_hotplug_begin(); if (pgmap->type == MEMORY_DEVICE_PRIVATE) { pfn = align_start >> PAGE_SHIFT; __remove_pages(page_zone(pfn_to_page(pfn)), pfn, align_size >> PAGE_SHIFT, NULL); } else { - arch_remove_memory(align_start, align_size, + arch_remove_memory(nid, align_start, align_size, pgmap->altmap_valid ? &pgmap->altmap : NULL); kasan_remove_zero_shadow(__va(align_start), align_size); } diff --git a/mm/hmm.c b/mm/hmm.c index 57f0d2a..ae1f6ad 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -999,6 +999,7 @@ static void hmm_devmem_release(void *data) unsigned long start_pfn, npages; struct zone *zone; struct page *page; + int nid;
/* pages are dead and unused, undo the arch mapping */ start_pfn = (resource->start & ~(PA_SECTION_SIZE - 1)) >> PAGE_SHIFT; @@ -1006,12 +1007,13 @@ static void hmm_devmem_release(void *data)
page = pfn_to_page(start_pfn); zone = page_zone(page); + nid = page_to_nid(page);
mem_hotplug_begin(); if (resource->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY) __remove_pages(zone, start_pfn, npages, NULL); else - arch_remove_memory(start_pfn << PAGE_SHIFT, + arch_remove_memory(nid, start_pfn << PAGE_SHIFT, npages << PAGE_SHIFT, NULL); mem_hotplug_done();
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 968379e..b3572a7 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1901,7 +1901,7 @@ void __ref __remove_memory(int nid, u64 start, u64 size) memblock_free(start, size); memblock_remove(start, size);
- arch_remove_memory(start, size, NULL); + arch_remove_memory(nid, start, size, NULL);
try_offline_node(nid);
From: David Hildenbrand david@redhat.com
commit d9eb1417c77df7ce19abd2e41619e9dceccbdf2a upstream.
Patch series "mm/memory_hotplug: Better error handling when removing memory", v1.
Error handling when removing memory is somewhat messed up right now. Some errors result in warnings, others are completely ignored. Memory unplug code can essentially not deal with errors properly as of now. remove_memory() will never fail.
We have basically two choices: 1. Allow arch_remov_memory() and friends to fail, propagating errors via remove_memory(). Might be problematic (e.g. DIMMs consisting of multiple pieces added/removed separately). 2. Don't allow the functions to fail, handling errors in a nicer way.
It seems like most errors that can theoretically happen are really corner cases and mostly theoretical (e.g. "section not valid"). However e.g. aborting removal of sections while all callers simply continue in case of errors is not nice.
If we can gurantee that removal of memory always works (and WARN/skip in case of theoretical errors so we can figure out what is going on), we can go ahead and implement better error handling when adding memory.
E.g. via add_memory():
arch_add_memory() ret = do_stuff() if (ret) { arch_remove_memory(); goto error; }
Handling here that arch_remove_memory() might fail is basically impossible. So I suggest, let's avoid reporting errors while removing memory, warning on theoretical errors instead and continuing instead of aborting.
This patch (of 4):
__add_pages() doesn't add the memory resource, so __remove_pages() shouldn't remove it. Let's factor it out. Especially as it is a special case for memory used as system memory, added via add_memory() and friends.
We now remove the resource after removing the sections instead of doing it the other way around. I don't think this change is problematic.
add_memory() register memory resource arch_add_memory()
remove_memory arch_remove_memory() release memory resource
While at it, explain why we ignore errors and that it only happeny if we remove memory in a different granularity as we added it.
[david@redhat.com: fix printk warning] Link: http://lkml.kernel.org/r/20190417120204.6997-1-david@redhat.com Link: http://lkml.kernel.org/r/20190409100148.24703-2-david@redhat.com Signed-off-by: David Hildenbrand david@redhat.com Reviewed-by: Oscar Salvador osalvador@suse.de Cc: Michal Hocko mhocko@suse.com Cc: David Hildenbrand david@redhat.com Cc: Pavel Tatashin pasha.tatashin@soleen.com Cc: Wei Yang richard.weiyang@gmail.com Cc: Qian Cai cai@lca.pw Cc: Arun KS arunks@codeaurora.org Cc: Mathieu Malaterre malat@debian.org Cc: Andrew Banman andrew.banman@hpe.com Cc: Andy Lutomirski luto@kernel.org Cc: Benjamin Herrenschmidt benh@kernel.crashing.org Cc: Borislav Petkov bp@alien8.de Cc: Christophe Leroy christophe.leroy@c-s.fr Cc: Dave Hansen dave.hansen@linux.intel.com Cc: Fenghua Yu fenghua.yu@intel.com Cc: Geert Uytterhoeven geert@linux-m68k.org Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: Heiko Carstens heiko.carstens@de.ibm.com Cc: "H. Peter Anvin" hpa@zytor.com Cc: Ingo Molnar mingo@kernel.org Cc: Ingo Molnar mingo@redhat.com Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: "Kirill A. Shutemov" kirill.shutemov@linux.intel.com Cc: Martin Schwidefsky schwidefsky@de.ibm.com Cc: Masahiro Yamada yamada.masahiro@socionext.com Cc: Michael Ellerman mpe@ellerman.id.au Cc: Mike Rapoport rppt@linux.ibm.com Cc: Mike Travis mike.travis@hpe.com Cc: Nicholas Piggin npiggin@gmail.com Cc: Oscar Salvador osalvador@suse.com Cc: Paul Mackerras paulus@samba.org Cc: Peter Zijlstra peterz@infradead.org Cc: "Rafael J. Wysocki" rafael@kernel.org Cc: Rich Felker dalias@libc.org Cc: Rob Herring robh@kernel.org Cc: Stefan Agner stefan@agner.ch Cc: Thomas Gleixner tglx@linutronix.de Cc: Tony Luck tony.luck@intel.com Cc: Vasily Gorbik gor@linux.ibm.com Cc: Yoshinori Sato ysato@users.sourceforge.jp Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: David Hildenbrand david@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org
Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/memory_hotplug.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-)
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index b3572a7..e015534 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -523,20 +523,6 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, if (is_dev_zone(zone)) { if (altmap) map_offset = vmem_altmap_offset(altmap); - } else { - resource_size_t start, size; - - start = phys_start_pfn << PAGE_SHIFT; - size = nr_pages * PAGE_SIZE; - - ret = release_mem_region_adjustable(&iomem_resource, start, - size); - if (ret) { - resource_size_t endres = start + size - 1; - - pr_warn("Unable to release resource <%pa-%pa> (%d)\n", - &start, &endres, ret); - } }
clear_zone_contiguous(zone); @@ -1868,6 +1854,26 @@ void try_offline_node(int nid) } EXPORT_SYMBOL(try_offline_node);
+static void __release_memory_resource(resource_size_t start, + resource_size_t size) +{ + int ret; + + /* + * When removing memory in the same granularity as it was added, + * this function never fails. It might only fail if resources + * have to be adjusted or split. We'll ignore the error, as + * removing of memory cannot fail. + */ + ret = release_mem_region_adjustable(&iomem_resource, start, size); + if (ret) { + resource_size_t endres = start + size - 1; + + pr_warn("Unable to release resource <%pa-%pa> (%d)\n", + &start, &endres, ret); + } +} + /** * remove_memory * @nid: the node ID @@ -1902,6 +1908,7 @@ void __ref __remove_memory(int nid, u64 start, u64 size) memblock_remove(start, size);
arch_remove_memory(nid, start, size, NULL); + __release_memory_resource(start, size);
try_offline_node(nid);
From: Baoquan He bhe@redhat.com
commit 063b8a4cee8088224bcdb79bcd08db98df16178e upstream.
The input parameter 'phys_index' of memory_block_action() is actually the section number, but not the phys_index of memory_block. This is a relic from the past when one memory block could only contain one section. Rename it to start_section_nr.
And also in remove_memory_section(), the 'node_id' and 'phys_device' arguments are not used by anyone. Remove them.
Link: http://lkml.kernel.org/r/20190329144250.14315-2-bhe@redhat.com Signed-off-by: Baoquan He bhe@redhat.com Acked-by: Michal Hocko mhocko@suse.com Reviewed-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Reviewed-by: Mukesh Ojha mojha@codeaurora.org Reviewed-by: Oscar Salvador osalvador@suse.de Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: David Hildenbrand david@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/base/memory.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 5e49333..5f861fe 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -230,13 +230,14 @@ static bool pages_correctly_probed(unsigned long start_pfn) * OK to have direct references to sparsemem variables in here. */ static int -memory_block_action(unsigned long phys_index, unsigned long action, int online_type) +memory_block_action(unsigned long start_section_nr, unsigned long action, + int online_type) { unsigned long start_pfn; unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; int ret;
- start_pfn = section_nr_to_pfn(phys_index); + start_pfn = section_nr_to_pfn(start_section_nr);
switch (action) { case MEM_ONLINE: @@ -250,7 +251,7 @@ static bool pages_correctly_probed(unsigned long start_pfn) break; default: WARN(1, KERN_WARNING "%s(%ld, %ld) unknown action: " - "%ld\n", __func__, phys_index, action, action); + "%ld\n", __func__, start_section_nr, action, action); ret = -EINVAL; }
@@ -747,8 +748,7 @@ int hotplug_memory_register(int nid, struct mem_section *section) device_unregister(&memory->dev); }
-static int remove_memory_section(unsigned long node_id, - struct mem_section *section, int phys_device) +static int remove_memory_section(struct mem_section *section) { struct memory_block *mem;
@@ -780,7 +780,7 @@ int unregister_memory_section(struct mem_section *section) if (!present_section(section)) return -EINVAL;
- return remove_memory_section(0, section, 0); + return remove_memory_section(section); } #endif /* CONFIG_MEMORY_HOTREMOVE */
From: Dan Carpenter dan.carpenter@oracle.com
commit 16df1456aa858a86f398dbc7d27649eb6662b0cc upstream.
The remove_memory_block() function was renamed to in commit cc292b0b4302 ("drivers/base/memory.c: rename remove_memory_block() to remove_memory_section()").
Signed-off-by: Dan Carpenter dan.carpenter@oracle.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: David Hildenbrand david@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/base/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 5f861fe..104b0e7 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -743,7 +743,7 @@ int hotplug_memory_register(int nid, struct mem_section *section) { BUG_ON(memory->dev.bus != &memory_subsys);
- /* drop the ref. we got in remove_memory_block() */ + /* drop the ref. we got in remove_memory_section() */ put_device(&memory->dev); device_unregister(&memory->dev); }
From: David Hildenbrand david@redhat.com
commit cb7b3a3685b20d3b5900ff24b2cb96d002960189 upstream.
Failing while removing memory is mostly ignored and cannot really be handled. Let's treat errors in unregister_memory_section() in a nice way, warning, but continuing.
Link: http://lkml.kernel.org/r/20190409100148.24703-3-david@redhat.com Signed-off-by: David Hildenbrand david@redhat.com Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: "Rafael J. Wysocki" rafael@kernel.org Cc: Ingo Molnar mingo@kernel.org Cc: Andrew Banman andrew.banman@hpe.com Cc: Mike Travis mike.travis@hpe.com Cc: David Hildenbrand david@redhat.com Cc: Oscar Salvador osalvador@suse.de Cc: Michal Hocko mhocko@suse.com Cc: Pavel Tatashin pasha.tatashin@soleen.com Cc: Qian Cai cai@lca.pw Cc: Wei Yang richard.weiyang@gmail.com Cc: Arun KS arunks@codeaurora.org Cc: Mathieu Malaterre malat@debian.org Cc: Andy Lutomirski luto@kernel.org Cc: Benjamin Herrenschmidt benh@kernel.crashing.org Cc: Borislav Petkov bp@alien8.de Cc: Christophe Leroy christophe.leroy@c-s.fr Cc: Dave Hansen dave.hansen@linux.intel.com Cc: Fenghua Yu fenghua.yu@intel.com Cc: Geert Uytterhoeven geert@linux-m68k.org Cc: Heiko Carstens heiko.carstens@de.ibm.com Cc: "H. Peter Anvin" hpa@zytor.com Cc: Ingo Molnar mingo@redhat.com Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: "Kirill A. Shutemov" kirill.shutemov@linux.intel.com Cc: Martin Schwidefsky schwidefsky@de.ibm.com Cc: Masahiro Yamada yamada.masahiro@socionext.com Cc: Michael Ellerman mpe@ellerman.id.au Cc: Mike Rapoport rppt@linux.ibm.com Cc: Nicholas Piggin npiggin@gmail.com Cc: Oscar Salvador osalvador@suse.com Cc: Paul Mackerras paulus@samba.org Cc: Peter Zijlstra peterz@infradead.org Cc: Rich Felker dalias@libc.org Cc: Rob Herring robh@kernel.org Cc: Stefan Agner stefan@agner.ch Cc: Thomas Gleixner tglx@linutronix.de Cc: Tony Luck tony.luck@intel.com Cc: Vasily Gorbik gor@linux.ibm.com Cc: Yoshinori Sato ysato@users.sourceforge.jp Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: David Hildenbrand david@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/base/memory.c | 16 +++++----------- include/linux/memory.h | 2 +- mm/memory_hotplug.c | 4 +--- 3 files changed, 7 insertions(+), 15 deletions(-)
diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 104b0e7..e76a0d8 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -743,15 +743,18 @@ int hotplug_memory_register(int nid, struct mem_section *section) { BUG_ON(memory->dev.bus != &memory_subsys);
- /* drop the ref. we got in remove_memory_section() */ + /* drop the ref. we got via find_memory_block() */ put_device(&memory->dev); device_unregister(&memory->dev); }
-static int remove_memory_section(struct mem_section *section) +void unregister_memory_section(struct mem_section *section) { struct memory_block *mem;
+ if (WARN_ON_ONCE(!present_section(section))) + return; + mutex_lock(&mem_sysfs_mutex);
/* @@ -772,15 +775,6 @@ static int remove_memory_section(struct mem_section *section)
out_unlock: mutex_unlock(&mem_sysfs_mutex); - return 0; -} - -int unregister_memory_section(struct mem_section *section) -{ - if (!present_section(section)) - return -EINVAL; - - return remove_memory_section(section); } #endif /* CONFIG_MEMORY_HOTREMOVE */
diff --git a/include/linux/memory.h b/include/linux/memory.h index a6ddefc..e1dc1bb 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -113,7 +113,7 @@ static inline int memory_isolate_notify(unsigned long val, void *v) extern void unregister_memory_isolate_notifier(struct notifier_block *nb); int hotplug_memory_register(int nid, struct mem_section *section); #ifdef CONFIG_MEMORY_HOTREMOVE -extern int unregister_memory_section(struct mem_section *); +extern void unregister_memory_section(struct mem_section *); #endif extern int memory_dev_init(void); extern int memory_notify(unsigned long val, void *v); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index e015534..2f15c3e 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -488,9 +488,7 @@ static int __remove_section(struct zone *zone, struct mem_section *ms, if (!valid_section(ms)) return ret;
- ret = unregister_memory_section(ms); - if (ret) - return ret; + unregister_memory_section(ms);
scn_nr = __section_nr(ms); start_pfn = section_nr_to_pfn((unsigned long)scn_nr);
From: David Hildenbrand david@redhat.com
commit 9d1d887d785b4fe0590bd3c5e71acaa3908044e2 upstream.
Let's just warn in case a section is not valid instead of failing to remove somewhere in the middle of the process, returning an error that will be mostly ignored by callers.
Link: http://lkml.kernel.org/r/20190409100148.24703-4-david@redhat.com Signed-off-by: David Hildenbrand david@redhat.com Reviewed-by: Oscar Salvador osalvador@suse.de Cc: Michal Hocko mhocko@suse.com Cc: David Hildenbrand david@redhat.com Cc: Pavel Tatashin pasha.tatashin@soleen.com Cc: Qian Cai cai@lca.pw Cc: Wei Yang richard.weiyang@gmail.com Cc: Arun KS arunks@codeaurora.org Cc: Mathieu Malaterre malat@debian.org Cc: Andrew Banman andrew.banman@hpe.com Cc: Andy Lutomirski luto@kernel.org Cc: Benjamin Herrenschmidt benh@kernel.crashing.org Cc: Borislav Petkov bp@alien8.de Cc: Christophe Leroy christophe.leroy@c-s.fr Cc: Dave Hansen dave.hansen@linux.intel.com Cc: Fenghua Yu fenghua.yu@intel.com Cc: Geert Uytterhoeven geert@linux-m68k.org Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: Heiko Carstens heiko.carstens@de.ibm.com Cc: "H. Peter Anvin" hpa@zytor.com Cc: Ingo Molnar mingo@kernel.org Cc: Ingo Molnar mingo@redhat.com Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: "Kirill A. Shutemov" kirill.shutemov@linux.intel.com Cc: Martin Schwidefsky schwidefsky@de.ibm.com Cc: Masahiro Yamada yamada.masahiro@socionext.com Cc: Michael Ellerman mpe@ellerman.id.au Cc: Mike Rapoport rppt@linux.ibm.com Cc: Mike Travis mike.travis@hpe.com Cc: Nicholas Piggin npiggin@gmail.com Cc: Oscar Salvador osalvador@suse.com Cc: Paul Mackerras paulus@samba.org Cc: Peter Zijlstra peterz@infradead.org Cc: "Rafael J. Wysocki" rafael@kernel.org Cc: Rich Felker dalias@libc.org Cc: Rob Herring robh@kernel.org Cc: Stefan Agner stefan@agner.ch Cc: Thomas Gleixner tglx@linutronix.de Cc: Tony Luck tony.luck@intel.com Cc: Vasily Gorbik gor@linux.ibm.com Cc: Yoshinori Sato ysato@users.sourceforge.jp Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: David Hildenbrand david@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/memory_hotplug.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-)
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 2f15c3e..edfcecc 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -478,15 +478,15 @@ static void __remove_zone(struct zone *zone, unsigned long start_pfn) pgdat_resize_unlock(zone->zone_pgdat, &flags); }
-static int __remove_section(struct zone *zone, struct mem_section *ms, - unsigned long map_offset, struct vmem_altmap *altmap) +static void __remove_section(struct zone *zone, struct mem_section *ms, + unsigned long map_offset, + struct vmem_altmap *altmap) { unsigned long start_pfn; int scn_nr; - int ret = -EINVAL;
- if (!valid_section(ms)) - return ret; + if (WARN_ON_ONCE(!valid_section(ms))) + return;
unregister_memory_section(ms);
@@ -495,7 +495,6 @@ static int __remove_section(struct zone *zone, struct mem_section *ms, __remove_zone(zone, start_pfn);
sparse_remove_one_section(zone, ms, map_offset, altmap); - return 0; }
/** @@ -515,7 +514,7 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, { unsigned long i; unsigned long map_offset = 0; - int sections_to_remove, ret = 0; + int sections_to_remove;
/* In the ZONE_DEVICE case device driver owns the memory region */ if (is_dev_zone(zone)) { @@ -536,16 +535,13 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
cond_resched(); - ret = __remove_section(zone, __pfn_to_section(pfn), map_offset, - altmap); + __remove_section(zone, __pfn_to_section(pfn), map_offset, + altmap); map_offset = 0; - if (ret) - break; }
set_zone_contiguous(zone); - - return ret; + return 0; } #endif /* CONFIG_MEMORY_HOTREMOVE */
From: "Aneesh Kumar K.V" aneesh.kumar@linux.ibm.com
commit 26ad26718dfaa7cf49d106d212ebf2370076c253 upstream.
This patch fix the below section mismatch warnings.
WARNING: vmlinux.o(.text+0x2d1f44): Section mismatch in reference from the function devm_memremap_pages_release() to the function .meminit.text:arch_remove_memory() WARNING: vmlinux.o(.text+0x2d265c): Section mismatch in reference from the function devm_memremap_pages() to the function .meminit.text:arch_add_memory()
Signed-off-by: Aneesh Kumar K.V aneesh.kumar@linux.ibm.com Signed-off-by: Michael Ellerman mpe@ellerman.id.au Signed-off-by: David Hildenbrand david@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/powerpc/mm/mem.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 1b6e0ef..625d785 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -118,8 +118,8 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end) return -ENODEV; }
-int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, - bool want_memblock) +int __ref arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, + bool want_memblock) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; @@ -140,8 +140,8 @@ int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap * }
#ifdef CONFIG_MEMORY_HOTREMOVE -int __meminit arch_remove_memory(int nid, u64 start, u64 size, - struct vmem_altmap *altmap) +int __ref arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT;
From: David Hildenbrand david@redhat.com
commit ac5c94264580f498e484c854031d0226b3c1038f upstream.
-- snip --
Minor conflict in arch/powerpc/mm/mem.c
-- snip --
All callers of arch_remove_memory() ignore errors. And we should really try to remove any errors from the memory removal path. No more errors are reported from __remove_pages(). BUG() in s390x code in case arch_remove_memory() is triggered. We may implement that properly later. WARN in case powerpc code failed to remove the section mapping, which is better than ignoring the error completely right now.
Link: http://lkml.kernel.org/r/20190409100148.24703-5-david@redhat.com Signed-off-by: David Hildenbrand david@redhat.com Cc: Tony Luck tony.luck@intel.com Cc: Fenghua Yu fenghua.yu@intel.com Cc: Benjamin Herrenschmidt benh@kernel.crashing.org Cc: Paul Mackerras paulus@samba.org Cc: Michael Ellerman mpe@ellerman.id.au Cc: Martin Schwidefsky schwidefsky@de.ibm.com Cc: Heiko Carstens heiko.carstens@de.ibm.com Cc: Yoshinori Sato ysato@users.sourceforge.jp Cc: Rich Felker dalias@libc.org Cc: Dave Hansen dave.hansen@linux.intel.com Cc: Andy Lutomirski luto@kernel.org Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Cc: Ingo Molnar mingo@redhat.com Cc: Borislav Petkov bp@alien8.de Cc: "H. Peter Anvin" hpa@zytor.com Cc: Michal Hocko mhocko@suse.com Cc: Mike Rapoport rppt@linux.ibm.com Cc: Oscar Salvador osalvador@suse.com Cc: "Kirill A. Shutemov" kirill.shutemov@linux.intel.com Cc: Christophe Leroy christophe.leroy@c-s.fr Cc: Stefan Agner stefan@agner.ch Cc: Nicholas Piggin npiggin@gmail.com Cc: Pavel Tatashin pasha.tatashin@soleen.com Cc: Vasily Gorbik gor@linux.ibm.com Cc: Arun KS arunks@codeaurora.org Cc: Geert Uytterhoeven geert@linux-m68k.org Cc: Masahiro Yamada yamada.masahiro@socionext.com Cc: Rob Herring robh@kernel.org Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Wei Yang richard.weiyang@gmail.com Cc: Qian Cai cai@lca.pw Cc: Mathieu Malaterre malat@debian.org Cc: Andrew Banman andrew.banman@hpe.com Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: Ingo Molnar mingo@kernel.org Cc: Mike Travis mike.travis@hpe.com Cc: Oscar Salvador osalvador@suse.de Cc: "Rafael J. Wysocki" rafael@kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: David Hildenbrand david@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/ia64/mm/init.c | 11 +++-------- arch/powerpc/mm/mem.c | 9 +++------ arch/s390/mm/init.c | 5 +++-- arch/sh/mm/init.c | 11 +++-------- arch/x86/mm/init_32.c | 5 +++-- arch/x86/mm/init_64.c | 10 +++------- include/linux/memory_hotplug.h | 8 ++++---- mm/memory_hotplug.c | 5 ++--- 8 files changed, 24 insertions(+), 40 deletions(-)
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index b54d0ee..950a9e0 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -662,20 +662,15 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, }
#ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) +void arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; struct zone *zone; - int ret;
zone = page_zone(pfn_to_page(start_pfn)); - ret = __remove_pages(zone, start_pfn, nr_pages, altmap); - if (ret) - pr_warn("%s: Problem encountered in __remove_pages() as" - " ret=%d\n", __func__, ret); - - return ret; + __remove_pages(zone, start_pfn, nr_pages, altmap); } #endif #endif diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 625d785..ab79f28 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -140,7 +140,7 @@ int __ref arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altm }
#ifdef CONFIG_MEMORY_HOTREMOVE -int __ref arch_remove_memory(int nid, u64 start, u64 size, +void __ref arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; @@ -156,14 +156,13 @@ int __ref arch_remove_memory(int nid, u64 start, u64 size, if (altmap) page += vmem_altmap_offset(altmap);
- ret = __remove_pages(page_zone(page), start_pfn, nr_pages, altmap); - if (ret) - return ret; + __remove_pages(page_zone(page), start_pfn, nr_pages, altmap);
/* Remove htab bolted mappings for this section of memory */ start = (unsigned long)__va(start); flush_inval_dcache_range(start, start + size); ret = remove_section_mapping(start, start + size); + WARN_ON_ONCE(ret);
/* Ensure all vmalloc mappings are flushed in case they also * hit that section of memory @@ -171,8 +170,6 @@ int __ref arch_remove_memory(int nid, u64 start, u64 size, vm_unmap_aliases();
resize_hpt_for_hotplug(memblock_phys_mem_size()); - - return ret; } #endif #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index ab3e33a..fede690 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -243,14 +243,15 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, }
#ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) +void arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap) { /* * There is no hardware or firmware interface which could trigger a * hot memory remove on s390. So there is nothing that needs to be * implemented. */ - return -EBUSY; + BUG(); } #endif #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 5c91bb6..59ae5d7 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -444,20 +444,15 @@ int memory_add_physaddr_to_nid(u64 addr) #endif
#ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) +void arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap) { unsigned long start_pfn = PFN_DOWN(start); unsigned long nr_pages = size >> PAGE_SHIFT; struct zone *zone; - int ret;
zone = page_zone(pfn_to_page(start_pfn)); - ret = __remove_pages(zone, start_pfn, nr_pages, altmap); - if (unlikely(ret)) - pr_warn("%s: Failed, __remove_pages() == %d\n", __func__, - ret); - - return ret; + __remove_pages(zone, start_pfn, nr_pages, altmap); } #endif #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 9fa503f..c6a50a0 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -861,14 +861,15 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, }
#ifdef CONFIG_MEMORY_HOTREMOVE -int arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) +void arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; struct zone *zone;
zone = page_zone(pfn_to_page(start_pfn)); - return __remove_pages(zone, start_pfn, nr_pages, altmap); + __remove_pages(zone, start_pfn, nr_pages, altmap); } #endif #endif diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 32066d5..b9e15f2 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1142,24 +1142,20 @@ void __ref vmemmap_free(unsigned long start, unsigned long end, remove_pagetable(start, end, true, NULL); }
-int __ref arch_remove_memory(int nid, u64 start, u64 size, - struct vmem_altmap *altmap) +void __ref arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; struct page *page = pfn_to_page(start_pfn); struct zone *zone; - int ret;
/* With altmap the first mapped page is offset from @start */ if (altmap) page += vmem_altmap_offset(altmap); zone = page_zone(page); - ret = __remove_pages(zone, start_pfn, nr_pages, altmap); - WARN_ON_ONCE(ret); + __remove_pages(zone, start_pfn, nr_pages, altmap); kernel_physical_mapping_remove(start, start + size); - - return ret; } #endif #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index df77a75..04c40da 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -109,10 +109,10 @@ static inline bool movable_node_is_enabled(void) }
#ifdef CONFIG_MEMORY_HOTREMOVE -extern int arch_remove_memory(int nid, u64 start, u64 size, - struct vmem_altmap *altmap); -extern int __remove_pages(struct zone *zone, unsigned long start_pfn, - unsigned long nr_pages, struct vmem_altmap *altmap); +extern void arch_remove_memory(int nid, u64 start, u64 size, + struct vmem_altmap *altmap); +extern void __remove_pages(struct zone *zone, unsigned long start_pfn, + unsigned long nr_pages, struct vmem_altmap *altmap); #endif /* CONFIG_MEMORY_HOTREMOVE */
/* reasonably generic interface to expand the physical pages */ diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index edfcecc..a8f8e14 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -509,8 +509,8 @@ static void __remove_section(struct zone *zone, struct mem_section *ms, * sure that pages are marked reserved and zones are adjust properly by * calling offline_pages(). */ -int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, - unsigned long nr_pages, struct vmem_altmap *altmap) +void __remove_pages(struct zone *zone, unsigned long phys_start_pfn, + unsigned long nr_pages, struct vmem_altmap *altmap) { unsigned long i; unsigned long map_offset = 0; @@ -541,7 +541,6 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, }
set_zone_contiguous(zone); - return 0; } #endif /* CONFIG_MEMORY_HOTREMOVE */
From: David Hildenbrand david@redhat.com
commit 18c86506c80f6b6b5e67d95bf0d6f7e665de5239 upstream.
Will come in handy when wanting to handle errors after arch_add_memory().
Link: http://lkml.kernel.org/r/20190527111152.16324-4-david@redhat.com Signed-off-by: David Hildenbrand david@redhat.com Cc: Heiko Carstens heiko.carstens@de.ibm.com Cc: Michal Hocko mhocko@suse.com Cc: Mike Rapoport rppt@linux.vnet.ibm.com Cc: David Hildenbrand david@redhat.com Cc: Vasily Gorbik gor@linux.ibm.com Cc: Oscar Salvador osalvador@suse.com Cc: Alex Deucher alexander.deucher@amd.com Cc: Andrew Banman andrew.banman@hpe.com Cc: Andy Lutomirski luto@kernel.org Cc: Anshuman Khandual anshuman.khandual@arm.com Cc: Ard Biesheuvel ard.biesheuvel@linaro.org Cc: Arun KS arunks@codeaurora.org Cc: Baoquan He bhe@redhat.com Cc: Benjamin Herrenschmidt benh@kernel.crashing.org Cc: Borislav Petkov bp@alien8.de Cc: Catalin Marinas catalin.marinas@arm.com Cc: Chintan Pandya cpandya@codeaurora.org Cc: Christophe Leroy christophe.leroy@c-s.fr Cc: Chris Wilson chris@chris-wilson.co.uk Cc: Dan Williams dan.j.williams@intel.com Cc: Dave Hansen dave.hansen@linux.intel.com Cc: "David S. Miller" davem@davemloft.net Cc: Fenghua Yu fenghua.yu@intel.com Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: "H. Peter Anvin" hpa@zytor.com Cc: Ingo Molnar mingo@kernel.org Cc: Jonathan Cameron Jonathan.Cameron@huawei.com Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Jun Yao yaojun8558363@gmail.com Cc: "Kirill A. Shutemov" kirill.shutemov@linux.intel.com Cc: Logan Gunthorpe logang@deltatee.com Cc: Mark Brown broonie@kernel.org Cc: Mark Rutland mark.rutland@arm.com Cc: Masahiro Yamada yamada.masahiro@socionext.com Cc: Mathieu Malaterre malat@debian.org Cc: Michael Ellerman mpe@ellerman.id.au Cc: Mike Rapoport rppt@linux.ibm.com Cc: "mike.travis@hpe.com" mike.travis@hpe.com Cc: Nicholas Piggin npiggin@gmail.com Cc: Oscar Salvador osalvador@suse.de Cc: Paul Mackerras paulus@samba.org Cc: Pavel Tatashin pasha.tatashin@soleen.com Cc: Peter Zijlstra peterz@infradead.org Cc: Qian Cai cai@lca.pw Cc: "Rafael J. Wysocki" rafael@kernel.org Cc: Rich Felker dalias@libc.org Cc: Rob Herring robh@kernel.org Cc: Robin Murphy robin.murphy@arm.com Cc: Thomas Gleixner tglx@linutronix.de Cc: Tony Luck tony.luck@intel.com Cc: Wei Yang richard.weiyang@gmail.com Cc: Will Deacon will.deacon@arm.com Cc: Yoshinori Sato ysato@users.sourceforge.jp Cc: Yu Zhao yuzhao@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: David Hildenbrand david@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/s390/mm/init.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index fede690..19bf02a 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -246,12 +246,13 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, void arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) { - /* - * There is no hardware or firmware interface which could trigger a - * hot memory remove on s390. So there is nothing that needs to be - * implemented. - */ - BUG(); + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + struct zone *zone; + + zone = page_zone(pfn_to_page(start_pfn)); + __remove_pages(zone, start_pfn, nr_pages, altmap); + vmem_remove_mapping(start, size); } #endif #endif /* CONFIG_MEMORY_HOTPLUG */
From: David Hildenbrand david@redhat.com
commit 80ec922dbd87fd38d15719c86a94457204648aeb upstream.
-- snip --
Missing arm64 memory hot(un)plug support.
-- snip --
We want to improve error handling while adding memory by allowing to use arch_remove_memory() and __remove_pages() even if CONFIG_MEMORY_HOTREMOVE is not set to e.g., implement something like:
arch_add_memory() rc = do_something(); if (rc) { arch_remove_memory(); }
We won't get rid of CONFIG_MEMORY_HOTREMOVE for now, as it will require quite some dependencies for memory offlining.
Link: http://lkml.kernel.org/r/20190527111152.16324-7-david@redhat.com Signed-off-by: David Hildenbrand david@redhat.com Reviewed-by: Pavel Tatashin pasha.tatashin@soleen.com Cc: Tony Luck tony.luck@intel.com Cc: Fenghua Yu fenghua.yu@intel.com Cc: Benjamin Herrenschmidt benh@kernel.crashing.org Cc: Paul Mackerras paulus@samba.org Cc: Michael Ellerman mpe@ellerman.id.au Cc: Heiko Carstens heiko.carstens@de.ibm.com Cc: Yoshinori Sato ysato@users.sourceforge.jp Cc: Rich Felker dalias@libc.org Cc: Dave Hansen dave.hansen@linux.intel.com Cc: Andy Lutomirski luto@kernel.org Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Cc: Borislav Petkov bp@alien8.de Cc: "H. Peter Anvin" hpa@zytor.com Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: "Rafael J. Wysocki" rafael@kernel.org Cc: Michal Hocko mhocko@suse.com Cc: David Hildenbrand david@redhat.com Cc: Oscar Salvador osalvador@suse.com Cc: "Kirill A. Shutemov" kirill.shutemov@linux.intel.com Cc: Alex Deucher alexander.deucher@amd.com Cc: "David S. Miller" davem@davemloft.net Cc: Mark Brown broonie@kernel.org Cc: Chris Wilson chris@chris-wilson.co.uk Cc: Christophe Leroy christophe.leroy@c-s.fr Cc: Nicholas Piggin npiggin@gmail.com Cc: Vasily Gorbik gor@linux.ibm.com Cc: Rob Herring robh@kernel.org Cc: Masahiro Yamada yamada.masahiro@socionext.com Cc: "mike.travis@hpe.com" mike.travis@hpe.com Cc: Andrew Banman andrew.banman@hpe.com Cc: Arun KS arunks@codeaurora.org Cc: Qian Cai cai@lca.pw Cc: Mathieu Malaterre malat@debian.org Cc: Baoquan He bhe@redhat.com Cc: Logan Gunthorpe logang@deltatee.com Cc: Anshuman Khandual anshuman.khandual@arm.com Cc: Ard Biesheuvel ard.biesheuvel@linaro.org Cc: Catalin Marinas catalin.marinas@arm.com Cc: Chintan Pandya cpandya@codeaurora.org Cc: Dan Williams dan.j.williams@intel.com Cc: Ingo Molnar mingo@kernel.org Cc: Jonathan Cameron Jonathan.Cameron@huawei.com Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Jun Yao yaojun8558363@gmail.com Cc: Mark Rutland mark.rutland@arm.com Cc: Mike Rapoport rppt@linux.vnet.ibm.com Cc: Oscar Salvador osalvador@suse.de Cc: Robin Murphy robin.murphy@arm.com Cc: Wei Yang richard.weiyang@gmail.com Cc: Will Deacon will.deacon@arm.com Cc: Yu Zhao yuzhao@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: David Hildenbrand david@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org [yyl: remove CONFIG_MEMORY_HOTREMOVE in arch/arm64/mm/mmu.c]
Signed-off-by: Yang Yingliang yangyingliang@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/mm/mmu.c | 2 -- arch/ia64/mm/init.c | 2 -- arch/powerpc/mm/mem.c | 2 -- arch/s390/mm/init.c | 2 -- arch/sh/mm/init.c | 2 -- arch/x86/mm/init_32.c | 2 -- arch/x86/mm/init_64.c | 2 -- drivers/base/memory.c | 2 -- include/linux/memory.h | 2 -- include/linux/memory_hotplug.h | 2 -- mm/memory_hotplug.c | 2 -- mm/sparse.c | 6 ------ 12 files changed, 28 deletions(-)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 722b7b0..660fd6e 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1051,7 +1051,6 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, return __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT, altmap, want_memblock); } -#ifdef CONFIG_MEMORY_HOTREMOVE void arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) { @@ -1070,4 +1069,3 @@ void arch_remove_memory(int nid, u64 start, u64 size, __remove_pages(zone, start_pfn, nr_pages, altmap); } #endif -#endif diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 950a9e0..778781e 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -661,7 +661,6 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, return ret; }
-#ifdef CONFIG_MEMORY_HOTREMOVE void arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) { @@ -673,4 +672,3 @@ void arch_remove_memory(int nid, u64 start, u64 size, __remove_pages(zone, start_pfn, nr_pages, altmap); } #endif -#endif diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index ab79f28..84c6d37 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -139,7 +139,6 @@ int __ref arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altm return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); }
-#ifdef CONFIG_MEMORY_HOTREMOVE void __ref arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) { @@ -172,7 +171,6 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size, resize_hpt_for_hotplug(memblock_phys_mem_size()); } #endif -#endif /* CONFIG_MEMORY_HOTPLUG */
/* * walk_memory_resource() needs to make sure there is no holes in a given diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 19bf02a..ad3f213 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -242,7 +242,6 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, return rc; }
-#ifdef CONFIG_MEMORY_HOTREMOVE void arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) { @@ -254,5 +253,4 @@ void arch_remove_memory(int nid, u64 start, u64 size, __remove_pages(zone, start_pfn, nr_pages, altmap); vmem_remove_mapping(start, size); } -#endif #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 59ae5d7..0da784a 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -443,7 +443,6 @@ int memory_add_physaddr_to_nid(u64 addr) EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif
-#ifdef CONFIG_MEMORY_HOTREMOVE void arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) { @@ -454,5 +453,4 @@ void arch_remove_memory(int nid, u64 start, u64 size, zone = page_zone(pfn_to_page(start_pfn)); __remove_pages(zone, start_pfn, nr_pages, altmap); } -#endif #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index c6a50a0..64f54f7 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -860,7 +860,6 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock); }
-#ifdef CONFIG_MEMORY_HOTREMOVE void arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap) { @@ -872,7 +871,6 @@ void arch_remove_memory(int nid, u64 start, u64 size, __remove_pages(zone, start_pfn, nr_pages, altmap); } #endif -#endif
int kernel_set_to_readonly __read_mostly;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index b9e15f2..50df7ca 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1132,7 +1132,6 @@ void __ref vmemmap_free(unsigned long start, unsigned long end, remove_pagetable(start, end, false, altmap); }
-#ifdef CONFIG_MEMORY_HOTREMOVE static void __meminit kernel_physical_mapping_remove(unsigned long start, unsigned long end) { @@ -1157,7 +1156,6 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size, __remove_pages(zone, start_pfn, nr_pages, altmap); kernel_physical_mapping_remove(start, start + size); } -#endif #endif /* CONFIG_MEMORY_HOTPLUG */
static struct kcore_list kcore_vsyscall; diff --git a/drivers/base/memory.c b/drivers/base/memory.c index e76a0d8..c56b326 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -737,7 +737,6 @@ int hotplug_memory_register(int nid, struct mem_section *section) return ret; }
-#ifdef CONFIG_MEMORY_HOTREMOVE static void unregister_memory(struct memory_block *memory) { @@ -776,7 +775,6 @@ void unregister_memory_section(struct mem_section *section) out_unlock: mutex_unlock(&mem_sysfs_mutex); } -#endif /* CONFIG_MEMORY_HOTREMOVE */
/* return true if the memory block is offlined, otherwise, return false */ bool is_memblock_offlined(struct memory_block *mem) diff --git a/include/linux/memory.h b/include/linux/memory.h index e1dc1bb..474c7c6 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -112,9 +112,7 @@ static inline int memory_isolate_notify(unsigned long val, void *v) extern int register_memory_isolate_notifier(struct notifier_block *nb); extern void unregister_memory_isolate_notifier(struct notifier_block *nb); int hotplug_memory_register(int nid, struct mem_section *section); -#ifdef CONFIG_MEMORY_HOTREMOVE extern void unregister_memory_section(struct mem_section *); -#endif extern int memory_dev_init(void); extern int memory_notify(unsigned long val, void *v); extern int memory_isolate_notify(unsigned long val, void *v); diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 04c40da..5ac5832 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -108,12 +108,10 @@ static inline bool movable_node_is_enabled(void) return movable_node_enabled; }
-#ifdef CONFIG_MEMORY_HOTREMOVE extern void arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap); extern void __remove_pages(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages, struct vmem_altmap *altmap); -#endif /* CONFIG_MEMORY_HOTREMOVE */
/* reasonably generic interface to expand the physical pages */ extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index a8f8e14..361eb87 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -315,7 +315,6 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn, return err; }
-#ifdef CONFIG_MEMORY_HOTREMOVE /* find the smallest valid pfn in the range [start_pfn, end_pfn) */ static unsigned long find_smallest_section_pfn(int nid, struct zone *zone, unsigned long start_pfn, @@ -542,7 +541,6 @@ void __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
set_zone_contiguous(zone); } -#endif /* CONFIG_MEMORY_HOTREMOVE */
int set_online_page_callback(online_page_callback_t callback) { diff --git a/mm/sparse.c b/mm/sparse.c index 8550915..6bc452e 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -586,7 +586,6 @@ static void __kfree_section_memmap(struct page *memmap,
vmemmap_free(start, end, altmap); } -#ifdef CONFIG_MEMORY_HOTREMOVE static void free_map_bootmem(struct page *memmap) { unsigned long start = (unsigned long)memmap; @@ -594,7 +593,6 @@ static void free_map_bootmem(struct page *memmap)
vmemmap_free(start, end, NULL); } -#endif /* CONFIG_MEMORY_HOTREMOVE */ #else static struct page *__kmalloc_section_memmap(void) { @@ -633,7 +631,6 @@ static void __kfree_section_memmap(struct page *memmap, get_order(sizeof(struct page) * PAGES_PER_SECTION)); }
-#ifdef CONFIG_MEMORY_HOTREMOVE static void free_map_bootmem(struct page *memmap) { unsigned long maps_section_nr, removing_section_nr, i; @@ -663,7 +660,6 @@ static void free_map_bootmem(struct page *memmap) put_page_bootmem(page); } } -#endif /* CONFIG_MEMORY_HOTREMOVE */ #endif /* CONFIG_SPARSEMEM_VMEMMAP */
/* @@ -722,7 +718,6 @@ int __meminit sparse_add_one_section(int nid, unsigned long start_pfn, return ret; }
-#ifdef CONFIG_MEMORY_HOTREMOVE #ifdef CONFIG_MEMORY_FAILURE static void clear_hwpoisoned_pages(struct page *memmap, int nr_pages) { @@ -790,5 +785,4 @@ void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, PAGES_PER_SECTION - map_offset); free_section_usemap(memmap, usemap, altmap); } -#endif /* CONFIG_MEMORY_HOTREMOVE */ #endif /* CONFIG_MEMORY_HOTPLUG */
From: David Hildenbrand david@redhat.com
commit 1811582587c43bdf13d690d83345610d4df433bb upstream.
We'll rework hotplug_memory_register() shortly, so it no longer consumes pass a section.
[cai@lca.pw: fix a compilation warning] Link: http://lkml.kernel.org/r/1559320186-28337-1-git-send-email-cai@lca.pw Link: http://lkml.kernel.org/r/20190527111152.16324-6-david@redhat.com Signed-off-by: David Hildenbrand david@redhat.com Signed-off-by: Qian Cai cai@lca.pw Acked-by: Michal Hocko mhocko@suse.com Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: "Rafael J. Wysocki" rafael@kernel.org Cc: Alex Deucher alexander.deucher@amd.com Cc: Andrew Banman andrew.banman@hpe.com Cc: Andy Lutomirski luto@kernel.org Cc: Anshuman Khandual anshuman.khandual@arm.com Cc: Ard Biesheuvel ard.biesheuvel@linaro.org Cc: Arun KS arunks@codeaurora.org Cc: Baoquan He bhe@redhat.com Cc: Benjamin Herrenschmidt benh@kernel.crashing.org Cc: Borislav Petkov bp@alien8.de Cc: Catalin Marinas catalin.marinas@arm.com Cc: Chintan Pandya cpandya@codeaurora.org Cc: Christophe Leroy christophe.leroy@c-s.fr Cc: Chris Wilson chris@chris-wilson.co.uk Cc: Dan Williams dan.j.williams@intel.com Cc: Dave Hansen dave.hansen@linux.intel.com Cc: "David S. Miller" davem@davemloft.net Cc: Fenghua Yu fenghua.yu@intel.com Cc: Heiko Carstens heiko.carstens@de.ibm.com Cc: "H. Peter Anvin" hpa@zytor.com Cc: Ingo Molnar mingo@kernel.org Cc: Jonathan Cameron Jonathan.Cameron@huawei.com Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Jun Yao yaojun8558363@gmail.com Cc: "Kirill A. Shutemov" kirill.shutemov@linux.intel.com Cc: Logan Gunthorpe logang@deltatee.com Cc: Mark Brown broonie@kernel.org Cc: Mark Rutland mark.rutland@arm.com Cc: Masahiro Yamada yamada.masahiro@socionext.com Cc: Mathieu Malaterre malat@debian.org Cc: Michael Ellerman mpe@ellerman.id.au Cc: Mike Rapoport rppt@linux.vnet.ibm.com Cc: "mike.travis@hpe.com" mike.travis@hpe.com Cc: Nicholas Piggin npiggin@gmail.com Cc: Oscar Salvador osalvador@suse.com Cc: Oscar Salvador osalvador@suse.de Cc: Paul Mackerras paulus@samba.org Cc: Pavel Tatashin pasha.tatashin@soleen.com Cc: Peter Zijlstra peterz@infradead.org Cc: Rich Felker dalias@libc.org Cc: Rob Herring robh@kernel.org Cc: Robin Murphy robin.murphy@arm.com Cc: Thomas Gleixner tglx@linutronix.de Cc: Tony Luck tony.luck@intel.com Cc: Vasily Gorbik gor@linux.ibm.com Cc: Wei Yang richard.weiyang@gmail.com Cc: Will Deacon will.deacon@arm.com Cc: Yoshinori Sato ysato@users.sourceforge.jp Cc: Yu Zhao yuzhao@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: David Hildenbrand david@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org
Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/base/memory.c | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-)
diff --git a/drivers/base/memory.c b/drivers/base/memory.c index c56b326..ec40599 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -660,21 +660,18 @@ int register_memory(struct memory_block *memory) return ret; }
-static int init_memory_block(struct memory_block **memory, - struct mem_section *section, unsigned long state) +static int init_memory_block(struct memory_block **memory, int block_id, + unsigned long state) { struct memory_block *mem; unsigned long start_pfn; - int scn_nr; int ret = 0;
mem = kzalloc(sizeof(*mem), GFP_KERNEL); if (!mem) return -ENOMEM;
- scn_nr = __section_nr(section); - mem->start_section_nr = - base_memory_block_id(scn_nr) * sections_per_block; + mem->start_section_nr = block_id * sections_per_block; mem->end_section_nr = mem->start_section_nr + sections_per_block - 1; mem->state = state; start_pfn = section_nr_to_pfn(mem->start_section_nr); @@ -689,21 +686,18 @@ static int init_memory_block(struct memory_block **memory, static int add_memory_block(int base_section_nr) { struct memory_block *mem; - int i, ret, section_count = 0, section_nr; + int i, ret, section_count = 0;
for (i = base_section_nr; i < base_section_nr + sections_per_block; - i++) { - if (!present_section_nr(i)) - continue; - if (section_count == 0) - section_nr = i; - section_count++; - } + i++) + if (present_section_nr(i)) + section_count++;
if (section_count == 0) return 0; - ret = init_memory_block(&mem, __nr_to_section(section_nr), MEM_ONLINE); + ret = init_memory_block(&mem, base_memory_block_id(base_section_nr), + MEM_ONLINE); if (ret) return ret; mem->section_count = section_count; @@ -716,6 +710,7 @@ static int add_memory_block(int base_section_nr) */ int hotplug_memory_register(int nid, struct mem_section *section) { + int block_id = base_memory_block_id(__section_nr(section)); int ret = 0; struct memory_block *mem;
@@ -726,7 +721,7 @@ int hotplug_memory_register(int nid, struct mem_section *section) mem->section_count++; put_device(&mem->dev); } else { - ret = init_memory_block(&mem, section, MEM_OFFLINE); + ret = init_memory_block(&mem, block_id, MEM_OFFLINE); if (ret) goto out; mem->section_count++;
From: David Hildenbrand david@redhat.com
commit db051a0dac13db24d58470d75cee0ce7c6b031a1 upstream.
Only memory to be added to the buddy and to be onlined/offlined by user space using /sys/devices/system/memory/... needs (and should have!) memory block devices.
Factor out creation of memory block devices. Create all devices after arch_add_memory() succeeded. We can later drop the want_memblock parameter, because it is now effectively stale.
Only after memory block devices have been added, memory can be onlined by user space. This implies, that memory is not visible to user space at all before arch_add_memory() succeeded.
While at it - use WARN_ON_ONCE instead of BUG_ON in moved unregister_memory() - introduce find_memory_block_by_id() to search via block id - Use find_memory_block_by_id() in init_memory_block() to catch duplicates
Link: http://lkml.kernel.org/r/20190527111152.16324-8-david@redhat.com Signed-off-by: David Hildenbrand david@redhat.com Reviewed-by: Pavel Tatashin pasha.tatashin@soleen.com Acked-by: Michal Hocko mhocko@suse.com Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: "Rafael J. Wysocki" rafael@kernel.org Cc: David Hildenbrand david@redhat.com Cc: "mike.travis@hpe.com" mike.travis@hpe.com Cc: Ingo Molnar mingo@kernel.org Cc: Andrew Banman andrew.banman@hpe.com Cc: Oscar Salvador osalvador@suse.de Cc: Qian Cai cai@lca.pw Cc: Wei Yang richard.weiyang@gmail.com Cc: Arun KS arunks@codeaurora.org Cc: Mathieu Malaterre malat@debian.org Cc: Alex Deucher alexander.deucher@amd.com Cc: Andy Lutomirski luto@kernel.org Cc: Anshuman Khandual anshuman.khandual@arm.com Cc: Ard Biesheuvel ard.biesheuvel@linaro.org Cc: Baoquan He bhe@redhat.com Cc: Benjamin Herrenschmidt benh@kernel.crashing.org Cc: Borislav Petkov bp@alien8.de Cc: Catalin Marinas catalin.marinas@arm.com Cc: Chintan Pandya cpandya@codeaurora.org Cc: Christophe Leroy christophe.leroy@c-s.fr Cc: Chris Wilson chris@chris-wilson.co.uk Cc: Dan Williams dan.j.williams@intel.com Cc: Dave Hansen dave.hansen@linux.intel.com Cc: "David S. Miller" davem@davemloft.net Cc: Fenghua Yu fenghua.yu@intel.com Cc: Heiko Carstens heiko.carstens@de.ibm.com Cc: "H. Peter Anvin" hpa@zytor.com Cc: Jonathan Cameron Jonathan.Cameron@huawei.com Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Jun Yao yaojun8558363@gmail.com Cc: "Kirill A. Shutemov" kirill.shutemov@linux.intel.com Cc: Logan Gunthorpe logang@deltatee.com Cc: Mark Brown broonie@kernel.org Cc: Mark Rutland mark.rutland@arm.com Cc: Masahiro Yamada yamada.masahiro@socionext.com Cc: Michael Ellerman mpe@ellerman.id.au Cc: Mike Rapoport rppt@linux.vnet.ibm.com Cc: Nicholas Piggin npiggin@gmail.com Cc: Oscar Salvador osalvador@suse.com Cc: Paul Mackerras paulus@samba.org Cc: Peter Zijlstra peterz@infradead.org Cc: Rich Felker dalias@libc.org Cc: Rob Herring robh@kernel.org Cc: Robin Murphy robin.murphy@arm.com Cc: Thomas Gleixner tglx@linutronix.de Cc: Tony Luck tony.luck@intel.com Cc: Vasily Gorbik gor@linux.ibm.com Cc: Will Deacon will.deacon@arm.com Cc: Yoshinori Sato ysato@users.sourceforge.jp Cc: Yu Zhao yuzhao@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: David Hildenbrand david@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/base/memory.c | 82 +++++++++++++++++++++++++++++++++----------------- include/linux/memory.h | 2 +- mm/memory_hotplug.c | 15 ++++----- 3 files changed, 63 insertions(+), 36 deletions(-)
diff --git a/drivers/base/memory.c b/drivers/base/memory.c index ec40599..25ef1cf 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -39,6 +39,11 @@ static inline int base_memory_block_id(int section_nr) return section_nr / sections_per_block; }
+static inline int pfn_to_block_id(unsigned long pfn) +{ + return base_memory_block_id(pfn_to_section_nr(pfn)); +} + static int memory_subsys_online(struct device *dev); static int memory_subsys_offline(struct device *dev);
@@ -591,10 +596,9 @@ int __weak arch_get_memory_phys_device(unsigned long start_pfn) * A reference for the returned object is held and the reference for the * hinted object is released. */ -struct memory_block *find_memory_block_hinted(struct mem_section *section, - struct memory_block *hint) +static struct memory_block *find_memory_block_by_id(int block_id, + struct memory_block *hint) { - int block_id = base_memory_block_id(__section_nr(section)); struct device *hintdev = hint ? &hint->dev : NULL; struct device *dev;
@@ -606,6 +610,14 @@ struct memory_block *find_memory_block_hinted(struct mem_section *section, return to_memory_block(dev); }
+struct memory_block *find_memory_block_hinted(struct mem_section *section, + struct memory_block *hint) +{ + int block_id = base_memory_block_id(__section_nr(section)); + + return find_memory_block_by_id(block_id, hint); +} + /* * For now, we have a linear search to go find the appropriate * memory_block corresponding to a particular phys_index. If @@ -667,6 +679,11 @@ static int init_memory_block(struct memory_block **memory, int block_id, unsigned long start_pfn; int ret = 0;
+ mem = find_memory_block_by_id(block_id, NULL); + if (mem) { + put_device(&mem->dev); + return -EEXIST; + } mem = kzalloc(sizeof(*mem), GFP_KERNEL); if (!mem) return -ENOMEM; @@ -704,44 +721,53 @@ static int add_memory_block(int base_section_nr) return 0; }
+static void unregister_memory(struct memory_block *memory) +{ + if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys)) + return; + + /* drop the ref. we got via find_memory_block() */ + put_device(&memory->dev); + device_unregister(&memory->dev); +} + /* - * need an interface for the VM to add new memory regions, - * but without onlining it. + * Create memory block devices for the given memory area. Start and size + * have to be aligned to memory block granularity. Memory block devices + * will be initialized as offline. */ -int hotplug_memory_register(int nid, struct mem_section *section) +int create_memory_block_devices(unsigned long start, unsigned long size) { - int block_id = base_memory_block_id(__section_nr(section)); - int ret = 0; + const int start_block_id = pfn_to_block_id(PFN_DOWN(start)); + int end_block_id = pfn_to_block_id(PFN_DOWN(start + size)); struct memory_block *mem; + unsigned long block_id; + int ret = 0;
- mutex_lock(&mem_sysfs_mutex); + if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) || + !IS_ALIGNED(size, memory_block_size_bytes()))) + return -EINVAL;
- mem = find_memory_block(section); - if (mem) { - mem->section_count++; - put_device(&mem->dev); - } else { + mutex_lock(&mem_sysfs_mutex); + for (block_id = start_block_id; block_id != end_block_id; block_id++) { ret = init_memory_block(&mem, block_id, MEM_OFFLINE); if (ret) - goto out; - mem->section_count++; + break; + mem->section_count = sections_per_block; + } + if (ret) { + end_block_id = block_id; + for (block_id = start_block_id; block_id != end_block_id; + block_id++) { + mem = find_memory_block_by_id(block_id, NULL); + mem->section_count = 0; + unregister_memory(mem); + } } - -out: mutex_unlock(&mem_sysfs_mutex); return ret; }
-static void -unregister_memory(struct memory_block *memory) -{ - BUG_ON(memory->dev.bus != &memory_subsys); - - /* drop the ref. we got via find_memory_block() */ - put_device(&memory->dev); - device_unregister(&memory->dev); -} - void unregister_memory_section(struct mem_section *section) { struct memory_block *mem; diff --git a/include/linux/memory.h b/include/linux/memory.h index 474c7c6..db3e856 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -111,7 +111,7 @@ static inline int memory_isolate_notify(unsigned long val, void *v) extern void unregister_memory_notifier(struct notifier_block *nb); extern int register_memory_isolate_notifier(struct notifier_block *nb); extern void unregister_memory_isolate_notifier(struct notifier_block *nb); -int hotplug_memory_register(int nid, struct mem_section *section); +int create_memory_block_devices(unsigned long start, unsigned long size); extern void unregister_memory_section(struct mem_section *); extern int memory_dev_init(void); extern int memory_notify(unsigned long val, void *v); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 361eb87..4139f2e 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -256,13 +256,7 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn, return -EEXIST;
ret = sparse_add_one_section(nid, phys_start_pfn, altmap); - if (ret < 0) - return ret; - - if (!want_memblock) - return 0; - - return hotplug_memory_register(nid, __pfn_to_section(phys_start_pfn)); + return ret < 0 ? ret : 0; }
/* @@ -1091,6 +1085,13 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online) if (ret < 0) goto error;
+ /* create memory block devices after memory was added */ + ret = create_memory_block_devices(start, size); + if (ret) { + arch_remove_memory(nid, start, size, NULL); + goto error; + } + if (new_node) { /* If sysfs file of new node can't be created, cpu on the node * can't be hot-added. There is no rollback way now.
From: David Hildenbrand david@redhat.com
commit 4c4b7f9ba9486c565aead99a198ceeef73ae81f6 upstream.
Let's factor out removing of memory block devices, which is only necessary for memory added via add_memory() and friends that created memory block devices. Remove the devices before calling arch_remove_memory().
This finishes factoring out memory block device handling from arch_add_memory() and arch_remove_memory().
Link: http://lkml.kernel.org/r/20190527111152.16324-10-david@redhat.com Signed-off-by: David Hildenbrand david@redhat.com Reviewed-by: Dan Williams dan.j.williams@intel.com Acked-by: Michal Hocko mhocko@suse.com Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: "Rafael J. Wysocki" rafael@kernel.org Cc: David Hildenbrand david@redhat.com Cc: "mike.travis@hpe.com" mike.travis@hpe.com Cc: Andrew Banman andrew.banman@hpe.com Cc: Ingo Molnar mingo@kernel.org Cc: Alex Deucher alexander.deucher@amd.com Cc: "David S. Miller" davem@davemloft.net Cc: Mark Brown broonie@kernel.org Cc: Chris Wilson chris@chris-wilson.co.uk Cc: Oscar Salvador osalvador@suse.de Cc: Jonathan Cameron Jonathan.Cameron@huawei.com Cc: Arun KS arunks@codeaurora.org Cc: Mathieu Malaterre malat@debian.org Cc: Andy Lutomirski luto@kernel.org Cc: Anshuman Khandual anshuman.khandual@arm.com Cc: Ard Biesheuvel ard.biesheuvel@linaro.org Cc: Baoquan He bhe@redhat.com Cc: Benjamin Herrenschmidt benh@kernel.crashing.org Cc: Borislav Petkov bp@alien8.de Cc: Catalin Marinas catalin.marinas@arm.com Cc: Chintan Pandya cpandya@codeaurora.org Cc: Christophe Leroy christophe.leroy@c-s.fr Cc: Dave Hansen dave.hansen@linux.intel.com Cc: Fenghua Yu fenghua.yu@intel.com Cc: Heiko Carstens heiko.carstens@de.ibm.com Cc: "H. Peter Anvin" hpa@zytor.com Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Jun Yao yaojun8558363@gmail.com Cc: "Kirill A. Shutemov" kirill.shutemov@linux.intel.com Cc: Logan Gunthorpe logang@deltatee.com Cc: Mark Rutland mark.rutland@arm.com Cc: Masahiro Yamada yamada.masahiro@socionext.com Cc: Michael Ellerman mpe@ellerman.id.au Cc: Mike Rapoport rppt@linux.vnet.ibm.com Cc: Nicholas Piggin npiggin@gmail.com Cc: Oscar Salvador osalvador@suse.com Cc: Paul Mackerras paulus@samba.org Cc: Pavel Tatashin pasha.tatashin@soleen.com Cc: Peter Zijlstra peterz@infradead.org Cc: Qian Cai cai@lca.pw Cc: Rich Felker dalias@libc.org Cc: Rob Herring robh@kernel.org Cc: Robin Murphy robin.murphy@arm.com Cc: Thomas Gleixner tglx@linutronix.de Cc: Tony Luck tony.luck@intel.com Cc: Vasily Gorbik gor@linux.ibm.com Cc: Wei Yang richard.weiyang@gmail.com Cc: Will Deacon will.deacon@arm.com Cc: Yoshinori Sato ysato@users.sourceforge.jp Cc: Yu Zhao yuzhao@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: David Hildenbrand david@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/base/memory.c | 37 ++++++++++++++++++------------------- drivers/base/node.c | 11 ++++++----- include/linux/memory.h | 2 +- include/linux/node.h | 6 ++---- mm/memory_hotplug.c | 5 +++-- 5 files changed, 30 insertions(+), 31 deletions(-)
diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 25ef1cf..ce3463b 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -768,32 +768,31 @@ int create_memory_block_devices(unsigned long start, unsigned long size) return ret; }
-void unregister_memory_section(struct mem_section *section) +/* + * Remove memory block devices for the given memory area. Start and size + * have to be aligned to memory block granularity. Memory block devices + * have to be offline. + */ +void remove_memory_block_devices(unsigned long start, unsigned long size) { + const int start_block_id = pfn_to_block_id(PFN_DOWN(start)); + const int end_block_id = pfn_to_block_id(PFN_DOWN(start + size)); struct memory_block *mem; + int block_id;
- if (WARN_ON_ONCE(!present_section(section))) + if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) || + !IS_ALIGNED(size, memory_block_size_bytes()))) return;
mutex_lock(&mem_sysfs_mutex); - - /* - * Some users of the memory hotplug do not want/need memblock to - * track all sections. Skip over those. - */ - mem = find_memory_block(section); - if (!mem) - goto out_unlock; - - unregister_mem_sect_under_nodes(mem, __section_nr(section)); - - mem->section_count--; - if (mem->section_count == 0) + for (block_id = start_block_id; block_id != end_block_id; block_id++) { + mem = find_memory_block_by_id(block_id, NULL); + if (WARN_ON_ONCE(!mem)) + continue; + mem->section_count = 0; + unregister_memory_block_under_nodes(mem); unregister_memory(mem); - else - put_device(&mem->dev); - -out_unlock: + } mutex_unlock(&mem_sysfs_mutex); }
diff --git a/drivers/base/node.c b/drivers/base/node.c index e6b0060..1e351a3 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -453,9 +453,10 @@ int register_mem_sect_under_node(struct memory_block *mem_blk, void *arg) return 0; }
-/* unregister memory section under all nodes that it spans */ -int unregister_mem_sect_under_nodes(struct memory_block *mem_blk, - unsigned long phys_index) +/* + * Unregister memory block device under all nodes that it spans. + */ +int unregister_memory_block_under_nodes(struct memory_block *mem_blk) { NODEMASK_ALLOC(nodemask_t, unlinked_nodes, GFP_KERNEL); unsigned long pfn, sect_start_pfn, sect_end_pfn; @@ -468,8 +469,8 @@ int unregister_mem_sect_under_nodes(struct memory_block *mem_blk, return -ENOMEM; nodes_clear(*unlinked_nodes);
- sect_start_pfn = section_nr_to_pfn(phys_index); - sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1; + sect_start_pfn = section_nr_to_pfn(mem_blk->start_section_nr); + sect_end_pfn = section_nr_to_pfn(mem_blk->end_section_nr); for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { int nid;
diff --git a/include/linux/memory.h b/include/linux/memory.h index db3e856..f26a541 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -112,7 +112,7 @@ static inline int memory_isolate_notify(unsigned long val, void *v) extern int register_memory_isolate_notifier(struct notifier_block *nb); extern void unregister_memory_isolate_notifier(struct notifier_block *nb); int create_memory_block_devices(unsigned long start, unsigned long size); -extern void unregister_memory_section(struct mem_section *); +void remove_memory_block_devices(unsigned long start, unsigned long size); extern int memory_dev_init(void); extern int memory_notify(unsigned long val, void *v); extern int memory_isolate_notify(unsigned long val, void *v); diff --git a/include/linux/node.h b/include/linux/node.h index 257bb3d..9a6db43 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -72,8 +72,7 @@ static inline int register_one_node(int nid) extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid); extern int register_mem_sect_under_node(struct memory_block *mem_blk, void *arg); -extern int unregister_mem_sect_under_nodes(struct memory_block *mem_blk, - unsigned long phys_index); +extern int unregister_memory_block_under_nodes(struct memory_block *mem_blk);
#ifdef CONFIG_HUGETLBFS extern void register_hugetlbfs_with_node(node_registration_func_t doregister, @@ -105,8 +104,7 @@ static inline int register_mem_sect_under_node(struct memory_block *mem_blk, { return 0; } -static inline int unregister_mem_sect_under_nodes(struct memory_block *mem_blk, - unsigned long phys_index) +static inline int unregister_memory_block_under_nodes(struct memory_block *mem_blk) { return 0; } diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 4139f2e..22a17e9 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -481,8 +481,6 @@ static void __remove_section(struct zone *zone, struct mem_section *ms, if (WARN_ON_ONCE(!valid_section(ms))) return;
- unregister_memory_section(ms); - scn_nr = __section_nr(ms); start_pfn = section_nr_to_pfn((unsigned long)scn_nr); __remove_zone(zone, start_pfn); @@ -1899,6 +1897,9 @@ void __ref __remove_memory(int nid, u64 start, u64 size) memblock_free(start, size); memblock_remove(start, size);
+ /* remove memory block devices before removing memory */ + remove_memory_block_devices(start, size); + arch_remove_memory(nid, start, size, NULL); __release_memory_resource(start, size);
From: David Hildenbrand david@redhat.com
commit a31b264c2b415b29660da0bc2ba291a98629ce51 upstream.
We really don't want anything during memory hotunplug to fail. We always pass a valid memory block device, that check can go. Avoid allocating memory and eventually failing. As we are always called under lock, we can use a static piece of memory. This avoids having to put the structure onto the stack, having to guess about the stack size of callers.
Patch inspired by a patch from Oscar Salvador.
In the future, there might be no need to iterate over nodes at all. mem->nid should tell us exactly what to remove. Memory block devices with mixed nodes (added during boot) should properly fenced off and never removed.
Link: http://lkml.kernel.org/r/20190527111152.16324-11-david@redhat.com Signed-off-by: David Hildenbrand david@redhat.com Reviewed-by: Wei Yang richardw.yang@linux.intel.com Reviewed-by: Oscar Salvador osalvador@suse.de Acked-by: Michal Hocko mhocko@suse.com Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: "Rafael J. Wysocki" rafael@kernel.org Cc: Alex Deucher alexander.deucher@amd.com Cc: "David S. Miller" davem@davemloft.net Cc: Mark Brown broonie@kernel.org Cc: Chris Wilson chris@chris-wilson.co.uk Cc: David Hildenbrand david@redhat.com Cc: Jonathan Cameron Jonathan.Cameron@huawei.com Cc: Andrew Banman andrew.banman@hpe.com Cc: Andy Lutomirski luto@kernel.org Cc: Anshuman Khandual anshuman.khandual@arm.com Cc: Ard Biesheuvel ard.biesheuvel@linaro.org Cc: Arun KS arunks@codeaurora.org Cc: Baoquan He bhe@redhat.com Cc: Benjamin Herrenschmidt benh@kernel.crashing.org Cc: Borislav Petkov bp@alien8.de Cc: Catalin Marinas catalin.marinas@arm.com Cc: Chintan Pandya cpandya@codeaurora.org Cc: Christophe Leroy christophe.leroy@c-s.fr Cc: Dan Williams dan.j.williams@intel.com Cc: Dave Hansen dave.hansen@linux.intel.com Cc: Fenghua Yu fenghua.yu@intel.com Cc: Heiko Carstens heiko.carstens@de.ibm.com Cc: "H. Peter Anvin" hpa@zytor.com Cc: Ingo Molnar mingo@kernel.org Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Jun Yao yaojun8558363@gmail.com Cc: "Kirill A. Shutemov" kirill.shutemov@linux.intel.com Cc: Logan Gunthorpe logang@deltatee.com Cc: Mark Rutland mark.rutland@arm.com Cc: Masahiro Yamada yamada.masahiro@socionext.com Cc: Mathieu Malaterre malat@debian.org Cc: Michael Ellerman mpe@ellerman.id.au Cc: Mike Rapoport rppt@linux.vnet.ibm.com Cc: "mike.travis@hpe.com" mike.travis@hpe.com Cc: Nicholas Piggin npiggin@gmail.com Cc: Paul Mackerras paulus@samba.org Cc: Pavel Tatashin pasha.tatashin@soleen.com Cc: Peter Zijlstra peterz@infradead.org Cc: Qian Cai cai@lca.pw Cc: Rich Felker dalias@libc.org Cc: Rob Herring robh@kernel.org Cc: Robin Murphy robin.murphy@arm.com Cc: Thomas Gleixner tglx@linutronix.de Cc: Tony Luck tony.luck@intel.com Cc: Vasily Gorbik gor@linux.ibm.com Cc: Will Deacon will.deacon@arm.com Cc: Yoshinori Sato ysato@users.sourceforge.jp Cc: Yu Zhao yuzhao@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: David Hildenbrand david@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/base/node.c | 18 +++++------------- include/linux/node.h | 5 ++--- 2 files changed, 7 insertions(+), 16 deletions(-)
diff --git a/drivers/base/node.c b/drivers/base/node.c index 1e351a3..126213d 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -455,20 +455,14 @@ int register_mem_sect_under_node(struct memory_block *mem_blk, void *arg)
/* * Unregister memory block device under all nodes that it spans. + * Has to be called with mem_sysfs_mutex held (due to unlinked_nodes). */ -int unregister_memory_block_under_nodes(struct memory_block *mem_blk) +void unregister_memory_block_under_nodes(struct memory_block *mem_blk) { - NODEMASK_ALLOC(nodemask_t, unlinked_nodes, GFP_KERNEL); unsigned long pfn, sect_start_pfn, sect_end_pfn; + static nodemask_t unlinked_nodes;
- if (!mem_blk) { - NODEMASK_FREE(unlinked_nodes); - return -EFAULT; - } - if (!unlinked_nodes) - return -ENOMEM; - nodes_clear(*unlinked_nodes); - + nodes_clear(unlinked_nodes); sect_start_pfn = section_nr_to_pfn(mem_blk->start_section_nr); sect_end_pfn = section_nr_to_pfn(mem_blk->end_section_nr); for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { @@ -479,15 +473,13 @@ int unregister_memory_block_under_nodes(struct memory_block *mem_blk) continue; if (!node_online(nid)) continue; - if (node_test_and_set(nid, *unlinked_nodes)) + if (node_test_and_set(nid, unlinked_nodes)) continue; sysfs_remove_link(&node_devices[nid]->dev.kobj, kobject_name(&mem_blk->dev.kobj)); sysfs_remove_link(&mem_blk->dev.kobj, kobject_name(&node_devices[nid]->dev.kobj)); } - NODEMASK_FREE(unlinked_nodes); - return 0; }
int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn) diff --git a/include/linux/node.h b/include/linux/node.h index 9a6db43..708939b 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -72,7 +72,7 @@ static inline int register_one_node(int nid) extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid); extern int register_mem_sect_under_node(struct memory_block *mem_blk, void *arg); -extern int unregister_memory_block_under_nodes(struct memory_block *mem_blk); +extern void unregister_memory_block_under_nodes(struct memory_block *mem_blk);
#ifdef CONFIG_HUGETLBFS extern void register_hugetlbfs_with_node(node_registration_func_t doregister, @@ -104,9 +104,8 @@ static inline int register_mem_sect_under_node(struct memory_block *mem_blk, { return 0; } -static inline int unregister_memory_block_under_nodes(struct memory_block *mem_blk) +static inline void unregister_memory_block_under_nodes(struct memory_block *mem_blk) { - return 0; }
static inline void register_hugetlbfs_with_node(node_registration_func_t reg,
From: David Hildenbrand david@redhat.com
commit b9bf8d342d9b443c0d19aa57883d8ddb38d965de upstream.
The parameter is unused, so let's drop it. Memory removal paths should never care about zones. This is the job of memory offlining and will require more refactorings.
Link: http://lkml.kernel.org/r/20190527111152.16324-12-david@redhat.com Signed-off-by: David Hildenbrand david@redhat.com Reviewed-by: Dan Williams dan.j.williams@intel.com Reviewed-by: Wei Yang richardw.yang@linux.intel.com Reviewed-by: Oscar Salvador osalvador@suse.de Acked-by: Michal Hocko mhocko@suse.com Cc: Alex Deucher alexander.deucher@amd.com Cc: Andrew Banman andrew.banman@hpe.com Cc: Andy Lutomirski luto@kernel.org Cc: Anshuman Khandual anshuman.khandual@arm.com Cc: Ard Biesheuvel ard.biesheuvel@linaro.org Cc: Arun KS arunks@codeaurora.org Cc: Baoquan He bhe@redhat.com Cc: Benjamin Herrenschmidt benh@kernel.crashing.org Cc: Borislav Petkov bp@alien8.de Cc: Catalin Marinas catalin.marinas@arm.com Cc: Chintan Pandya cpandya@codeaurora.org Cc: Christophe Leroy christophe.leroy@c-s.fr Cc: Chris Wilson chris@chris-wilson.co.uk Cc: Dave Hansen dave.hansen@linux.intel.com Cc: "David S. Miller" davem@davemloft.net Cc: Fenghua Yu fenghua.yu@intel.com Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: Heiko Carstens heiko.carstens@de.ibm.com Cc: "H. Peter Anvin" hpa@zytor.com Cc: Ingo Molnar mingo@kernel.org Cc: Jonathan Cameron Jonathan.Cameron@huawei.com Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Jun Yao yaojun8558363@gmail.com Cc: "Kirill A. Shutemov" kirill.shutemov@linux.intel.com Cc: Logan Gunthorpe logang@deltatee.com Cc: Mark Brown broonie@kernel.org Cc: Mark Rutland mark.rutland@arm.com Cc: Masahiro Yamada yamada.masahiro@socionext.com Cc: Mathieu Malaterre malat@debian.org Cc: Michael Ellerman mpe@ellerman.id.au Cc: Mike Rapoport rppt@linux.vnet.ibm.com Cc: "mike.travis@hpe.com" mike.travis@hpe.com Cc: Nicholas Piggin npiggin@gmail.com Cc: Paul Mackerras paulus@samba.org Cc: Pavel Tatashin pasha.tatashin@soleen.com Cc: Peter Zijlstra peterz@infradead.org Cc: Qian Cai cai@lca.pw Cc: "Rafael J. Wysocki" rafael@kernel.org Cc: Rich Felker dalias@libc.org Cc: Rob Herring robh@kernel.org Cc: Robin Murphy robin.murphy@arm.com Cc: Thomas Gleixner tglx@linutronix.de Cc: Tony Luck tony.luck@intel.com Cc: Vasily Gorbik gor@linux.ibm.com Cc: Will Deacon will.deacon@arm.com Cc: Yoshinori Sato ysato@users.sourceforge.jp Cc: Yu Zhao yuzhao@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: David Hildenbrand david@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/memory_hotplug.h | 2 +- mm/memory_hotplug.c | 2 +- mm/sparse.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 5ac5832..26bda04 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -335,7 +335,7 @@ extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, extern bool is_memblock_offlined(struct memory_block *mem); extern int sparse_add_one_section(int nid, unsigned long start_pfn, struct vmem_altmap *altmap); -extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, +extern void sparse_remove_one_section(struct mem_section *ms, unsigned long map_offset, struct vmem_altmap *altmap); extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 22a17e9..d593d21 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -485,7 +485,7 @@ static void __remove_section(struct zone *zone, struct mem_section *ms, start_pfn = section_nr_to_pfn((unsigned long)scn_nr); __remove_zone(zone, start_pfn);
- sparse_remove_one_section(zone, ms, map_offset, altmap); + sparse_remove_one_section(ms, map_offset, altmap); }
/** diff --git a/mm/sparse.c b/mm/sparse.c index 6bc452e..9854aff 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -767,8 +767,8 @@ static void free_section_usemap(struct page *memmap, unsigned long *usemap, free_map_bootmem(memmap); }
-void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, - unsigned long map_offset, struct vmem_altmap *altmap) +void sparse_remove_one_section(struct mem_section *ms, unsigned long map_offset, + struct vmem_altmap *altmap) { struct page *memmap = NULL; unsigned long *usemap = NULL;
From: Dan Williams dan.j.williams@intel.com
commit 96da4350000973ef9310a10d077d65bbc017f093 upstream.
-- snip --
Minor conflict, keep the altmap check.
-- snip --
The zone type check was a leftover from the cleanup that plumbed altmap through the memory hotplug path, i.e. commit da024512a1fa "mm: pass the vmem_altmap to arch_remove_memory and __remove_pages".
Link: http://lkml.kernel.org/r/156092352642.979959.6664333788149363039.stgit@dwill... Signed-off-by: Dan Williams dan.j.williams@intel.com Reviewed-by: David Hildenbrand david@redhat.com Reviewed-by: Oscar Salvador osalvador@suse.de Tested-by: Aneesh Kumar K.V aneesh.kumar@linux.ibm.com [ppc64] Cc: Michal Hocko mhocko@suse.com Cc: Logan Gunthorpe logang@deltatee.com Cc: Pavel Tatashin pasha.tatashin@soleen.com Cc: Jane Chu jane.chu@oracle.com Cc: Jeff Moyer jmoyer@redhat.com Cc: Jérôme Glisse jglisse@redhat.com Cc: Jonathan Corbet corbet@lwn.net Cc: Mike Rapoport rppt@linux.ibm.com Cc: Toshi Kani toshi.kani@hpe.com Cc: Vlastimil Babka vbabka@suse.cz Cc: Wei Yang richardw.yang@linux.intel.com Cc: Jason Gunthorpe jgg@mellanox.com Cc: Christoph Hellwig hch@lst.de Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: David Hildenbrand david@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/memory_hotplug.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index d593d21..0015f40 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -507,11 +507,8 @@ void __remove_pages(struct zone *zone, unsigned long phys_start_pfn, unsigned long map_offset = 0; int sections_to_remove;
- /* In the ZONE_DEVICE case device driver owns the memory region */ - if (is_dev_zone(zone)) { - if (altmap) - map_offset = vmem_altmap_offset(altmap); - } + if (altmap) + map_offset = vmem_altmap_offset(altmap);
clear_zone_contiguous(zone);
From: David Hildenbrand david@redhat.com
commit d84f2f5a755208da3f93e17714631485cb3da11c upstream.
We don't allow to offline memory block devices that belong to multiple numa nodes. Therefore, such devices can never get removed. It is sufficient to process a single node when removing the memory block. No need to iterate over each and every PFN.
We already have the nid stored for each memory block. Make sure that the nid always has a sane value.
Please note that checking for node_online(nid) is not required. If we would have a memory block belonging to a node that is no longer offline, then we would have a BUG in the node offlining code.
Link: http://lkml.kernel.org/r/20190719135244.15242-1-david@redhat.com Signed-off-by: David Hildenbrand david@redhat.com Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: "Rafael J. Wysocki" rafael@kernel.org Cc: David Hildenbrand david@redhat.com Cc: Stephen Rothwell sfr@canb.auug.org.au Cc: Pavel Tatashin pasha.tatashin@soleen.com Cc: Michal Hocko mhocko@suse.com Cc: Oscar Salvador osalvador@suse.de Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: David Hildenbrand david@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/base/memory.c | 1 + drivers/base/node.c | 39 +++++++++++++++------------------------ 2 files changed, 16 insertions(+), 24 deletions(-)
diff --git a/drivers/base/memory.c b/drivers/base/memory.c index ce3463b..46572f3 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -693,6 +693,7 @@ static int init_memory_block(struct memory_block **memory, int block_id, mem->state = state; start_pfn = section_nr_to_pfn(mem->start_section_nr); mem->phys_device = arch_get_memory_phys_device(start_pfn); + mem->nid = NUMA_NO_NODE;
ret = register_memory(mem);
diff --git a/drivers/base/node.c b/drivers/base/node.c index 126213d..6fb55a5 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -409,8 +409,6 @@ int register_mem_sect_under_node(struct memory_block *mem_blk, void *arg) int ret, nid = *(int *)arg; unsigned long pfn, sect_start_pfn, sect_end_pfn;
- mem_blk->nid = nid; - sect_start_pfn = section_nr_to_pfn(mem_blk->start_section_nr); sect_end_pfn = section_nr_to_pfn(mem_blk->end_section_nr); sect_end_pfn += PAGES_PER_SECTION - 1; @@ -439,6 +437,13 @@ int register_mem_sect_under_node(struct memory_block *mem_blk, void *arg) if (page_nid != nid) continue; } + + /* + * If this memory block spans multiple nodes, we only indicate + * the last processed node. + */ + mem_blk->nid = nid; + ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj, &mem_blk->dev.kobj, kobject_name(&mem_blk->dev.kobj)); @@ -454,32 +459,18 @@ int register_mem_sect_under_node(struct memory_block *mem_blk, void *arg) }
/* - * Unregister memory block device under all nodes that it spans. - * Has to be called with mem_sysfs_mutex held (due to unlinked_nodes). + * Unregister a memory block device under the node it spans. Memory blocks + * with multiple nodes cannot be offlined and therefore also never be removed. */ void unregister_memory_block_under_nodes(struct memory_block *mem_blk) { - unsigned long pfn, sect_start_pfn, sect_end_pfn; - static nodemask_t unlinked_nodes; - - nodes_clear(unlinked_nodes); - sect_start_pfn = section_nr_to_pfn(mem_blk->start_section_nr); - sect_end_pfn = section_nr_to_pfn(mem_blk->end_section_nr); - for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { - int nid; + if (mem_blk->nid == NUMA_NO_NODE) + return;
- nid = get_nid_for_pfn(pfn); - if (nid < 0) - continue; - if (!node_online(nid)) - continue; - if (node_test_and_set(nid, unlinked_nodes)) - continue; - sysfs_remove_link(&node_devices[nid]->dev.kobj, - kobject_name(&mem_blk->dev.kobj)); - sysfs_remove_link(&mem_blk->dev.kobj, - kobject_name(&node_devices[nid]->dev.kobj)); - } + sysfs_remove_link(&node_devices[mem_blk->nid]->dev.kobj, + kobject_name(&mem_blk->dev.kobj)); + sysfs_remove_link(&mem_blk->dev.kobj, + kobject_name(&node_devices[mem_blk->nid]->dev.kobj)); }
int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn)
From: "Aneesh Kumar K.V" aneesh.kumar@linux.ibm.com
commit 77e080e7680e1e615587352f70c87b9e98126d03 upstream.
-- snip --
- Missing mm/hmm.c and kernel/memremap.c unification. -- hmm code does not need fixes (no altmap) - Missing 7cc7867fb061 ("mm/devm_memremap_pages: enable sub-section remap")
-- snip --
Patch series "mm/memory_hotplug: Shrink zones before removing memory", v6.
This series fixes the access of uninitialized memmaps when shrinking zones/nodes and when removing memory. Also, it contains all fixes for crashes that can be triggered when removing certain namespace using memunmap_pages() - ZONE_DEVICE, reported by Aneesh.
We stop trying to shrink ZONE_DEVICE, as it's buggy, fixing it would be more involved (we don't have SECTION_IS_ONLINE as an indicator), and shrinking is only of limited use (set_zone_contiguous() cannot detect the ZONE_DEVICE as contiguous).
We continue shrinking !ZONE_DEVICE zones, however, I reduced the amount of code to a minimum. Shrinking is especially necessary to keep zone->contiguous set where possible, especially, on memory unplug of DIMMs at zone boundaries.
--------------------------------------------------------------------------
Zones are now properly shrunk when offlining memory blocks or when onlining failed. This allows to properly shrink zones on memory unplug even if the separate memory blocks of a DIMM were onlined to different zones or re-onlined to a different zone after offlining.
Example:
:/# cat /proc/zoneinfo Node 1, zone Movable spanned 0 present 0 managed 0 :/# echo "online_movable" > /sys/devices/system/memory/memory41/state :/# echo "online_movable" > /sys/devices/system/memory/memory43/state :/# cat /proc/zoneinfo Node 1, zone Movable spanned 98304 present 65536 managed 65536 :/# echo 0 > /sys/devices/system/memory/memory43/online :/# cat /proc/zoneinfo Node 1, zone Movable spanned 32768 present 32768 managed 32768 :/# echo 0 > /sys/devices/system/memory/memory41/online :/# cat /proc/zoneinfo Node 1, zone Movable spanned 0 present 0 managed 0
This patch (of 10):
With an altmap, the memmap falling into the reserved altmap space are not initialized and, therefore, contain a garbage NID and a garbage zone. Make sure to read the NID/zone from a memmap that was initialized.
This fixes a kernel crash that is observed when destroying a namespace:
kernel BUG at include/linux/mm.h:1107! cpu 0x1: Vector: 700 (Program Check) at [c000000274087890] pc: c0000000004b9728: memunmap_pages+0x238/0x340 lr: c0000000004b9724: memunmap_pages+0x234/0x340 ... pid = 3669, comm = ndctl kernel BUG at include/linux/mm.h:1107! devm_action_release+0x30/0x50 release_nodes+0x268/0x2d0 device_release_driver_internal+0x174/0x240 unbind_store+0x13c/0x190 drv_attr_store+0x44/0x60 sysfs_kf_write+0x70/0xa0 kernfs_fop_write+0x1ac/0x290 __vfs_write+0x3c/0x70 vfs_write+0xe4/0x200 ksys_write+0x7c/0x140 system_call+0x5c/0x68
The "page_zone(pfn_to_page(pfn)" was introduced by 69324b8f4833 ("mm, devm_memremap_pages: add MEMORY_DEVICE_PRIVATE support"), however, I think we will never have driver reserved memory with MEMORY_DEVICE_PRIVATE (no altmap AFAIKS).
[david@redhat.com: minimze code changes, rephrase description] Link: http://lkml.kernel.org/r/20191006085646.5768-2-david@redhat.com Fixes: 2c2a5af6fed2 ("mm, memory_hotplug: add nid parameter to arch_remove_memory") Signed-off-by: Aneesh Kumar K.V aneesh.kumar@linux.ibm.com Signed-off-by: David Hildenbrand david@redhat.com Cc: Dan Williams dan.j.williams@intel.com Cc: Jason Gunthorpe jgg@ziepe.ca Cc: Logan Gunthorpe logang@deltatee.com Cc: Ira Weiny ira.weiny@intel.com Cc: Damian Tometzki damian.tometzki@gmail.com Cc: Alexander Duyck alexander.h.duyck@linux.intel.com Cc: Alexander Potapenko glider@google.com Cc: Andy Lutomirski luto@kernel.org Cc: Anshuman Khandual anshuman.khandual@arm.com Cc: Benjamin Herrenschmidt benh@kernel.crashing.org Cc: Borislav Petkov bp@alien8.de Cc: Catalin Marinas catalin.marinas@arm.com Cc: Christian Borntraeger borntraeger@de.ibm.com Cc: Christophe Leroy christophe.leroy@c-s.fr Cc: Dave Hansen dave.hansen@linux.intel.com Cc: Fenghua Yu fenghua.yu@intel.com Cc: Gerald Schaefer gerald.schaefer@de.ibm.com Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: Halil Pasic pasic@linux.ibm.com Cc: Heiko Carstens heiko.carstens@de.ibm.com Cc: "H. Peter Anvin" hpa@zytor.com Cc: Ingo Molnar mingo@redhat.com Cc: Jun Yao yaojun8558363@gmail.com Cc: Mark Rutland mark.rutland@arm.com Cc: Masahiro Yamada yamada.masahiro@socionext.com Cc: "Matthew Wilcox (Oracle)" willy@infradead.org Cc: Mel Gorman mgorman@techsingularity.net Cc: Michael Ellerman mpe@ellerman.id.au Cc: Michal Hocko mhocko@suse.com Cc: Mike Rapoport rppt@linux.ibm.com Cc: Oscar Salvador osalvador@suse.de Cc: Pankaj Gupta pagupta@redhat.com Cc: Paul Mackerras paulus@samba.org Cc: Pavel Tatashin pasha.tatashin@soleen.com Cc: Pavel Tatashin pavel.tatashin@microsoft.com Cc: Peter Zijlstra peterz@infradead.org Cc: Qian Cai cai@lca.pw Cc: Rich Felker dalias@libc.org Cc: Robin Murphy robin.murphy@arm.com Cc: Steve Capper steve.capper@arm.com Cc: Thomas Gleixner tglx@linutronix.de Cc: Tom Lendacky thomas.lendacky@amd.com Cc: Tony Luck tony.luck@intel.com Cc: Vasily Gorbik gor@linux.ibm.com Cc: Vlastimil Babka vbabka@suse.cz Cc: Wei Yang richard.weiyang@gmail.com Cc: Wei Yang richardw.yang@linux.intel.com Cc: Will Deacon will@kernel.org Cc: Yoshinori Sato ysato@users.sourceforge.jp Cc: Yu Zhao yuzhao@google.com Cc: stable@vger.kernel.org [5.0+] Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: David Hildenbrand david@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org
Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/memremap.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/kernel/memremap.c b/kernel/memremap.c index 2ee2e67..1ec1f8f 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -120,6 +120,7 @@ static void devm_memremap_pages_release(void *data) struct device *dev = pgmap->dev; struct resource *res = &pgmap->res; resource_size_t align_start, align_size; + struct page *first_page; unsigned long pfn; int nid;
@@ -132,13 +133,16 @@ static void devm_memremap_pages_release(void *data) align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) - align_start;
- nid = page_to_nid(pfn_to_page(align_start >> PAGE_SHIFT)); + /* make sure to access a memmap that was actually initialized */ + first_page = pfn_to_page(pfn_first(pgmap)); + + nid = page_to_nid(first_page);
mem_hotplug_begin(); if (pgmap->type == MEMORY_DEVICE_PRIVATE) { pfn = align_start >> PAGE_SHIFT; - __remove_pages(page_zone(pfn_to_page(pfn)), pfn, - align_size >> PAGE_SHIFT, NULL); + __remove_pages(page_zone(first_page), pfn, + align_size >> PAGE_SHIFT, NULL); } else { arch_remove_memory(nid, align_start, align_size, pgmap->altmap_valid ? &pgmap->altmap : NULL);
From: David Hildenbrand david@redhat.com
commit 2c91f8fc6c999fe10185d8ad99fda1759f662f70 upstream.
-- snip --
Only contextual issues: - Unrelated check_and_unmap_cpu_on_node() changes are missing. - Unrelated walk_memory_blocks() has not been moved/refactored yet.
-- snip --
try_offline_node() is pretty much broken right now:
- The node span is updated when onlining memory, not when adding it. We ignore memory that was mever onlined. Bad.
- We touch possible garbage memmaps. The pfn_to_nid(pfn) can easily trigger a kernel panic. Bad for memory that is offline but also bad for subsection hotadd with ZONE_DEVICE, whereby the memmap of the first PFN of a section might contain garbage.
- Sections belonging to mixed nodes are not properly considered.
As memory blocks might belong to multiple nodes, we would have to walk all pageblocks (or at least subsections) within present sections. However, we don't have a way to identify whether a memmap that is not online was initialized (relevant for ZONE_DEVICE). This makes things more complicated.
Luckily, we can piggy pack on the node span and the nid stored in memory blocks. Currently, the node span is grown when calling move_pfn_range_to_zone() - e.g., when onlining memory, and shrunk when removing memory, before calling try_offline_node(). Sysfs links are created via link_mem_sections(), e.g., during boot or when adding memory.
If the node still spans memory or if any memory block belongs to the nid, we don't set the node offline. As memory blocks that span multiple nodes cannot get offlined, the nid stored in memory blocks is reliable enough (for such online memory blocks, the node still spans the memory).
Introduce for_each_memory_block() to efficiently walk all memory blocks.
Note: We will soon stop shrinking the ZONE_DEVICE zone and the node span when removing ZONE_DEVICE memory to fix similar issues (access of garbage memmaps) - until we have a reliable way to identify whether these memmaps were properly initialized. This implies later, that once a node had ZONE_DEVICE memory, we won't be able to set a node offline - which should be acceptable.
Since commit f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online") memory that is added is not assoziated with a zone/node (memmap not initialized). The introducing commit 60a5a19e7419 ("memory-hotplug: remove sysfs file of node") already missed that we could have multiple nodes for a section and that the zone/node span is updated when onlining pages, not when adding them.
I tested this by hotplugging two DIMMs to a memory-less and cpu-less NUMA node. The node is properly onlined when adding the DIMMs. When removing the DIMMs, the node is properly offlined.
Masayoshi Mizuma reported:
: Without this patch, memory hotplug fails as panic: : : BUG: kernel NULL pointer dereference, address: 0000000000000000 : ... : Call Trace: : remove_memory_block_devices+0x81/0xc0 : try_remove_memory+0xb4/0x130 : __remove_memory+0xa/0x20 : acpi_memory_device_remove+0x84/0x100 : acpi_bus_trim+0x57/0x90 : acpi_bus_trim+0x2e/0x90 : acpi_device_hotplug+0x2b2/0x4d0 : acpi_hotplug_work_fn+0x1a/0x30 : process_one_work+0x171/0x380 : worker_thread+0x49/0x3f0 : kthread+0xf8/0x130 : ret_from_fork+0x35/0x40
[david@redhat.com: v3] Link: http://lkml.kernel.org/r/20191102120221.7553-1-david@redhat.com Link: http://lkml.kernel.org/r/20191028105458.28320-1-david@redhat.com Fixes: 60a5a19e7419 ("memory-hotplug: remove sysfs file of node") Fixes: f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online") # visiable after d0dc12e86b319 Signed-off-by: David Hildenbrand david@redhat.com Tested-by: Masayoshi Mizuma m.mizuma@jp.fujitsu.com Cc: Tang Chen tangchen@cn.fujitsu.com Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: "Rafael J. Wysocki" rafael@kernel.org Cc: Keith Busch keith.busch@intel.com Cc: Jiri Olsa jolsa@kernel.org Cc: "Peter Zijlstra (Intel)" peterz@infradead.org Cc: Jani Nikula jani.nikula@intel.com Cc: Nayna Jain nayna@linux.ibm.com Cc: Michal Hocko mhocko@suse.com Cc: Oscar Salvador osalvador@suse.de Cc: Stephen Rothwell sfr@canb.auug.org.au Cc: Dan Williams dan.j.williams@intel.com Cc: Pavel Tatashin pasha.tatashin@soleen.com Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: David Hildenbrand david@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Conflicts: mm/memory_hotplug.c [yyl: adjust context]
Signed-off-by: Yang Yingliang yangyingliang@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/base/memory.c | 36 ++++++++++++++++++++++++++++++++++++ include/linux/memory.h | 2 ++ mm/memory_hotplug.c | 43 +++++++++++++++++++++++++++---------------- 3 files changed, 65 insertions(+), 16 deletions(-)
diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 46572f3..c4a7904 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -862,3 +862,39 @@ int __init memory_dev_init(void) printk(KERN_ERR "%s() failed: %d\n", __func__, ret); return ret; } + +struct for_each_memory_block_cb_data { + walk_memory_blocks_func_t func; + void *arg; +}; + +static int for_each_memory_block_cb(struct device *dev, void *data) +{ + struct memory_block *mem = to_memory_block(dev); + struct for_each_memory_block_cb_data *cb_data = data; + + return cb_data->func(mem, cb_data->arg); +} + +/** + * for_each_memory_block - walk through all present memory blocks + * + * @arg: argument passed to func + * @func: callback for each memory block walked + * + * This function walks through all present memory blocks, calling func on + * each memory block. + * + * In case func() returns an error, walking is aborted and the error is + * returned. + */ +int for_each_memory_block(void *arg, walk_memory_blocks_func_t func) +{ + struct for_each_memory_block_cb_data cb_data = { + .func = func, + .arg = arg, + }; + + return bus_for_each_dev(&memory_subsys, NULL, &cb_data, + for_each_memory_block_cb); +} diff --git a/include/linux/memory.h b/include/linux/memory.h index f26a541..5c41136 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -119,6 +119,8 @@ static inline int memory_isolate_notify(unsigned long val, void *v) extern struct memory_block *find_memory_block_hinted(struct mem_section *, struct memory_block *); extern struct memory_block *find_memory_block(struct mem_section *); +typedef int (*walk_memory_blocks_func_t)(struct memory_block *, void *); +extern int for_each_memory_block(void *arg, walk_memory_blocks_func_t func); #define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT) #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 0015f40..7d8091e 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1797,6 +1797,18 @@ static int check_cpu_on_node(pg_data_t *pgdat) return 0; }
+static int check_no_memblock_for_node_cb(struct memory_block *mem, void *arg) +{ + int nid = *(int *)arg; + + /* + * If a memory block belongs to multiple nodes, the stored nid is not + * reliable. However, such blocks are always online (e.g., cannot get + * offlined) and, therefore, are still spanned by the node. + */ + return mem->nid == nid ? -EEXIST : 0; +} + /** * try_offline_node * @nid: the node ID @@ -1809,25 +1821,24 @@ static int check_cpu_on_node(pg_data_t *pgdat) void try_offline_node(int nid) { pg_data_t *pgdat = NODE_DATA(nid); - unsigned long start_pfn = pgdat->node_start_pfn; - unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages; - unsigned long pfn; - - for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { - unsigned long section_nr = pfn_to_section_nr(pfn); - - if (!present_section_nr(section_nr)) - continue; + int rc;
- if (pfn_to_nid(pfn) != nid) - continue; + /* + * If the node still spans pages (especially ZONE_DEVICE), don't + * offline it. A node spans memory after move_pfn_range_to_zone(), + * e.g., after the memory block was onlined. + */ + if (pgdat->node_spanned_pages) + return;
- /* - * some memory sections of this node are not removed, and we - * can't offline node now. - */ + /* + * Especially offline memory blocks might not be spanned by the + * node. They will get spanned by the node once they get onlined. + * However, they link to the node in sysfs and can get onlined later. + */ + rc = for_each_memory_block(&nid, check_no_memblock_for_node_cb); + if (rc) return; - }
if (check_cpu_on_node(pgdat)) return;
From: David Hildenbrand david@redhat.com
commit feee6b2989165631b17ac6d4ccdbf6759254e85a upstream.
-- snip --
- Missing arm64 hot(un)plug support - Missing some vmem_altmap_offset() cleanups - Missing sub-section hotadd support - Missing unification of mm/hmm.c and kernel/memremap.c
-- snip --
We currently try to shrink a single zone when removing memory. We use the zone of the first page of the memory we are removing. If that memmap was never initialized (e.g., memory was never onlined), we will read garbage and can trigger kernel BUGs (due to a stale pointer):
BUG: unable to handle page fault for address: 000000000000353d #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 0 P4D 0 Oops: 0002 [#1] SMP PTI CPU: 1 PID: 7 Comm: kworker/u8:0 Not tainted 5.3.0-rc5-next-20190820+ #317 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58e9a3f-prebuilt.qemu.4 Workqueue: kacpi_hotplug acpi_hotplug_work_fn RIP: 0010:clear_zone_contiguous+0x5/0x10 Code: 48 89 c6 48 89 c3 e8 2a fe ff ff 48 85 c0 75 cf 5b 5d c3 c6 85 fd 05 00 00 01 5b 5d c3 0f 1f 840 RSP: 0018:ffffad2400043c98 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000200000000 RCX: 0000000000000000 RDX: 0000000000200000 RSI: 0000000000140000 RDI: 0000000000002f40 RBP: 0000000140000000 R08: 0000000000000000 R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000140000 R13: 0000000000140000 R14: 0000000000002f40 R15: ffff9e3e7aff3680 FS: 0000000000000000(0000) GS:ffff9e3e7bb00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000000353d CR3: 0000000058610000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __remove_pages+0x4b/0x640 arch_remove_memory+0x63/0x8d try_remove_memory+0xdb/0x130 __remove_memory+0xa/0x11 acpi_memory_device_remove+0x70/0x100 acpi_bus_trim+0x55/0x90 acpi_device_hotplug+0x227/0x3a0 acpi_hotplug_work_fn+0x1a/0x30 process_one_work+0x221/0x550 worker_thread+0x50/0x3b0 kthread+0x105/0x140 ret_from_fork+0x3a/0x50 Modules linked in: CR2: 000000000000353d
Instead, shrink the zones when offlining memory or when onlining failed. Introduce and use remove_pfn_range_from_zone(() for that. We now properly shrink the zones, even if we have DIMMs whereby
- Some memory blocks fall into no zone (never onlined)
- Some memory blocks fall into multiple zones (offlined+re-onlined)
- Multiple memory blocks that fall into different zones
Drop the zone parameter (with a potential dubious value) from __remove_pages() and __remove_section().
Link: http://lkml.kernel.org/r/20191006085646.5768-6-david@redhat.com Fixes: f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online") [visible after d0dc12e86b319] Signed-off-by: David Hildenbrand david@redhat.com Reviewed-by: Oscar Salvador osalvador@suse.de Cc: Michal Hocko mhocko@suse.com Cc: "Matthew Wilcox (Oracle)" willy@infradead.org Cc: "Aneesh Kumar K.V" aneesh.kumar@linux.ibm.com Cc: Pavel Tatashin pasha.tatashin@soleen.com Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: Dan Williams dan.j.williams@intel.com Cc: Logan Gunthorpe logang@deltatee.com Cc: stable@vger.kernel.org [5.0+] Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: David Hildenbrand david@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com [yyl: drop the zone parameter in arch/arm64/mm/mmu.c]
Signed-off-by: Yang Yingliang yangyingliang@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/mm/mmu.c | 4 +--- arch/ia64/mm/init.c | 4 +--- arch/powerpc/mm/mem.c | 11 +---------- arch/s390/mm/init.c | 4 +--- arch/sh/mm/init.c | 4 +--- arch/x86/mm/init_32.c | 4 +--- arch/x86/mm/init_64.c | 8 +------- include/linux/memory_hotplug.h | 7 +++++-- kernel/memremap.c | 3 +-- mm/hmm.c | 4 +--- mm/memory_hotplug.c | 29 ++++++++++++++--------------- 11 files changed, 28 insertions(+), 54 deletions(-)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 660fd6e..5673db3 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1056,7 +1056,6 @@ void arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct zone *zone;
/* * FIXME: Cleanup page tables (also in arch_add_memory() in case @@ -1065,7 +1064,6 @@ void arch_remove_memory(int nid, u64 start, u64 size, * unplug. ARCH_ENABLE_MEMORY_HOTREMOVE must not be * unlocked yet. */ - zone = page_zone(pfn_to_page(start_pfn)); - __remove_pages(zone, start_pfn, nr_pages, altmap); + __remove_pages(start_pfn, nr_pages, altmap); } #endif diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 778781e..79e5cc7 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -666,9 +666,7 @@ void arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct zone *zone;
- zone = page_zone(pfn_to_page(start_pfn)); - __remove_pages(zone, start_pfn, nr_pages, altmap); + __remove_pages(start_pfn, nr_pages, altmap); } #endif diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 84c6d37..84a012e 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -144,18 +144,9 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct page *page; int ret;
- /* - * If we have an altmap then we need to skip over any reserved PFNs - * when querying the zone. - */ - page = pfn_to_page(start_pfn); - if (altmap) - page += vmem_altmap_offset(altmap); - - __remove_pages(page_zone(page), start_pfn, nr_pages, altmap); + __remove_pages(start_pfn, nr_pages, altmap);
/* Remove htab bolted mappings for this section of memory */ start = (unsigned long)__va(start); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index ad3f213..e3ea658 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -247,10 +247,8 @@ void arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct zone *zone;
- zone = page_zone(pfn_to_page(start_pfn)); - __remove_pages(zone, start_pfn, nr_pages, altmap); + __remove_pages(start_pfn, nr_pages, altmap); vmem_remove_mapping(start, size); } #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index 0da784a..47882be 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -448,9 +448,7 @@ void arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = PFN_DOWN(start); unsigned long nr_pages = size >> PAGE_SHIFT; - struct zone *zone;
- zone = page_zone(pfn_to_page(start_pfn)); - __remove_pages(zone, start_pfn, nr_pages, altmap); + __remove_pages(start_pfn, nr_pages, altmap); } #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 64f54f7..79b95910 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -865,10 +865,8 @@ void arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct zone *zone;
- zone = page_zone(pfn_to_page(start_pfn)); - __remove_pages(zone, start_pfn, nr_pages, altmap); + __remove_pages(start_pfn, nr_pages, altmap); } #endif
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 50df7ca..81e85a8 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1146,14 +1146,8 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size, { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - struct page *page = pfn_to_page(start_pfn); - struct zone *zone;
- /* With altmap the first mapped page is offset from @start */ - if (altmap) - page += vmem_altmap_offset(altmap); - zone = page_zone(page); - __remove_pages(zone, start_pfn, nr_pages, altmap); + __remove_pages(start_pfn, nr_pages, altmap); kernel_physical_mapping_remove(start, start + size); } #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 26bda04..d17d45c 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -110,8 +110,8 @@ static inline bool movable_node_is_enabled(void)
extern void arch_remove_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap); -extern void __remove_pages(struct zone *zone, unsigned long start_pfn, - unsigned long nr_pages, struct vmem_altmap *altmap); +extern void __remove_pages(unsigned long start_pfn, unsigned long nr_pages, + struct vmem_altmap *altmap);
/* reasonably generic interface to expand the physical pages */ extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, @@ -331,6 +331,9 @@ extern int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, bool want_memblock); extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, unsigned long nr_pages, struct vmem_altmap *altmap); +extern void remove_pfn_range_from_zone(struct zone *zone, + unsigned long start_pfn, + unsigned long nr_pages); extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); extern bool is_memblock_offlined(struct memory_block *mem); extern int sparse_add_one_section(int nid, unsigned long start_pfn, diff --git a/kernel/memremap.c b/kernel/memremap.c index 1ec1f8f..331baad 100644 --- a/kernel/memremap.c +++ b/kernel/memremap.c @@ -141,8 +141,7 @@ static void devm_memremap_pages_release(void *data) mem_hotplug_begin(); if (pgmap->type == MEMORY_DEVICE_PRIVATE) { pfn = align_start >> PAGE_SHIFT; - __remove_pages(page_zone(first_page), pfn, - align_size >> PAGE_SHIFT, NULL); + __remove_pages(pfn, align_size >> PAGE_SHIFT, NULL); } else { arch_remove_memory(nid, align_start, align_size, pgmap->altmap_valid ? &pgmap->altmap : NULL); diff --git a/mm/hmm.c b/mm/hmm.c index ae1f6ad..c482c07 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -997,7 +997,6 @@ static void hmm_devmem_release(void *data) struct hmm_devmem *devmem = data; struct resource *resource = devmem->resource; unsigned long start_pfn, npages; - struct zone *zone; struct page *page; int nid;
@@ -1006,12 +1005,11 @@ static void hmm_devmem_release(void *data) npages = ALIGN(resource_size(resource), PA_SECTION_SIZE) >> PAGE_SHIFT;
page = pfn_to_page(start_pfn); - zone = page_zone(page); nid = page_to_nid(page);
mem_hotplug_begin(); if (resource->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY) - __remove_pages(zone, start_pfn, npages, NULL); + __remove_pages(start_pfn, npages, NULL); else arch_remove_memory(nid, start_pfn << PAGE_SHIFT, npages << PAGE_SHIFT, NULL); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 7d8091e..ae6e469 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -449,10 +449,11 @@ static void update_pgdat_span(struct pglist_data *pgdat) pgdat->node_spanned_pages = node_end_pfn - node_start_pfn; }
-static void __remove_zone(struct zone *zone, unsigned long start_pfn) +void __ref remove_pfn_range_from_zone(struct zone *zone, + unsigned long start_pfn, + unsigned long nr_pages) { struct pglist_data *pgdat = zone->zone_pgdat; - int nr_pages = PAGES_PER_SECTION; unsigned long flags;
#ifdef CONFIG_ZONE_DEVICE @@ -465,14 +466,17 @@ static void __remove_zone(struct zone *zone, unsigned long start_pfn) return; #endif
+ clear_zone_contiguous(zone); + pgdat_resize_lock(zone->zone_pgdat, &flags); shrink_zone_span(zone, start_pfn, start_pfn + nr_pages); update_pgdat_span(pgdat); pgdat_resize_unlock(zone->zone_pgdat, &flags); + + set_zone_contiguous(zone); }
-static void __remove_section(struct zone *zone, struct mem_section *ms, - unsigned long map_offset, +static void __remove_section(struct mem_section *ms, unsigned long map_offset, struct vmem_altmap *altmap) { unsigned long start_pfn; @@ -483,14 +487,12 @@ static void __remove_section(struct zone *zone, struct mem_section *ms,
scn_nr = __section_nr(ms); start_pfn = section_nr_to_pfn((unsigned long)scn_nr); - __remove_zone(zone, start_pfn);
sparse_remove_one_section(ms, map_offset, altmap); }
/** - * __remove_pages() - remove sections of pages from a zone - * @zone: zone from which pages need to be removed + * __remove_pages() - remove sections of pages * @phys_start_pfn: starting pageframe (must be aligned to start of a section) * @nr_pages: number of pages to remove (must be multiple of section size) * @altmap: alternative device page map or %NULL if default memmap is used @@ -500,8 +502,8 @@ static void __remove_section(struct zone *zone, struct mem_section *ms, * sure that pages are marked reserved and zones are adjust properly by * calling offline_pages(). */ -void __remove_pages(struct zone *zone, unsigned long phys_start_pfn, - unsigned long nr_pages, struct vmem_altmap *altmap) +void __remove_pages(unsigned long phys_start_pfn, unsigned long nr_pages, + struct vmem_altmap *altmap) { unsigned long i; unsigned long map_offset = 0; @@ -510,8 +512,6 @@ void __remove_pages(struct zone *zone, unsigned long phys_start_pfn, if (altmap) map_offset = vmem_altmap_offset(altmap);
- clear_zone_contiguous(zone); - /* * We can only remove entire sections */ @@ -523,12 +523,9 @@ void __remove_pages(struct zone *zone, unsigned long phys_start_pfn, unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
cond_resched(); - __remove_section(zone, __pfn_to_section(pfn), map_offset, - altmap); + __remove_section(__pfn_to_section(pfn), map_offset, altmap); map_offset = 0; } - - set_zone_contiguous(zone); }
int set_online_page_callback(online_page_callback_t callback) @@ -898,6 +895,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ (unsigned long long) pfn << PAGE_SHIFT, (((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1); memory_notify(MEM_CANCEL_ONLINE, &arg); + remove_pfn_range_from_zone(zone, pfn, nr_pages); mem_hotplug_done(); return ret; } @@ -1693,6 +1691,7 @@ static int __ref __offline_pages(unsigned long start_pfn, writeback_set_ratelimit();
memory_notify(MEM_OFFLINE, &arg); + remove_pfn_range_from_zone(zone, start_pfn, nr_pages); mem_hotplug_done(); return 0;
From: Greg Kroah-Hartman gregkh@linuxfoundation.org
Merge 91 patches from 4.19.100 stable branch (93 total) beside 2 already merged patches: a243850 Documentation: Document arm64 kpti control a3cf10b mm/memory_hotplug: make remove_memory() take the device_hotplug_lock
Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile index a2be0c7..f1e4282 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 19 -SUBLEVEL = 99 +SUBLEVEL = 100 EXTRAVERSION = NAME = "People's Front"
From: Bin Liu b-liu@ti.com
commit 09ed259fac621634d51cd986aa8d65f035662658 upstream.
VBUS should be turned off when leaving the host mode. Set GCTL_PRTCAP to device mode in teardown to de-assert DRVVBUS pin to turn off VBUS power.
Fixes: 5f94adfeed97 ("usb: dwc3: core: refactor mode initialization to its own function") Cc: stable@vger.kernel.org Signed-off-by: Bin Liu b-liu@ti.com Signed-off-by: Felipe Balbi balbi@kernel.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/usb/dwc3/core.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index f52fcbc..6666d2a 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1199,6 +1199,9 @@ static void dwc3_core_exit_mode(struct dwc3 *dwc) /* do nothing */ break; } + + /* de-assert DRVVBUS for HOST and OTG mode */ + dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_DEVICE); }
static void dwc3_get_properties(struct dwc3 *dwc)
From: Andrey Shvetsov andrey.shvetsov@k2l.de
commit 4d1356ac12f4d5180d0df345d85ff0ee42b89c72 upstream.
If the length of the socket buffer is 0xFFFFFFFF (max size for an unsigned int), then payload_len becomes 0xFFFFFFF1 after subtracting 14 (ETH_HLEN). Then, mdp_len is set to payload_len + 16 (MDP_HDR_LEN) which overflows and results in a value of 2. These values for payload_len and mdp_len will pass current buffer size checks.
This patch checks if derived from skb->len sum may overflow.
The check is based on the following idea:
For any `unsigned V1, V2` and derived `unsigned SUM = V1 + V2`, `V1 + V2` overflows iif `SUM < V1`.
Reported-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Andrey Shvetsov andrey.shvetsov@k2l.de Cc: stable stable@vger.kernel.org Link: https://lore.kernel.org/r/20200116172238.6046-1-andrey.shvetsov@microchip.co... Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/staging/most/net/net.c | 10 ++++++++++ 1 file changed, 10 insertions(+)
diff --git a/drivers/staging/most/net/net.c b/drivers/staging/most/net/net.c index 30d816b..ff80834 100644 --- a/drivers/staging/most/net/net.c +++ b/drivers/staging/most/net/net.c @@ -81,6 +81,11 @@ static int skb_to_mamac(const struct sk_buff *skb, struct mbo *mbo) unsigned int payload_len = skb->len - ETH_HLEN; unsigned int mdp_len = payload_len + MDP_HDR_LEN;
+ if (mdp_len < skb->len) { + pr_err("drop: too large packet! (%u)\n", skb->len); + return -EINVAL; + } + if (mbo->buffer_length < mdp_len) { pr_err("drop: too small buffer! (%d for %d)\n", mbo->buffer_length, mdp_len); @@ -128,6 +133,11 @@ static int skb_to_mep(const struct sk_buff *skb, struct mbo *mbo) u8 *buff = mbo->virt_address; unsigned int mep_len = skb->len + MEP_HDR_LEN;
+ if (mep_len < skb->len) { + pr_err("drop: too large packet! (%u)\n", skb->len); + return -EINVAL; + } + if (mbo->buffer_length < mep_len) { pr_err("drop: too small buffer! (%d for %d)\n", mbo->buffer_length, mep_len);
From: Colin Ian King colin.king@canonical.com
commit 4cc41cbce536876678b35e03c4a8a7bb72c78fa9 upstream.
Currently when the call to prism2sta_ifst fails a netdev_err error is reported, error return variable result is set to -1 but the function always returns 0 for success. Fix this by returning the error value in variable result rather than 0.
Addresses-Coverity: ("Unused value") Fixes: 00b3ed168508 ("Staging: add wlan-ng prism2 usb driver") Signed-off-by: Colin Ian King colin.king@canonical.com Cc: stable stable@vger.kernel.org Link: https://lore.kernel.org/r/20200114181604.390235-1-colin.king@canonical.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/staging/wlan-ng/prism2mgmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/staging/wlan-ng/prism2mgmt.c b/drivers/staging/wlan-ng/prism2mgmt.c index 7350fe5..a8860d2 100644 --- a/drivers/staging/wlan-ng/prism2mgmt.c +++ b/drivers/staging/wlan-ng/prism2mgmt.c @@ -959,7 +959,7 @@ int prism2mgmt_flashdl_state(struct wlandevice *wlandev, void *msgp) } }
- return 0; + return result; }
/*----------------------------------------------------------------
From: Malcolm Priestley tvboxspy@gmail.com
commit d971fdd3412f8342747778fb59b8803720ed82b1 upstream.
It appears that the driver still transmits in CTS protect mode even though it is not enabled in mac80211.
That is both packet types PK_TYPE_11GA and PK_TYPE_11GB both use CTS protect. The only difference between them GA does not use B rates.
Find if only B rate in GB or GA in protect mode otherwise transmit packets as PK_TYPE_11A.
Cc: stable stable@vger.kernel.org Signed-off-by: Malcolm Priestley tvboxspy@gmail.com Link: https://lore.kernel.org/r/9c1323ff-dbb3-0eaa-43e1-9453f7390dc0@gmail.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/staging/vt6656/device.h | 2 ++ drivers/staging/vt6656/rxtx.c | 12 ++++++++---- 2 files changed, 10 insertions(+), 4 deletions(-)
diff --git a/drivers/staging/vt6656/device.h b/drivers/staging/vt6656/device.h index cabdda2..77e59a9 100644 --- a/drivers/staging/vt6656/device.h +++ b/drivers/staging/vt6656/device.h @@ -52,6 +52,8 @@ #define RATE_AUTO 12
#define MAX_RATE 12 +#define VNT_B_RATES (BIT(RATE_1M) | BIT(RATE_2M) |\ + BIT(RATE_5M) | BIT(RATE_11M))
/* * device specific diff --git a/drivers/staging/vt6656/rxtx.c b/drivers/staging/vt6656/rxtx.c index 9def074..7ca5b41 100644 --- a/drivers/staging/vt6656/rxtx.c +++ b/drivers/staging/vt6656/rxtx.c @@ -815,10 +815,14 @@ int vnt_tx_packet(struct vnt_private *priv, struct sk_buff *skb) if (info->band == NL80211_BAND_5GHZ) { pkt_type = PK_TYPE_11A; } else { - if (tx_rate->flags & IEEE80211_TX_RC_USE_CTS_PROTECT) - pkt_type = PK_TYPE_11GB; - else - pkt_type = PK_TYPE_11GA; + if (tx_rate->flags & IEEE80211_TX_RC_USE_CTS_PROTECT) { + if (priv->basic_rates & VNT_B_RATES) + pkt_type = PK_TYPE_11GB; + else + pkt_type = PK_TYPE_11GA; + } else { + pkt_type = PK_TYPE_11A; + } } } else { pkt_type = PK_TYPE_11B;
From: Malcolm Priestley tvboxspy@gmail.com
commit d579c43c82f093e63639151625b2139166c730fd upstream.
It appears that the drivers does not go into power save correctly the NULL data packets are not being transmitted because it not enabled in mac80211.
The driver needs to capture ieee80211_is_nullfunc headers and copy the duration_id to it's own duration data header.
Cc: stable stable@vger.kernel.org Signed-off-by: Malcolm Priestley tvboxspy@gmail.com Link: https://lore.kernel.org/r/610971ae-555b-a6c3-61b3-444a0c1e35b4@gmail.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/staging/vt6656/main_usb.c | 1 + drivers/staging/vt6656/rxtx.c | 14 +++++--------- 2 files changed, 6 insertions(+), 9 deletions(-)
diff --git a/drivers/staging/vt6656/main_usb.c b/drivers/staging/vt6656/main_usb.c index ff9cf0f..36562ac 100644 --- a/drivers/staging/vt6656/main_usb.c +++ b/drivers/staging/vt6656/main_usb.c @@ -978,6 +978,7 @@ int vnt_init(struct vnt_private *priv) ieee80211_hw_set(priv->hw, RX_INCLUDES_FCS); ieee80211_hw_set(priv->hw, REPORTS_TX_ACK_STATUS); ieee80211_hw_set(priv->hw, SUPPORTS_PS); + ieee80211_hw_set(priv->hw, PS_NULLFUNC_STACK);
priv->hw->max_signal = 100;
diff --git a/drivers/staging/vt6656/rxtx.c b/drivers/staging/vt6656/rxtx.c index 7ca5b41..4b4f07f 100644 --- a/drivers/staging/vt6656/rxtx.c +++ b/drivers/staging/vt6656/rxtx.c @@ -278,11 +278,9 @@ static u16 vnt_rxtx_datahead_g(struct vnt_usb_send_context *tx_context, PK_TYPE_11B, &buf->b);
/* Get Duration and TimeStamp */ - if (ieee80211_is_pspoll(hdr->frame_control)) { - __le16 dur = cpu_to_le16(priv->current_aid | BIT(14) | BIT(15)); - - buf->duration_a = dur; - buf->duration_b = dur; + if (ieee80211_is_nullfunc(hdr->frame_control)) { + buf->duration_a = hdr->duration_id; + buf->duration_b = hdr->duration_id; } else { buf->duration_a = vnt_get_duration_le(priv, tx_context->pkt_type, need_ack); @@ -371,10 +369,8 @@ static u16 vnt_rxtx_datahead_ab(struct vnt_usb_send_context *tx_context, tx_context->pkt_type, &buf->ab);
/* Get Duration and TimeStampOff */ - if (ieee80211_is_pspoll(hdr->frame_control)) { - __le16 dur = cpu_to_le16(priv->current_aid | BIT(14) | BIT(15)); - - buf->duration = dur; + if (ieee80211_is_nullfunc(hdr->frame_control)) { + buf->duration = hdr->duration_id; } else { buf->duration = vnt_get_duration_le(priv, tx_context->pkt_type, need_ack);
From: Malcolm Priestley tvboxspy@gmail.com
commit 9dd631fa99dc0a0dfbd191173bf355ba30ea786a upstream.
The driver reporting IEEE80211_TX_STAT_ACK is not being handled correctly. The driver should only report on TSR_TMO flag is not set indicating no transmission errors and when not IEEE80211_TX_CTL_NO_ACK is being requested.
Cc: stable stable@vger.kernel.org Signed-off-by: Malcolm Priestley tvboxspy@gmail.com Link: https://lore.kernel.org/r/340f1f7f-c310-dca5-476f-abc059b9cd97@gmail.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/staging/vt6656/int.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/staging/vt6656/int.c b/drivers/staging/vt6656/int.c index 504424b..af0060c 100644 --- a/drivers/staging/vt6656/int.c +++ b/drivers/staging/vt6656/int.c @@ -97,9 +97,11 @@ static int vnt_int_report_rate(struct vnt_private *priv, u8 pkt_no, u8 tsr)
info->status.rates[0].count = tx_retry;
- if (!(tsr & (TSR_TMO | TSR_RETRYTMO))) { + if (!(tsr & TSR_TMO)) { info->status.rates[0].idx = idx; - info->flags |= IEEE80211_TX_STAT_ACK; + + if (!(info->flags & IEEE80211_TX_CTL_NO_ACK)) + info->flags |= IEEE80211_TX_STAT_ACK; }
ieee80211_tx_status_irqsafe(priv->hw, context->skb);
From: Lukas Wunner lukas@wunner.de
commit dc76697d7e933d5e299116f219c890568785ea15 upstream.
Unbinding the bcm2835aux UART driver raises the following error if the maximum number of 8250 UARTs is set to 1 (via the 8250.nr_uarts module parameter or CONFIG_SERIAL_8250_RUNTIME_UARTS):
(NULL device *): Removing wrong port: a6f80333 != fa20408b
That's because bcm2835aux_serial_probe() retrieves UART line number 1 from the devicetree and stores it in data->uart.port.line, while serial8250_register_8250_port() instead uses UART line number 0, which is stored in data->line.
On driver unbind, bcm2835aux_serial_remove() uses data->uart.port.line, which contains the wrong number. Fix it.
The issue does not occur if the maximum number of 8250 UARTs is >= 2.
Fixes: bdc5f3009580 ("serial: bcm2835: add driver for bcm2835-aux-uart") Signed-off-by: Lukas Wunner lukas@wunner.de Cc: stable@vger.kernel.org # v4.6+ Cc: Martin Sperl kernel@martin.sperl.org Reviewed-by: Nicolas Saenz Julienne nsaenzjulienne@suse.de Tested-by: Nicolas Saenz Julienne nsaenzjulienne@suse.de Link: https://lore.kernel.org/r/912ccf553c5258135c6d7e8f404a101ef320f0f4.157917522... Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/tty/serial/8250/8250_bcm2835aux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/tty/serial/8250/8250_bcm2835aux.c b/drivers/tty/serial/8250/8250_bcm2835aux.c index bd53661..3173d98 100644 --- a/drivers/tty/serial/8250/8250_bcm2835aux.c +++ b/drivers/tty/serial/8250/8250_bcm2835aux.c @@ -115,7 +115,7 @@ static int bcm2835aux_serial_remove(struct platform_device *pdev) { struct bcm2835aux_data *data = platform_get_drvdata(pdev);
- serial8250_unregister_port(data->uart.port.line); + serial8250_unregister_port(data->line); clk_disable_unprepare(data->clk);
return 0;
From: Lubomir Rintel lkundrak@v3.sk
commit ef9ffc1e5f1ac73ecd2fb3b70db2a3b2472ff2f7 upstream.
The match data does not have to be a struct device pointer, and indeed very often is not. Attempt to treat it as such easily results in a crash.
For the components that are not registered, we don't know which device is missing. Once it it is there, we can use the struct component to get the device and whether it's bound or not.
Fixes: 59e73854b5fd ('component: add debugfs support') Signed-off-by: Lubomir Rintel lkundrak@v3.sk Cc: stable stable@vger.kernel.org Cc: Arnaud Pouliquen arnaud.pouliquen@st.com Link: https://lore.kernel.org/r/20191118115431.63626-1-lkundrak@v3.sk Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/base/component.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/base/component.c b/drivers/base/component.c index e8d676f..7f7c423 100644 --- a/drivers/base/component.c +++ b/drivers/base/component.c @@ -74,11 +74,11 @@ static int component_devices_show(struct seq_file *s, void *data) seq_printf(s, "%-40s %20s\n", "device name", "status"); seq_puts(s, "-------------------------------------------------------------\n"); for (i = 0; i < match->num; i++) { - struct device *d = (struct device *)match->compare[i].data; + struct component *component = match->compare[i].component;
- seq_printf(s, "%-40s %20s\n", dev_name(d), - match->compare[i].component ? - "registered" : "not registered"); + seq_printf(s, "%-40s %20s\n", + component ? dev_name(component->dev) : "(unknown)", + component ? (component->bound ? "bound" : "not bound") : "not registered"); } mutex_unlock(&component_mutex);
From: Tomas Winkler tomas.winkler@intel.com
commit 559e575a8946a6561dfe8880de341d4ef78d5994 upstream.
Add Comet Point device IDs for Comet Lake H platforms.
Cc: stable@vger.kernel.org Signed-off-by: Tomas Winkler tomas.winkler@intel.com Link: https://lore.kernel.org/r/20200119094229.20116-1-tomas.winkler@intel.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/misc/mei/hw-me-regs.h | 4 ++++ drivers/misc/mei/pci-me.c | 2 ++ 2 files changed, 6 insertions(+)
diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index 9c40424..d80372d 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -141,8 +141,12 @@
#define MEI_DEV_ID_CMP_LP 0x02e0 /* Comet Point LP */ #define MEI_DEV_ID_CMP_LP_3 0x02e4 /* Comet Point LP 3 (iTouch) */ + #define MEI_DEV_ID_CMP_V 0xA3BA /* Comet Point Lake V */
+#define MEI_DEV_ID_CMP_H 0x06e0 /* Comet Lake H */ +#define MEI_DEV_ID_CMP_H_3 0x06e4 /* Comet Lake H 3 (iTouch) */ + #define MEI_DEV_ID_ICP_LP 0x34E0 /* Ice Lake Point LP */
#define MEI_DEV_ID_TGP_LP 0xA0E0 /* Tiger Lake Point LP */ diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 41a10e3..3498c10b 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -108,6 +108,8 @@ {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_LP, MEI_ME_PCH12_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_LP_3, MEI_ME_PCH8_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_V, MEI_ME_PCH12_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_H, MEI_ME_PCH12_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_H_3, MEI_ME_PCH8_CFG)},
{MEI_PCI_DEVICE(MEI_DEV_ID_ICP_LP, MEI_ME_PCH12_CFG)},
From: Andy Shevchenko andriy.shevchenko@linux.intel.com
commit e825070f697abddf3b9b0a675ed0ff1884114818 upstream.
The commit 41c128cb25ce ("iio: st_gyro: Add lsm9ds0-gyro support") assumes that gyro in LSM9DS0 is the same as others with 0xd4 WAI ID, but datasheet tells slight different story, i.e. the first scale factor for the chip is 245 dps, and not 250 dps.
Correct this by introducing a separate settings for LSM9DS0.
Fixes: 41c128cb25ce ("iio: st_gyro: Add lsm9ds0-gyro support") Depends-on: 45a4e4220bf4 ("iio: gyro: st_gyro: fix L3GD20H support") Cc: Leonard Crestez leonard.crestez@nxp.com Cc: Lorenzo Bianconi lorenzo.bianconi83@gmail.com Cc: Stable@vger.kernel.org Signed-off-by: Andy Shevchenko andriy.shevchenko@linux.intel.com Signed-off-by: Jonathan Cameron Jonathan.Cameron@huawei.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/iio/gyro/st_gyro_core.c | 75 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 74 insertions(+), 1 deletion(-)
diff --git a/drivers/iio/gyro/st_gyro_core.c b/drivers/iio/gyro/st_gyro_core.c index b31064b..02f6f26 100644 --- a/drivers/iio/gyro/st_gyro_core.c +++ b/drivers/iio/gyro/st_gyro_core.c @@ -141,7 +141,6 @@ [2] = LSM330DLC_GYRO_DEV_NAME, [3] = L3G4IS_GYRO_DEV_NAME, [4] = LSM330_GYRO_DEV_NAME, - [5] = LSM9DS0_GYRO_DEV_NAME, }, .ch = (struct iio_chan_spec *)st_gyro_16bit_channels, .odr = { @@ -212,6 +211,80 @@ .bootime = 2, }, { + .wai = 0xd4, + .wai_addr = ST_SENSORS_DEFAULT_WAI_ADDRESS, + .sensors_supported = { + [0] = LSM9DS0_GYRO_DEV_NAME, + }, + .ch = (struct iio_chan_spec *)st_gyro_16bit_channels, + .odr = { + .addr = 0x20, + .mask = GENMASK(7, 6), + .odr_avl = { + { .hz = 95, .value = 0x00, }, + { .hz = 190, .value = 0x01, }, + { .hz = 380, .value = 0x02, }, + { .hz = 760, .value = 0x03, }, + }, + }, + .pw = { + .addr = 0x20, + .mask = BIT(3), + .value_on = ST_SENSORS_DEFAULT_POWER_ON_VALUE, + .value_off = ST_SENSORS_DEFAULT_POWER_OFF_VALUE, + }, + .enable_axis = { + .addr = ST_SENSORS_DEFAULT_AXIS_ADDR, + .mask = ST_SENSORS_DEFAULT_AXIS_MASK, + }, + .fs = { + .addr = 0x23, + .mask = GENMASK(5, 4), + .fs_avl = { + [0] = { + .num = ST_GYRO_FS_AVL_245DPS, + .value = 0x00, + .gain = IIO_DEGREE_TO_RAD(8750), + }, + [1] = { + .num = ST_GYRO_FS_AVL_500DPS, + .value = 0x01, + .gain = IIO_DEGREE_TO_RAD(17500), + }, + [2] = { + .num = ST_GYRO_FS_AVL_2000DPS, + .value = 0x02, + .gain = IIO_DEGREE_TO_RAD(70000), + }, + }, + }, + .bdu = { + .addr = 0x23, + .mask = BIT(7), + }, + .drdy_irq = { + .int2 = { + .addr = 0x22, + .mask = BIT(3), + }, + /* + * The sensor has IHL (active low) and open + * drain settings, but only for INT1 and not + * for the DRDY line on INT2. + */ + .stat_drdy = { + .addr = ST_SENSORS_DEFAULT_STAT_ADDR, + .mask = GENMASK(2, 0), + }, + }, + .sim = { + .addr = 0x23, + .value = BIT(0), + }, + .multi_read_bit = true, + .bootime = 2, + }, + { .wai = 0xd7, .wai_addr = ST_SENSORS_DEFAULT_WAI_ADDRESS, .sensors_supported = {
From: Eric Biggers ebiggers@google.com
commit bd56cea012fc2d6381e8cd3209510ce09f9de8c9 upstream.
The chelsio crypto driver is casting 'struct crypto_aead' directly to 'struct crypto_tfm', which is incorrect because the crypto_tfm isn't the first field of 'struct crypto_aead'. Consequently, the calls to crypto_tfm_set_flags() are modifying some other field in the struct.
Also, the driver is setting CRYPTO_TFM_RES_BAD_KEY_LEN in ->setauthsize(), not just in ->setkey(). This is incorrect since this flag is for bad key lengths, not for bad authentication tag lengths.
Fix these bugs by removing the broken crypto_tfm_set_flags() calls from ->setauthsize() and by fixing them in ->setkey().
Fixes: 324429d74127 ("chcr: Support for Chelsio's Crypto Hardware") Cc: stable@vger.kernel.org # v4.9+ Cc: Atul Gupta atul.gupta@chelsio.com Signed-off-by: Eric Biggers ebiggers@google.com Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/crypto/chelsio/chcr_algo.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-)
diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c index 010bbf6..c435f89 100644 --- a/drivers/crypto/chelsio/chcr_algo.c +++ b/drivers/crypto/chelsio/chcr_algo.c @@ -3135,9 +3135,6 @@ static int chcr_gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize) aeadctx->mayverify = VERIFY_SW; break; default: - - crypto_tfm_set_flags((struct crypto_tfm *) tfm, - CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; } return crypto_aead_setauthsize(aeadctx->sw_cipher, authsize); @@ -3162,8 +3159,6 @@ static int chcr_4106_4309_setauthsize(struct crypto_aead *tfm, aeadctx->mayverify = VERIFY_HW; break; default: - crypto_tfm_set_flags((struct crypto_tfm *)tfm, - CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; } return crypto_aead_setauthsize(aeadctx->sw_cipher, authsize); @@ -3204,8 +3199,6 @@ static int chcr_ccm_setauthsize(struct crypto_aead *tfm, aeadctx->mayverify = VERIFY_HW; break; default: - crypto_tfm_set_flags((struct crypto_tfm *)tfm, - CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; } return crypto_aead_setauthsize(aeadctx->sw_cipher, authsize); @@ -3230,8 +3223,7 @@ static int chcr_ccm_common_setkey(struct crypto_aead *aead, ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256; mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_256; } else { - crypto_tfm_set_flags((struct crypto_tfm *)aead, - CRYPTO_TFM_RES_BAD_KEY_LEN); + crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN); aeadctx->enckey_len = 0; return -EINVAL; } @@ -3269,8 +3261,7 @@ static int chcr_aead_rfc4309_setkey(struct crypto_aead *aead, const u8 *key, int error;
if (keylen < 3) { - crypto_tfm_set_flags((struct crypto_tfm *)aead, - CRYPTO_TFM_RES_BAD_KEY_LEN); + crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN); aeadctx->enckey_len = 0; return -EINVAL; } @@ -3320,8 +3311,7 @@ static int chcr_gcm_setkey(struct crypto_aead *aead, const u8 *key, } else if (keylen == AES_KEYSIZE_256) { ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256; } else { - crypto_tfm_set_flags((struct crypto_tfm *)aead, - CRYPTO_TFM_RES_BAD_KEY_LEN); + crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN); pr_err("GCM: Invalid key length %d\n", keylen); ret = -EINVAL; goto out;
From: "Paulo Alcantara (SUSE)" pc@cjr.nz
commit 0a5a98863c9debc02387b3d23c46d187756f5e2b upstream.
__smb2_handle_cancelled_cmd() is called under a spin lock held in cifs_mid_q_entry_release(), so make its memory allocation GFP_ATOMIC.
This issue was observed when running xfstests generic/028:
[ 1722.589204] CIFS VFS: \192.168.30.26 Cancelling wait for mid 72064 cmd: 5 [ 1722.590687] CIFS VFS: \192.168.30.26 Cancelling wait for mid 72065 cmd: 17 [ 1722.593529] CIFS VFS: \192.168.30.26 Cancelling wait for mid 72066 cmd: 6 [ 1723.039014] BUG: sleeping function called from invalid context at mm/slab.h:565 [ 1723.040710] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 30877, name: cifsd [ 1723.045098] CPU: 3 PID: 30877 Comm: cifsd Not tainted 5.5.0-rc4+ #313 [ 1723.046256] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.0-59-gc9ba527-rebuilt.opensuse.org 04/01/2014 [ 1723.048221] Call Trace: [ 1723.048689] dump_stack+0x97/0xe0 [ 1723.049268] ___might_sleep.cold+0xd1/0xe1 [ 1723.050069] kmem_cache_alloc_trace+0x204/0x2b0 [ 1723.051051] __smb2_handle_cancelled_cmd+0x40/0x140 [cifs] [ 1723.052137] smb2_handle_cancelled_mid+0xf6/0x120 [cifs] [ 1723.053247] cifs_mid_q_entry_release+0x44d/0x630 [cifs] [ 1723.054351] ? cifs_reconnect+0x26a/0x1620 [cifs] [ 1723.055325] cifs_demultiplex_thread+0xad4/0x14a0 [cifs] [ 1723.056458] ? cifs_handle_standard+0x2c0/0x2c0 [cifs] [ 1723.057365] ? kvm_sched_clock_read+0x14/0x30 [ 1723.058197] ? sched_clock+0x5/0x10 [ 1723.058838] ? sched_clock_cpu+0x18/0x110 [ 1723.059629] ? lockdep_hardirqs_on+0x17d/0x250 [ 1723.060456] kthread+0x1ab/0x200 [ 1723.061149] ? cifs_handle_standard+0x2c0/0x2c0 [cifs] [ 1723.062078] ? kthread_create_on_node+0xd0/0xd0 [ 1723.062897] ret_from_fork+0x3a/0x50
Signed-off-by: Paulo Alcantara (SUSE) pc@cjr.nz Fixes: 9150c3adbf24 ("CIFS: Close open handle after interrupted close") Cc: Stable stable@vger.kernel.org Signed-off-by: Steve French stfrench@microsoft.com Reviewed-by: Pavel Shilovsky pshilov@microsoft.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/cifs/smb2misc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 766974f..14265b4 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -750,7 +750,7 @@ struct smb2_lease_break_work { { struct close_cancelled_open *cancelled;
- cancelled = kzalloc(sizeof(*cancelled), GFP_KERNEL); + cancelled = kzalloc(sizeof(*cancelled), GFP_ATOMIC); if (!cancelled) return -ENOMEM;
From: Johan Hovold johan@kernel.org
commit 0ef332951e856efa89507cdd13ba8f4fb8d4db12 upstream.
Make sure to use the current alternate setting when verifying the storage interface descriptors to avoid submitting an URB to an invalid endpoint.
Failing to do so could cause the driver to misbehave or trigger a WARN() in usb_submit_urb() that kernels with panic_on_warn set would choke on.
Fixes: 36bcce430657 ("ath9k_htc: Handle storage devices") Cc: stable stable@vger.kernel.org # 2.6.39 Signed-off-by: Johan Hovold johan@kernel.org Signed-off-by: Kalle Valo kvalo@codeaurora.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/wireless/ath/ath9k/hif_usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/wireless/ath/ath9k/hif_usb.c b/drivers/net/wireless/ath/ath9k/hif_usb.c index fb649d8..dd0c323 100644 --- a/drivers/net/wireless/ath/ath9k/hif_usb.c +++ b/drivers/net/wireless/ath/ath9k/hif_usb.c @@ -1216,7 +1216,7 @@ static void ath9k_hif_usb_firmware_cb(const struct firmware *fw, void *context) static int send_eject_command(struct usb_interface *interface) { struct usb_device *udev = interface_to_usbdev(interface); - struct usb_host_interface *iface_desc = &interface->altsetting[0]; + struct usb_host_interface *iface_desc = interface->cur_altsetting; struct usb_endpoint_descriptor *endpoint; unsigned char *cmd; u8 bulk_out_ep;
From: Johan Hovold johan@kernel.org
commit 3428fbcd6e6c0850b1a8b2a12082b7b2aabb3da3 upstream.
Make sure to use the current alternate setting when verifying the interface descriptors to avoid binding to an invalid interface.
Failing to do so could cause the driver to misbehave or trigger a WARN() in usb_submit_urb() that kernels with panic_on_warn set would choke on.
Fixes: 71bb244ba2fd ("brcm80211: fmac: add USB support for bcm43235/6/8 chipsets") Cc: stable stable@vger.kernel.org # 3.4 Cc: Arend van Spriel arend@broadcom.com Signed-off-by: Johan Hovold johan@kernel.org Signed-off-by: Kalle Valo kvalo@codeaurora.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c index 44ead0f..6a213fe 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c @@ -1357,7 +1357,7 @@ static int brcmf_usb_probe_cb(struct brcmf_usbdev_info *devinfo) goto fail; }
- desc = &intf->altsetting[0].desc; + desc = &intf->cur_altsetting->desc; if ((desc->bInterfaceClass != USB_CLASS_VENDOR_SPEC) || (desc->bInterfaceSubClass != 2) || (desc->bInterfaceProtocol != 0xff)) { @@ -1370,7 +1370,7 @@ static int brcmf_usb_probe_cb(struct brcmf_usbdev_info *devinfo)
num_of_eps = desc->bNumEndpoints; for (ep = 0; ep < num_of_eps; ep++) { - endpoint = &intf->altsetting[0].endpoint[ep].desc; + endpoint = &intf->cur_altsetting->endpoint[ep].desc; endpoint_num = usb_endpoint_num(endpoint); if (!usb_endpoint_xfer_bulk(endpoint)) continue;
From: Johan Hovold johan@kernel.org
commit 39a4281c312f2d226c710bc656ce380c621a2b16 upstream.
Make sure to use the current alternate setting when verifying the interface descriptors to avoid binding to an invalid interface.
Failing to do so could cause the driver to misbehave or trigger a WARN() in usb_submit_urb() that kernels with panic_on_warn set would choke on.
Fixes: 26f1fad29ad9 ("New driver: rtl8xxxu (mac80211)") Cc: stable stable@vger.kernel.org # 4.4 Cc: Jes Sorensen Jes.Sorensen@redhat.com Signed-off-by: Johan Hovold johan@kernel.org Signed-off-by: Kalle Valo kvalo@codeaurora.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index b2e1523..070ea0f 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -5921,7 +5921,7 @@ static int rtl8xxxu_parse_usb(struct rtl8xxxu_priv *priv, u8 dir, xtype, num; int ret = 0;
- host_interface = &interface->altsetting[0]; + host_interface = interface->cur_altsetting; interface_desc = &host_interface->desc; endpoints = interface_desc->bNumEndpoints;
From: Johan Hovold johan@kernel.org
commit 2d68bb2687abb747558b933e80845ff31570a49c upstream.
Make sure to use the current alternate setting when verifying the storage interface descriptors to avoid submitting an URB to an invalid endpoint.
Failing to do so could cause the driver to misbehave or trigger a WARN() in usb_submit_urb() that kernels with panic_on_warn set would choke on.
Fixes: a1030e92c150 ("[PATCH] zd1211rw: Convert installer CDROM device into WLAN device") Cc: stable stable@vger.kernel.org # 2.6.19 Signed-off-by: Johan Hovold johan@kernel.org Signed-off-by: Kalle Valo kvalo@codeaurora.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/wireless/zydas/zd1211rw/zd_usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c index c2cda3a..0fddfb4 100644 --- a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c +++ b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c @@ -1275,7 +1275,7 @@ static void print_id(struct usb_device *udev) static int eject_installer(struct usb_interface *intf) { struct usb_device *udev = interface_to_usbdev(intf); - struct usb_host_interface *iface_desc = &intf->altsetting[0]; + struct usb_host_interface *iface_desc = intf->cur_altsetting; struct usb_endpoint_descriptor *endpoint; unsigned char *cmd; u8 bulk_out_ep;
From: Eric Dumazet edumazet@google.com
[ Upstream commit 55cd9f67f1e45de8517cdaab985fb8e56c0bc1d8 ]
It is possible for malicious userspace to set TCF_EM_SIMPLE bit even for matches that should not have this bit set.
This can fool two places using tcf_em_is_simple()
1) tcf_em_tree_destroy() -> memory leak of em->data if ops->destroy() is NULL
2) tcf_em_tree_dump() wrongly report/leak 4 low-order bytes of a kernel pointer.
BUG: memory leak unreferenced object 0xffff888121850a40 (size 32): comm "syz-executor927", pid 7193, jiffies 4294941655 (age 19.840s) hex dump (first 32 bytes): 00 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000f67036ea>] kmemleak_alloc_recursive include/linux/kmemleak.h:43 [inline] [<00000000f67036ea>] slab_post_alloc_hook mm/slab.h:586 [inline] [<00000000f67036ea>] slab_alloc mm/slab.c:3320 [inline] [<00000000f67036ea>] __do_kmalloc mm/slab.c:3654 [inline] [<00000000f67036ea>] __kmalloc_track_caller+0x165/0x300 mm/slab.c:3671 [<00000000fab0cc8e>] kmemdup+0x27/0x60 mm/util.c:127 [<00000000d9992e0a>] kmemdup include/linux/string.h:453 [inline] [<00000000d9992e0a>] em_nbyte_change+0x5b/0x90 net/sched/em_nbyte.c:32 [<000000007e04f711>] tcf_em_validate net/sched/ematch.c:241 [inline] [<000000007e04f711>] tcf_em_tree_validate net/sched/ematch.c:359 [inline] [<000000007e04f711>] tcf_em_tree_validate+0x332/0x46f net/sched/ematch.c:300 [<000000007a769204>] basic_set_parms net/sched/cls_basic.c:157 [inline] [<000000007a769204>] basic_change+0x1d7/0x5f0 net/sched/cls_basic.c:219 [<00000000e57a5997>] tc_new_tfilter+0x566/0xf70 net/sched/cls_api.c:2104 [<0000000074b68559>] rtnetlink_rcv_msg+0x3b2/0x4b0 net/core/rtnetlink.c:5415 [<00000000b7fe53fb>] netlink_rcv_skb+0x61/0x170 net/netlink/af_netlink.c:2477 [<00000000e83a40d0>] rtnetlink_rcv+0x1d/0x30 net/core/rtnetlink.c:5442 [<00000000d62ba933>] netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] [<00000000d62ba933>] netlink_unicast+0x223/0x310 net/netlink/af_netlink.c:1328 [<0000000088070f72>] netlink_sendmsg+0x2c0/0x570 net/netlink/af_netlink.c:1917 [<00000000f70b15ea>] sock_sendmsg_nosec net/socket.c:639 [inline] [<00000000f70b15ea>] sock_sendmsg+0x54/0x70 net/socket.c:659 [<00000000ef95a9be>] ____sys_sendmsg+0x2d0/0x300 net/socket.c:2330 [<00000000b650f1ab>] ___sys_sendmsg+0x8a/0xd0 net/socket.c:2384 [<0000000055bfa74a>] __sys_sendmsg+0x80/0xf0 net/socket.c:2417 [<000000002abac183>] __do_sys_sendmsg net/socket.c:2426 [inline] [<000000002abac183>] __se_sys_sendmsg net/socket.c:2424 [inline] [<000000002abac183>] __x64_sys_sendmsg+0x23/0x30 net/socket.c:2424
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet edumazet@google.com Reported-by: syzbot+03c4738ed29d5d366ddf@syzkaller.appspotmail.com Cc: Cong Wang xiyou.wangcong@gmail.com Acked-by: Cong Wang xiyou.wangcong@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/sched/ematch.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 750d88d..113a133 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -242,6 +242,9 @@ static int tcf_em_validate(struct tcf_proto *tp, goto errout;
if (em->ops->change) { + err = -EINVAL; + if (em_hdr->flags & TCF_EM_SIMPLE) + goto errout; err = em->ops->change(net, data, data_len, em); if (err < 0) goto errout;
From: Cong Wang xiyou.wangcong@gmail.com
[ Upstream commit 2e24cd755552350b94a7617617c6877b8cbcb701 ]
The current implementations of ops->bind_class() are merely searching for classid and updating class in the struct tcf_result, without invoking either of cl_ops->bind_tcf() or cl_ops->unbind_tcf(). This breaks the design of them as qdisc's like cbq use them to count filters too. This is why syzbot triggered the warning in cbq_destroy_class().
In order to fix this, we have to call cl_ops->bind_tcf() and cl_ops->unbind_tcf() like the filter binding path. This patch does so by refactoring out two helper functions __tcf_bind_filter() and __tcf_unbind_filter(), which are lockless and accept a Qdisc pointer, then teaching each implementation to call them correctly.
Note, we merely pass the Qdisc pointer as an opaque pointer to each filter, they only need to pass it down to the helper functions without understanding it at all.
Fixes: 07d79fc7d94e ("net_sched: add reverse binding for tc class") Reported-and-tested-by: syzbot+0a0596220218fcb603a8@syzkaller.appspotmail.com Reported-and-tested-by: syzbot+63bdb6006961d8c917c6@syzkaller.appspotmail.com Cc: Jamal Hadi Salim jhs@mojatatu.com Cc: Jiri Pirko jiri@resnulli.us Signed-off-by: Cong Wang xiyou.wangcong@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/net/pkt_cls.h | 33 +++++++++++++++++++-------------- include/net/sch_generic.h | 3 ++- net/sched/cls_basic.c | 11 ++++++++--- net/sched/cls_bpf.c | 11 ++++++++--- net/sched/cls_flower.c | 11 ++++++++--- net/sched/cls_fw.c | 11 ++++++++--- net/sched/cls_matchall.c | 11 ++++++++--- net/sched/cls_route.c | 11 ++++++++--- net/sched/cls_rsvp.h | 11 ++++++++--- net/sched/cls_tcindex.c | 11 ++++++++--- net/sched/cls_u32.c | 11 ++++++++--- net/sched/sch_api.c | 6 ++++-- 12 files changed, 97 insertions(+), 44 deletions(-)
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 75a3f3f..c1162f2 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -206,31 +206,38 @@ static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, return xchg(clp, cl); }
-static inline unsigned long -cls_set_class(struct Qdisc *q, unsigned long *clp, unsigned long cl) +static inline void +__tcf_bind_filter(struct Qdisc *q, struct tcf_result *r, unsigned long base) { - unsigned long old_cl; + unsigned long cl;
- sch_tree_lock(q); - old_cl = __cls_set_class(clp, cl); - sch_tree_unlock(q); - return old_cl; + cl = q->ops->cl_ops->bind_tcf(q, base, r->classid); + cl = __cls_set_class(&r->class, cl); + if (cl) + q->ops->cl_ops->unbind_tcf(q, cl); }
static inline void tcf_bind_filter(struct tcf_proto *tp, struct tcf_result *r, unsigned long base) { struct Qdisc *q = tp->chain->block->q; - unsigned long cl;
/* Check q as it is not set for shared blocks. In that case, * setting class is not supported. */ if (!q) return; - cl = q->ops->cl_ops->bind_tcf(q, base, r->classid); - cl = cls_set_class(q, &r->class, cl); - if (cl) + sch_tree_lock(q); + __tcf_bind_filter(q, r, base); + sch_tree_unlock(q); +} + +static inline void +__tcf_unbind_filter(struct Qdisc *q, struct tcf_result *r) +{ + unsigned long cl; + + if ((cl = __cls_set_class(&r->class, 0)) != 0) q->ops->cl_ops->unbind_tcf(q, cl); }
@@ -238,12 +245,10 @@ static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, tcf_unbind_filter(struct tcf_proto *tp, struct tcf_result *r) { struct Qdisc *q = tp->chain->block->q; - unsigned long cl;
if (!q) return; - if ((cl = __cls_set_class(&r->class, 0)) != 0) - q->ops->cl_ops->unbind_tcf(q, cl); + __tcf_unbind_filter(q, r); }
struct tcf_exts { diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c9cd508..d737a6a 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -273,7 +273,8 @@ struct tcf_proto_ops { int (*reoffload)(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, void *cb_priv, struct netlink_ext_ack *extack); - void (*bind_class)(void *, u32, unsigned long); + void (*bind_class)(void *, u32, unsigned long, + void *, unsigned long); void * (*tmplt_create)(struct net *net, struct tcf_chain *chain, struct nlattr **tca, diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 6a5dce8..14098da 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -254,12 +254,17 @@ static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg) } }
-static void basic_bind_class(void *fh, u32 classid, unsigned long cl) +static void basic_bind_class(void *fh, u32 classid, unsigned long cl, void *q, + unsigned long base) { struct basic_filter *f = fh;
- if (f && f->res.classid == classid) - f->res.class = cl; + if (f && f->res.classid == classid) { + if (cl) + __tcf_bind_filter(q, &f->res, base); + else + __tcf_unbind_filter(q, &f->res); + } }
static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh, diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index fa6fe2f..5d10012 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -627,12 +627,17 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, void *fh, return -1; }
-static void cls_bpf_bind_class(void *fh, u32 classid, unsigned long cl) +static void cls_bpf_bind_class(void *fh, u32 classid, unsigned long cl, + void *q, unsigned long base) { struct cls_bpf_prog *prog = fh;
- if (prog && prog->res.classid == classid) - prog->res.class = cl; + if (prog && prog->res.classid == classid) { + if (cl) + __tcf_bind_filter(q, &prog->res, base); + else + __tcf_unbind_filter(q, &prog->res); + } }
static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 09b3597..2241531 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1942,12 +1942,17 @@ static int fl_tmplt_dump(struct sk_buff *skb, struct net *net, void *tmplt_priv) return -EMSGSIZE; }
-static void fl_bind_class(void *fh, u32 classid, unsigned long cl) +static void fl_bind_class(void *fh, u32 classid, unsigned long cl, void *q, + unsigned long base) { struct cls_fl_filter *f = fh;
- if (f && f->res.classid == classid) - f->res.class = cl; + if (f && f->res.classid == classid) { + if (cl) + __tcf_bind_filter(q, &f->res, base); + else + __tcf_unbind_filter(q, &f->res); + } }
static struct tcf_proto_ops cls_fl_ops __read_mostly = { diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 29eeeaf..cb2c626 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -432,12 +432,17 @@ static int fw_dump(struct net *net, struct tcf_proto *tp, void *fh, return -1; }
-static void fw_bind_class(void *fh, u32 classid, unsigned long cl) +static void fw_bind_class(void *fh, u32 classid, unsigned long cl, void *q, + unsigned long base) { struct fw_filter *f = fh;
- if (f && f->res.classid == classid) - f->res.class = cl; + if (f && f->res.classid == classid) { + if (cl) + __tcf_bind_filter(q, &f->res, base); + else + __tcf_unbind_filter(q, &f->res); + } }
static struct tcf_proto_ops cls_fw_ops __read_mostly = { diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 621bc1d..40be745 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -310,12 +310,17 @@ static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh, return -1; }
-static void mall_bind_class(void *fh, u32 classid, unsigned long cl) +static void mall_bind_class(void *fh, u32 classid, unsigned long cl, void *q, + unsigned long base) { struct cls_mall_head *head = fh;
- if (head && head->res.classid == classid) - head->res.class = cl; + if (head && head->res.classid == classid) { + if (cl) + __tcf_bind_filter(q, &head->res, base); + else + __tcf_unbind_filter(q, &head->res); + } }
static struct tcf_proto_ops cls_mall_ops __read_mostly = { diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 0404aa5..37ae23d 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -645,12 +645,17 @@ static int route4_dump(struct net *net, struct tcf_proto *tp, void *fh, return -1; }
-static void route4_bind_class(void *fh, u32 classid, unsigned long cl) +static void route4_bind_class(void *fh, u32 classid, unsigned long cl, void *q, + unsigned long base) { struct route4_filter *f = fh;
- if (f && f->res.classid == classid) - f->res.class = cl; + if (f && f->res.classid == classid) { + if (cl) + __tcf_bind_filter(q, &f->res, base); + else + __tcf_unbind_filter(q, &f->res); + } }
static struct tcf_proto_ops cls_route4_ops __read_mostly = { diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index e9ccf7d..6d30a29 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -736,12 +736,17 @@ static int rsvp_dump(struct net *net, struct tcf_proto *tp, void *fh, return -1; }
-static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl) +static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl, void *q, + unsigned long base) { struct rsvp_filter *f = fh;
- if (f && f->res.classid == classid) - f->res.class = cl; + if (f && f->res.classid == classid) { + if (cl) + __tcf_bind_filter(q, &f->res, base); + else + __tcf_unbind_filter(q, &f->res); + } }
static struct tcf_proto_ops RSVP_OPS __read_mostly = { diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 38bb882..edf2736 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -652,12 +652,17 @@ static int tcindex_dump(struct net *net, struct tcf_proto *tp, void *fh, return -1; }
-static void tcindex_bind_class(void *fh, u32 classid, unsigned long cl) +static void tcindex_bind_class(void *fh, u32 classid, unsigned long cl, + void *q, unsigned long base) { struct tcindex_filter_result *r = fh;
- if (r && r->res.classid == classid) - r->res.class = cl; + if (r && r->res.classid == classid) { + if (cl) + __tcf_bind_filter(q, &r->res, base); + else + __tcf_unbind_filter(q, &r->res); + } }
static struct tcf_proto_ops cls_tcindex_ops __read_mostly = { diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index b2c3406..fe246e0 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -1315,12 +1315,17 @@ static int u32_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb, return 0; }
-static void u32_bind_class(void *fh, u32 classid, unsigned long cl) +static void u32_bind_class(void *fh, u32 classid, unsigned long cl, void *q, + unsigned long base) { struct tc_u_knode *n = fh;
- if (n && n->res.classid == classid) - n->res.class = cl; + if (n && n->res.classid == classid) { + if (cl) + __tcf_bind_filter(q, &n->res, base); + else + __tcf_unbind_filter(q, &n->res); + } }
static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh, diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 84fdc48..39e319d 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1803,8 +1803,9 @@ static int tclass_del_notify(struct net *net,
struct tcf_bind_args { struct tcf_walker w; - u32 classid; + unsigned long base; unsigned long cl; + u32 classid; };
static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg) @@ -1815,7 +1816,7 @@ static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg) struct Qdisc *q = tcf_block_q(tp->chain->block);
sch_tree_lock(q); - tp->ops->bind_class(n, a->classid, a->cl); + tp->ops->bind_class(n, a->classid, a->cl, q, a->base); sch_tree_unlock(q); } return 0; @@ -1846,6 +1847,7 @@ static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
arg.w.fn = tcf_node_bind; arg.classid = clid; + arg.base = cl; arg.cl = new_cl; tp->ops->walk(tp, &arg.w); }
From: Aaron Ma aaron.ma@canonical.com
[ Upstream commit 348b80b273fbf4ce2a307f9e38eadecf37828cad ]
Add multitouch support for LG MELF I2C touchscreen. Apply the same workaround as LG USB touchscreen.
Signed-off-by: Aaron Ma aaron.ma@canonical.com Signed-off-by: Jiri Kosina jkosina@suse.cz Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-multitouch.c | 3 +++ 2 files changed, 4 insertions(+)
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 1949d6f..ee243bf 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -717,6 +717,7 @@ #define USB_DEVICE_ID_LG_MULTITOUCH 0x0064 #define USB_DEVICE_ID_LG_MELFAS_MT 0x6007 #define I2C_DEVICE_ID_LG_8001 0x8001 +#define I2C_DEVICE_ID_LG_7010 0x7010
#define USB_VENDOR_ID_LOGITECH 0x046d #define USB_DEVICE_ID_LOGITECH_AUDIOHUB 0x0a0e diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index f9167d0..8403251 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1972,6 +1972,9 @@ static void mt_remove(struct hid_device *hdev) { .driver_data = MT_CLS_LG, HID_USB_DEVICE(USB_VENDOR_ID_LG, USB_DEVICE_ID_LG_MELFAS_MT) }, + { .driver_data = MT_CLS_LG, + HID_DEVICE(BUS_I2C, HID_GROUP_GENERIC, + USB_VENDOR_ID_LG, I2C_DEVICE_ID_LG_7010) },
/* MosArt panels */ { .driver_data = MT_CLS_CONFIDENCE_MINUS_ONE,
From: Randy Dunlap rdunlap@infradead.org
[ Upstream commit 1928b36cfa4df1aeedf5f2644d0c33f3a1fcfd7b ]
Fix kconfig warning for arch/arc/plat-eznps/Kconfig allmodconfig:
WARNING: unmet direct dependencies detected for CLKSRC_NPS Depends on [n]: GENERIC_CLOCKEVENTS [=y] && !PHYS_ADDR_T_64BIT [=y] Selected by [y]: - ARC_PLAT_EZNPS [=y]
Signed-off-by: Randy Dunlap rdunlap@infradead.org Cc: Vineet Gupta vgupta@synopsys.com Cc: Ofer Levi oferle@mellanox.com Cc: linux-snps-arc@lists.infradead.org Signed-off-by: Vineet Gupta vgupta@synopsys.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arc/plat-eznps/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arc/plat-eznps/Kconfig b/arch/arc/plat-eznps/Kconfig index 8eff057..ce908e2c 100644 --- a/arch/arc/plat-eznps/Kconfig +++ b/arch/arc/plat-eznps/Kconfig @@ -7,7 +7,7 @@ menuconfig ARC_PLAT_EZNPS bool ""EZchip" ARC dev platform" select CPU_BIG_ENDIAN - select CLKSRC_NPS + select CLKSRC_NPS if !PHYS_ADDR_T_64BIT select EZNPS_GIC select EZCHIP_NPS_MANAGEMENT_ENET if ETHERNET help
From: Priit Laes plaes@plaes.org
[ Upstream commit c62f7cd8ed066a93a243643ebf57ca99f754388e ]
Without the quirk, joystick shows up as single controller for both first and second player pads/pins.
Signed-off-by: Priit Laes plaes@plaes.org Signed-off-by: Jiri Kosina jkosina@suse.cz Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/hid/hid-quirks.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 57d6fe9..b9529be 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -175,6 +175,7 @@ { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SIRIUS_BATTERY_FREE_TABLET), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP_LTD2, USB_DEVICE_ID_SMARTJOY_DUAL_PLUS), HID_QUIRK_NOGET | HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_QUAD_USB_JOYPAD), HID_QUIRK_NOGET | HID_QUIRK_MULTI_INPUT }, + { HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_XIN_MO_DUAL_ARCADE), HID_QUIRK_MULTI_INPUT },
{ 0 } };
From: Hans de Goede hdegoede@redhat.com
[ Upstream commit 8f18eca9ebc57d6b150237033f6439242907e0ba ]
The Acer SW5-012 2-in-1 keyboard dock uses a Synaptics S91028 touchpad which is connected to an ITE 8595 USB keyboard controller chip.
This keyboard has the same quirk for its rfkill / airplane mode hotkey as other keyboards with the ITE 8595 chip, it only sends a single release event when pressed and released, it never sends a press event.
This commit adds this keyboards USB id to the hid-ite id-table, fixing the rfkill key not working on this keyboard.
Signed-off-by: Hans de Goede hdegoede@redhat.com Signed-off-by: Jiri Kosina jkosina@suse.cz Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-ite.c | 3 +++ 2 files changed, 4 insertions(+)
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index ee243bf..03d65b6 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -1075,6 +1075,7 @@ #define USB_DEVICE_ID_SYNAPTICS_LTS2 0x1d10 #define USB_DEVICE_ID_SYNAPTICS_HD 0x0ac3 #define USB_DEVICE_ID_SYNAPTICS_QUAD_HD 0x1ac3 +#define USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012 0x2968 #define USB_DEVICE_ID_SYNAPTICS_TP_V103 0x5710
#define USB_VENDOR_ID_TEXAS_INSTRUMENTS 0x2047 diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c index 98b059d..2ce1eb0 100644 --- a/drivers/hid/hid-ite.c +++ b/drivers/hid/hid-ite.c @@ -43,6 +43,9 @@ static int ite_event(struct hid_device *hdev, struct hid_field *field, static const struct hid_device_id ite_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ITE, USB_DEVICE_ID_ITE8595) }, { HID_USB_DEVICE(USB_VENDOR_ID_258A, USB_DEVICE_ID_258A_6A88) }, + /* ITE8595 USB kbd ctlr, with Synaptics touchpad connected to it. */ + { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, + USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) }, { } }; MODULE_DEVICE_TABLE(hid, ite_devices);
From: Pavel Balan admin@kryma.net
[ Upstream commit fd0913768701612fc2b8ab9c8a5c019133e8d978 ]
Apply it to the Lenovo Y720 gaming laptop I2C peripheral then.
This fixes dmesg being flooded with errors visible on un-suspend in Linux Mint 19 Cinnamon.
Example of error log:
<...> [ 4.326588] i2c_hid i2c-ITE33D1:00: i2c_hid_get_input: incomplete report (2/4) [ 4.326845] i2c_hid i2c-ITE33D1:00: i2c_hid_get_input: incomplete report (2/4) [ 4.327095] i2c_hid i2c-ITE33D1:00: i2c_hid_get_input: incomplete report (2/4) [ 4.327341] i2c_hid i2c-ITE33D1:00: i2c_hid_get_input: incomplete report (2/4) [ 4.327609] i2c_hid i2c-ITE33D1:00: i2c_hid_get_input: incomplete report (2/4) <...>
Example of fixed log (debug on)
<...> [ 3731.333183] i2c_hid i2c-ITE33D1:00: input: 02 00 [ 3731.333581] i2c_hid i2c-ITE33D1:00: input: 02 00 [ 3731.333842] i2c_hid i2c-ITE33D1:00: input: 02 00 [ 3731.334107] i2c_hid i2c-ITE33D1:00: input: 02 00 [ 3731.334367] i2c_hid i2c-ITE33D1:00: input: 02 00 <...>
[jkosina@suse.cz: rebase onto more recent codebase] Signed-off-by: Pavel Balan admin@kryma.net Signed-off-by: Jiri Kosina jkosina@suse.cz Signed-off-by: Sasha Levin sashal@kernel.org
Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/hid/hid-ids.h | 1 + drivers/hid/i2c-hid/i2c-hid-core.c | 16 +++++++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 03d65b6..f491092 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -620,6 +620,7 @@ #define USB_VENDOR_ID_ITE 0x048d #define USB_DEVICE_ID_ITE_LENOVO_YOGA 0x8386 #define USB_DEVICE_ID_ITE_LENOVO_YOGA2 0x8350 +#define I2C_DEVICE_ID_ITE_LENOVO_LEGION_Y720 0x837a #define USB_DEVICE_ID_ITE_LENOVO_YOGA900 0x8396 #define USB_DEVICE_ID_ITE8595 0x8595
diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c index 0a39e44..f2c8c59 100644 --- a/drivers/hid/i2c-hid/i2c-hid-core.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -52,6 +52,8 @@ #define I2C_HID_QUIRK_DELAY_AFTER_SLEEP BIT(3) #define I2C_HID_QUIRK_BOGUS_IRQ BIT(4) #define I2C_HID_QUIRK_RESET_ON_RESUME BIT(5) +#define I2C_HID_QUIRK_BAD_INPUT_SIZE BIT(6) +
/* flags */ #define I2C_HID_STARTED 0 @@ -185,6 +187,8 @@ struct i2c_hid { I2C_HID_QUIRK_BOGUS_IRQ }, { USB_VENDOR_ID_ALPS_JP, HID_ANY_ID, I2C_HID_QUIRK_RESET_ON_RESUME }, + { USB_VENDOR_ID_ITE, I2C_DEVICE_ID_ITE_LENOVO_LEGION_Y720, + I2C_HID_QUIRK_BAD_INPUT_SIZE }, { 0, 0 } };
@@ -516,9 +520,15 @@ static void i2c_hid_get_input(struct i2c_hid *ihid) }
if ((ret_size > size) || (ret_size < 2)) { - dev_err(&ihid->client->dev, "%s: incomplete report (%d/%d)\n", - __func__, size, ret_size); - return; + if (ihid->quirks & I2C_HID_QUIRK_BAD_INPUT_SIZE) { + ihid->inbuf[0] = size & 0xff; + ihid->inbuf[1] = size >> 8; + ret_size = size; + } else { + dev_err(&ihid->client->dev, "%s: incomplete report (%d/%d)\n", + __func__, size, ret_size); + return; + } }
i2c_hid_dbg(ihid, "input: %*ph\n", ret_size, ihid->inbuf);
From: Pan Zhang zhangpan26@huawei.com
[ Upstream commit 306d5acbfc66e7cccb4d8f91fc857206b8df80d1 ]
1002 if ((quirks & MT_QUIRK_IGNORE_DUPLICATES) && mt) { 1003 struct input_mt_slot *i_slot = &mt->slots[slotnum]; 1004 1005 if (input_mt_is_active(i_slot) && 1006 input_mt_is_used(mt, i_slot)) 1007 return -EAGAIN; 1008 }
We previously assumed 'mt' could be null (see line 1002).
The following situation is similar, so add a judgement.
Signed-off-by: Pan Zhang zhangpan26@huawei.com Signed-off-by: Benjamin Tissoires benjamin.tissoires@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/hid/hid-multitouch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 8403251..19dfd8a 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1007,7 +1007,7 @@ static int mt_process_slot(struct mt_device *td, struct input_dev *input, tool = MT_TOOL_DIAL; else if (unlikely(!confidence_state)) { tool = MT_TOOL_PALM; - if (!active && + if (!active && mt && input_mt_is_active(&mt->slots[slotnum])) { /* * The non-confidence was reported for
From: Bjorn Andersson bjorn.andersson@linaro.org
[ Upstream commit cd217ee6867d285ceecd610fa1006975d5c683fa ]
It's typical for the QHP PHY to take slightly above 1ms to initialize, so increase the timeout of the PHY ready check to 10ms - as already done in the downstream PCIe driver.
Signed-off-by: Bjorn Andersson bjorn.andersson@linaro.org Tested-by: Evan Green evgreen@chromium.org Tested-by: Vinod Koul vkoul@kernel.org Signed-off-by: Vinod Koul vkoul@kernel.org Signed-off-by: Kishon Vijay Abraham I kishon@ti.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/phy/qualcomm/phy-qcom-qmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp.c b/drivers/phy/qualcomm/phy-qcom-qmp.c index 4c47010..cf51592 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp.c +++ b/drivers/phy/qualcomm/phy-qcom-qmp.c @@ -66,7 +66,7 @@ /* QPHY_V3_PCS_MISC_CLAMP_ENABLE register bits */ #define CLAMP_EN BIT(0) /* enables i/o clamp_n */
-#define PHY_INIT_COMPLETE_TIMEOUT 1000 +#define PHY_INIT_COMPLETE_TIMEOUT 10000 #define POWER_DOWN_DELAY_US_MIN 10 #define POWER_DOWN_DELAY_US_MAX 11
From: Tony Lindgren tony@atomide.com
[ Upstream commit 63078b6ba09e842f09df052c5728857389fddcd2 ]
The micro-USB connector on Motorola Mapphone devices can be muxed between the SoC and the mdm6600 modem. But even when used for the SoC, configuring the PHY with ID pin grounded will wake up the modem from idle state. Looks like the issue is probably caused by line glitches.
We can prevent the glitches by using a previously unknown mode of the GPIO mux to prevent the USB lines from being connected to the moden while configuring the USB PHY, and enable the USB lines after configuring the PHY.
Note that this only prevents waking up mdm6600 as regular USB A-host mode, and does not help when connected to a lapdock. The lapdock specific issue still needs to be debugged separately.
Cc: Merlijn Wajer merlijn@wizzup.org Cc: Pavel Machek pavel@ucw.cz Cc: Sebastian Reichel sre@kernel.org Acked-by: Pavel Machek pavel@ucw.cz Signed-off-by: Tony Lindgren tony@atomide.com Signed-off-by: Kishon Vijay Abraham I kishon@ti.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/phy/motorola/phy-cpcap-usb.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-)
diff --git a/drivers/phy/motorola/phy-cpcap-usb.c b/drivers/phy/motorola/phy-cpcap-usb.c index 4ba3634..593c77d 100644 --- a/drivers/phy/motorola/phy-cpcap-usb.c +++ b/drivers/phy/motorola/phy-cpcap-usb.c @@ -115,7 +115,7 @@ struct cpcap_usb_ints_state { enum cpcap_gpio_mode { CPCAP_DM_DP, CPCAP_MDM_RX_TX, - CPCAP_UNKNOWN, + CPCAP_UNKNOWN_DISABLED, /* Seems to disable USB lines */ CPCAP_OTG_DM_DP, };
@@ -379,7 +379,8 @@ static int cpcap_usb_set_uart_mode(struct cpcap_phy_ddata *ddata) { int error;
- error = cpcap_usb_gpio_set_mode(ddata, CPCAP_DM_DP); + /* Disable lines to prevent glitches from waking up mdm6600 */ + error = cpcap_usb_gpio_set_mode(ddata, CPCAP_UNKNOWN_DISABLED); if (error) goto out_err;
@@ -406,6 +407,11 @@ static int cpcap_usb_set_uart_mode(struct cpcap_phy_ddata *ddata) if (error) goto out_err;
+ /* Enable UART mode */ + error = cpcap_usb_gpio_set_mode(ddata, CPCAP_DM_DP); + if (error) + goto out_err; + return 0;
out_err: @@ -418,7 +424,8 @@ static int cpcap_usb_set_usb_mode(struct cpcap_phy_ddata *ddata) { int error;
- error = cpcap_usb_gpio_set_mode(ddata, CPCAP_OTG_DM_DP); + /* Disable lines to prevent glitches from waking up mdm6600 */ + error = cpcap_usb_gpio_set_mode(ddata, CPCAP_UNKNOWN_DISABLED); if (error) return error;
@@ -458,6 +465,11 @@ static int cpcap_usb_set_usb_mode(struct cpcap_phy_ddata *ddata) if (error) goto out_err;
+ /* Enable USB mode */ + error = cpcap_usb_gpio_set_mode(ddata, CPCAP_OTG_DM_DP); + if (error) + goto out_err; + return 0;
out_err:
From: David Engraf david.engraf@sysgo.com
[ Upstream commit da9e3f4e30a53cd420cf1e6961c3b4110f0f21f0 ]
max77620_wdt uses watchdog core functions. Enable CONFIG_WATCHDOG_CORE to fix potential build errors.
Signed-off-by: David Engraf david.engraf@sysgo.com Reviewed-by: Guenter Roeck linux@roeck-us.net Link: https://lore.kernel.org/r/20191127084617.16937-1-david.engraf@sysgo.com Signed-off-by: Guenter Roeck linux@roeck-us.net Signed-off-by: Wim Van Sebroeck wim@linux-watchdog.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/watchdog/Kconfig | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index b165c46..709d4de 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -594,6 +594,7 @@ config MAX63XX_WATCHDOG config MAX77620_WATCHDOG tristate "Maxim Max77620 Watchdog Timer" depends on MFD_MAX77620 || COMPILE_TEST + select WATCHDOG_CORE help This is the driver for the Max77620 watchdog timer. Say 'Y' here to enable the watchdog timer support for
From: Andreas Kemnade andreas@kemnade.info
[ Upstream commit a76dfb859cd42df6e3d1910659128ffcd2fb6ba2 ]
Platform device aliases were missing so module autoloading did not work.
Signed-off-by: Andreas Kemnade andreas@kemnade.info Reviewed-by: Guenter Roeck linux@roeck-us.net Link: https://lore.kernel.org/r/20191213214802.22268-1-andreas@kemnade.info Signed-off-by: Guenter Roeck linux@roeck-us.net Signed-off-by: Wim Van Sebroeck wim@linux-watchdog.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/watchdog/rn5t618_wdt.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/watchdog/rn5t618_wdt.c b/drivers/watchdog/rn5t618_wdt.c index e60f557..d2e79cf 100644 --- a/drivers/watchdog/rn5t618_wdt.c +++ b/drivers/watchdog/rn5t618_wdt.c @@ -193,6 +193,7 @@ static int rn5t618_wdt_remove(struct platform_device *pdev)
module_platform_driver(rn5t618_wdt_driver);
+MODULE_ALIAS("platform:rn5t618-wdt"); MODULE_AUTHOR("Beniamino Galvani b.galvani@gmail.com"); MODULE_DESCRIPTION("RN5T618 watchdog driver"); MODULE_LICENSE("GPL v2");
From: "wuxu.wu" wuxu.wu@huawei.com
[ Upstream commit 19b61392c5a852b4e8a0bf35aecb969983c5932d ]
dw_spi_irq() and dw_spi_transfer_one concurrent calls.
I find a panic in dw_writer(): txw = *(u8 *)(dws->tx), when dw->tx==null, dw->len==4, and dw->tx_end==1.
When tpm driver's message overtime dw_spi_irq() and dw_spi_transfer_one may concurrent visit dw_spi, so I think dw_spi structure lack of protection.
Otherwise dw_spi_transfer_one set dw rx/tx buffer and then open irq, store dw rx/tx instructions and other cores handle irq load dw rx/tx instructions may out of order.
[ 1025.321302] Call trace: ... [ 1025.321319] __crash_kexec+0x98/0x148 [ 1025.321323] panic+0x17c/0x314 [ 1025.321329] die+0x29c/0x2e8 [ 1025.321334] die_kernel_fault+0x68/0x78 [ 1025.321337] __do_kernel_fault+0x90/0xb0 [ 1025.321346] do_page_fault+0x88/0x500 [ 1025.321347] do_translation_fault+0xa8/0xb8 [ 1025.321349] do_mem_abort+0x68/0x118 [ 1025.321351] el1_da+0x20/0x8c [ 1025.321362] dw_writer+0xc8/0xd0 [ 1025.321364] interrupt_transfer+0x60/0x110 [ 1025.321365] dw_spi_irq+0x48/0x70 ...
Signed-off-by: wuxu.wu wuxu.wu@huawei.com Link: https://lore.kernel.org/r/1577849981-31489-1-git-send-email-wuxu.wu@huawei.c... Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/spi/spi-dw.c | 15 ++++++++++++--- drivers/spi/spi-dw.h | 1 + 2 files changed, 13 insertions(+), 3 deletions(-)
diff --git a/drivers/spi/spi-dw.c b/drivers/spi/spi-dw.c index b3116da..c623998 100644 --- a/drivers/spi/spi-dw.c +++ b/drivers/spi/spi-dw.c @@ -179,9 +179,11 @@ static inline u32 rx_max(struct dw_spi *dws)
static void dw_writer(struct dw_spi *dws) { - u32 max = tx_max(dws); + u32 max; u16 txw = 0;
+ spin_lock(&dws->buf_lock); + max = tx_max(dws); while (max--) { /* Set the tx word if the transfer's original "tx" is not null */ if (dws->tx_end - dws->len) { @@ -193,13 +195,16 @@ static void dw_writer(struct dw_spi *dws) dw_write_io_reg(dws, DW_SPI_DR, txw); dws->tx += dws->n_bytes; } + spin_unlock(&dws->buf_lock); }
static void dw_reader(struct dw_spi *dws) { - u32 max = rx_max(dws); + u32 max; u16 rxw;
+ spin_lock(&dws->buf_lock); + max = rx_max(dws); while (max--) { rxw = dw_read_io_reg(dws, DW_SPI_DR); /* Care rx only if the transfer's original "rx" is not null */ @@ -211,6 +216,7 @@ static void dw_reader(struct dw_spi *dws) } dws->rx += dws->n_bytes; } + spin_unlock(&dws->buf_lock); }
static void int_error_stop(struct dw_spi *dws, const char *msg) @@ -283,18 +289,20 @@ static int dw_spi_transfer_one(struct spi_controller *master, { struct dw_spi *dws = spi_controller_get_devdata(master); struct chip_data *chip = spi_get_ctldata(spi); + unsigned long flags; u8 imask = 0; u16 txlevel = 0; u32 cr0; int ret;
dws->dma_mapped = 0; - + spin_lock_irqsave(&dws->buf_lock, flags); dws->tx = (void *)transfer->tx_buf; dws->tx_end = dws->tx + transfer->len; dws->rx = transfer->rx_buf; dws->rx_end = dws->rx + transfer->len; dws->len = transfer->len; + spin_unlock_irqrestore(&dws->buf_lock, flags);
spi_enable_chip(dws, 0);
@@ -485,6 +493,7 @@ int dw_spi_add_host(struct device *dev, struct dw_spi *dws) dws->type = SSI_MOTO_SPI; dws->dma_inited = 0; dws->dma_addr = (dma_addr_t)(dws->paddr + DW_SPI_DR); + spin_lock_init(&dws->buf_lock);
spi_controller_set_devdata(master, dws);
diff --git a/drivers/spi/spi-dw.h b/drivers/spi/spi-dw.h index 0168b08..20a09fe 100644 --- a/drivers/spi/spi-dw.h +++ b/drivers/spi/spi-dw.h @@ -118,6 +118,7 @@ struct dw_spi { size_t len; void *tx; void *tx_end; + spinlock_t buf_lock; void *rx; void *rx_end; int dma_mapped;
From: Fenghua Yu fenghua.yu@intel.com
[ Upstream commit f11421ba4af706cb4f5703de34fa77fba8472776 ]
Atomic operations that span cache lines are super-expensive on x86 (not just to the current processor, but also to other processes as all memory operations are blocked until the operation completes). Upcoming x86 processors have a switch to cause such operations to generate a #AC trap. It is expected that some real time systems will enable this mode in BIOS.
In preparation for this, it is necessary to fix code that may execute atomic instructions with operands that cross cachelines because the #AC trap will crash the kernel.
Since "pwol_mask" is local and never exposed to concurrency, there is no need to set bits in pwol_mask using atomic operations.
Directly operate on the byte which contains the bit instead of using __set_bit() to avoid any big endian concern due to type cast to unsigned long in __set_bit().
Suggested-by: Peter Zijlstra peterz@infradead.org Signed-off-by: Fenghua Yu fenghua.yu@intel.com Signed-off-by: Tony Luck tony.luck@intel.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/ethernet/broadcom/b44.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index e445ab7..88f8d31 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -1519,8 +1519,10 @@ static int b44_magic_pattern(u8 *macaddr, u8 *ppattern, u8 *pmask, int offset) int ethaddr_bytes = ETH_ALEN;
memset(ppattern + offset, 0xff, magicsync); - for (j = 0; j < magicsync; j++) - set_bit(len++, (unsigned long *) pmask); + for (j = 0; j < magicsync; j++) { + pmask[len >> 3] |= BIT(len & 7); + len++; + }
for (j = 0; j < B44_MAX_PATTERNS; j++) { if ((B44_PATTERN_SIZE - len) >= ETH_ALEN) @@ -1532,7 +1534,8 @@ static int b44_magic_pattern(u8 *macaddr, u8 *ppattern, u8 *pmask, int offset) for (k = 0; k< ethaddr_bytes; k++) { ppattern[offset + magicsync + (j * ETH_ALEN) + k] = macaddr[k]; - set_bit(len++, (unsigned long *) pmask); + pmask[len >> 3] |= BIT(len & 7); + len++; } } return len - 1;
From: Krzysztof Kozlowski krzk@kernel.org
[ Upstream commit 00c0688cecadbf7ac2f5b4cdb36d912a2d3f0cca ]
Since net_device.mem_start is unsigned long, it should not be cast to int right before casting to pointer. This fixes warning (compile testing on alpha architecture):
drivers/net/wan/sdla.c: In function ‘sdla_transmit’: drivers/net/wan/sdla.c:711:13: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
Signed-off-by: Krzysztof Kozlowski krzk@kernel.org Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/wan/sdla.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/wan/sdla.c b/drivers/net/wan/sdla.c index 57ed259..09fde60 100644 --- a/drivers/net/wan/sdla.c +++ b/drivers/net/wan/sdla.c @@ -711,7 +711,7 @@ static netdev_tx_t sdla_transmit(struct sk_buff *skb,
spin_lock_irqsave(&sdla_lock, flags); SDLA_WINDOW(dev, addr); - pbuf = (void *)(((int) dev->mem_start) + (addr & SDLA_ADDR_MASK)); + pbuf = (void *)(dev->mem_start + (addr & SDLA_ADDR_MASK)); __sdla_write(dev, pbuf->buf_addr, skb->data, skb->len); SDLA_WINDOW(dev, addr); pbuf->opp_flag = 1;
From: Dmitry Osipenko digetx@gmail.com
[ Upstream commit c5706c7defc79de68a115b5536376298a8fef111 ]
Driver fails to compile in a minimized kernel's configuration because of the missing dependency on GPIOLIB_IRQCHIP.
error: ‘struct gpio_chip’ has no member named ‘irq’ 44 | virq = irq_find_mapping(gpio->gpio_chip.irq.domain, offset);
Signed-off-by: Dmitry Osipenko digetx@gmail.com Link: https://lore.kernel.org/r/20200106015154.12040-1-digetx@gmail.com Signed-off-by: Linus Walleij linus.walleij@linaro.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/gpio/Kconfig | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index ed51221..2c34e953 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -1059,6 +1059,7 @@ config GPIO_MADERA config GPIO_MAX77620 tristate "GPIO support for PMIC MAX77620 and MAX20024" depends on MFD_MAX77620 + select GPIOLIB_IRQCHIP help GPIO driver for MAX77620 and MAX20024 PMIC from Maxim Semiconductor. MAX77620 PMIC has 8 pins that can be configured as GPIOs. The
From: Arnd Bergmann arnd@arndb.de
[ Upstream commit 30780d086a83332adcd9362281201cee7c3d9d19 ]
With -O3, gcc has found an actual unintialized variable stored into an mmio register in two instances:
drivers/atm/eni.c: In function 'discard': drivers/atm/eni.c:465:13: error: 'dma[1]' is used uninitialized in this function [-Werror=uninitialized] writel(dma[i*2+1],eni_dev->rx_dma+dma_wr*8+4); ^ drivers/atm/eni.c:465:13: error: 'dma[3]' is used uninitialized in this function [-Werror=uninitialized]
Change the code to always write zeroes instead.
Signed-off-by: Arnd Bergmann arnd@arndb.de Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/atm/eni.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c index 6470e3c..7323e92 100644 --- a/drivers/atm/eni.c +++ b/drivers/atm/eni.c @@ -372,7 +372,7 @@ static int do_rx_dma(struct atm_vcc *vcc,struct sk_buff *skb, here = (eni_vcc->descr+skip) & (eni_vcc->words-1); dma[j++] = (here << MID_DMA_COUNT_SHIFT) | (vcc->vci << MID_DMA_VCI_SHIFT) | MID_DT_JK; - j++; + dma[j++] = 0; } here = (eni_vcc->descr+size+skip) & (eni_vcc->words-1); if (!eff) size += skip; @@ -445,7 +445,7 @@ static int do_rx_dma(struct atm_vcc *vcc,struct sk_buff *skb, if (size != eff) { dma[j++] = (here << MID_DMA_COUNT_SHIFT) | (vcc->vci << MID_DMA_VCI_SHIFT) | MID_DT_JK; - j++; + dma[j++] = 0; } if (!j || j > 2*RX_DMA_BUF) { printk(KERN_CRIT DEV_LABEL "!j or j too big!!!\n");
From: Rodrigo Rivas Costa rodrigorivascosta@gmail.com
[ Upstream commit 20eee6e5af35d9586774e80b6e0b1850e7cc9899 ]
The `connected` value for wired devices was not properly initialized, it must be set to `true` upon creation, because wired devices do not generate connection events.
When a raw client (the Steam Client) uses the device, the input device is destroyed. Then, when the raw client finishes, it must be recreated. But since the `connected` variable was false this never happended.
Signed-off-by: Rodrigo Rivas Costa rodrigorivascosta@gmail.com Signed-off-by: Jiri Kosina jkosina@suse.cz Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/hid/hid-steam.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c index 8dae0f9..6286204 100644 --- a/drivers/hid/hid-steam.c +++ b/drivers/hid/hid-steam.c @@ -768,8 +768,12 @@ static int steam_probe(struct hid_device *hdev,
if (steam->quirks & STEAM_QUIRK_WIRELESS) { hid_info(hdev, "Steam wireless receiver connected"); + /* If using a wireless adaptor ask for connection status */ + steam->connected = false; steam_request_conn_status(steam); } else { + /* A wired connection is always present */ + steam->connected = true; ret = steam_register(steam); if (ret) { hid_err(hdev,
From: Pacien TRAN-GIRARD pacien.trangirard@pacien.net
[ Upstream commit 10b65e2915b2fcc606d173e98a972850101fb4c4 ]
This patch adds a quirk disabling keyboard backlight support for the Dell Inspiron 1012 and 1018.
Those models wrongly report supporting keyboard backlight control features (through SMBIOS tokens) even though they're not equipped with a backlit keyboard. This led to broken controls being exposed through sysfs by this driver which froze the system when used.
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=107651 Signed-off-by: Pacien TRAN-GIRARD pacien.trangirard@pacien.net Reviewed-by: Mario Limonciello mario.limonciello@dell.com Reviewed-by: Pali Rohár pali.rohar@gmail.com Signed-off-by: Andy Shevchenko andriy.shevchenko@linux.intel.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/platform/x86/dell-laptop.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+)
diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c index 3433986..949dbc8 100644 --- a/drivers/platform/x86/dell-laptop.c +++ b/drivers/platform/x86/dell-laptop.c @@ -37,6 +37,7 @@
struct quirk_entry { bool touchpad_led; + bool kbd_led_not_present; bool kbd_led_levels_off_1; bool kbd_missing_ac_tag;
@@ -77,6 +78,10 @@ static int __init dmi_matched(const struct dmi_system_id *dmi) .kbd_led_levels_off_1 = true, };
+static struct quirk_entry quirk_dell_inspiron_1012 = { + .kbd_led_not_present = true, +}; + static struct platform_driver platform_driver = { .driver = { .name = "dell-laptop", @@ -314,6 +319,24 @@ static int __init dmi_matched(const struct dmi_system_id *dmi) }, .driver_data = &quirk_dell_latitude_e6410, }, + { + .callback = dmi_matched, + .ident = "Dell Inspiron 1012", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1012"), + }, + .driver_data = &quirk_dell_inspiron_1012, + }, + { + .callback = dmi_matched, + .ident = "Dell Inspiron 1018", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1018"), + }, + .driver_data = &quirk_dell_inspiron_1012, + }, { } };
@@ -1497,6 +1520,9 @@ static void kbd_init(void) { int ret;
+ if (quirks && quirks->kbd_led_not_present) + return; + ret = kbd_init_info(); kbd_init_tokens();
From: Slawomir Pawlowski slawomir.pawlowski@intel.com
[ Upstream commit 56b4cd4b7da9ee95778eb5c8abea49f641ebfd91 ]
Intel Visual Compute Accelerator (VCA) is a family of PCIe add-in devices exposing computational units via Non Transparent Bridges (NTB, PEX 87xx).
Similarly to MIC x200, we need to add DMA aliases to allow buffer access when IOMMU is enabled.
Add aliases to allow computational unit access to host memory. These aliases mark the whole VCA device as one IOMMU group.
All possible slot numbers (0x20) are used, since we are unable to tell what slot is used on other side. This quirk is intended for both host and computational unit sides. The VCA devices have up to five functions: four for DMA channels and one additional.
Link: https://lore.kernel.org/r/5683A335CC8BE1438C3C30C49DCC38DF637CED8E@IRSMSX102... Signed-off-by: Slawomir Pawlowski slawomir.pawlowski@intel.com Signed-off-by: Przemek Kitszel przemyslawx.kitszel@intel.com Signed-off-by: Bjorn Helgaas bhelgaas@google.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/pci/quirks.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+)
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 362d8f1..802ec3e 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3987,6 +3987,40 @@ static void quirk_mic_x200_dma_alias(struct pci_dev *pdev) DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2264, quirk_mic_x200_dma_alias);
/* + * Intel Visual Compute Accelerator (VCA) is a family of PCIe add-in devices + * exposing computational units via Non Transparent Bridges (NTB, PEX 87xx). + * + * Similarly to MIC x200, we need to add DMA aliases to allow buffer access + * when IOMMU is enabled. These aliases allow computational unit access to + * host memory. These aliases mark the whole VCA device as one IOMMU + * group. + * + * All possible slot numbers (0x20) are used, since we are unable to tell + * what slot is used on other side. This quirk is intended for both host + * and computational unit sides. The VCA devices have up to five functions + * (four for DMA channels and one additional). + */ +static void quirk_pex_vca_alias(struct pci_dev *pdev) +{ + const unsigned int num_pci_slots = 0x20; + unsigned int slot; + + for (slot = 0; slot < num_pci_slots; slot++) { + pci_add_dma_alias(pdev, PCI_DEVFN(slot, 0x0)); + pci_add_dma_alias(pdev, PCI_DEVFN(slot, 0x1)); + pci_add_dma_alias(pdev, PCI_DEVFN(slot, 0x2)); + pci_add_dma_alias(pdev, PCI_DEVFN(slot, 0x3)); + pci_add_dma_alias(pdev, PCI_DEVFN(slot, 0x4)); + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2954, quirk_pex_vca_alias); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2955, quirk_pex_vca_alias); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2956, quirk_pex_vca_alias); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2958, quirk_pex_vca_alias); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2959, quirk_pex_vca_alias); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x295A, quirk_pex_vca_alias); + +/* * The IOMMU and interrupt controller on Broadcom Vulcan/Cavium ThunderX2 are * associated not at the root bus, but at a bridge below. This quirk avoids * generating invalid DMA aliases.
From: Logan Gunthorpe logang@deltatee.com
[ Upstream commit 3c124435e8dd516df4b2fc983f4415386fd6edae ]
Non-Transparent Bridge (NTB) devices (among others) may have many DMA aliases seeing the hardware will send requests with different device ids depending on their origin across the bridged hardware.
See commit ad281ecf1c7d ("PCI: Add DMA alias quirk for Microsemi Switchtec NTB") for more information on this.
The AMD IOMMU IRQ remapping functionality ignores all PCI aliases for IRQs so if devices send an interrupt from one of their aliases they will be blocked on AMD hardware with the IOMMU enabled.
To fix this, ensure IRQ remapping is enabled for all aliases with MSI interrupts.
This is analogous to the functionality added to the Intel IRQ remapping code in commit 3f0c625c6ae7 ("iommu/vt-d: Allow interrupts from the entire bus for aliased devices")
Signed-off-by: Logan Gunthorpe logang@deltatee.com Signed-off-by: Joerg Roedel jroedel@suse.de Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/iommu/amd_iommu.c | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-)
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index cd61f53..f75a6c3 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3693,7 +3693,20 @@ static void set_remap_table_entry(struct amd_iommu *iommu, u16 devid, iommu_flush_dte(iommu, devid); }
-static struct irq_remap_table *alloc_irq_table(u16 devid) +static int set_remap_table_entry_alias(struct pci_dev *pdev, u16 alias, + void *data) +{ + struct irq_remap_table *table = data; + + irq_lookup_table[alias] = table; + set_dte_irq_entry(alias, table); + + iommu_flush_dte(amd_iommu_rlookup_table[alias], alias); + + return 0; +} + +static struct irq_remap_table *alloc_irq_table(u16 devid, struct pci_dev *pdev) { struct irq_remap_table *table = NULL; struct irq_remap_table *new_table = NULL; @@ -3739,7 +3752,12 @@ static struct irq_remap_table *alloc_irq_table(u16 devid) table = new_table; new_table = NULL;
- set_remap_table_entry(iommu, devid, table); + if (pdev) + pci_for_each_dma_alias(pdev, set_remap_table_entry_alias, + table); + else + set_remap_table_entry(iommu, devid, table); + if (devid != alias) set_remap_table_entry(iommu, alias, table);
@@ -3756,7 +3774,8 @@ static struct irq_remap_table *alloc_irq_table(u16 devid) return table; }
-static int alloc_irq_index(u16 devid, int count, bool align) +static int alloc_irq_index(u16 devid, int count, bool align, + struct pci_dev *pdev) { struct irq_remap_table *table; int index, c, alignment = 1; @@ -3766,7 +3785,7 @@ static int alloc_irq_index(u16 devid, int count, bool align) if (!iommu) return -ENODEV;
- table = alloc_irq_table(devid); + table = alloc_irq_table(devid, pdev); if (!table) return -ENODEV;
@@ -4199,7 +4218,7 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, struct irq_remap_table *table; struct amd_iommu *iommu;
- table = alloc_irq_table(devid); + table = alloc_irq_table(devid, NULL); if (table) { if (!table->min_index) { /* @@ -4216,11 +4235,15 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, } else { index = -ENOMEM; } - } else { + } else if (info->type == X86_IRQ_ALLOC_TYPE_MSI || + info->type == X86_IRQ_ALLOC_TYPE_MSIX) { bool align = (info->type == X86_IRQ_ALLOC_TYPE_MSI);
- index = alloc_irq_index(devid, nr_irqs, align); + index = alloc_irq_index(devid, nr_irqs, align, info->msi_dev); + } else { + index = alloc_irq_index(devid, nr_irqs, false, NULL); } + if (index < 0) { pr_warn("Failed to allocate IRTE\n"); ret = index;
From: Ben Dooks ben.dooks@codethink.co.uk
[ Upstream commit 2079fe6ea8cbd2fb2fbadba911f1eca6c362eb9b ]
The omap_sr_pdata is not declared but is exported, so add a define for it to fix the following warning:
arch/arm/mach-omap2/pdata-quirks.c:609:36: warning: symbol 'omap_sr_pdata' was not declared. Should it be static?
Signed-off-by: Ben Dooks ben.dooks@codethink.co.uk Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/power/smartreflex.h | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/include/linux/power/smartreflex.h b/include/linux/power/smartreflex.h index 7b81dad..37d9b70 100644 --- a/include/linux/power/smartreflex.h +++ b/include/linux/power/smartreflex.h @@ -296,6 +296,9 @@ struct omap_sr_data { struct voltagedomain *voltdm; };
+ +extern struct omap_sr_data omap_sr_pdata[OMAP_SR_NR]; + #ifdef CONFIG_POWER_AVS_OMAP
/* Smartreflex module enable/disable interface */
From: Laura Abbott labbott@fedoraproject.org
[ Upstream commit bc3bdb12bbb3492067c8719011576370e959a2e6 ]
Steve Ellis reported incorrect block sizes and alignement offsets with a SATA enclosure. Adding a quirk to disable UAS fixes the problems.
Reported-by: Steven Ellis sellis@redhat.com Cc: Pacho Ramos pachoramos@gmail.com Signed-off-by: Laura Abbott labbott@fedoraproject.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/usb/storage/unusual_uas.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index d0bdebd..1b23741 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -87,12 +87,15 @@ USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_IGNORE_UAS),
-/* Reported-by: Takeo Nakayama javhera@gmx.com */ +/* + * Initially Reported-by: Takeo Nakayama javhera@gmx.com + * UAS Ignore Reported by Steven Ellis sellis@redhat.com + */ UNUSUAL_DEV(0x357d, 0x7788, 0x0000, 0x9999, "JMicron", "JMS566", USB_SC_DEVICE, USB_PR_DEVICE, NULL, - US_FL_NO_REPORT_OPCODES), + US_FL_NO_REPORT_OPCODES | US_FL_IGNORE_UAS),
/* Reported-by: Hans de Goede hdegoede@redhat.com */ UNUSUAL_DEV(0x4971, 0x1012, 0x0000, 0x9999,
From: Johan Hovold johan@kernel.org
commit 92aafe77123ab478e5f5095878856ab0424910da upstream.
The driver would fail to stop the command timer in most error paths, something which specifically could lead to the timer being freed while still active on I/O errors during probe.
Fix this by making sure that each function starting the timer also stops it in all relevant error paths.
Reported-by: syzbot+1d1597a5aa3679c65b9f@syzkaller.appspotmail.com Fixes: b78e91bcfb33 ("rsi: Add new firmware loading method") Cc: stable stable@vger.kernel.org # 4.12 Cc: Prameela Rani Garnepudi prameela.j04cs@gmail.com Cc: Amitkumar Karwar amit.karwar@redpinesignals.com Signed-off-by: Johan Hovold johan@kernel.org Signed-off-by: Kalle Valo kvalo@codeaurora.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/wireless/rsi/rsi_91x_hal.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/drivers/net/wireless/rsi/rsi_91x_hal.c b/drivers/net/wireless/rsi/rsi_91x_hal.c index 01edf96..a7b341e 100644 --- a/drivers/net/wireless/rsi/rsi_91x_hal.c +++ b/drivers/net/wireless/rsi/rsi_91x_hal.c @@ -616,6 +616,7 @@ static int bl_cmd(struct rsi_hw *adapter, u8 cmd, u8 exp_resp, char *str) bl_start_cmd_timer(adapter, timeout); status = bl_write_cmd(adapter, cmd, exp_resp, ®out_val); if (status < 0) { + bl_stop_cmd_timer(adapter); rsi_dbg(ERR_ZONE, "%s: Command %s (%0x) writing failed..\n", __func__, str, cmd); @@ -731,10 +732,9 @@ static int ping_pong_write(struct rsi_hw *adapter, u8 cmd, u8 *addr, u32 size) }
status = bl_cmd(adapter, cmd_req, cmd_resp, str); - if (status) { - bl_stop_cmd_timer(adapter); + if (status) return status; - } + return 0; }
@@ -822,10 +822,9 @@ static int auto_fw_upgrade(struct rsi_hw *adapter, u8 *flash_content,
status = bl_cmd(adapter, EOF_REACHED, FW_LOADING_SUCCESSFUL, "EOF_REACHED"); - if (status) { - bl_stop_cmd_timer(adapter); + if (status) return status; - } + rsi_dbg(INFO_ZONE, "FW loading is done and FW is running..\n"); return 0; } @@ -846,6 +845,7 @@ static int rsi_load_firmware(struct rsi_hw *adapter) status = hif_ops->master_reg_read(adapter, SWBL_REGOUT, ®out_val, 2); if (status < 0) { + bl_stop_cmd_timer(adapter); rsi_dbg(ERR_ZONE, "%s: REGOUT read failed\n", __func__); return status;
From: Johan Hovold johan@kernel.org
commit 47768297481184932844ab01a86752ba31a38861 upstream.
Make sure to free the skb on failed receive-URB submission (e.g. on disconnect or currently also due to a missing endpoint).
Fixes: a1854fae1414 ("rsi: improve RX packet handling in USB interface") Cc: stable stable@vger.kernel.org # 4.17 Cc: Prameela Rani Garnepudi prameela.j04cs@gmail.com Signed-off-by: Johan Hovold johan@kernel.org Signed-off-by: Kalle Valo kvalo@codeaurora.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/wireless/rsi/rsi_91x_usb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/net/wireless/rsi/rsi_91x_usb.c b/drivers/net/wireless/rsi/rsi_91x_usb.c index 14e56be..183cb39 100644 --- a/drivers/net/wireless/rsi/rsi_91x_usb.c +++ b/drivers/net/wireless/rsi/rsi_91x_usb.c @@ -327,8 +327,10 @@ static int rsi_rx_urb_submit(struct rsi_hw *adapter, u8 ep_num) rx_cb);
status = usb_submit_urb(urb, GFP_KERNEL); - if (status) + if (status) { rsi_dbg(ERR_ZONE, "%s: Failed in urb submission\n", __func__); + dev_kfree_skb(skb); + }
return status; }
From: Johan Hovold johan@kernel.org
commit b9b9f9fea21830f85cf0148cd8dce001ae55ead1 upstream.
USB completion handlers are called in atomic context and must specifically not allocate memory using GFP_KERNEL.
Fixes: a1854fae1414 ("rsi: improve RX packet handling in USB interface") Cc: stable stable@vger.kernel.org # 4.17 Cc: Prameela Rani Garnepudi prameela.j04cs@gmail.com Signed-off-by: Johan Hovold johan@kernel.org Signed-off-by: Kalle Valo kvalo@codeaurora.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/wireless/rsi/rsi_91x_usb.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/drivers/net/wireless/rsi/rsi_91x_usb.c b/drivers/net/wireless/rsi/rsi_91x_usb.c index 183cb39..0ddd722 100644 --- a/drivers/net/wireless/rsi/rsi_91x_usb.c +++ b/drivers/net/wireless/rsi/rsi_91x_usb.c @@ -16,6 +16,7 @@ */
#include <linux/module.h> +#include <linux/types.h> #include <net/rsi_91x.h> #include "rsi_usb.h" #include "rsi_hal.h" @@ -29,7 +30,7 @@ "9[Wi-Fi STA + BT LE], 13[Wi-Fi STA + BT classic + BT LE]\n" "6[AP + BT classic], 14[AP + BT classic + BT LE]");
-static int rsi_rx_urb_submit(struct rsi_hw *adapter, u8 ep_num); +static int rsi_rx_urb_submit(struct rsi_hw *adapter, u8 ep_num, gfp_t flags);
/** * rsi_usb_card_write() - This function writes to the USB Card. @@ -283,7 +284,7 @@ static void rsi_rx_done_handler(struct urb *urb) status = 0;
out: - if (rsi_rx_urb_submit(dev->priv, rx_cb->ep_num)) + if (rsi_rx_urb_submit(dev->priv, rx_cb->ep_num, GFP_ATOMIC)) rsi_dbg(ERR_ZONE, "%s: Failed in urb submission", __func__);
if (status) @@ -296,7 +297,7 @@ static void rsi_rx_done_handler(struct urb *urb) * * Return: 0 on success, a negative error code on failure. */ -static int rsi_rx_urb_submit(struct rsi_hw *adapter, u8 ep_num) +static int rsi_rx_urb_submit(struct rsi_hw *adapter, u8 ep_num, gfp_t mem_flags) { struct rsi_91x_usbdev *dev = (struct rsi_91x_usbdev *)adapter->rsi_dev; struct rx_usb_ctrl_block *rx_cb = &dev->rx_cb[ep_num - 1]; @@ -326,7 +327,7 @@ static int rsi_rx_urb_submit(struct rsi_hw *adapter, u8 ep_num) rsi_rx_done_handler, rx_cb);
- status = usb_submit_urb(urb, GFP_KERNEL); + status = usb_submit_urb(urb, mem_flags); if (status) { rsi_dbg(ERR_ZONE, "%s: Failed in urb submission\n", __func__); dev_kfree_skb(skb); @@ -783,12 +784,12 @@ static int rsi_probe(struct usb_interface *pfunction, rsi_dbg(INIT_ZONE, "%s: Device Init Done\n", __func__); }
- status = rsi_rx_urb_submit(adapter, WLAN_EP); + status = rsi_rx_urb_submit(adapter, WLAN_EP, GFP_KERNEL); if (status) goto err1;
if (adapter->priv->coex_mode > 1) { - status = rsi_rx_urb_submit(adapter, BT_EP); + status = rsi_rx_urb_submit(adapter, BT_EP, GFP_KERNEL); if (status) goto err1; }
From: Herbert Xu herbert@gondor.apana.org.au
commit 37f96694cf73ba116993a9d2d99ad6a75fa7fdb0 upstream.
As af_alg_release_parent may be called from BH context (most notably due to an async request that only completes after socket closure, or as reported here because of an RCU-delayed sk_destruct call), we must use bh_lock_sock instead of lock_sock.
Reported-by: syzbot+c2f1558d49e25cc36e5e@syzkaller.appspotmail.com Reported-by: Eric Dumazet eric.dumazet@gmail.com Fixes: c840ac6af3f8 ("crypto: af_alg - Disallow bind/setkey/...") Cc: stable@vger.kernel.org Signed-off-by: Herbert Xu herbert@gondor.apana.org.au Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- crypto/af_alg.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/crypto/af_alg.c b/crypto/af_alg.c index ed643ce..4fc8e6a 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -139,11 +139,13 @@ void af_alg_release_parent(struct sock *sk) sk = ask->parent; ask = alg_sk(sk);
- lock_sock(sk); + local_bh_disable(); + bh_lock_sock(sk); ask->nokey_refcnt -= nokey; if (!last) last = !--ask->refcnt; - release_sock(sk); + bh_unlock_sock(sk); + local_bh_enable();
if (last) sock_put(sk);
From: Linus Torvalds torvalds@linux-foundation.org
commit 50ee7529ec4500c88f8664560770a7a1b65db72b upstream.
For 5.3 we had to revert a nice ext4 IO pattern improvement, because it caused a bootup regression due to lack of entropy at bootup together with arguably broken user space that was asking for secure random numbers when it really didn't need to.
See commit 72dbcf721566 (Revert "ext4: make __ext4_get_inode_loc plug").
This aims to solve the issue by actively generating entropy noise using the CPU cycle counter when waiting for the random number generator to initialize. This only works when you have a high-frequency time stamp counter available, but that's the case on all modern x86 CPU's, and on most other modern CPU's too.
What we do is to generate jitter entropy from the CPU cycle counter under a somewhat complex load: calling the scheduler while also guaranteeing a certain amount of timing noise by also triggering a timer.
I'm sure we can tweak this, and that people will want to look at other alternatives, but there's been a number of papers written on jitter entropy, and this should really be fairly conservative by crediting one bit of entropy for every timer-induced jump in the cycle counter. Not because the timer itself would be all that unpredictable, but because the interaction between the timer and the loop is going to be.
Even if (and perhaps particularly if) the timer actually happens on another CPU, the cacheline interaction between the loop that reads the cycle counter and the timer itself firing is going to add perturbations to the cycle counter values that get mixed into the entropy pool.
As Thomas pointed out, with a modern out-of-order CPU, even quite simple loops show a fair amount of hard-to-predict timing variability even in the absense of external interrupts. But this tries to take that further by actually having a fairly complex interaction.
This is not going to solve the entropy issue for architectures that have no CPU cycle counter, but it's not clear how (and if) that is solvable, and the hardware in question is largely starting to be irrelevant. And by doing this we can at least avoid some of the even more contentious approaches (like making the entropy waiting time out in order to avoid the possibly unbounded waiting).
Cc: Ahmed Darwish darwish.07@gmail.com Cc: Thomas Gleixner tglx@linutronix.de Cc: Theodore Ts'o tytso@mit.edu Cc: Nicholas Mc Guire hofrat@opentech.at Cc: Andy Lutomirski luto@kernel.org Cc: Kees Cook keescook@chromium.org Cc: Willy Tarreau w@1wt.eu Cc: Alexander E. Patrakov patrakov@gmail.com Cc: Lennart Poettering mzxreary@0pointer.de Cc: Noah Meyerhans noahm@debian.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/char/random.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 61 insertions(+), 1 deletion(-)
diff --git a/drivers/char/random.c b/drivers/char/random.c index 86fe1df..53e8227 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1653,6 +1653,56 @@ void get_random_bytes(void *buf, int nbytes) } EXPORT_SYMBOL(get_random_bytes);
+ +/* + * Each time the timer fires, we expect that we got an unpredictable + * jump in the cycle counter. Even if the timer is running on another + * CPU, the timer activity will be touching the stack of the CPU that is + * generating entropy.. + * + * Note that we don't re-arm the timer in the timer itself - we are + * happy to be scheduled away, since that just makes the load more + * complex, but we do not want the timer to keep ticking unless the + * entropy loop is running. + * + * So the re-arming always happens in the entropy loop itself. + */ +static void entropy_timer(struct timer_list *t) +{ + credit_entropy_bits(&input_pool, 1); +} + +/* + * If we have an actual cycle counter, see if we can + * generate enough entropy with timing noise + */ +static void try_to_generate_entropy(void) +{ + struct { + unsigned long now; + struct timer_list timer; + } stack; + + stack.now = random_get_entropy(); + + /* Slow counter - or none. Don't even bother */ + if (stack.now == random_get_entropy()) + return; + + timer_setup_on_stack(&stack.timer, entropy_timer, 0); + while (!crng_ready()) { + if (!timer_pending(&stack.timer)) + mod_timer(&stack.timer, jiffies+1); + mix_pool_bytes(&input_pool, &stack.now, sizeof(stack.now)); + schedule(); + stack.now = random_get_entropy(); + } + + del_timer_sync(&stack.timer); + destroy_timer_on_stack(&stack.timer); + mix_pool_bytes(&input_pool, &stack.now, sizeof(stack.now)); +} + /* * Wait for the urandom pool to be seeded and thus guaranteed to supply * cryptographically secure random numbers. This applies to: the /dev/urandom @@ -1667,7 +1717,17 @@ int wait_for_random_bytes(void) { if (likely(crng_ready())) return 0; - return wait_event_interruptible(crng_init_wait, crng_ready()); + + do { + int ret; + ret = wait_event_interruptible_timeout(crng_init_wait, crng_ready(), HZ); + if (ret) + return ret > 0 ? 0 : ret; + + try_to_generate_entropy(); + } while (!crng_ready()); + + return 0; } EXPORT_SYMBOL(wait_for_random_bytes);
From: Andrew Murray andrew.murray@arm.com
commit 4942dc6638b07b5326b6d2faa142635c559e7cd5 upstream.
On VHE systems arch.mdcr_el2 is written to mdcr_el2 at vcpu_load time to set options for self-hosted debug and the performance monitors extension.
Unfortunately the value of arch.mdcr_el2 is not calculated until kvm_arm_setup_debug() in the run loop after the vcpu has been loaded. This means that the initial brief iterations of the run loop use a zero value of mdcr_el2 - until the vcpu is preempted. This also results in a delay between changes to vcpu->guest_debug taking effect.
Fix this by writing to mdcr_el2 in kvm_arm_setup_debug() on VHE systems when a change to arch.mdcr_el2 has been detected.
Fixes: d5a21bcc2995 ("KVM: arm64: Move common VHE/non-VHE trap config in separate functions") Cc: stable@vger.kernel.org # 4.17.x- Suggested-by: James Morse james.morse@arm.com Acked-by: Will Deacon will@kernel.org Reviewed-by: Marc Zyngier maz@kernel.org Signed-off-by: Andrew Murray andrew.murray@arm.com Signed-off-by: Catalin Marinas catalin.marinas@arm.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/kvm/debug.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c index 00d4223..4e722d7 100644 --- a/arch/arm64/kvm/debug.c +++ b/arch/arm64/kvm/debug.c @@ -112,7 +112,7 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) { bool trap_debug = !(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY); - unsigned long mdscr; + unsigned long mdscr, orig_mdcr_el2 = vcpu->arch.mdcr_el2;
trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug);
@@ -208,6 +208,10 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) if (vcpu_read_sys_reg(vcpu, MDSCR_EL1) & (DBG_MDSCR_KDE | DBG_MDSCR_MDE)) vcpu->arch.flags |= KVM_ARM64_DEBUG_DIRTY;
+ /* Write mdcr_el2 changes since vcpu_load on VHE systems */ + if (has_vhe() && orig_mdcr_el2 != vcpu->arch.mdcr_el2) + write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); + trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2); trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_read_sys_reg(vcpu, MDSCR_EL1)); }
From: Greg Kroah-Hartman gregkh@linuxfoundation.org
Merge 46 patches from 4.19.101 stable branch (50 total) beside 4 already merged patches: 6c11530 sched/fair: Add tmp_alone_branch assertion 2d935df sched/fair: Fix insertion in rq->leaf_cfs_rq_list b0be61a block: cleanup __blkdev_issue_discard() f387897 block: fix 32 bit overflow in __blkdev_issue_discard()
Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile index f1e4282..ca186bc 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 19 -SUBLEVEL = 100 +SUBLEVEL = 101 EXTRAVERSION = NAME = "People's Front"