Brian Norris (1): tracefs: Only clobber mode/uid/gid on remount if asked
David Hildenbrand (1): mm/hugetlb: fix hugetlb not supporting softdirty tracking
Denis V. Lunev (1): neigh: fix possible DoS due to net iface start/stop loop
Eric Dumazet (1): tcp: annotate data-race around challenge_timestamp
Florian Westphal (2): netfilter: ebtables: reject blobs that don't provide all entry points netfilter: ebtables: fix memory leak when blob is malformed
Gaosheng Cui (1): audit: fix potential double free on error path from fsnotify_add_inode_mark
Greg Kroah-Hartman (1): debugfs: add debugfs_lookup_and_remove()
Harsh Modi (1): netfilter: br_netfilter: Drop dst references before setting.
Jann Horn (1): mm: Fix TLB flush for not-first PFNMAP mappings in unmap_region()
Jonathan Toppins (1): bonding: 802.3ad: fix no transmission of LACPDUs
Kuniyuki Iwashima (9): net: Fix data-races around weight_p and dev_weight_[rt]x_bias. net: Fix data-races around netdev_tstamp_prequeue. ratelimit: Fix data-races in ___ratelimit(). net: Fix a data-race around sysctl_tstamp_allow_data. net: Fix a data-race around sysctl_net_busy_poll. net: Fix a data-race around sysctl_net_busy_read. net: Fix a data-race around netdev_budget. net: Fix a data-race around netdev_budget_usecs. net: Fix a data-race around sysctl_somaxconn.
Maciej Żenczykowski (1): net: ipvtap - add __init/__exit annotations to module init/exit funcs
Mike Snitzer (1): dm: return early from dm_pr_call() if DM device is suspended
Neal Cardwell (1): tcp: fix early ETIMEDOUT after spurious non-SACK RTO
NeilBrown (1): SUNRPC: use _bh spinlocking on ->transport_lock
Quanyang Wang (1): asm-generic: sections: refactor memory_intersects
Siddh Raman Pant (1): loop: Check for overflow while configuring loop
Trond Myklebust (1): NFSv4: Fix races in the legacy idmapper upcall
Xin Xiong (1): xfrm: fix refcount leak in __xfrm_policy_check()
Yang Yingliang (1): net: neigh: don't call kfree_skb() under spin_lock_irqsave()
Yee Lee (1): Revert "mm: kmemleak: take a full lowmem check in kmemleak_*_phys()"
drivers/block/loop.c | 5 +++ drivers/md/dm.c | 5 +++ drivers/net/bonding/bond_3ad.c | 38 ++++++++----------- drivers/net/ipvlan/ipvtap.c | 4 +- fs/debugfs/inode.c | 22 +++++++++++ fs/nfs/nfs4idmap.c | 46 ++++++++++++----------- fs/tracefs/inode.c | 29 ++++++++++---- include/asm-generic/sections.h | 7 +++- include/linux/debugfs.h | 6 +++ include/linux/netfilter_bridge/ebtables.h | 4 -- include/net/busy_poll.h | 2 +- kernel/audit_fsnotify.c | 1 + lib/ratelimit.c | 12 ++++-- mm/kmemleak.c | 8 ++-- mm/mmap.c | 17 +++++++-- net/bridge/br_netfilter_hooks.c | 2 + net/bridge/br_netfilter_ipv6.c | 1 + net/bridge/netfilter/ebtable_broute.c | 8 ---- net/bridge/netfilter/ebtable_filter.c | 8 ---- net/bridge/netfilter/ebtable_nat.c | 8 ---- net/bridge/netfilter/ebtables.c | 10 ++--- net/core/dev.c | 14 +++---- net/core/neighbour.c | 27 ++++++++++--- net/core/skbuff.c | 2 +- net/core/sock.c | 2 +- net/core/sysctl_net_core.c | 15 +++++--- net/ipv4/tcp_input.c | 29 +++++++++----- net/sched/sch_generic.c | 2 +- net/socket.c | 2 +- net/sunrpc/xprt.c | 4 +- net/xfrm/xfrm_policy.c | 1 + 31 files changed, 205 insertions(+), 136 deletions(-)
From: Trond Myklebust trond.myklebust@hammerspace.com
stable inclusion from stable-v4.19.256 commit bb4a4b31c29dd04d50e2b34b780acc0e1bbb8ce2 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5UQH4 CVE: NA
--------------------------------
commit 51fd2eb52c0ca8275a906eed81878ef50ae94eb0 upstream.
nfs_idmap_instantiate() will cause the process that is waiting in request_key_with_auxdata() to wake up and exit. If there is a second process waiting for the idmap->idmap_mutex, then it may wake up and start a new call to request_key_with_auxdata(). If the call to idmap_pipe_downcall() from the first process has not yet finished calling nfs_idmap_complete_pipe_upcall_locked(), then we may end up triggering the WARN_ON_ONCE() in nfs_idmap_prepare_pipe_upcall().
The fix is to ensure that we clear idmap->idmap_upcall_data before calling nfs_idmap_instantiate().
Fixes: e9ab41b620e4 ("NFSv4: Clean up the legacy idmapper upcall") Signed-off-by: Trond Myklebust trond.myklebust@hammerspace.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- fs/nfs/nfs4idmap.c | 46 ++++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 22 deletions(-)
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index bf34ddaa2ad7..c1c26b06764f 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -547,22 +547,20 @@ nfs_idmap_prepare_pipe_upcall(struct idmap *idmap, return true; }
-static void -nfs_idmap_complete_pipe_upcall_locked(struct idmap *idmap, int ret) +static void nfs_idmap_complete_pipe_upcall(struct idmap_legacy_upcalldata *data, + int ret) { - struct key *authkey = idmap->idmap_upcall_data->authkey; - - kfree(idmap->idmap_upcall_data); - idmap->idmap_upcall_data = NULL; - complete_request_key(authkey, ret); - key_put(authkey); + complete_request_key(data->authkey, ret); + key_put(data->authkey); + kfree(data); }
-static void -nfs_idmap_abort_pipe_upcall(struct idmap *idmap, int ret) +static void nfs_idmap_abort_pipe_upcall(struct idmap *idmap, + struct idmap_legacy_upcalldata *data, + int ret) { - if (idmap->idmap_upcall_data != NULL) - nfs_idmap_complete_pipe_upcall_locked(idmap, ret); + if (cmpxchg(&idmap->idmap_upcall_data, data, NULL) == data) + nfs_idmap_complete_pipe_upcall(data, ret); }
static int nfs_idmap_legacy_upcall(struct key *authkey, void *aux) @@ -599,7 +597,7 @@ static int nfs_idmap_legacy_upcall(struct key *authkey, void *aux)
ret = rpc_queue_upcall(idmap->idmap_pipe, msg); if (ret < 0) - nfs_idmap_abort_pipe_upcall(idmap, ret); + nfs_idmap_abort_pipe_upcall(idmap, data, ret);
return ret; out2: @@ -655,6 +653,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) struct request_key_auth *rka; struct rpc_inode *rpci = RPC_I(file_inode(filp)); struct idmap *idmap = (struct idmap *)rpci->private; + struct idmap_legacy_upcalldata *data; struct key *authkey; struct idmap_msg im; size_t namelen_in; @@ -664,10 +663,11 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) * will have been woken up and someone else may now have used * idmap_key_cons - so after this point we may no longer touch it. */ - if (idmap->idmap_upcall_data == NULL) + data = xchg(&idmap->idmap_upcall_data, NULL); + if (data == NULL) goto out_noupcall;
- authkey = idmap->idmap_upcall_data->authkey; + authkey = data->authkey; rka = get_request_key_auth(authkey);
if (mlen != sizeof(im)) { @@ -689,18 +689,17 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ) { ret = -EINVAL; goto out; -} + }
- ret = nfs_idmap_read_and_verify_message(&im, - &idmap->idmap_upcall_data->idmap_msg, - rka->target_key, authkey); + ret = nfs_idmap_read_and_verify_message(&im, &data->idmap_msg, + rka->target_key, authkey); if (ret >= 0) { key_set_timeout(rka->target_key, nfs_idmap_cache_timeout); ret = mlen; }
out: - nfs_idmap_complete_pipe_upcall_locked(idmap, ret); + nfs_idmap_complete_pipe_upcall(data, ret); out_noupcall: return ret; } @@ -714,7 +713,7 @@ idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg) struct idmap *idmap = data->idmap;
if (msg->errno) - nfs_idmap_abort_pipe_upcall(idmap, msg->errno); + nfs_idmap_abort_pipe_upcall(idmap, data, msg->errno); }
static void @@ -722,8 +721,11 @@ idmap_release_pipe(struct inode *inode) { struct rpc_inode *rpci = RPC_I(inode); struct idmap *idmap = (struct idmap *)rpci->private; + struct idmap_legacy_upcalldata *data;
- nfs_idmap_abort_pipe_upcall(idmap, -EPIPE); + data = xchg(&idmap->idmap_upcall_data, NULL); + if (data) + nfs_idmap_complete_pipe_upcall(data, -EPIPE); }
int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, kuid_t *uid)
From: Mike Snitzer snitzer@kernel.org
stable inclusion from stable-v4.19.256 commit 4f040ba9f15d047f173ecc01d3bba3d1e9fbee08 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5UQH4 CVE: NA
--------------------------------
[ Upstream commit e120a5f1e78fab6223544e425015f393d90d6f0d ]
Otherwise PR ops may be issued while the broader DM device is being reconfigured, etc.
Fixes: 9c72bad1f31a ("dm: call PR reserve/unreserve on each underlying device") Signed-off-by: Mike Snitzer snitzer@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- drivers/md/dm.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 86a233731876..4c46f030eed2 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -3128,6 +3128,11 @@ static int dm_call_pr(struct block_device *bdev, iterate_devices_callout_fn fn, goto out; ti = dm_table_get_target(table, 0);
+ if (dm_suspended_md(md)) { + ret = -EAGAIN; + goto out; + } + ret = -EINVAL; if (!ti->type->iterate_devices) goto out;
From: Gaosheng Cui cuigaosheng1@huawei.com
stable inclusion from stable-v4.19.257 commit 1133d90d9d9ff3def7fc5ba160381cd611aa51ee category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5UQH4 CVE: NA
--------------------------------
commit ad982c3be4e60c7d39c03f782733503cbd88fd2a upstream.
Audit_alloc_mark() assign pathname to audit_mark->path, on error path from fsnotify_add_inode_mark(), fsnotify_put_mark will free memory of audit_mark->path, but the caller of audit_alloc_mark will free the pathname again, so there will be double free problem.
Fix this by resetting audit_mark->path to NULL pointer on error path from fsnotify_add_inode_mark().
Cc: stable@vger.kernel.org Fixes: 7b1293234084d ("fsnotify: Add group pointer in fsnotify_init_mark()") Signed-off-by: Gaosheng Cui cuigaosheng1@huawei.com Reviewed-by: Jan Kara jack@suse.cz Signed-off-by: Paul Moore paul@paul-moore.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- kernel/audit_fsnotify.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c index fba78047fb37..57404292c6d1 100644 --- a/kernel/audit_fsnotify.c +++ b/kernel/audit_fsnotify.c @@ -111,6 +111,7 @@ struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char *pa
ret = fsnotify_add_inode_mark(&audit_mark->mark, inode, true); if (ret < 0) { + audit_mark->path = NULL; fsnotify_put_mark(&audit_mark->mark); audit_mark = ERR_PTR(ret); }
From: Xin Xiong xiongx18@fudan.edu.cn
stable inclusion from stable-v4.19.257 commit 0769491a8acd3e85ca4c3f65080eac2c824262df category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5UQH4 CVE: NA
--------------------------------
[ Upstream commit 9c9cb23e00ddf45679b21b4dacc11d1ae7961ebe ]
The issue happens on an error path in __xfrm_policy_check(). When the fetching process of the object `pols[1]` fails, the function simply returns 0, forgetting to decrement the reference count of `pols[0]`, which is incremented earlier by either xfrm_sk_policy_lookup() or xfrm_policy_lookup(). This may result in memory leaks.
Fix it by decreasing the reference count of `pols[0]` in that path.
Fixes: 134b0fc544ba ("IPsec: propagate security module errors up from flow_cache_lookup") Signed-off-by: Xin Xiong xiongx18@fudan.edu.cn Signed-off-by: Xin Tan tanxin.ctf@gmail.com Signed-off-by: Steffen Klassert steffen.klassert@secunet.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- net/xfrm/xfrm_policy.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 408e3f34d16c..e02421ca9609 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2409,6 +2409,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, if (pols[1]) { if (IS_ERR(pols[1])) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); + xfrm_pol_put(pols[0]); return 0; } pols[1]->curlft.use_time = ktime_get_real_seconds();
From: Jonathan Toppins jtoppins@redhat.com
stable inclusion from stable-v4.19.257 commit d60e70a898cbee907c686c75fb06b680ec37b94e category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5UQH4 CVE: NA
--------------------------------
[ Upstream commit d745b5062ad2b5da90a5e728d7ca884fc07315fd ]
This is caused by the global variable ad_ticks_per_sec being zero as demonstrated by the reproducer script discussed below. This causes all timer values in __ad_timer_to_ticks to be zero, resulting in the periodic timer to never fire.
To reproduce: Run the script in `tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh` which puts bonding into a state where it never transmits LACPDUs.
line 44: ip link add fbond type bond mode 4 miimon 200 \ xmit_hash_policy 1 ad_actor_sys_prio 65535 lacp_rate fast setting bond param: ad_actor_sys_prio given: params.ad_actor_system = 0 call stack: bond_option_ad_actor_sys_prio() -> bond_3ad_update_ad_actor_settings() -> set ad.system.sys_priority = bond->params.ad_actor_sys_prio -> ad.system.sys_mac_addr = bond->dev->dev_addr; because params.ad_actor_system == 0 results: ad.system.sys_mac_addr = bond->dev->dev_addr
line 48: ip link set fbond address 52:54:00:3B:7C:A6 setting bond MAC addr call stack: bond->dev->dev_addr = new_mac
line 52: ip link set fbond type bond ad_actor_sys_prio 65535 setting bond param: ad_actor_sys_prio given: params.ad_actor_system = 0 call stack: bond_option_ad_actor_sys_prio() -> bond_3ad_update_ad_actor_settings() -> set ad.system.sys_priority = bond->params.ad_actor_sys_prio -> ad.system.sys_mac_addr = bond->dev->dev_addr; because params.ad_actor_system == 0 results: ad.system.sys_mac_addr = bond->dev->dev_addr
line 60: ip link set veth1-bond down master fbond given: params.ad_actor_system = 0 params.mode = BOND_MODE_8023AD ad.system.sys_mac_addr == bond->dev->dev_addr call stack: bond_enslave -> bond_3ad_initialize(); because first slave -> if ad.system.sys_mac_addr != bond->dev->dev_addr return results: Nothing is run in bond_3ad_initialize() because dev_addr equals sys_mac_addr leaving the global ad_ticks_per_sec zero as it is never initialized anywhere else.
The if check around the contents of bond_3ad_initialize() is no longer needed due to commit 5ee14e6d336f ("bonding: 3ad: apply ad_actor settings changes immediately") which sets ad.system.sys_mac_addr if any one of the bonding parameters whos set function calls bond_3ad_update_ad_actor_settings(). This is because if ad.system.sys_mac_addr is zero it will be set to the current bond mac address, this causes the if check to never be true.
Fixes: 5ee14e6d336f ("bonding: 3ad: apply ad_actor settings changes immediately") Signed-off-by: Jonathan Toppins jtoppins@redhat.com Acked-by: Jay Vosburgh jay.vosburgh@canonical.com Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- drivers/net/bonding/bond_3ad.c | 38 ++++++++++++++-------------------- 1 file changed, 16 insertions(+), 22 deletions(-)
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index b3eaef31b767..a6bb7e915f74 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -1977,30 +1977,24 @@ void bond_3ad_initiate_agg_selection(struct bonding *bond, int timeout) */ void bond_3ad_initialize(struct bonding *bond, u16 tick_resolution) { - /* check that the bond is not initialized yet */ - if (!MAC_ADDRESS_EQUAL(&(BOND_AD_INFO(bond).system.sys_mac_addr), - bond->dev->dev_addr)) { - - BOND_AD_INFO(bond).aggregator_identifier = 0; - - BOND_AD_INFO(bond).system.sys_priority = - bond->params.ad_actor_sys_prio; - if (is_zero_ether_addr(bond->params.ad_actor_system)) - BOND_AD_INFO(bond).system.sys_mac_addr = - *((struct mac_addr *)bond->dev->dev_addr); - else - BOND_AD_INFO(bond).system.sys_mac_addr = - *((struct mac_addr *)bond->params.ad_actor_system); + BOND_AD_INFO(bond).aggregator_identifier = 0; + BOND_AD_INFO(bond).system.sys_priority = + bond->params.ad_actor_sys_prio; + if (is_zero_ether_addr(bond->params.ad_actor_system)) + BOND_AD_INFO(bond).system.sys_mac_addr = + *((struct mac_addr *)bond->dev->dev_addr); + else + BOND_AD_INFO(bond).system.sys_mac_addr = + *((struct mac_addr *)bond->params.ad_actor_system);
- /* initialize how many times this module is called in one - * second (should be about every 100ms) - */ - ad_ticks_per_sec = tick_resolution; + /* initialize how many times this module is called in one + * second (should be about every 100ms) + */ + ad_ticks_per_sec = tick_resolution;
- bond_3ad_initiate_agg_selection(bond, - AD_AGGREGATOR_SELECTION_TIMER * - ad_ticks_per_sec); - } + bond_3ad_initiate_agg_selection(bond, + AD_AGGREGATOR_SELECTION_TIMER * + ad_ticks_per_sec); }
/**
From: Maciej Żenczykowski maze@google.com
stable inclusion from stable-v4.19.257 commit 4b07805e34a6b2e27cc07d6f0d0c228d46130cb1 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5UQH4 CVE: NA
--------------------------------
[ Upstream commit 4b2e3a17e9f279325712b79fb01d1493f9e3e005 ]
Looks to have been left out in an oversight.
Cc: Mahesh Bandewar maheshb@google.com Cc: Sainath Grandhi sainath.grandhi@intel.com Fixes: 235a9d89da97 ('ipvtap: IP-VLAN based tap driver') Signed-off-by: Maciej Żenczykowski maze@google.com Link: https://lore.kernel.org/r/20220821130808.12143-1-zenczykowski@gmail.com Signed-off-by: Paolo Abeni pabeni@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- drivers/net/ipvlan/ipvtap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ipvlan/ipvtap.c b/drivers/net/ipvlan/ipvtap.c index 0bcc07f346c3..2e517e30c5ac 100644 --- a/drivers/net/ipvlan/ipvtap.c +++ b/drivers/net/ipvlan/ipvtap.c @@ -193,7 +193,7 @@ static struct notifier_block ipvtap_notifier_block __read_mostly = { .notifier_call = ipvtap_device_event, };
-static int ipvtap_init(void) +static int __init ipvtap_init(void) { int err;
@@ -227,7 +227,7 @@ static int ipvtap_init(void) } module_init(ipvtap_init);
-static void ipvtap_exit(void) +static void __exit ipvtap_exit(void) { rtnl_link_unregister(&ipvtap_link_ops); unregister_netdevice_notifier(&ipvtap_notifier_block);
From: Kuniyuki Iwashima kuniyu@amazon.com
stable inclusion from stable-v4.19.257 commit d6be3137d5b9b5e9c6828d4d7eb7d73141895c7f category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5UQH4 CVE: NA
--------------------------------
[ Upstream commit bf955b5ab8f6f7b0632cdef8e36b14e4f6e77829 ]
While reading weight_p, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader.
Also, dev_[rt]x_weight can be read/written at the same time. So, we need to use READ_ONCE() and WRITE_ONCE() for its access. Moreover, to use the same weight_p while changing dev_[rt]x_weight, we add a mutex in proc_do_dev_weight().
Fixes: 3d48b53fb2ae ("net: dev_weight: TX/RX orthogonality") Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima kuniyu@amazon.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- net/core/dev.c | 2 +- net/core/sysctl_net_core.c | 15 +++++++++------ net/sched/sch_generic.c | 2 +- 3 files changed, 11 insertions(+), 8 deletions(-)
diff --git a/net/core/dev.c b/net/core/dev.c index 078e8a231616..f7cc5430745e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5849,7 +5849,7 @@ static int process_backlog(struct napi_struct *napi, int quota) net_rps_action_and_irq_enable(sd); }
- napi->weight = dev_rx_weight; + napi->weight = READ_ONCE(dev_rx_weight); while (again) { struct sk_buff *skb;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index aa254c609ca9..79164fc37359 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -231,14 +231,17 @@ static int set_default_qdisc(struct ctl_table *table, int write, static int proc_do_dev_weight(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - int ret; + static DEFINE_MUTEX(dev_weight_mutex); + int ret, weight;
+ mutex_lock(&dev_weight_mutex); ret = proc_dointvec(table, write, buffer, lenp, ppos); - if (ret != 0) - return ret; - - dev_rx_weight = weight_p * dev_weight_rx_bias; - dev_tx_weight = weight_p * dev_weight_tx_bias; + if (!ret && write) { + weight = READ_ONCE(weight_p); + WRITE_ONCE(dev_rx_weight, weight * dev_weight_rx_bias); + WRITE_ONCE(dev_tx_weight, weight * dev_weight_tx_bias); + } + mutex_unlock(&dev_weight_mutex);
return ret; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index ef41320a9e96..64ae84bea587 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -397,7 +397,7 @@ static inline bool qdisc_restart(struct Qdisc *q, int *packets)
void __qdisc_run(struct Qdisc *q) { - int quota = dev_tx_weight; + int quota = READ_ONCE(dev_tx_weight); int packets;
while (qdisc_restart(q, &packets)) {
From: Kuniyuki Iwashima kuniyu@amazon.com
stable inclusion from stable-v4.19.257 commit 8121bda0093a1e4ad517d93a5707e2a5615c6f99 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5UQH4 CVE: NA
--------------------------------
[ Upstream commit 61adf447e38664447526698872e21c04623afb8e ]
While reading netdev_tstamp_prequeue, it can be changed concurrently. Thus, we need to add READ_ONCE() to its readers.
Fixes: 3b098e2d7c69 ("net: Consistent skb timestamping") Signed-off-by: Kuniyuki Iwashima kuniyu@amazon.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- net/core/dev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/net/core/dev.c b/net/core/dev.c index f7cc5430745e..c7a21172c482 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4473,7 +4473,7 @@ static int netif_rx_internal(struct sk_buff *skb) { int ret;
- net_timestamp_check(netdev_tstamp_prequeue, skb); + net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
trace_netif_rx(skb);
@@ -4793,7 +4793,7 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, int ret = NET_RX_DROP; __be16 type;
- net_timestamp_check(!netdev_tstamp_prequeue, skb); + net_timestamp_check(!READ_ONCE(netdev_tstamp_prequeue), skb);
trace_netif_receive_skb(skb);
@@ -5145,7 +5145,7 @@ static int netif_receive_skb_internal(struct sk_buff *skb) { int ret;
- net_timestamp_check(netdev_tstamp_prequeue, skb); + net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
if (skb_defer_rx_timestamp(skb)) return NET_RX_SUCCESS; @@ -5175,7 +5175,7 @@ static void netif_receive_skb_list_internal(struct list_head *head)
INIT_LIST_HEAD(&sublist); list_for_each_entry_safe(skb, next, head, list) { - net_timestamp_check(netdev_tstamp_prequeue, skb); + net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb); skb_list_del_init(skb); if (!skb_defer_rx_timestamp(skb)) list_add_tail(&skb->list, &sublist);
From: Kuniyuki Iwashima kuniyu@amazon.com
stable inclusion from stable-v4.19.257 commit 0b6dccd3077ad91ba6fd368fd77cb6b792faa1ac category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5UQH4 CVE: NA
--------------------------------
[ Upstream commit 6bae8ceb90ba76cdba39496db936164fa672b9be ]
While reading rs->interval and rs->burst, they can be changed concurrently via sysctl (e.g. net_ratelimit_state). Thus, we need to add READ_ONCE() to their readers.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima kuniyu@amazon.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- lib/ratelimit.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/lib/ratelimit.c b/lib/ratelimit.c index d01f47135239..b805702de84d 100644 --- a/lib/ratelimit.c +++ b/lib/ratelimit.c @@ -27,10 +27,16 @@ */ int ___ratelimit(struct ratelimit_state *rs, const char *func) { + /* Paired with WRITE_ONCE() in .proc_handler(). + * Changing two values seperately could be inconsistent + * and some message could be lost. (See: net_ratelimit_state). + */ + int interval = READ_ONCE(rs->interval); + int burst = READ_ONCE(rs->burst); unsigned long flags; int ret;
- if (!rs->interval) + if (!interval) return 1;
/* @@ -45,7 +51,7 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func) if (!rs->begin) rs->begin = jiffies;
- if (time_is_before_jiffies(rs->begin + rs->interval)) { + if (time_is_before_jiffies(rs->begin + interval)) { if (rs->missed) { if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) { printk_deferred(KERN_WARNING @@ -57,7 +63,7 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func) rs->begin = jiffies; rs->printed = 0; } - if (rs->burst && rs->burst > rs->printed) { + if (burst && burst > rs->printed) { rs->printed++; ret = 1; } else {
From: Kuniyuki Iwashima kuniyu@amazon.com
stable inclusion from stable-v4.19.257 commit 04ec1e942cff42fecdde5ee30f42fc28b822379b category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5UQH4 CVE: NA
--------------------------------
[ Upstream commit d2154b0afa73c0159b2856f875c6b4fe7cf6a95e ]
While reading sysctl_tstamp_allow_data, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader.
Fixes: b245be1f4db1 ("net-timestamp: no-payload only sysctl") Signed-off-by: Kuniyuki Iwashima kuniyu@amazon.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c623c129d0ab..e0be1f8651bb 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4377,7 +4377,7 @@ static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly) { bool ret;
- if (likely(sysctl_tstamp_allow_data || tsonly)) + if (likely(READ_ONCE(sysctl_tstamp_allow_data) || tsonly)) return true;
read_lock_bh(&sk->sk_callback_lock);
From: Kuniyuki Iwashima kuniyu@amazon.com
stable inclusion from stable-v4.19.257 commit da89cab514b3ec7fdedb378617160c47fe3b60a9 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5UQH4 CVE: NA
--------------------------------
[ Upstream commit c42b7cddea47503411bfb5f2f93a4154aaffa2d9 ]
While reading sysctl_net_busy_poll, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader.
Fixes: 060212928670 ("net: add low latency socket poll") Signed-off-by: Kuniyuki Iwashima kuniyu@amazon.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- include/net/busy_poll.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index c76a5e9894da..8f42f6f3af86 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -43,7 +43,7 @@ extern unsigned int sysctl_net_busy_poll __read_mostly;
static inline bool net_busy_loop_on(void) { - return sysctl_net_busy_poll; + return READ_ONCE(sysctl_net_busy_poll); }
static inline bool sk_can_busy_loop(const struct sock *sk)
From: Kuniyuki Iwashima kuniyu@amazon.com
stable inclusion from stable-v4.19.257 commit d908fd060927cf23bae5b2975140c7ed81bd4013 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5UQH4 CVE: NA
--------------------------------
[ Upstream commit e59ef36f0795696ab229569c153936bfd068d21c ]
While reading sysctl_net_busy_read, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader.
Fixes: 2d48d67fa8cd ("net: poll/select low latency socket support") Signed-off-by: Kuniyuki Iwashima kuniyu@amazon.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- net/core/sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/core/sock.c b/net/core/sock.c index 246abac5e8d2..6e891dbb08e5 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2878,7 +2878,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
#ifdef CONFIG_NET_RX_BUSY_POLL sk->sk_napi_id = 0; - sk->sk_ll_usec = sysctl_net_busy_read; + sk->sk_ll_usec = READ_ONCE(sysctl_net_busy_read); #endif
sk->sk_max_pacing_rate = ~0U;
From: Kuniyuki Iwashima kuniyu@amazon.com
stable inclusion from stable-v4.19.257 commit c3d0b6b3c808ff7d5439fbe6a41108bbb000ab26 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5UQH4 CVE: NA
--------------------------------
[ Upstream commit 2e0c42374ee32e72948559d2ae2f7ba3dc6b977c ]
While reading netdev_budget, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader.
Fixes: 51b0bdedb8e7 ("[NET]: Separate two usages of netdev_max_backlog.") Signed-off-by: Kuniyuki Iwashima kuniyu@amazon.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/core/dev.c b/net/core/dev.c index c7a21172c482..b4b84d55bdfa 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6327,7 +6327,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) struct softnet_data *sd = this_cpu_ptr(&softnet_data); unsigned long time_limit = jiffies + usecs_to_jiffies(netdev_budget_usecs); - int budget = netdev_budget; + int budget = READ_ONCE(netdev_budget); LIST_HEAD(list); LIST_HEAD(repoll);
From: Kuniyuki Iwashima kuniyu@amazon.com
stable inclusion from stable-v4.19.257 commit 2a341830849e1ced57e40455bcaa6fc9b79662d5 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5UQH4 CVE: NA
--------------------------------
[ Upstream commit fa45d484c52c73f79db2c23b0cdfc6c6455093ad ]
While reading netdev_budget_usecs, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader.
Fixes: 7acf8a1e8a28 ("Replace 2 jiffies with sysctl netdev_budget_usecs to enable softirq tuning") Signed-off-by: Kuniyuki Iwashima kuniyu@amazon.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/core/dev.c b/net/core/dev.c index b4b84d55bdfa..09b5b73ecdd8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6326,7 +6326,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) { struct softnet_data *sd = this_cpu_ptr(&softnet_data); unsigned long time_limit = jiffies + - usecs_to_jiffies(netdev_budget_usecs); + usecs_to_jiffies(READ_ONCE(netdev_budget_usecs)); int budget = READ_ONCE(netdev_budget); LIST_HEAD(list); LIST_HEAD(repoll);
From: Kuniyuki Iwashima kuniyu@amazon.com
stable inclusion from stable-v4.19.257 commit 1e4142b95269a394229c89d132f8e226fb1b4f71 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5UQH4 CVE: NA
--------------------------------
[ Upstream commit 3c9ba81d72047f2e81bb535d42856517b613aba7 ]
While reading sysctl_somaxconn, it can be changed concurrently. Thus, we need to add READ_ONCE() to its reader.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Kuniyuki Iwashima kuniyu@amazon.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- net/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/socket.c b/net/socket.c index bf943e6d47fe..553045441a2c 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1630,7 +1630,7 @@ int __sys_listen(int fd, int backlog)
sock = sockfd_lookup_light(fd, &err, &fput_needed); if (sock) { - somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn; + somaxconn = READ_ONCE(sock_net(sock->sk)->core.sysctl_somaxconn); if ((unsigned int)backlog > somaxconn) backlog = somaxconn;
From: Siddh Raman Pant code@siddh.me
stable inclusion from stable-v4.19.257 commit 2035c770bfdbcc82bd52e05871a7c82db9529e0f category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5UQH4 CVE: NA
--------------------------------
commit c490a0b5a4f36da3918181a8acdc6991d967c5f3 upstream.
The userspace can configure a loop using an ioctl call, wherein a configuration of type loop_config is passed (see lo_ioctl()'s case on line 1550 of drivers/block/loop.c). This proceeds to call loop_configure() which in turn calls loop_set_status_from_info() (see line 1050 of loop.c), passing &config->info which is of type loop_info64*. This function then sets the appropriate values, like the offset.
loop_device has lo_offset of type loff_t (see line 52 of loop.c), which is typdef-chained to long long, whereas loop_info64 has lo_offset of type __u64 (see line 56 of include/uapi/linux/loop.h).
The function directly copies offset from info to the device as follows (See line 980 of loop.c): lo->lo_offset = info->lo_offset;
This results in an overflow, which triggers a warning in iomap_iter() due to a call to iomap_iter_done() which has: WARN_ON_ONCE(iter->iomap.offset > iter->pos);
Thus, check for negative value during loop_set_status_from_info().
Bug report: https://syzkaller.appspot.com/bug?id=c620fe14aac810396d3c3edc9ad73848bf69a29...
Reported-and-tested-by: syzbot+a8e049cd3abd342936b6@syzkaller.appspotmail.com Cc: stable@vger.kernel.org Reviewed-by: Matthew Wilcox (Oracle) willy@infradead.org Signed-off-by: Siddh Raman Pant code@siddh.me Reviewed-by: Christoph Hellwig hch@lst.de Link: https://lore.kernel.org/r/20220823160810.181275-1-code@siddh.me Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- drivers/block/loop.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 94582da07a98..108a4ff27bcd 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1354,6 +1354,11 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info) info->lo_number = lo->lo_number; info->lo_offset = lo->lo_offset; info->lo_sizelimit = lo->lo_sizelimit; + + /* loff_t vars have been assigned __u64 */ + if (lo->lo_offset < 0 || lo->lo_sizelimit < 0) + return -EOVERFLOW; + info->lo_flags = lo->lo_flags; memcpy(info->lo_file_name, lo->lo_file_name, LO_NAME_SIZE); memcpy(info->lo_crypt_name, lo->lo_crypt_name, LO_NAME_SIZE);
From: Quanyang Wang quanyang.wang@windriver.com
stable inclusion from stable-v4.19.257 commit b5cc57e43f2e5c51ef6ee6d45d97ff7844e9d4ba category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5UQH4 CVE: NA
--------------------------------
commit 0c7d7cc2b4fe2e74ef8728f030f0f1674f9f6aee upstream.
There are two problems with the current code of memory_intersects:
First, it doesn't check whether the region (begin, end) falls inside the region (virt, vend), that is (virt < begin && vend > end).
The second problem is if vend is equal to begin, it will return true but this is wrong since vend (virt + size) is not the last address of the memory region but (virt + size -1) is. The wrong determination will trigger the misreporting when the function check_for_illegal_area calls memory_intersects to check if the dma region intersects with stext region.
The misreporting is as below (stext is at 0x80100000): WARNING: CPU: 0 PID: 77 at kernel/dma/debug.c:1073 check_for_illegal_area+0x130/0x168 DMA-API: chipidea-usb2 e0002000.usb: device driver maps memory from kernel text or rodata [addr=800f0000] [len=65536] Modules linked in: CPU: 1 PID: 77 Comm: usb-storage Not tainted 5.19.0-yocto-standard #5 Hardware name: Xilinx Zynq Platform unwind_backtrace from show_stack+0x18/0x1c show_stack from dump_stack_lvl+0x58/0x70 dump_stack_lvl from __warn+0xb0/0x198 __warn from warn_slowpath_fmt+0x80/0xb4 warn_slowpath_fmt from check_for_illegal_area+0x130/0x168 check_for_illegal_area from debug_dma_map_sg+0x94/0x368 debug_dma_map_sg from __dma_map_sg_attrs+0x114/0x128 __dma_map_sg_attrs from dma_map_sg_attrs+0x18/0x24 dma_map_sg_attrs from usb_hcd_map_urb_for_dma+0x250/0x3b4 usb_hcd_map_urb_for_dma from usb_hcd_submit_urb+0x194/0x214 usb_hcd_submit_urb from usb_sg_wait+0xa4/0x118 usb_sg_wait from usb_stor_bulk_transfer_sglist+0xa0/0xec usb_stor_bulk_transfer_sglist from usb_stor_bulk_srb+0x38/0x70 usb_stor_bulk_srb from usb_stor_Bulk_transport+0x150/0x360 usb_stor_Bulk_transport from usb_stor_invoke_transport+0x38/0x440 usb_stor_invoke_transport from usb_stor_control_thread+0x1e0/0x238 usb_stor_control_thread from kthread+0xf8/0x104 kthread from ret_from_fork+0x14/0x2c
Refactor memory_intersects to fix the two problems above.
Before the 1d7db834a027e ("dma-debug: use memory_intersects() directly"), memory_intersects is called only by printk_late_init:
printk_late_init -> init_section_intersects ->memory_intersects.
There were few places where memory_intersects was called.
When commit 1d7db834a027e ("dma-debug: use memory_intersects() directly") was merged and CONFIG_DMA_API_DEBUG is enabled, the DMA subsystem uses it to check for an illegal area and the calltrace above is triggered.
[akpm@linux-foundation.org: fix nearby comment typo] Link: https://lkml.kernel.org/r/20220819081145.948016-1-quanyang.wang@windriver.co... Fixes: 979559362516 ("asm/sections: add helpers to check for section data") Signed-off-by: Quanyang Wang quanyang.wang@windriver.com Cc: Ard Biesheuvel ardb@kernel.org Cc: Arnd Bergmann arnd@arndb.de Cc: Thierry Reding treding@nvidia.com Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- include/asm-generic/sections.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index 849cd8eb5ca0..53f5c46debe9 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -97,7 +97,7 @@ static inline bool memory_contains(void *begin, void *end, void *virt, /** * memory_intersects - checks if the region occupied by an object intersects * with another memory region - * @begin: virtual address of the beginning of the memory regien + * @begin: virtual address of the beginning of the memory region * @end: virtual address of the end of the memory region * @virt: virtual address of the memory object * @size: size of the memory object @@ -110,7 +110,10 @@ static inline bool memory_intersects(void *begin, void *end, void *virt, { void *vend = virt + size;
- return (virt >= begin && virt < end) || (vend >= begin && vend < end); + if (virt < end && vend > begin) + return true; + + return false; }
/**
From: David Hildenbrand david@redhat.com
stable inclusion from stable-v4.19.257 commit b18376f359baff4fa5132d3accd1a05cd8d526e4 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5UQH4 CVE: NA
--------------------------------
commit f96f7a40874d7c746680c0b9f57cef2262ae551f upstream.
Patch series "mm/hugetlb: fix write-fault handling for shared mappings", v2.
I observed that hugetlb does not support/expect write-faults in shared mappings that would have to map the R/O-mapped page writable -- and I found two case where we could currently get such faults and would erroneously map an anon page into a shared mapping.
Reproducers part of the patches.
I propose to backport both fixes to stable trees. The first fix needs a small adjustment.
This patch (of 2):
Staring at hugetlb_wp(), one might wonder where all the logic for shared mappings is when stumbling over a write-protected page in a shared mapping. In fact, there is none, and so far we thought we could get away with that because e.g., mprotect() should always do the right thing and map all pages directly writable.
Looks like we were wrong:
-------------------------------------------------------------------------- #include <stdio.h> #include <stdlib.h> #include <string.h> #include <fcntl.h> #include <unistd.h> #include <errno.h> #include <sys/mman.h>
#define HUGETLB_SIZE (2 * 1024 * 1024u)
static void clear_softdirty(void) { int fd = open("/proc/self/clear_refs", O_WRONLY); const char *ctrl = "4"; int ret;
if (fd < 0) { fprintf(stderr, "open(clear_refs) failed\n"); exit(1); } ret = write(fd, ctrl, strlen(ctrl)); if (ret != strlen(ctrl)) { fprintf(stderr, "write(clear_refs) failed\n"); exit(1); } close(fd); }
int main(int argc, char **argv) { char *map; int fd;
fd = open("/dev/hugepages/tmp", O_RDWR | O_CREAT); if (!fd) { fprintf(stderr, "open() failed\n"); return -errno; } if (ftruncate(fd, HUGETLB_SIZE)) { fprintf(stderr, "ftruncate() failed\n"); return -errno; }
map = mmap(NULL, HUGETLB_SIZE, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); if (map == MAP_FAILED) { fprintf(stderr, "mmap() failed\n"); return -errno; }
*map = 0;
if (mprotect(map, HUGETLB_SIZE, PROT_READ)) { fprintf(stderr, "mmprotect() failed\n"); return -errno; }
clear_softdirty();
if (mprotect(map, HUGETLB_SIZE, PROT_READ|PROT_WRITE)) { fprintf(stderr, "mmprotect() failed\n"); return -errno; }
*map = 0;
return 0; } --------------------------------------------------------------------------
Above test fails with SIGBUS when there is only a single free hugetlb page. # echo 1 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages # ./test Bus error (core dumped)
And worse, with sufficient free hugetlb pages it will map an anonymous page into a shared mapping, for example, messing up accounting during unmap and breaking MAP_SHARED semantics: # echo 2 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages # ./test # cat /proc/meminfo | grep HugePages_ HugePages_Total: 2 HugePages_Free: 1 HugePages_Rsvd: 18446744073709551615 HugePages_Surp: 0
Reason in this particular case is that vma_wants_writenotify() will return "true", removing VM_SHARED in vma_set_page_prot() to map pages write-protected. Let's teach vma_wants_writenotify() that hugetlb does not support softdirty tracking.
Link: https://lkml.kernel.org/r/20220811103435.188481-1-david@redhat.com Link: https://lkml.kernel.org/r/20220811103435.188481-2-david@redhat.com Fixes: 64e455079e1b ("mm: softdirty: enable write notifications on VMAs after VM_SOFTDIRTY cleared") Signed-off-by: David Hildenbrand david@redhat.com Reviewed-by: Mike Kravetz mike.kravetz@oracle.com Cc: Peter Feiner pfeiner@google.com Cc: Kirill A. Shutemov kirill.shutemov@linux.intel.com Cc: Cyrill Gorcunov gorcunov@openvz.org Cc: Pavel Emelyanov xemul@parallels.com Cc: Jamie Liu jamieliu@google.com Cc: Hugh Dickins hughd@google.com Cc: Naoya Horiguchi n-horiguchi@ah.jp.nec.com Cc: Bjorn Helgaas bhelgaas@google.com Cc: Muchun Song songmuchun@bytedance.com Cc: Peter Xu peterx@redhat.com Cc: stable@vger.kernel.org [3.18+] Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: David Hildenbrand david@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- mm/mmap.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/mm/mmap.c b/mm/mmap.c index 3bf113a24599..8b5f3e45613e 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1924,8 +1924,12 @@ int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot) pgprot_val(vm_pgprot_modify(vm_page_prot, vm_flags))) return 0;
- /* Do we need to track softdirty? */ - if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && !(vm_flags & VM_SOFTDIRTY)) + /* + * Do we need to track softdirty? hugetlb does not support softdirty + * tracking yet. + */ + if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && !(vm_flags & VM_SOFTDIRTY) && + !is_vm_hugetlb_page(vma)) return 1;
/* Specialty mapping? */
From: "Denis V. Lunev" den@openvz.org
stable inclusion from stable-v4.19.257 commit 05fdce1ae744dee43c9181fd063c9c0db4f777f2 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5UQH4 CVE: NA
--------------------------------
[ Upstream commit 66ba215cb51323e4e55e38fd5f250e0fae0cbc94 ]
Normal processing of ARP request (usually this is Ethernet broadcast packet) coming to the host is looking like the following: * the packet comes to arp_process() call and is passed through routing procedure * the request is put into the queue using pneigh_enqueue() if corresponding ARP record is not local (common case for container records on the host) * the request is processed by timer (within 80 jiffies by default) and ARP reply is sent from the same arp_process() using NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED condition (flag is set inside pneigh_enqueue())
And here the problem comes. Linux kernel calls pneigh_queue_purge() which destroys the whole queue of ARP requests on ANY network interface start/stop event through __neigh_ifdown().
This is actually not a problem within the original world as network interface start/stop was accessible to the host 'root' only, which could do more destructive things. But the world is changed and there are Linux containers available. Here container 'root' has an access to this API and could be considered as untrusted user in the hosting (container's) world.
Thus there is an attack vector to other containers on node when container's root will endlessly start/stop interfaces. We have observed similar situation on a real production node when docker container was doing such activity and thus other containers on the node become not accessible.
The patch proposed doing very simple thing. It drops only packets from the same namespace in the pneigh_queue_purge() where network interface state change is detected. This is enough to prevent the problem for the whole node preserving original semantics of the code.
v2: - do del_timer_sync() if queue is empty after pneigh_queue_purge() v3: - rebase to net tree
Cc: "David S. Miller" davem@davemloft.net Cc: Eric Dumazet edumazet@google.com Cc: Jakub Kicinski kuba@kernel.org Cc: Paolo Abeni pabeni@redhat.com Cc: Daniel Borkmann daniel@iogearbox.net Cc: David Ahern dsahern@kernel.org Cc: Yajun Deng yajun.deng@linux.dev Cc: Roopa Prabhu roopa@nvidia.com Cc: Christian Brauner brauner@kernel.org Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: Alexey Kuznetsov kuznet@ms2.inr.ac.ru Cc: Alexander Mikhalitsyn alexander.mikhalitsyn@virtuozzo.com Cc: Konstantin Khorenko khorenko@virtuozzo.com Cc: kernel@openvz.org Cc: devel@openvz.org Investigated-by: Alexander Mikhalitsyn alexander.mikhalitsyn@virtuozzo.com Signed-off-by: Denis V. Lunev den@openvz.org Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- net/core/neighbour.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-)
diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 6233e9856016..65e80aaa0948 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -224,14 +224,23 @@ static int neigh_del_timer(struct neighbour *n) return 0; }
-static void pneigh_queue_purge(struct sk_buff_head *list) +static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net) { + unsigned long flags; struct sk_buff *skb;
- while ((skb = skb_dequeue(list)) != NULL) { - dev_put(skb->dev); - kfree_skb(skb); + spin_lock_irqsave(&list->lock, flags); + skb = skb_peek(list); + while (skb != NULL) { + struct sk_buff *skb_next = skb_peek_next(skb, list); + if (net == NULL || net_eq(dev_net(skb->dev), net)) { + __skb_unlink(skb, list); + dev_put(skb->dev); + kfree_skb(skb); + } + skb = skb_next; } + spin_unlock_irqrestore(&list->lock, flags); }
static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev) @@ -297,9 +306,9 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) write_lock_bh(&tbl->lock); neigh_flush_dev(tbl, dev); pneigh_ifdown_and_unlock(tbl, dev); - - del_timer_sync(&tbl->proxy_timer); - pneigh_queue_purge(&tbl->proxy_queue); + pneigh_queue_purge(&tbl->proxy_queue, dev_net(dev)); + if (skb_queue_empty_lockless(&tbl->proxy_queue)) + del_timer_sync(&tbl->proxy_timer); return 0; } EXPORT_SYMBOL(neigh_ifdown); @@ -1614,7 +1623,7 @@ int neigh_table_clear(int index, struct neigh_table *tbl) /* It is not clean... Fix it to unload IPv6 module safely */ cancel_delayed_work_sync(&tbl->gc_work); del_timer_sync(&tbl->proxy_timer); - pneigh_queue_purge(&tbl->proxy_queue); + pneigh_queue_purge(&tbl->proxy_queue, NULL); neigh_ifdown(tbl, NULL); if (atomic_read(&tbl->entries)) pr_crit("neighbour leakage\n");
From: Yang Yingliang yangyingliang@huawei.com
stable inclusion from stable-v4.19.257 commit 509e1b324aad95019ff14b8eb88fdfa500d9a489 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5UQH4 CVE: NA
--------------------------------
commit d5485d9dd24e1d04e5509916515260186eb1455c upstream.
It is not allowed to call kfree_skb() from hardware interrupt context or with interrupts being disabled. So add all skb to a tmp list, then free them after spin_unlock_irqrestore() at once.
Fixes: 66ba215cb513 ("neigh: fix possible DoS due to net iface start/stop loop") Suggested-by: Denis V. Lunev den@openvz.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com Reviewed-by: Nikolay Aleksandrov razor@blackwall.org Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- net/core/neighbour.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 65e80aaa0948..73042407eb5b 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -226,21 +226,27 @@ static int neigh_del_timer(struct neighbour *n)
static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net) { + struct sk_buff_head tmp; unsigned long flags; struct sk_buff *skb;
+ skb_queue_head_init(&tmp); spin_lock_irqsave(&list->lock, flags); skb = skb_peek(list); while (skb != NULL) { struct sk_buff *skb_next = skb_peek_next(skb, list); if (net == NULL || net_eq(dev_net(skb->dev), net)) { __skb_unlink(skb, list); - dev_put(skb->dev); - kfree_skb(skb); + __skb_queue_tail(&tmp, skb); } skb = skb_next; } spin_unlock_irqrestore(&list->lock, flags); + + while ((skb = __skb_dequeue(&tmp))) { + dev_put(skb->dev); + kfree_skb(skb); + } }
static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
From: Yee Lee yee.lee@mediatek.com
stable inclusion from stable-v4.19.258 commit aabdbd63c46ff6fa6661dd9d3aaaebb62141db90 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5UQH4 CVE: NA
--------------------------------
This reverts commit 23c2d497de21f25898fbea70aeb292ab8acc8c94.
Commit 23c2d497de21 ("mm: kmemleak: take a full lowmem check in kmemleak_*_phys()") brought false leak alarms on some archs like arm64 that does not init pfn boundary in early booting. The final solution lands on linux-6.0: commit 0c24e061196c ("mm: kmemleak: add rbtree and store physical address for objects allocated with PA").
Revert this commit before linux-6.0. The original issue of invalid PA can be mitigated by additional check in devicetree.
The false alarm report is as following: Kmemleak output: (Qemu/arm64) unreferenced object 0xffff0000c0170a00 (size 128): comm "swapper/0", pid 1, jiffies 4294892404 (age 126.208s) hex dump (first 32 bytes): 62 61 73 65 00 00 00 00 00 00 00 00 00 00 00 00 base............ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<(____ptrval____)>] __kmalloc_track_caller+0x1b0/0x2e4 [<(____ptrval____)>] kstrdup_const+0x8c/0xc4 [<(____ptrval____)>] kvasprintf_const+0xbc/0xec [<(____ptrval____)>] kobject_set_name_vargs+0x58/0xe4 [<(____ptrval____)>] kobject_add+0x84/0x100 [<(____ptrval____)>] __of_attach_node_sysfs+0x78/0xec [<(____ptrval____)>] of_core_init+0x68/0x104 [<(____ptrval____)>] driver_init+0x28/0x48 [<(____ptrval____)>] do_basic_setup+0x14/0x28 [<(____ptrval____)>] kernel_init_freeable+0x110/0x178 [<(____ptrval____)>] kernel_init+0x20/0x1a0 [<(____ptrval____)>] ret_from_fork+0x10/0x20
This pacth is also applicable to linux-5.17.y/linux-5.18.y/linux-5.19.y
Cc: stable@vger.kernel.org Signed-off-by: Yee Lee yee.lee@mediatek.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- mm/kmemleak.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 639acbb91fd5..f54734abf946 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1196,7 +1196,7 @@ EXPORT_SYMBOL(kmemleak_no_scan); void __ref kmemleak_alloc_phys(phys_addr_t phys, size_t size, int min_count, gfp_t gfp) { - if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn) + if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn) kmemleak_alloc(__va(phys), size, min_count, gfp); } EXPORT_SYMBOL(kmemleak_alloc_phys); @@ -1210,7 +1210,7 @@ EXPORT_SYMBOL(kmemleak_alloc_phys); */ void __ref kmemleak_free_part_phys(phys_addr_t phys, size_t size) { - if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn) + if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn) kmemleak_free_part(__va(phys), size); } EXPORT_SYMBOL(kmemleak_free_part_phys); @@ -1222,7 +1222,7 @@ EXPORT_SYMBOL(kmemleak_free_part_phys); */ void __ref kmemleak_not_leak_phys(phys_addr_t phys) { - if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn) + if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn) kmemleak_not_leak(__va(phys)); } EXPORT_SYMBOL(kmemleak_not_leak_phys); @@ -1234,7 +1234,7 @@ EXPORT_SYMBOL(kmemleak_not_leak_phys); */ void __ref kmemleak_ignore_phys(phys_addr_t phys) { - if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn) + if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn) kmemleak_ignore(__va(phys)); } EXPORT_SYMBOL(kmemleak_ignore_phys);
From: Eric Dumazet edumazet@google.com
stable inclusion from stable-v4.19.258 commit 12f99f07a5f4d7ec8d72da6ee8ef66f048634f6a category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5UQH4 CVE: NA
--------------------------------
[ Upstream commit 8c70521238b7863c2af607e20bcba20f974c969b ]
challenge_timestamp can be read an written by concurrent threads.
This was expected, but we need to annotate the race to avoid potential issues.
Following patch moves challenge_timestamp and challenge_count to per-netns storage to provide better isolation.
Fixes: 354e4aa391ed ("tcp: RFC 5961 5.2 Blind Data Injection Attack Mitigation") Reported-by: syzbot syzkaller@googlegroups.com Signed-off-by: Eric Dumazet edumazet@google.com Acked-by: Neal Cardwell ncardwell@google.com Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- net/ipv4/tcp_input.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 921e09c7d716..bc66d65d8423 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3471,11 +3471,11 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
/* Then check host-wide RFC 5961 rate limit. */ now = jiffies / HZ; - if (now != challenge_timestamp) { + if (now != READ_ONCE(challenge_timestamp)) { u32 ack_limit = READ_ONCE(net->ipv4.sysctl_tcp_challenge_ack_limit); u32 half = (ack_limit + 1) >> 1;
- challenge_timestamp = now; + WRITE_ONCE(challenge_timestamp, now); WRITE_ONCE(challenge_count, half + prandom_u32_max(ack_limit)); } count = READ_ONCE(challenge_count);
From: Greg Kroah-Hartman gregkh@linuxfoundation.org
stable inclusion from stable-v4.19.258 commit ebfb744bb6036485dac2a45c46e82b911c3726d1 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5UQH4 CVE: NA
--------------------------------
commit dec9b2f1e0455a151a7293c367da22ab973f713e upstream.
There is a very common pattern of using debugfs_remove(debufs_lookup(..)) which results in a dentry leak of the dentry that was looked up. Instead of having to open-code the correct pattern of calling dput() on the dentry, create debugfs_lookup_and_remove() to handle this pattern automatically and properly without any memory leaks.
Cc: stable stable@kernel.org Reported-by: Kuyo Chang kuyo.chang@mediatek.com Tested-by: Kuyo Chang kuyo.chang@mediatek.com Link: https://lore.kernel.org/r/YxIaQ8cSinDR881k@kroah.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- fs/debugfs/inode.c | 22 ++++++++++++++++++++++ include/linux/debugfs.h | 6 ++++++ 2 files changed, 28 insertions(+)
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 11664fdf5a3c..780d66d3463f 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -672,6 +672,28 @@ void debugfs_remove(struct dentry *dentry) } EXPORT_SYMBOL_GPL(debugfs_remove);
+/** + * debugfs_lookup_and_remove - lookup a directory or file and recursively remove it + * @name: a pointer to a string containing the name of the item to look up. + * @parent: a pointer to the parent dentry of the item. + * + * This is the equlivant of doing something like + * debugfs_remove(debugfs_lookup(..)) but with the proper reference counting + * handled for the directory being looked up. + */ +void debugfs_lookup_and_remove(const char *name, struct dentry *parent) +{ + struct dentry *dentry; + + dentry = debugfs_lookup(name, parent); + if (!dentry) + return; + + debugfs_remove(dentry); + dput(dentry); +} +EXPORT_SYMBOL_GPL(debugfs_lookup_and_remove); + /** * debugfs_rename - rename a file/directory in the debugfs filesystem * @old_dir: a pointer to the parent dentry for the renamed object. This diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 5fdcfeff9308..6b268857cf5d 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -84,6 +84,8 @@ struct dentry *debugfs_create_automount(const char *name, void debugfs_remove(struct dentry *dentry); #define debugfs_remove_recursive debugfs_remove
+void debugfs_lookup_and_remove(const char *name, struct dentry *parent); + const struct file_operations *debugfs_real_fops(const struct file *filp);
int debugfs_file_get(struct dentry *dentry); @@ -216,6 +218,10 @@ static inline void debugfs_remove(struct dentry *dentry) static inline void debugfs_remove_recursive(struct dentry *dentry) { }
+static inline void debugfs_lookup_and_remove(const char *name, + struct dentry *parent) +{ } + const struct file_operations *debugfs_real_fops(const struct file *filp);
static inline int debugfs_file_get(struct dentry *dentry)
From: Harsh Modi harshmodi@google.com
stable inclusion from stable-v4.19.258 commit 89810dbbffff7efae8d83b39557a50a7dfc65b44 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5UQH4 CVE: NA
--------------------------------
[ Upstream commit d047283a7034140ea5da759a494fd2274affdd46 ]
The IPv6 path already drops dst in the daddr changed case, but the IPv4 path does not. This change makes the two code paths consistent.
Further, it is possible that there is already a metadata_dst allocated from ingress that might already be attached to skbuff->dst while following the bridge path. If it is not released before setting a new metadata_dst, it will be leaked. This is similar to what is done in bpf_set_tunnel_key() or ip6_route_input().
It is important to note that the memory being leaked is not the dst being set in the bridge code, but rather memory allocated from some other code path that is not being freed correctly before the skb dst is overwritten.
An example of the leakage fixed by this commit found using kmemleak:
unreferenced object 0xffff888010112b00 (size 256): comm "softirq", pid 0, jiffies 4294762496 (age 32.012s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 80 16 f1 83 ff ff ff ff ................ e1 4e f6 82 ff ff ff ff 00 00 00 00 00 00 00 00 .N.............. backtrace: [<00000000d79567ea>] metadata_dst_alloc+0x1b/0xe0 [<00000000be113e13>] udp_tun_rx_dst+0x174/0x1f0 [<00000000a36848f4>] geneve_udp_encap_recv+0x350/0x7b0 [<00000000d4afb476>] udp_queue_rcv_one_skb+0x380/0x560 [<00000000ac064aea>] udp_unicast_rcv_skb+0x75/0x90 [<000000009a8ee8c5>] ip_protocol_deliver_rcu+0xd8/0x230 [<00000000ef4980bb>] ip_local_deliver_finish+0x7a/0xa0 [<00000000d7533c8c>] __netif_receive_skb_one_core+0x89/0xa0 [<00000000a879497d>] process_backlog+0x93/0x190 [<00000000e41ade9f>] __napi_poll+0x28/0x170 [<00000000b4c0906b>] net_rx_action+0x14f/0x2a0 [<00000000b20dd5d4>] __do_softirq+0xf4/0x305 [<000000003a7d7e15>] __irq_exit_rcu+0xc3/0x140 [<00000000968d39a2>] sysvec_apic_timer_interrupt+0x9e/0xc0 [<000000009e920794>] asm_sysvec_apic_timer_interrupt+0x16/0x20 [<000000008942add0>] native_safe_halt+0x13/0x20
Florian Westphal says: "Original code was likely fine because nothing ever did set a skb->dst entry earlier than bridge in those days."
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Harsh Modi harshmodi@google.com Acked-by: Florian Westphal fw@strlen.de Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- net/bridge/br_netfilter_hooks.c | 2 ++ net/bridge/br_netfilter_ipv6.c | 1 + 2 files changed, 3 insertions(+)
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index fd5e3470560a..f1b9aaccc89f 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -385,6 +385,7 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_ /* - Bridged-and-DNAT'ed traffic doesn't * require ip_forwarding. */ if (rt->dst.dev == dev) { + skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); goto bridged_dnat; } @@ -414,6 +415,7 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_ kfree_skb(skb); return 0; } + skb_dst_drop(skb); skb_dst_set_noref(skb, &rt->dst); }
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index 09d5e0c7b3ba..995d86777e7c 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -201,6 +201,7 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc kfree_skb(skb); return 0; } + skb_dst_drop(skb); skb_dst_set_noref(skb, &rt->dst); }
From: Neal Cardwell ncardwell@google.com
stable inclusion from stable-v4.19.258 commit 4d941fdf910bd6da3a017627ef9c08e9f3f17c06 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5UQH4 CVE: NA
--------------------------------
[ Upstream commit 686dc2db2a0fdc1d34b424ec2c0a735becd8d62b ]
Fix a bug reported and analyzed by Nagaraj Arankal, where the handling of a spurious non-SACK RTO could cause a connection to fail to clear retrans_stamp, causing a later RTO to very prematurely time out the connection with ETIMEDOUT.
Here is the buggy scenario, expanding upon Nagaraj Arankal's excellent report:
(*1) Send one data packet on a non-SACK connection
(*2) Because no ACK packet is received, the packet is retransmitted and we enter CA_Loss; but this retransmission is spurious.
(*3) The ACK for the original data is received. The transmitted packet is acknowledged. The TCP timestamp is before the retrans_stamp, so tcp_may_undo() returns true, and tcp_try_undo_loss() returns true without changing state to Open (because tcp_is_sack() is false), and tcp_process_loss() returns without calling tcp_try_undo_recovery(). Normally after undoing a CA_Loss episode, tcp_fastretrans_alert() would see that the connection has returned to CA_Open and fall through and call tcp_try_to_open(), which would set retrans_stamp to 0. However, for non-SACK connections we hold the connection in CA_Loss, so do not fall through to call tcp_try_to_open() and do not set retrans_stamp to 0. So retrans_stamp is (erroneously) still non-zero.
At this point the first "retransmission event" has passed and been recovered from. Any future retransmission is a completely new "event". However, retrans_stamp is erroneously still set. (And we are still in CA_Loss, which is correct.)
(*4) After 16 minutes (to correspond with tcp_retries2=15), a new data packet is sent. Note: No data is transmitted between (*3) and (*4) and we disabled keep alives.
The socket's timeout SHOULD be calculated from this point in time, but instead it's calculated from the prior "event" 16 minutes ago (step (*2)).
(*5) Because no ACK packet is received, the packet is retransmitted.
(*6) At the time of the 2nd retransmission, the socket returns ETIMEDOUT, prematurely, because retrans_stamp is (erroneously) too far in the past (set at the time of (*2)).
This commit fixes this bug by ensuring that we reuse in tcp_try_undo_loss() the same careful logic for non-SACK connections that we have in tcp_try_undo_recovery(). To avoid duplicating logic, we factor out that logic into a new tcp_is_non_sack_preventing_reopen() helper and call that helper from both undo functions.
Fixes: da34ac7626b5 ("tcp: only undo on partial ACKs in CA_Loss") Reported-by: Nagaraj Arankal nagaraj.p.arankal@hpe.com Link: https://lore.kernel.org/all/SJ0PR84MB1847BE6C24D274C46A1B9B0EB27A9@SJ0PR84MB... Signed-off-by: Neal Cardwell ncardwell@google.com Signed-off-by: Yuchung Cheng ycheng@google.com Reviewed-by: Eric Dumazet edumazet@google.com Link: https://lore.kernel.org/r/20220903121023.866900-1-ncardwell.kernel@gmail.com Signed-off-by: Paolo Abeni pabeni@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- net/ipv4/tcp_input.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bc66d65d8423..94123a220875 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2376,6 +2376,21 @@ static inline bool tcp_may_undo(const struct tcp_sock *tp) return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp)); }
+static bool tcp_is_non_sack_preventing_reopen(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) { + /* Hold old state until something *above* high_seq + * is ACKed. For Reno it is MUST to prevent false + * fast retransmits (RFC2582). SACK TCP is safe. */ + if (!tcp_any_retrans_done(sk)) + tp->retrans_stamp = 0; + return true; + } + return false; +} + /* People celebrate: "We love our President!" */ static bool tcp_try_undo_recovery(struct sock *sk) { @@ -2398,14 +2413,8 @@ static bool tcp_try_undo_recovery(struct sock *sk) } else if (tp->rack.reo_wnd_persist) { tp->rack.reo_wnd_persist--; } - if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) { - /* Hold old state until something *above* high_seq - * is ACKed. For Reno it is MUST to prevent false - * fast retransmits (RFC2582). SACK TCP is safe. */ - if (!tcp_any_retrans_done(sk)) - tp->retrans_stamp = 0; + if (tcp_is_non_sack_preventing_reopen(sk)) return true; - } tcp_set_ca_state(sk, TCP_CA_Open); tp->is_sack_reneg = 0; return false; @@ -2441,6 +2450,8 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS); inet_csk(sk)->icsk_retransmits = 0; + if (tcp_is_non_sack_preventing_reopen(sk)) + return true; if (frto_undo || tcp_is_sack(tp)) { tcp_set_ca_state(sk, TCP_CA_Open); tp->is_sack_reneg = 0;
From: NeilBrown neilb@suse.de
stable inclusion from stable-v4.19.258 commit bcab4d551a3d15bdf8efaac7b61420219bd154b6 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5UQH4 CVE: NA
--------------------------------
Prior to Linux 5.3, ->transport_lock in sunrpc required the _bh style spinlocks (when not called from a bottom-half handler).
When upstream 3848e96edf4788f772d83990022fa7023a233d83 was backported to stable kernels, the spin_lock/unlock calls should have been changed to the _bh version, but this wasn't noted in the patch and didn't happen.
So convert these lock/unlock calls to the _bh versions.
This patch is required for any stable kernel prior to 5.3 to which the above mentioned patch was backported. Namely 4.9.y, 4.14.y, 4.19.y.
Signed-off-by: NeilBrown neilb@suse.de Reported-by: Eugeniu Rosca erosca@de.adit-jv.com Reviewed-by: Eugeniu Rosca erosca@de.adit-jv.com Tested-by: Eugeniu Rosca erosca@de.adit-jv.com Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- net/sunrpc/xprt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index b2e7ed0f0e1b..c912bf20faa2 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1572,9 +1572,9 @@ static void xprt_destroy(struct rpc_xprt *xprt) * is cleared. We use ->transport_lock to ensure the mod_timer() * can only run *before* del_time_sync(), never after. */ - spin_lock(&xprt->transport_lock); + spin_lock_bh(&xprt->transport_lock); del_timer_sync(&xprt->timer); - spin_unlock(&xprt->transport_lock); + spin_unlock_bh(&xprt->transport_lock);
/* * Destroy sockets etc from the system workqueue so they can
From: Jann Horn jannh@google.com
stable inclusion from stable-v4.19.259 commit 56fa5f3dd44a05a5eacd75ae9d00c5415046d371 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5UQH4 CVE: NA
--------------------------------
This is a stable-specific patch. I botched the stable-specific rewrite of commit b67fbebd4cf98 ("mmu_gather: Force tlb-flush VM_PFNMAP vmas"): As Hugh pointed out, unmap_region() actually operates on a list of VMAs, and the variable "vma" merely points to the first VMA in that list. So if we want to check whether any of the VMAs we're operating on is PFNMAP or MIXEDMAP, we have to iterate through the list and check each VMA.
Signed-off-by: Jann Horn jannh@google.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- mm/mmap.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/mm/mmap.c b/mm/mmap.c index 8b5f3e45613e..0576cad8ccb6 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2898,6 +2898,7 @@ static void unmap_region(struct mm_struct *mm, { struct vm_area_struct *next = prev ? prev->vm_next : mm->mmap; struct mmu_gather tlb; + struct vm_area_struct *cur_vma;
lru_add_drain(); tlb_gather_mmu(&tlb, mm, start, end); @@ -2912,8 +2913,12 @@ static void unmap_region(struct mm_struct *mm, * concurrent flush in this region has to be coming through the rmap, * and we synchronize against that using the rmap lock. */ - if ((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) != 0) - tlb_flush_mmu(&tlb); + for (cur_vma = vma; cur_vma; cur_vma = cur_vma->vm_next) { + if ((cur_vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) != 0) { + tlb_flush_mmu(&tlb); + break; + } + }
free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS, next ? next->vm_start : USER_PGTABLES_CEILING);
From: Florian Westphal fw@strlen.de
stable inclusion from stable-v4.19.257 commit 358765beb836f5fc2ed26b5df4140d5d3548ac11 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5UQH4 CVE: NA
--------------------------------
[ Upstream commit 7997eff82828304b780dc0a39707e1946d6f1ebf ]
Harshit Mogalapalli says: In ebt_do_table() function dereferencing 'private->hook_entry[hook]' can lead to NULL pointer dereference. [..] Kernel panic:
general protection fault, probably for non-canonical address 0xdffffc0000000005: 0000 [#1] PREEMPT SMP KASAN KASAN: null-ptr-deref in range [0x0000000000000028-0x000000000000002f] [..] RIP: 0010:ebt_do_table+0x1dc/0x1ce0 Code: 89 fa 48 c1 ea 03 80 3c 02 00 0f 85 5c 16 00 00 48 b8 00 00 00 00 00 fc ff df 49 8b 6c df 08 48 8d 7d 2c 48 89 fa 48 c1 ea 03 <0f> b6 14 02 48 89 f8 83 e0 07 83 c0 03 38 d0 7c 08 84 d2 0f 85 88 [..] Call Trace: nf_hook_slow+0xb1/0x170 __br_forward+0x289/0x730 maybe_deliver+0x24b/0x380 br_flood+0xc6/0x390 br_dev_xmit+0xa2e/0x12c0
For some reason ebtables rejects blobs that provide entry points that are not supported by the table, but what it should instead reject is the opposite: blobs that DO NOT provide an entry point supported by the table.
t->valid_hooks is the bitmask of hooks (input, forward ...) that will see packets. Providing an entry point that is not support is harmless (never called/used), but the inverse isn't: it results in a crash because the ebtables traverser doesn't expect a NULL blob for a location its receiving packets for.
Instead of fixing all the individual checks, do what iptables is doing and reject all blobs that differ from the expected hooks.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Harshit Mogalapalli harshit.m.mogalapalli@oracle.com Reported-by: syzkaller syzkaller@googlegroups.com Signed-off-by: Florian Westphal fw@strlen.de Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Xu Jia xujia39@huawei.com Reviewed-by: Wei Yongjun weiyongjun1@huawei.com Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- include/linux/netfilter_bridge/ebtables.h | 4 ---- net/bridge/netfilter/ebtable_broute.c | 8 -------- net/bridge/netfilter/ebtable_filter.c | 8 -------- net/bridge/netfilter/ebtable_nat.c | 8 -------- net/bridge/netfilter/ebtables.c | 8 +------- 5 files changed, 1 insertion(+), 35 deletions(-)
diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index c6935be7c6ca..954ffe32f622 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -94,10 +94,6 @@ struct ebt_table { struct ebt_replace_kernel *table; unsigned int valid_hooks; rwlock_t lock; - /* e.g. could be the table explicitly only allows certain - * matches, targets, ... 0 == let it in */ - int (*check)(const struct ebt_table_info *info, - unsigned int valid_hooks); /* the data used by the kernel */ struct ebt_table_info *private; struct module *me; diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 276b60262981..b21c8a317be7 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -33,18 +33,10 @@ static struct ebt_replace_kernel initial_table = { .entries = (char *)&initial_chain, };
-static int check(const struct ebt_table_info *info, unsigned int valid_hooks) -{ - if (valid_hooks & ~(1 << NF_BR_BROUTING)) - return -EINVAL; - return 0; -} - static const struct ebt_table broute_table = { .name = "broute", .table = &initial_table, .valid_hooks = 1 << NF_BR_BROUTING, - .check = check, .me = THIS_MODULE, };
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 550324c516ee..c71795e4c18c 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -42,18 +42,10 @@ static struct ebt_replace_kernel initial_table = { .entries = (char *)initial_chains, };
-static int check(const struct ebt_table_info *info, unsigned int valid_hooks) -{ - if (valid_hooks & ~FILTER_VALID_HOOKS) - return -EINVAL; - return 0; -} - static const struct ebt_table frame_filter = { .name = "filter", .table = &initial_table, .valid_hooks = FILTER_VALID_HOOKS, - .check = check, .me = THIS_MODULE, };
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index c0fb3ca518af..44dde9e635e2 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -42,18 +42,10 @@ static struct ebt_replace_kernel initial_table = { .entries = (char *)initial_chains, };
-static int check(const struct ebt_table_info *info, unsigned int valid_hooks) -{ - if (valid_hooks & ~NAT_VALID_HOOKS) - return -EINVAL; - return 0; -} - static const struct ebt_table frame_nat = { .name = "nat", .table = &initial_table, .valid_hooks = NAT_VALID_HOOKS, - .check = check, .me = THIS_MODULE, };
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index f59230e4fc29..ea27bacbd005 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1003,8 +1003,7 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl, goto free_iterate; }
- /* the table doesn't like it */ - if (t->check && (ret = t->check(newinfo, repl->valid_hooks))) + if (repl->valid_hooks != t->valid_hooks) goto free_unlock;
if (repl->num_counters && repl->num_counters != t->private->nentries) { @@ -1197,11 +1196,6 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table, if (ret != 0) goto free_chainstack;
- if (table->check && table->check(newinfo, table->valid_hooks)) { - ret = -EINVAL; - goto free_chainstack; - } - table->private = newinfo; rwlock_init(&table->lock); mutex_lock(&ebt_mutex);
From: Florian Westphal fw@strlen.de
stable inclusion from stable-v4.19.260 commit 1e98318af2f163eadaff815abcef38d27ca92c1e category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5UQH4 CVE: NA
--------------------------------
[ Upstream commit 62ce44c4fff947eebdf10bb582267e686e6835c9 ]
The bug fix was incomplete, it "replaced" crash with a memory leak. The old code had an assignment to "ret" embedded into the conditional, restore this.
Fixes: 7997eff82828 ("netfilter: ebtables: reject blobs that don't provide all entry points") Reported-and-tested-by: syzbot+a24c5252f3e3ab733464@syzkaller.appspotmail.com Signed-off-by: Florian Westphal fw@strlen.de Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Xu Jia xujia39@huawei.com Reviewed-by: Wei Yongjun weiyongjun1@huawei.com Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- net/bridge/netfilter/ebtables.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index ea27bacbd005..59d8974ee92b 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1003,8 +1003,10 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl, goto free_iterate; }
- if (repl->valid_hooks != t->valid_hooks) + if (repl->valid_hooks != t->valid_hooks) { + ret = -EINVAL; goto free_unlock; + }
if (repl->num_counters && repl->num_counters != t->private->nentries) { ret = -EINVAL;
From: Brian Norris briannorris@chromium.org
stable inclusion from stable-v4.19.259 commit 27591187f95e6f67a40bba46f8306699d2ecbd81 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5V45M CVE: NA
---------------------------
[ Upstream commit 47311db8e8f33011d90dee76b39c8886120cdda4 ]
Users may have explicitly configured their tracefs permissions; we shouldn't overwrite those just because a second mount appeared.
Only clobber if the options were provided at mount time.
Note: the previous behavior was especially surprising in the presence of automounted /sys/kernel/debug/tracing/.
Existing behavior:
## Pre-existing status: tracefs is 0755. # stat -c '%A' /sys/kernel/tracing/ drwxr-xr-x
## (Re)trigger the automount. # umount /sys/kernel/debug/tracing # stat -c '%A' /sys/kernel/debug/tracing/. drwx------
## Unexpected: the automount changed mode for other mount instances. # stat -c '%A' /sys/kernel/tracing/ drwx------
New behavior (after this change):
## Pre-existing status: tracefs is 0755. # stat -c '%A' /sys/kernel/tracing/ drwxr-xr-x
## (Re)trigger the automount. # umount /sys/kernel/debug/tracing # stat -c '%A' /sys/kernel/debug/tracing/. drwxr-xr-x
## Expected: the automount does not change other mount instances. # stat -c '%A' /sys/kernel/tracing/ drwxr-xr-x
Link: https://lkml.kernel.org/r/20220826174353.2.Iab6e5ea57963d6deca5311b27fb72267...
Cc: stable@vger.kernel.org Fixes: 4282d60689d4f ("tracefs: Add new tracefs file system") Signed-off-by: Brian Norris briannorris@chromium.org Signed-off-by: Steven Rostedt (Google) rostedt@goodmis.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Li Lingfeng lilingfeng3@huawei.com Reviewed-by: Zhang Yi yi.zhang@huawei.com Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- fs/tracefs/inode.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-)
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index f4ea7bf13386..ae0a660b98b6 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -142,6 +142,8 @@ struct tracefs_mount_opts { kuid_t uid; kgid_t gid; umode_t mode; + /* Opt_* bitfield. */ + unsigned int opts; };
enum { @@ -171,6 +173,7 @@ static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts) kgid_t gid; char *p;
+ opts->opts = 0; opts->mode = TRACEFS_DEFAULT_MODE;
while ((p = strsep(&data, ",")) != NULL) { @@ -205,22 +208,34 @@ static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts) * but traditionally tracefs has ignored all mount options */ } + + opts->opts |= BIT(token); }
return 0; }
-static int tracefs_apply_options(struct super_block *sb) +static int tracefs_apply_options(struct super_block *sb, bool remount) { struct tracefs_fs_info *fsi = sb->s_fs_info; struct inode *inode = sb->s_root->d_inode; struct tracefs_mount_opts *opts = &fsi->mount_opts;
- inode->i_mode &= ~S_IALLUGO; - inode->i_mode |= opts->mode; + /* + * On remount, only reset mode/uid/gid if they were provided as mount + * options. + */ + + if (!remount || opts->opts & BIT(Opt_mode)) { + inode->i_mode &= ~S_IALLUGO; + inode->i_mode |= opts->mode; + } + + if (!remount || opts->opts & BIT(Opt_uid)) + inode->i_uid = opts->uid;
- inode->i_uid = opts->uid; - inode->i_gid = opts->gid; + if (!remount || opts->opts & BIT(Opt_gid)) + inode->i_gid = opts->gid;
return 0; } @@ -235,7 +250,7 @@ static int tracefs_remount(struct super_block *sb, int *flags, char *data) if (err) goto fail;
- tracefs_apply_options(sb); + tracefs_apply_options(sb, true);
fail: return err; @@ -287,7 +302,7 @@ static int trace_fill_super(struct super_block *sb, void *data, int silent)
sb->s_op = &tracefs_super_operations;
- tracefs_apply_options(sb); + tracefs_apply_options(sb, false);
return 0;