From: Jiasheng Jiang jiasheng@iscas.ac.cn
stable inclusion from stable-v6.0.14 commit abfaf0eee97925905e742aa3b0b72e04a918fa9e category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I6694U CVE: CVE-2022-3108
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
As the possible failure of the allocation, kmemdup() may return NULL pointer. Therefore, it should be better to check the 'props2' in order to prevent the dereference of NULL pointer.
Fixes: 3a87177eb141 ("drm/amdkfd: Add topology support for dGPUs") Signed-off-by: Jiasheng Jiang jiasheng@iscas.ac.cn Reviewed-by: Felix Kuehling Felix.Kuehling@amd.com Signed-off-by: Felix Kuehling Felix.Kuehling@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Ren Zhijie renzhijie2@huawei.com Reviewed-by: songping yu yusongping@huawei.com Reviewed-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 86b4dadf772e..02e3c650ed1c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -408,6 +408,9 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, return -ENODEV; /* same everything but the other direction */ props2 = kmemdup(props, sizeof(*props2), GFP_KERNEL); + if (!props2) + return -ENOMEM; + props2->node_from = id_to; props2->node_to = id_from; props2->kobj = NULL;
From: Phil Turnbull philipturnbull@github.com
stable inclusion from stable-v5.10.157 commit 905f886eae4b065656a575e8a02544045cbaadcf category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I66M3L CVE: CVE-2022-47519
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 051ae669e4505abbe05165bebf6be7922de11f41 upstream.
Validate that the IEEE80211_P2P_ATTR_OPER_CHANNEL attribute contains enough space for a 'struct struct wilc_attr_oper_ch'. If the attribute is too small then it triggers an out-of-bounds write later in the function.
Signed-off-by: Phil Turnbull philipturnbull@github.com Tested-by: Ajay Kathat ajay.kathat@microchip.com Acked-by: Ajay Kathat ajay.kathat@microchip.com Signed-off-by: Kalle Valo kvalo@kernel.org Link: https://lore.kernel.org/r/20221123153543.8568-3-philipturnbull@github.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Baisong Zhong zhongbaisong@huawei.com Reviewed-by: Liu Jian liujian56@huawei.com Reviewed-by: Xiu Jianfeng xiujianfeng@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/net/wireless/microchip/wilc1000/cfg80211.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c index 6be5ac8ba518..b42e9eb2631c 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -939,14 +939,24 @@ static inline void wilc_wfi_cfg_parse_ch_attr(u8 *buf, u32 len, u8 sta_ch) return;
while (index + sizeof(*e) <= len) { + u16 attr_size; + e = (struct wilc_attr_entry *)&buf[index]; + attr_size = le16_to_cpu(e->attr_len); + + if (index + sizeof(*e) + attr_size > len) + return; + if (e->attr_type == IEEE80211_P2P_ATTR_CHANNEL_LIST) ch_list_idx = index; - else if (e->attr_type == IEEE80211_P2P_ATTR_OPER_CHANNEL) + else if (e->attr_type == IEEE80211_P2P_ATTR_OPER_CHANNEL && + attr_size == (sizeof(struct wilc_attr_oper_ch) - sizeof(*e))) op_ch_idx = index; + if (ch_list_idx && op_ch_idx) break; - index += le16_to_cpu(e->attr_len) + sizeof(*e); + + index += sizeof(*e) + attr_size; }
if (ch_list_idx) {
From: Phil Turnbull philipturnbull@github.com
stable inclusion from stable-v5.10.157 commit 3eb6b89a4e9f9e44c3170d70d8d16c3c8dc8c800 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I66LQ3?from=project-issue CVE: CVE-2022-47518
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 0cdfa9e6f0915e3d243e2393bfa8a22e12d553b0 upstream.
There is no validation of 'e->no_of_channels' which can trigger an out-of-bounds write in the following 'memset' call. Validate that the number of channels does not extends beyond the size of the channel list element.
Signed-off-by: Phil Turnbull philipturnbull@github.com Tested-by: Ajay Kathat ajay.kathat@microchip.com Acked-by: Ajay Kathat ajay.kathat@microchip.com Signed-off-by: Kalle Valo kvalo@kernel.org Link: https://lore.kernel.org/r/20221123153543.8568-5-philipturnbull@github.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Ziyang Xuan william.xuanziyang@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Reviewed-by: Xiu Jianfeng xiujianfeng@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- .../wireless/microchip/wilc1000/cfg80211.c | 23 ++++++++++++++----- 1 file changed, 17 insertions(+), 6 deletions(-)
diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c index b42e9eb2631c..64e01b84d98d 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -960,19 +960,30 @@ static inline void wilc_wfi_cfg_parse_ch_attr(u8 *buf, u32 len, u8 sta_ch) }
if (ch_list_idx) { - u16 attr_size; - struct wilc_ch_list_elem *e; - int i; + unsigned int i; + u16 elem_size;
ch_list = (struct wilc_attr_ch_list *)&buf[ch_list_idx]; - attr_size = le16_to_cpu(ch_list->attr_len); - for (i = 0; i < attr_size;) { + /* the number of bytes following the final 'elem' member */ + elem_size = le16_to_cpu(ch_list->attr_len) - + (sizeof(*ch_list) - sizeof(struct wilc_attr_entry)); + for (i = 0; i < elem_size;) { + struct wilc_ch_list_elem *e; + e = (struct wilc_ch_list_elem *)(ch_list->elem + i); + + i += sizeof(*e); + if (i > elem_size) + break; + + i += e->no_of_channels; + if (i > elem_size) + break; + if (e->op_class == WILC_WLAN_OPERATING_CLASS_2_4GHZ) { memset(e->ch_list, sta_ch, e->no_of_channels); break; } - i += e->no_of_channels; } }
From: Phil Turnbull philipturnbull@github.com
stable inclusion from stable-v5.10.156 commit 7c6535fb4d67ea37c98a1d1d24ca33dd5ec42693 category: bugfix bugzilla: 188177, https://gitee.com/src-openeuler/kernel/issues/I66M3K CVE: CVE-2022-47520
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit cd21d99e595ec1d8721e1058dcdd4f1f7de1d793 upstream.
There is no validation of 'offset' which can trigger an out-of-bounds read when extracting RSN capabilities.
Signed-off-by: Phil Turnbull philipturnbull@github.com Tested-by: Ajay Kathat ajay.kathat@microchip.com Acked-by: Ajay Kathat ajay.kathat@microchip.com Signed-off-by: Kalle Valo kvalo@kernel.org Link: https://lore.kernel.org/r/20221123153543.8568-2-philipturnbull@github.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Dong Chenchen dongchenchen2@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Reviewed-by: Xiu Jianfeng xiujianfeng@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/net/wireless/microchip/wilc1000/hif.c | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-)
diff --git a/drivers/net/wireless/microchip/wilc1000/hif.c b/drivers/net/wireless/microchip/wilc1000/hif.c index d025a3093015..b25847799138 100644 --- a/drivers/net/wireless/microchip/wilc1000/hif.c +++ b/drivers/net/wireless/microchip/wilc1000/hif.c @@ -467,14 +467,25 @@ void *wilc_parse_join_bss_param(struct cfg80211_bss *bss,
rsn_ie = cfg80211_find_ie(WLAN_EID_RSN, ies->data, ies->len); if (rsn_ie) { + int rsn_ie_len = sizeof(struct element) + rsn_ie[1]; int offset = 8;
- param->mode_802_11i = 2; - param->rsn_found = true; /* extract RSN capabilities */ - offset += (rsn_ie[offset] * 4) + 2; - offset += (rsn_ie[offset] * 4) + 2; - memcpy(param->rsn_cap, &rsn_ie[offset], 2); + if (offset < rsn_ie_len) { + /* skip over pairwise suites */ + offset += (rsn_ie[offset] * 4) + 2; + + if (offset < rsn_ie_len) { + /* skip over authentication suites */ + offset += (rsn_ie[offset] * 4) + 2; + + if (offset + 1 < rsn_ie_len) { + param->mode_802_11i = 2; + param->rsn_found = true; + memcpy(param->rsn_cap, &rsn_ie[offset], 2); + } + } + } }
if (param->rsn_found) {
From: Takashi Iwai tiwai@suse.de
stable inclusion from stable-v6.1.1 commit fd3d91ab1c6ab0628fe642dd570b56302c30a792 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I5SDE4 CVE: CVE-2022-41218
--------------------------------
The dvb-core tries to sync the releases of opened files at dvb_dmxdev_release() with two refcounts: dvbdev->users and dvr_dvbdev->users. A problem is present in those two syncs: when yet another dvb_demux_open() is called during those sync waits, dvb_demux_open() continues to process even if the device is being closed. This includes the increment of the former refcount, resulting in the leftover refcount after the sync of the latter refcount at dvb_dmxdev_release(). It ends up with use-after-free, since the function believes that all usages were gone and releases the resources.
This patch addresses the problem by adding the check of dmxdev->exit flag at dvb_demux_open(), just like dvb_dvr_open() already does. With the exit flag check, the second call of dvb_demux_open() fails, hence the further corruption can be avoided.
Also for avoiding the races of the dmxdev->exit flag reference, this patch serializes the dmxdev->exit set up and the sync waits with the dmxdev->mutex lock at dvb_dmxdev_release(). Without the mutex lock, dvb_demux_open() (or dvb_dvr_open()) may run concurrently with dvb_dmxdev_release(), which allows to skip the exit flag check and continue the open process that is being closed.
CVE-2022-41218 is assigned to those bugs above.
Reported-by: Hyunwoo Kim imv4bel@gmail.com Cc: stable@vger.kernel.org Link: https://lore.kernel.org/20220908132754.30532-1-tiwai@suse.de Signed-off-by: Takashi Iwai tiwai@suse.de Signed-off-by: Hans Verkuil hverkuil-cisco@xs4all.nl Signed-off-by: Ren Zhijie renzhijie2@huawei.com Reviewed-by: songping yu yusongping@huawei.com Reviewed-by: Zhang Qiao zhangqiao22@huawei.com Reviewed-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/media/dvb-core/dmxdev.c | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/drivers/media/dvb-core/dmxdev.c b/drivers/media/dvb-core/dmxdev.c index e58cb8434daf..12b7f698f562 100644 --- a/drivers/media/dvb-core/dmxdev.c +++ b/drivers/media/dvb-core/dmxdev.c @@ -800,6 +800,11 @@ static int dvb_demux_open(struct inode *inode, struct file *file) if (mutex_lock_interruptible(&dmxdev->mutex)) return -ERESTARTSYS;
+ if (dmxdev->exit) { + mutex_unlock(&dmxdev->mutex); + return -ENODEV; + } + for (i = 0; i < dmxdev->filternum; i++) if (dmxdev->filter[i].state == DMXDEV_STATE_FREE) break; @@ -1458,7 +1463,10 @@ EXPORT_SYMBOL(dvb_dmxdev_init);
void dvb_dmxdev_release(struct dmxdev *dmxdev) { + mutex_lock(&dmxdev->mutex); dmxdev->exit = 1; + mutex_unlock(&dmxdev->mutex); + if (dmxdev->dvbdev->users > 1) { wait_event(dmxdev->dvbdev->wait_queue, dmxdev->dvbdev->users == 1);
From: Juergen Gross jgross@suse.com
stable inclusion from stable-v5.10.159 commit 83632fc41449c480f2d0193683ec202caaa186c9 category: bugfix bugzilla: 188137, https://gitee.com/src-openeuler/kernel/issues/I651DP CVE: CVE-2022-42328
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
[ Upstream commit 74e7e1efdad45580cc3839f2a155174cf158f9b5 ]
It is not allowed to call kfree_skb() from hardware interrupt context or with interrupts being disabled. So remove kfree_skb() from the spin_lock_irqsave() section and use the already existing "drop" label in xenvif_start_xmit() for dropping the SKB. At the same time replace the dev_kfree_skb() call there with a call of dev_kfree_skb_any(), as xenvif_start_xmit() can be called with disabled interrupts.
This is XSA-424 / CVE-2022-42328 / CVE-2022-42329.
Fixes: be81992f9086 ("xen/netback: don't queue unlimited number of packages") Reported-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Juergen Gross jgross@suse.com Reviewed-by: Jan Beulich jbeulich@suse.com Signed-off-by: Juergen Gross jgross@suse.com Signed-off-by: Sasha Levin sashal@kernel.org
conflict: drivers/net/xen-netback/common.h
Signed-off-by: Lu Wei luwei32@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/net/xen-netback/common.h | 2 +- drivers/net/xen-netback/interface.c | 6 ++++-- drivers/net/xen-netback/rx.c | 8 +++++--- 3 files changed, 10 insertions(+), 6 deletions(-)
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 6a9178896c90..962e654d4a8d 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -395,7 +395,7 @@ irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data);
bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread); void xenvif_rx_action(struct xenvif_queue *queue); -void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); +bool xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb);
void xenvif_carrier_on(struct xenvif *vif);
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 7ce9807fc24c..c98890674e5d 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -269,14 +269,16 @@ xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) if (vif->hash.alg == XEN_NETIF_CTRL_HASH_ALGORITHM_NONE) skb_clear_hash(skb);
- xenvif_rx_queue_tail(queue, skb); + if (!xenvif_rx_queue_tail(queue, skb)) + goto drop; + xenvif_kick_thread(queue);
return NETDEV_TX_OK;
drop: vif->dev->stats.tx_dropped++; - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); return NETDEV_TX_OK; }
diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index a0335407be42..c2671eb6ad93 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -82,9 +82,10 @@ static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue) return false; }
-void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) +bool xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) { unsigned long flags; + bool ret = true;
spin_lock_irqsave(&queue->rx_queue.lock, flags);
@@ -92,8 +93,7 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) struct net_device *dev = queue->vif->dev;
netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id)); - kfree_skb(skb); - queue->vif->dev->stats.rx_dropped++; + ret = false; } else { if (skb_queue_empty(&queue->rx_queue)) xenvif_update_needed_slots(queue, skb); @@ -104,6 +104,8 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) }
spin_unlock_irqrestore(&queue->rx_queue.lock, flags); + + return ret; }
static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue)
From: Phil Turnbull philipturnbull@github.com
stable inclusion from stable-v5.10.157 commit 5a068535c0073c8402aa0755e8ef259fb98a33c5 category: bugfix bugzilla: 188173 https://gitee.com/src-openeuler/kernel/issues/I66M3J CVE: CVE-2022-47521
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit f9b62f9843c7b0afdaecabbcebf1dbba18599408 upstream.
Validate that the IEEE80211_P2P_ATTR_CHANNEL_LIST attribute contains enough space for a 'struct wilc_attr_oper_ch'. If the attribute is too small then it can trigger an out-of-bounds write later in the function.
'struct wilc_attr_oper_ch' is variable sized so also check 'attr_len' does not extend beyond the end of 'buf'.
Signed-off-by: Phil Turnbull philipturnbull@github.com Tested-by: Ajay Kathat ajay.kathat@microchip.com Acked-by: Ajay Kathat ajay.kathat@microchip.com Signed-off-by: Kalle Valo kvalo@kernel.org Link: https://lore.kernel.org/r/20221123153543.8568-4-philipturnbull@github.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Liu Jian liujian56@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com --- drivers/net/wireless/microchip/wilc1000/cfg80211.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c index 64e01b84d98d..dd26f2086180 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -947,7 +947,8 @@ static inline void wilc_wfi_cfg_parse_ch_attr(u8 *buf, u32 len, u8 sta_ch) if (index + sizeof(*e) + attr_size > len) return;
- if (e->attr_type == IEEE80211_P2P_ATTR_CHANNEL_LIST) + if (e->attr_type == IEEE80211_P2P_ATTR_CHANNEL_LIST && + attr_size >= (sizeof(struct wilc_attr_ch_list) - sizeof(*e))) ch_list_idx = index; else if (e->attr_type == IEEE80211_P2P_ATTR_OPER_CHANNEL && attr_size == (sizeof(struct wilc_attr_oper_ch) - sizeof(*e)))
From: Zhang Yi yi.zhang@huawei.com
mainline inclusion from mainline-v6.1-rc1 commit fdee117ee86479fd2644bcd9ac2b2469e55722d1 category: bugfix bugzilla: 187878,https://gitee.com/openeuler/kernel/issues/I5QJH9 CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?h...
--------------------------------
Current ll_rw_block() helper is fragile because it assumes that locked buffer means it's under IO which is submitted by some other who holds the lock, it skip buffer if it failed to get the lock, so it's only safe on the readahead path. Unfortunately, now that most filesystems still use this helper mistakenly on the sync metadata read path. There is no guarantee that the one who holds the buffer lock always submit IO (e.g. buffer_migrate_folio_norefs() after commit 88dbcbb3a484 ("blkdev: avoid migration stalls for blkdev pages"), it could lead to false positive -EIO when submitting reading IO.
This patch add some friendly buffer read helpers to prepare replacing ll_rw_block() and similar calls. We can only call bh_readahead_[] helpers for the readahead paths.
Link: https://lkml.kernel.org/r/20220901133505.2510834-3-yi.zhang@huawei.com Signed-off-by: Zhang Yi yi.zhang@huawei.com Reviewed-by: Jan Kara jack@suse.cz Reviewed-by: Christoph Hellwig hch@lst.de Signed-off-by: Andrew Morton akpm@linux-foundation.org
Conflict: fs/buffer.c include/linux/buffer_head.h
Signed-off-by: Li Lingfeng lilingfeng3@huawei.com Reviewed-by: Zhang Yi yi.zhang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- fs/buffer.c | 65 +++++++++++++++++++++++++++++++++++++ include/linux/buffer_head.h | 38 ++++++++++++++++++++++ 2 files changed, 103 insertions(+)
diff --git a/fs/buffer.c b/fs/buffer.c index 37a08026d3ef..a10dfa3a0f59 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3391,6 +3391,71 @@ int bh_uptodate_or_lock(struct buffer_head *bh) } EXPORT_SYMBOL(bh_uptodate_or_lock);
+/** + * __bh_read - Submit read for a locked buffer + * @bh: struct buffer_head + * @op_flags: appending REQ_OP_* flags besides REQ_OP_READ + * @wait: wait until reading finish + * + * Returns zero on success or don't wait, and -EIO on error. + */ +int __bh_read(struct buffer_head *bh, unsigned int op_flags, bool wait) +{ + int ret = 0; + + BUG_ON(!buffer_locked(bh)); + + get_bh(bh); + bh->b_end_io = end_buffer_read_sync; + submit_bh(REQ_OP_READ, op_flags, bh); + if (wait) { + wait_on_buffer(bh); + if (!buffer_uptodate(bh)) + ret = -EIO; + } + return ret; +} +EXPORT_SYMBOL(__bh_read); + +/** + * __bh_read_batch - Submit read for a batch of unlocked buffers + * @nr: entry number of the buffer batch + * @bhs: a batch of struct buffer_head + * @op_flags: appending REQ_OP_* flags besides REQ_OP_READ + * @force_lock: force to get a lock on the buffer if set, otherwise drops any + * buffer that cannot lock. + * + * Returns zero on success or don't wait, and -EIO on error. + */ +void __bh_read_batch(int nr, struct buffer_head *bhs[], + unsigned int op_flags, bool force_lock) +{ + int i; + + for (i = 0; i < nr; i++) { + struct buffer_head *bh = bhs[i]; + + if (buffer_uptodate(bh)) + continue; + + if (force_lock) + lock_buffer(bh); + else + if (!trylock_buffer(bh)) + continue; + + if (buffer_uptodate(bh)) { + unlock_buffer(bh); + continue; + } + + bh->b_end_io = end_buffer_read_sync; + get_bh(bh); + submit_bh(REQ_OP_READ, op_flags, bh); + } +} +EXPORT_SYMBOL(__bh_read_batch); + /** * bh_submit_read - Submit a locked buffer for reading * @bh: struct buffer_head diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 6b47f94378c5..8577ab2ef446 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -207,6 +207,9 @@ void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); int bh_uptodate_or_lock(struct buffer_head *bh); int bh_submit_read(struct buffer_head *bh); +int __bh_read(struct buffer_head *bh, unsigned int op_flags, bool wait); +void __bh_read_batch(int nr, struct buffer_head *bhs[], + unsigned int op_flags, bool force_lock);
extern int buffer_heads_over_limit;
@@ -380,6 +383,41 @@ static inline struct buffer_head *__getblk(struct block_device *bdev, return __getblk_gfp(bdev, block, size, __GFP_MOVABLE); }
+static inline void bh_readahead(struct buffer_head *bh, unsigned int op_flags) +{ + if (!buffer_uptodate(bh) && trylock_buffer(bh)) { + if (!buffer_uptodate(bh)) + __bh_read(bh, op_flags, false); + else + unlock_buffer(bh); + } +} + +static inline void bh_read_nowait(struct buffer_head *bh, unsigned int op_flags) +{ + if (!bh_uptodate_or_lock(bh)) + __bh_read(bh, op_flags, false); +} + +/* Returns 1 if buffer uptodated, 0 on success, and -EIO on error. */ +static inline int bh_read(struct buffer_head *bh, unsigned int op_flags) +{ + if (bh_uptodate_or_lock(bh)) + return 1; + return __bh_read(bh, op_flags, true); +} + +static inline void bh_read_batch(int nr, struct buffer_head *bhs[]) +{ + __bh_read_batch(nr, bhs, 0, true); +} + +static inline void bh_readahead_batch(int nr, struct buffer_head *bhs[], + unsigned int op_flags) +{ + __bh_read_batch(nr, bhs, op_flags, false); +} + /** * __bread() - reads a specified block and returns the bh * @bdev: the block_device to read from
From: Zhang Yi yi.zhang@huawei.com
mainline inclusion from mainline-v6.1-rc1 commit e7ea1129afab0e63af2c2d0e6e9fb7651f0982b3 category: bugfix bugzilla: 187878,https://gitee.com/openeuler/kernel/issues/I5QJH9 CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?h...
--------------------------------
ll_rw_block() is not safe for the sync IO path because it skip buffers which has been locked by others, it could lead to false positive EIO when submitting read IO. So stop using ll_rw_block(), switch to use new helpers which could guarantee buffer locked and submit IO if needed.
Link: https://lkml.kernel.org/r/20220901133505.2510834-4-yi.zhang@huawei.com Signed-off-by: Zhang Yi yi.zhang@huawei.com Reviewed-by: Jan Kara jack@suse.cz Reviewed-by: Christoph Hellwig hch@lst.de Signed-off-by: Andrew Morton akpm@linux-foundation.org
Conflict: fs/buffer.c
Signed-off-by: Li Lingfeng lilingfeng3@huawei.com Reviewed-by: Zhang Yi yi.zhang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- fs/buffer.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/fs/buffer.c b/fs/buffer.c index a10dfa3a0f59..93324b06ecb4 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -562,7 +562,7 @@ void write_boundary_block(struct block_device *bdev, struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize); if (bh) { if (buffer_dirty(bh)) - ll_rw_block(REQ_OP_WRITE, 0, 1, &bh); + write_dirty_buffer(bh, 0); put_bh(bh); } } @@ -1363,7 +1363,7 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size) { struct buffer_head *bh = __getblk(bdev, block, size); if (likely(bh)) { - ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh); + bh_readahead(bh, REQ_RAHEAD); brelse(bh); } } @@ -2038,7 +2038,7 @@ int __block_write_begin_int(struct page *page, loff_t pos, unsigned len, if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh) && (block_start < from || block_end > to)) { - ll_rw_block(REQ_OP_READ, 0, 1, &bh); + bh_read_nowait(bh, 0); *wait_bh++=bh; } } @@ -2927,11 +2927,9 @@ int block_truncate_page(struct address_space *mapping, set_buffer_uptodate(bh);
if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) { - err = -EIO; - ll_rw_block(REQ_OP_READ, 0, 1, &bh); - wait_on_buffer(bh); + err = bh_read(bh, 0); /* Uhhuh. Read error. Complain and punt. */ - if (!buffer_uptodate(bh)) + if (err < 0) goto unlock; }
From: Zhang Yi yi.zhang@huawei.com
mainline inclusion from mainline-v6.1-rc1 commit 86a020cc7232c3defad370852415876bbe4576dc category: bugfix bugzilla: 187878,https://gitee.com/openeuler/kernel/issues/I5QJH9 CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?h...
--------------------------------
ll_rw_block() is not safe for the sync read path because it cannot guarantee that always submitting read IO if the buffer has been locked, so stop using it. We also switch to new bh_readahead() helper for the readahead path.
Link: https://lkml.kernel.org/r/20220901133505.2510834-5-yi.zhang@huawei.com Signed-off-by: Zhang Yi yi.zhang@huawei.com Reviewed-by: Jan Kara jack@suse.cz Reviewed-by: Andreas Gruenbacher agruenba@redhat.com Reviewed-by: Christoph Hellwig hch@lst.de Signed-off-by: Andrew Morton akpm@linux-foundation.org
Conflict: fs/gfs2/meta_io.c fs/gfs2/quota.c
Signed-off-by: Li Lingfeng lilingfeng3@huawei.com Reviewed-by: Zhang Yi yi.zhang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- fs/gfs2/meta_io.c | 8 ++------ fs/gfs2/quota.c | 8 ++------ 2 files changed, 4 insertions(+), 12 deletions(-)
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 2db573e31f78..1ce73794a1aa 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -521,8 +521,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
if (buffer_uptodate(first_bh)) goto out; - if (!buffer_locked(first_bh)) - ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &first_bh); + bh_read_nowait(first_bh, REQ_META | REQ_PRIO);
dblock++; extlen--; @@ -530,10 +529,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) while (extlen) { bh = gfs2_getbuf(gl, dblock, CREATE);
- if (!buffer_uptodate(bh) && !buffer_locked(bh)) - ll_rw_block(REQ_OP_READ, - REQ_RAHEAD | REQ_META | REQ_PRIO, - 1, &bh); + bh_readahead(bh, REQ_RAHEAD | REQ_META | REQ_PRIO); brelse(bh); dblock++; extlen--; diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index ad953ecb5853..065ddb8792f3 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -741,12 +741,8 @@ static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index, } if (PageUptodate(page)) set_buffer_uptodate(bh); - if (!buffer_uptodate(bh)) { - ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &bh); - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) - goto unlock_out; - } + if (bh_read(bh, REQ_META | REQ_PRIO) < 0) + goto unlock_out; if (gfs2_is_jdata(ip)) gfs2_trans_add_data(ip->i_gl, bh); else
From: Zhang Yi yi.zhang@huawei.com
mainline inclusion from mainline-v6.1-rc1 commit 0ed48061887f603b33b7dcb9075cbfaaa8d02723 category: bugfix bugzilla: 187878,https://gitee.com/openeuler/kernel/issues/I5QJH9 CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?h...
--------------------------------
ll_rw_block() is not safe for the sync read path because it cannot guarantee that submitting read IO if the buffer has been locked. We could get false positive EIO return from zisofs_uncompress_block() if he buffer has been locked by others. So stop using ll_rw_block(), switch to sync helper instead.
Link: https://lkml.kernel.org/r/20220901133505.2510834-6-yi.zhang@huawei.com Signed-off-by: Zhang Yi yi.zhang@huawei.com Reviewed-by: Jan Kara jack@suse.cz Reviewed-by: Christoph Hellwig hch@lst.de Signed-off-by: Andrew Morton akpm@linux-foundation.org
Conflict: fs/isofs/compress.c
Signed-off-by: Li Lingfeng lilingfeng3@huawei.com Reviewed-by: Zhang Yi yi.zhang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- fs/isofs/compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c index bc12ac7e2312..c9fea93b1ee7 100644 --- a/fs/isofs/compress.c +++ b/fs/isofs/compress.c @@ -82,7 +82,7 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start, return 0; } haveblocks = isofs_get_blocks(inode, blocknum, bhs, needblocks); - ll_rw_block(REQ_OP_READ, 0, haveblocks, bhs); + bh_read_batch(haveblocks, bhs);
curbh = 0; curpage = 0;
From: Zhang Yi yi.zhang@huawei.com
mainline inclusion from mainline-v6.1-rc1 commit 8c004d1fc1497d9a6d92ea968bd58230af59a492 category: bugfix bugzilla: 187878,https://gitee.com/openeuler/kernel/issues/I5QJH9 CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?h...
--------------------------------
ll_rw_block() is not safe for the sync read path because it cannot guarantee that submitting read IO if the buffer has been locked. We could get false positive EIO after wait_on_buffer() if the buffer has been locked by others. So stop using ll_rw_block() in journal_get_superblock(). We also switch to new bh_readahead_batch() for the buffer array readahead path.
Link: https://lkml.kernel.org/r/20220901133505.2510834-7-yi.zhang@huawei.com Signed-off-by: Zhang Yi yi.zhang@huawei.com Reviewed-by: Jan Kara jack@suse.cz Reviewed-by: Theodore Ts'o tytso@mit.edu Reviewed-by: Christoph Hellwig hch@lst.de Signed-off-by: Andrew Morton akpm@linux-foundation.org
Conflict: fs/jbd2/journal.c fs/jbd2/recovery.c
Signed-off-by: Li Lingfeng lilingfeng3@huawei.com Reviewed-by: Zhang Yi yi.zhang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- fs/jbd2/journal.c | 15 ++++++--------- fs/jbd2/recovery.c | 16 ++++++++++------ 2 files changed, 16 insertions(+), 15 deletions(-)
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index aae412b0bfae..40d2edd55f85 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1793,19 +1793,16 @@ static int journal_get_superblock(journal_t *journal) { struct buffer_head *bh; journal_superblock_t *sb; - int err = -EIO; + int err;
bh = journal->j_sb_buffer;
J_ASSERT(bh != NULL); - if (!buffer_uptodate(bh)) { - ll_rw_block(REQ_OP_READ, 0, 1, &bh); - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) { - printk(KERN_ERR - "JBD2: IO error reading journal superblock\n"); - goto out; - } + err = bh_read(bh, 0); + if (err < 0) { + printk(KERN_ERR + "JBD2: IO error reading journal superblock\n"); + goto out; }
if (buffer_verified(bh)) diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 1e07dfac4d81..f5e3bb411953 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -100,7 +100,7 @@ static int do_readahead(journal_t *journal, unsigned int start) if (!buffer_uptodate(bh) && !buffer_locked(bh)) { bufs[nbufs++] = bh; if (nbufs == MAXBUF) { - ll_rw_block(REQ_OP_READ, 0, nbufs, bufs); + bh_readahead_batch(nbufs, bufs, 0); journal_brelse_array(bufs, nbufs); nbufs = 0; } @@ -109,7 +109,7 @@ static int do_readahead(journal_t *journal, unsigned int start) }
if (nbufs) - ll_rw_block(REQ_OP_READ, 0, nbufs, bufs); + bh_readahead_batch(nbufs, bufs, 0); err = 0;
failed: @@ -152,9 +152,14 @@ static int jread(struct buffer_head **bhp, journal_t *journal, return -ENOMEM;
if (!buffer_uptodate(bh)) { - /* If this is a brand new buffer, start readahead. - Otherwise, we assume we are already reading it. */ - if (!buffer_req(bh)) + /* + * If this is a brand new buffer, start readahead. + * Otherwise, we assume we are already reading it. + */ + bool need_readahead = !buffer_req(bh); + + bh_read_nowait(bh, 0); + if (need_readahead) do_readahead(journal, offset); wait_on_buffer(bh); } @@ -687,7 +692,6 @@ static int do_one_pass(journal_t *journal, mark_buffer_dirty(nbh); BUFFER_TRACE(nbh, "marking uptodate"); ++info->nr_replays; - /* ll_rw_block(WRITE, 1, &nbh); */ unlock_buffer(nbh); brelse(obh); brelse(nbh);
From: Zhang Yi yi.zhang@huawei.com
mainline inclusion from mainline-v6.1-rc1 commit 6bf414a00ae7688fac1e03f63431a355068a1d79 category: bugfix bugzilla: 187878,https://gitee.com/openeuler/kernel/issues/I5QJH9 CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?h...
--------------------------------
ll_rw_block() is not safe for the sync read path because it cannot guarantee that submitting read IO if the buffer has been locked. We could get false positive EIO after wait_on_buffer() if the buffer has been locked by others. So stop using ll_rw_block() in ntfs_get_block_vbo().
Link: https://lkml.kernel.org/r/20220901133505.2510834-8-yi.zhang@huawei.com Signed-off-by: Zhang Yi yi.zhang@huawei.com Reviewed-by: Jan Kara jack@suse.cz Reviewed-by: Christoph Hellwig hch@lst.de Signed-off-by: Andrew Morton akpm@linux-foundation.org
Conflict: fs/ntfs3/inode.c
Signed-off-by: Li Lingfeng lilingfeng3@huawei.com Reviewed-by: Zhang Yi yi.zhang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- fs/ntfs3/inode.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index 866691331a36..eecf78e5d754 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -629,12 +629,9 @@ static noinline int ntfs_get_block_vbo(struct inode *inode, u64 vbo, bh->b_size = block_size; off = vbo & (PAGE_SIZE - 1); set_bh_page(bh, page, off); - ll_rw_block(REQ_OP_READ, 0, 1, &bh); - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) { - err = -EIO; + err = bh_read(bh, 0); + if (err < 0) goto out; - } zero_user_segment(page, off + voff, off + block_size); } }
From: Zhang Yi yi.zhang@huawei.com
mainline inclusion from mainline-v6.1-rc1 commit 54d9171d38d904f5afde76e51bed416aaf144975 category: bugfix bugzilla: 187878,https://gitee.com/openeuler/kernel/issues/I5QJH9 CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?h...
--------------------------------
ll_rw_block() is not safe for the sync read path because it cannot guarantee that submitting read IO if the buffer has been locked. We could get false positive EIO after wait_on_buffer() if the buffer has been locked by others. So stop using ll_rw_block() in ocfs2.
Link: https://lkml.kernel.org/r/20220901133505.2510834-9-yi.zhang@huawei.com Signed-off-by: Zhang Yi yi.zhang@huawei.com Reviewed-by: Jan Kara jack@suse.cz Reviewed-by: Christoph Hellwig hch@lst.de Signed-off-by: Andrew Morton akpm@linux-foundation.org
Conflict: fs/ocfs2/aops.c fs/ocfs2/super.c
Signed-off-by: Li Lingfeng lilingfeng3@huawei.com Reviewed-by: Zhang Yi yi.zhang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- fs/ocfs2/aops.c | 2 +- fs/ocfs2/super.c | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-)
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index ad20403b383f..6b06de78f2af 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -640,7 +640,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, !buffer_new(bh) && ocfs2_should_read_blk(inode, page, block_start) && (block_start < from || block_end > to)) { - ll_rw_block(REQ_OP_READ, 0, 1, &bh); + bh_read_nowait(bh, 0); *wait_bh++=bh; }
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index c0e5f1bad499..01ac71723859 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1772,9 +1772,7 @@ static int ocfs2_get_sector(struct super_block *sb, if (!buffer_dirty(*bh)) clear_buffer_uptodate(*bh); unlock_buffer(*bh); - ll_rw_block(REQ_OP_READ, 0, 1, bh); - wait_on_buffer(*bh); - if (!buffer_uptodate(*bh)) { + if (bh_read(*bh, 0) < 0) { mlog_errno(-EIO); brelse(*bh); *bh = NULL;
From: Zhang Yi yi.zhang@huawei.com
mainline inclusion from mainline-v6.1-rc1 commit d554822e82cc99db53b845f3e60dc13e56ad4575 category: bugfix bugzilla: 187878,https://gitee.com/openeuler/kernel/issues/I5QJH9 CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?h...
--------------------------------
ll_rw_block() is not safe for the sync read/write path because it cannot guarantee that submitting read/write IO if the buffer has been locked. We could get false positive EIO after wait_on_buffer() in read path if the buffer has been locked by others. So stop using ll_rw_block() in reiserfs. We also switch to new bh_readahead_batch() helper for the buffer array readahead path.
Link: https://lkml.kernel.org/r/20220901133505.2510834-10-yi.zhang@huawei.com Signed-off-by: Zhang Yi yi.zhang@huawei.com Reviewed-by: Jan Kara jack@suse.cz Reviewed-by: Christoph Hellwig hch@lst.de Signed-off-by: Andrew Morton akpm@linux-foundation.org
Conflict: fs/reiserfs/journal.c fs/reiserfs/stree.c fs/reiserfs/super.c
Signed-off-by: Li Lingfeng lilingfeng3@huawei.com Reviewed-by: Zhang Yi yi.zhang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- fs/reiserfs/journal.c | 11 ++++++----- fs/reiserfs/stree.c | 4 ++-- fs/reiserfs/super.c | 4 +--- 3 files changed, 9 insertions(+), 10 deletions(-)
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index df5fc12a6cee..55df528e9876 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -870,7 +870,7 @@ static int write_ordered_buffers(spinlock_t * lock, */ if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) { spin_unlock(lock); - ll_rw_block(REQ_OP_WRITE, 0, 1, &bh); + write_dirty_buffer(bh, 0); spin_lock(lock); } put_bh(bh); @@ -1054,7 +1054,7 @@ static int flush_commit_list(struct super_block *s, if (tbh) { if (buffer_dirty(tbh)) { depth = reiserfs_write_unlock_nested(s); - ll_rw_block(REQ_OP_WRITE, 0, 1, &tbh); + write_dirty_buffer(tbh, 0); reiserfs_write_lock_nested(s, depth); } put_bh(tbh) ; @@ -2239,7 +2239,7 @@ static int journal_read_transaction(struct super_block *sb, } } /* read in the log blocks, memcpy to the corresponding real block */ - ll_rw_block(REQ_OP_READ, 0, get_desc_trans_len(desc), log_blocks); + bh_read_batch(get_desc_trans_len(desc), log_blocks); for (i = 0; i < get_desc_trans_len(desc); i++) {
wait_on_buffer(log_blocks[i]); @@ -2341,10 +2341,11 @@ static struct buffer_head *reiserfs_breada(struct block_device *dev, } else bhlist[j++] = bh; } - ll_rw_block(REQ_OP_READ, 0, j, bhlist); + bh = bhlist[0]; + bh_read_nowait(bh, 0); + bh_readahead_batch(j - 1, &bhlist[1], 0); for (i = 1; i < j; i++) brelse(bhlist[i]); - bh = bhlist[0]; wait_on_buffer(bh); if (buffer_uptodate(bh)) return bh; diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index ef42729216d1..84c12a1947b2 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -579,7 +579,7 @@ static int search_by_key_reada(struct super_block *s, if (!buffer_uptodate(bh[j])) { if (depth == -1) depth = reiserfs_write_unlock_nested(s); - ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, bh + j); + bh_readahead(bh[j], REQ_RAHEAD); } brelse(bh[j]); } @@ -685,7 +685,7 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, if (!buffer_uptodate(bh) && depth == -1) depth = reiserfs_write_unlock_nested(sb);
- ll_rw_block(REQ_OP_READ, 0, 1, &bh); + bh_read_nowait(bh, 0); wait_on_buffer(bh);
if (depth != -1) diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 913f5af9bf24..e5fd44b4a8ea 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1708,9 +1708,7 @@ static int read_super_block(struct super_block *s, int offset) /* after journal replay, reread all bitmap and super blocks */ static int reread_meta_blocks(struct super_block *s) { - ll_rw_block(REQ_OP_READ, 0, 1, &SB_BUFFER_WITH_SB(s)); - wait_on_buffer(SB_BUFFER_WITH_SB(s)); - if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) { + if (bh_read(SB_BUFFER_WITH_SB(s), 0) < 0) { reiserfs_warning(s, "reiserfs-2504", "error reading the super"); return 1; }
From: Zhang Yi yi.zhang@huawei.com
mainline inclusion from mainline-v6.1-rc1 commit 59a16786fa7a77dd383a62271e0102f1455bccea category: bugfix bugzilla: 187878,https://gitee.com/openeuler/kernel/issues/I5QJH9 CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?h...
--------------------------------
ll_rw_block() is not safe for the sync read path because it cannot guarantee that submitting read IO if the buffer has been locked. We could get false positive EIO after wait_on_buffer() if the buffer has been locked by others. So stop using ll_rw_block(). We also switch to new bh_readahead_batch() helper for the buffer array readahead path.
Link: https://lkml.kernel.org/r/20220901133505.2510834-11-yi.zhang@huawei.com Signed-off-by: Zhang Yi yi.zhang@huawei.com Reviewed-by: Christoph Hellwig hch@lst.de Signed-off-by: Andrew Morton akpm@linux-foundation.org
Conflict: fs/udf/dir.c fs/udf/directory.c fs/udf/inode.c
Signed-off-by: Li Lingfeng lilingfeng3@huawei.com Reviewed-by: Zhang Yi yi.zhang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- fs/udf/dir.c | 2 +- fs/udf/directory.c | 2 +- fs/udf/inode.c | 8 +------- 3 files changed, 3 insertions(+), 9 deletions(-)
diff --git a/fs/udf/dir.c b/fs/udf/dir.c index d0f92a52e3ba..02bf94e3e666 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c @@ -131,7 +131,7 @@ static int udf_readdir(struct file *file, struct dir_context *ctx) brelse(tmp); } if (num) { - ll_rw_block(REQ_OP_READ, REQ_RAHEAD, num, bha); + bh_readahead_batch(num, bha, REQ_RAHEAD); for (i = 0; i < num; i++) brelse(bha[i]); } diff --git a/fs/udf/directory.c b/fs/udf/directory.c index 73720320f0ab..16bcf2c6b8b3 100644 --- a/fs/udf/directory.c +++ b/fs/udf/directory.c @@ -89,7 +89,7 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos, brelse(tmp); } if (num) { - ll_rw_block(REQ_OP_READ, REQ_RAHEAD, num, bha); + bh_readahead_batch(num, bha, REQ_RAHEAD); for (i = 0; i < num; i++) brelse(bha[i]); } diff --git a/fs/udf/inode.c b/fs/udf/inode.c index d32b836f6ca7..3ae9955c42b0 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1210,13 +1210,7 @@ struct buffer_head *udf_bread(struct inode *inode, udf_pblk_t block, if (!bh) return NULL;
- if (buffer_uptodate(bh)) - return bh; - - ll_rw_block(REQ_OP_READ, 0, 1, &bh); - - wait_on_buffer(bh); - if (buffer_uptodate(bh)) + if (bh_read(bh, 0) >= 0) return bh;
brelse(bh);
From: Zhang Yi yi.zhang@huawei.com
mainline inclusion from mainline-v6.1-rc1 commit 6799b6983170c6dfdb2fcea8c97058557ea7b5b6 category: bugfix bugzilla: 187878,https://gitee.com/openeuler/kernel/issues/I5QJH9 CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?h...
--------------------------------
ll_rw_block() is not safe for the sync read path because it cannot guarantee that submitting read IO if the buffer has been locked. We could get false positive EIO after wait_on_buffer() if the buffer has been locked by others. So stop using ll_rw_block() in ufs.
Link: https://lkml.kernel.org/r/20220901133505.2510834-12-yi.zhang@huawei.com Signed-off-by: Zhang Yi yi.zhang@huawei.com Reviewed-by: Jan Kara jack@suse.cz Reviewed-by: Christoph Hellwig hch@lst.de Signed-off-by: Andrew Morton akpm@linux-foundation.org
Conflict: fs/ufs/balloc.c
Signed-off-by: Li Lingfeng lilingfeng3@huawei.com Reviewed-by: Zhang Yi yi.zhang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- fs/ufs/balloc.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-)
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 075d3d9114c8..2436e3f82147 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -295,14 +295,10 @@ static void ufs_change_blocknr(struct inode *inode, sector_t beg,
if (!buffer_mapped(bh)) map_bh(bh, inode->i_sb, oldb + pos); - if (!buffer_uptodate(bh)) { - ll_rw_block(REQ_OP_READ, 0, 1, &bh); - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) { - ufs_error(inode->i_sb, __func__, - "read of block failed\n"); - break; - } + if (bh_read(bh, 0) < 0) { + ufs_error(inode->i_sb, __func__, + "read of block failed\n"); + break; }
UFSD(" change from %llu to %llu, pos %u\n",
From: Zhang Yi yi.zhang@huawei.com
mainline inclusion from mainline-v6.1-rc1 commit 28cf75591008eef5e1649de31c4ddee5bf20081d category: bugfix bugzilla: 187878,https://gitee.com/openeuler/kernel/issues/I5QJH9 CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?h...
--------------------------------
bh_submit_read() and the uptodate check logic in bh_uptodate_or_lock() has been integrated in bh_read() helper, so switch to use it directly.
Link: https://lkml.kernel.org/r/20220901133505.2510834-14-yi.zhang@huawei.com Signed-off-by: Zhang Yi yi.zhang@huawei.com Reviewed-by: Jan Kara jack@suse.cz Reviewed-by: Christoph Hellwig hch@lst.de Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Li Lingfeng lilingfeng3@huawei.com Reviewed-by: Zhang Yi yi.zhang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- fs/ext2/balloc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index c17ccc19b938..5dc0a31f4a08 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -126,6 +126,7 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group) struct ext2_group_desc * desc; struct buffer_head * bh = NULL; ext2_fsblk_t bitmap_blk; + int ret;
desc = ext2_get_group_desc(sb, block_group, NULL); if (!desc) @@ -139,10 +140,10 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group) block_group, le32_to_cpu(desc->bg_block_bitmap)); return NULL; } - if (likely(bh_uptodate_or_lock(bh))) + ret = bh_read(bh, 0); + if (ret > 0) return bh; - - if (bh_submit_read(bh) < 0) { + if (ret < 0) { brelse(bh); ext2_error(sb, __func__, "Cannot read block bitmap - "
From: Ming Lei ming.lei@redhat.com
mainline inclusion from mainline-v5.13-rc1 commit 580dca8143d215977811bd2ff881e1e4f6ff39f0 category: performance bugzilla: 187991,https://gitee.com/openeuler/kernel/issues/I66VDN CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------
Yanhui found that write performance is degraded a lot after applying hctx shared tagset on one test machine with megaraid_sas. And turns out it is caused by none scheduler which becomes default elevator caused by hctx shared tagset patchset.
Given more scsi HBAs will apply hctx shared tagset, and the similar performance exists for them too.
So keep previous behavior by still using default mq-deadline for queues which apply hctx shared tagset, just like before.
Fixes: 32bc15afed04 ("blk-mq: Facilitate a shared sbitmap per tagset") Reported-by: Yanhui Ma yama@redhat.com Cc: John Garry john.garry@huawei.com Cc: Hannes Reinecke hare@suse.de Signed-off-by: Ming Lei ming.lei@redhat.com Reviewed-by: Martin K. Petersen martin.petersen@oracle.com Reviewed-by: Bart Van Assche bvanassche@acm.org Reviewed-by: John Garry john.garry@huawei.com Link: https://lore.kernel.org/r/20210406031933.767228-1-ming.lei@redhat.com Signed-off-by: Jens Axboe axboe@kernel.dk
Conflict: block/elevator.c
Signed-off-by: Zhong Jinghua zhongjinghua@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- block/elevator.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/block/elevator.c b/block/elevator.c index 76f70f679a1b..b66afb571e15 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -630,7 +630,8 @@ static struct elevator_type *elevator_get_default(struct request_queue *q) if (q->tag_set && q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT) return NULL;
- if (q->nr_hw_queues != 1) + if (q->nr_hw_queues != 1 && + !blk_mq_is_sbitmap_shared(q->tag_set->flags)) return NULL;
return elevator_get(q, "mq-deadline", false);
From: Yu Kuai yukuai3@huawei.com
mainline inclusion from v6.1-rc5 commit f02be9002c480cd3ec0fcf184ad27cf531bd6ece category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I610OR CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Out test found a following problem in kernel 5.10, and the same problem should exist in mainline:
BUG: kernel NULL pointer dereference, address: 0000000000000094 PGD 0 P4D 0 Oops: 0000 [#1] SMP CPU: 7 PID: 155 Comm: kworker/7:1 Not tainted 5.10.0-01932-g19e0ace2ca1d-dirty 4 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-b4 Workqueue: kthrotld blk_throtl_dispatch_work_fn RIP: 0010:bfq_bio_bfqg+0x52/0xc0 Code: 94 00 00 00 00 75 2e 48 8b 40 30 48 83 05 35 06 c8 0b 01 48 85 c0 74 3d 4b RSP: 0018:ffffc90001a1fba0 EFLAGS: 00010002 RAX: ffff888100d60400 RBX: ffff8881132e7000 RCX: 0000000000000000 RDX: 0000000000000017 RSI: ffff888103580a18 RDI: ffff888103580a18 RBP: ffff8881132e7000 R08: 0000000000000000 R09: ffffc90001a1fe10 R10: 0000000000000a20 R11: 0000000000034320 R12: 0000000000000000 R13: ffff888103580a18 R14: ffff888114447000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88881fdc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000094 CR3: 0000000100cdb000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: bfq_bic_update_cgroup+0x3c/0x350 ? ioc_create_icq+0x42/0x270 bfq_init_rq+0xfd/0x1060 bfq_insert_requests+0x20f/0x1cc0 ? ioc_create_icq+0x122/0x270 blk_mq_sched_insert_requests+0x86/0x1d0 blk_mq_flush_plug_list+0x193/0x2a0 blk_flush_plug_list+0x127/0x170 blk_finish_plug+0x31/0x50 blk_throtl_dispatch_work_fn+0x151/0x190 process_one_work+0x27c/0x5f0 worker_thread+0x28b/0x6b0 ? rescuer_thread+0x590/0x590 kthread+0x153/0x1b0 ? kthread_flush_work+0x170/0x170 ret_from_fork+0x1f/0x30 Modules linked in: CR2: 0000000000000094 ---[ end trace e2e59ac014314547 ]--- RIP: 0010:bfq_bio_bfqg+0x52/0xc0 Code: 94 00 00 00 00 75 2e 48 8b 40 30 48 83 05 35 06 c8 0b 01 48 85 c0 74 3d 4b RSP: 0018:ffffc90001a1fba0 EFLAGS: 00010002 RAX: ffff888100d60400 RBX: ffff8881132e7000 RCX: 0000000000000000 RDX: 0000000000000017 RSI: ffff888103580a18 RDI: ffff888103580a18 RBP: ffff8881132e7000 R08: 0000000000000000 R09: ffffc90001a1fe10 R10: 0000000000000a20 R11: 0000000000034320 R12: 0000000000000000 R13: ffff888103580a18 R14: ffff888114447000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88881fdc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000094 CR3: 0000000100cdb000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Root cause is quite complex:
1) use bfq elevator for the test device. 2) create a cgroup CG 3) config blk throtl in CG
blkg_conf_prep blkg_create
4) create a thread T1 and issue async io in CG:
bio_init bio_associate_blkg ... submit_bio submit_bio_noacct blk_throtl_bio -> io is throttled // io submit is done
5) switch elevator:
bfq_exit_queue blkcg_deactivate_policy list_for_each_entry(blkg, &q->blkg_list, q_node) blkg->pd[] = NULL // bfq policy is removed
5) thread t1 exist, then remove the cgroup CG:
blkcg_unpin_online blkcg_destroy_blkgs blkg_destroy list_del_init(&blkg->q_node) // blkg is removed from queue list
6) switch elevator back to bfq
bfq_init_queue bfq_create_group_hierarchy blkcg_activate_policy list_for_each_entry_reverse(blkg, &q->blkg_list) // blkg is removed from list, hence bfq policy is still NULL
7) throttled io is dispatched to bfq:
bfq_insert_requests bfq_init_rq bfq_bic_update_cgroup bfq_bio_bfqg bfqg = blkg_to_bfqg(blkg) // bfqg is NULL because bfq policy is NULL
The problem is only possible in bfq because only bfq can be deactivated and activated while queue is online, while others can only be deactivated while the device is removed.
Fix the problem in bfq by checking if blkg is online before calling blkg_to_bfqg().
Signed-off-by: Yu Kuai yukuai3@huawei.com Reviewed-by: Jan Kara jack@suse.cz Link: https://lore.kernel.org/r/20221108103434.2853269-1-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe axboe@kernel.dk Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- block/bfq-cgroup.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index f99351017182..36ba7324f685 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -613,6 +613,10 @@ struct bfq_group *bfq_bio_bfqg(struct bfq_data *bfqd, struct bio *bio) struct bfq_group *bfqg;
while (blkg) { + if (!blkg->online) { + blkg = blkg->parent; + continue; + } bfqg = blkg_to_bfqg(blkg); if (bfqg->online) { bio_associate_blkg_from_css(bio, &blkg->blkcg->css);
From: Yu Kuai yukuai3@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I66VJO CVE: NA
--------------------------------
Our test report a uaf for 'bfqq->bic' in 5.10:
================================================================== BUG: KASAN: use-after-free in bfq_select_queue+0x378/0xa30 Read of size 8 at addr ffff88810efb42d8 by task fsstress/2318352
CPU: 6 PID: 2318352 Comm: fsstress Kdump: loaded Not tainted 5.10.0-60.18.0.50.h602.kasan.eulerosv2r11.x86_64 #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.1-0-ga5cab58-20220320_160524-szxrtosci10000 04/01/2014 Call Trace: dump_stack+0x9c/0xd3 print_address_description.constprop.0+0x19/0x170 ? bfq_select_queue+0x378/0xa30 __kasan_report.cold+0x6c/0x84 ? bfq_select_queue+0x378/0xa30 kasan_report+0x3a/0x50 bfq_select_queue+0x378/0xa30 ? bfq_bfqq_expire+0x6c0/0x6c0 ? bfq_mark_bfqq_busy+0x1f/0x30 ? _raw_spin_lock_irq+0x7b/0xd0 __bfq_dispatch_request+0x1c4/0x220 bfq_dispatch_request+0xe8/0x130 __blk_mq_do_dispatch_sched+0x3f4/0x560 ? blk_mq_sched_mark_restart_hctx+0x50/0x50 ? bfq_init_rq+0x128/0x940 ? pvclock_clocksource_read+0xf6/0x1d0 blk_mq_do_dispatch_sched+0x62/0xb0 __blk_mq_sched_dispatch_requests+0x215/0x2a0 ? blk_mq_do_dispatch_ctx+0x3a0/0x3a0 ? bfq_insert_request+0x193/0x3f0 blk_mq_sched_dispatch_requests+0x8f/0xd0 __blk_mq_run_hw_queue+0x98/0x180 __blk_mq_delay_run_hw_queue+0x22b/0x240 ? bfq_asymmetric_scenario+0x160/0x160 blk_mq_run_hw_queue+0xe3/0x190 ? bfq_insert_request+0x3f0/0x3f0 blk_mq_sched_insert_requests+0x107/0x200 blk_mq_flush_plug_list+0x26e/0x3c0 ? blk_mq_insert_requests+0x250/0x250 ? blk_check_plugged+0x190/0x190 blk_finish_plug+0x63/0x90 __iomap_dio_rw+0x7b5/0x910 ? iomap_dio_actor+0x150/0x150 ? userns_put+0x70/0x70 ? userns_put+0x70/0x70 ? avc_has_perm_noaudit+0x1d0/0x1d0 ? down_read+0xd5/0x1a0 ? down_read_killable+0x1b0/0x1b0 ? from_kgid+0xa0/0xa0 iomap_dio_rw+0x36/0x80 ext4_dio_read_iter+0x146/0x190 [ext4] ext4_file_read_iter+0x1e2/0x230 [ext4] new_sync_read+0x29f/0x400 ? default_llseek+0x160/0x160 ? find_isec_ns+0x8d/0x2e0 ? avc_policy_seqno+0x27/0x40 ? selinux_file_permission+0x34/0x180 ? security_file_permission+0x135/0x2b0 vfs_read+0x24e/0x2d0 ksys_read+0xd5/0x1b0 ? __ia32_sys_pread64+0x160/0x160 ? __audit_syscall_entry+0x1cc/0x220 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x61/0xc6 RIP: 0033:0x7ff05f96fe62 Code: c0 e9 b2 fe ff ff 50 48 8d 3d 12 04 0c 00 e8 b5 fe 01 00 0f 1f 44 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 0f 05 <48> 3d 00 f0 ff ff 77 56 c3 0f 1f 44 00 00 48 83 ec 28 48 89 54 24 RSP: 002b:00007fffd30c0ff8 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: 00000000000000a5 RCX: 00007ff05f96fe62 RDX: 000000000001d000 RSI: 0000000001ffc000 RDI: 0000000000000003 RBP: 0000000000000003 R08: 0000000002019000 R09: 0000000000000000 R10: 00007ff05fa65290 R11: 0000000000000246 R12: 0000000000131800 R13: 000000000001d000 R14: 0000000000000000 R15: 0000000001ffc000
Allocated by task 2318348: kasan_save_stack+0x1b/0x40 __kasan_kmalloc.constprop.0+0xb5/0xe0 kmem_cache_alloc_node+0x15d/0x480 ioc_create_icq+0x68/0x2e0 blk_mq_sched_assign_ioc+0xbc/0xd0 blk_mq_rq_ctx_init+0x4b0/0x600 __blk_mq_alloc_request+0x21f/0x2e0 blk_mq_submit_bio+0x27a/0xd60 __submit_bio_noacct_mq+0x10b/0x270 submit_bio_noacct+0x13d/0x150 submit_bio+0xbf/0x280 iomap_dio_submit_bio+0x155/0x180 iomap_dio_bio_actor+0x2f0/0x770 iomap_dio_actor+0xd9/0x150 iomap_apply+0x1d2/0x4f0 __iomap_dio_rw+0x43a/0x910 iomap_dio_rw+0x36/0x80 ext4_dio_write_iter+0x46f/0x730 [ext4] ext4_file_write_iter+0xd8/0x100 [ext4] new_sync_write+0x2ac/0x3a0 vfs_write+0x365/0x430 ksys_write+0xd5/0x1b0 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x61/0xc6
Freed by task 2320929: kasan_save_stack+0x1b/0x40 kasan_set_track+0x1c/0x30 kasan_set_free_info+0x20/0x40 __kasan_slab_free+0x151/0x180 kmem_cache_free+0x9e/0x540 rcu_do_batch+0x292/0x700 rcu_core+0x270/0x2d0 __do_softirq+0xfd/0x402
Last call_rcu(): kasan_save_stack+0x1b/0x40 kasan_record_aux_stack+0xa8/0xf0 __call_rcu+0xa4/0x3a0 ioc_release_fn+0x45/0x120 process_one_work+0x3c5/0x730 worker_thread+0x93/0x650 kthread+0x1ba/0x210 ret_from_fork+0x22/0x30
Second to last call_rcu(): kasan_save_stack+0x1b/0x40 kasan_record_aux_stack+0xa8/0xf0 __call_rcu+0xa4/0x3a0 ioc_release_fn+0x45/0x120 process_one_work+0x3c5/0x730 worker_thread+0x93/0x650 kthread+0x1ba/0x210 ret_from_fork+0x22/0x30
The buggy address belongs to the object at ffff88810efb42a0 which belongs to the cache bfq_io_cq of size 160 The buggy address is located 56 bytes inside of 160-byte region [ffff88810efb42a0, ffff88810efb4340) The buggy address belongs to the page: page:00000000a519c14c refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff88810efb4000 pfn:0x10efb4 head:00000000a519c14c order:1 compound_mapcount:0 flags: 0x17ffffc0010200(slab|head|node=0|zone=2|lastcpupid=0x1fffff) raw: 0017ffffc0010200 0000000000000000 dead000000000122 ffff8881407c8600 raw: ffff88810efb4000 000000008024001a 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected
Memory state around the buggy address: ffff88810efb4180: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb ffff88810efb4200: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc
ffff88810efb4280: fc fc fc fc fa fb fb fb fb fb fb fb fb fb fb fb
^ ffff88810efb4300: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc ffff88810efb4380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ==================================================================
Commit 3bc5e683c67d ("bfq: Split shared queues on move between cgroups") changes that move process to a new cgroup will allocate a new bfqq to use, however, the old bfqq and new bfqq can point to the same bic:
1) Initial state, two process with io in the same cgroup.
Process 1 Process 2 (BIC1) (BIC2) | Λ | Λ | | | | V | V | bfqq1 bfqq2
2) bfqq1 is merged to bfqq2.
Process 1 Process 2(cg1) (BIC1) (BIC2) | | -------------| V bfqq1 bfqq2(coop)
3) Process 1 exit, then issue new io(denoce IOA) from Process 2.
(BIC2) | Λ | | V | bfqq2(coop)
4) Before IOA is completed, move Process 2 to another cgroup and issue io.
Process 2 (BIC2) Λ |--------------\ | V bfqq2 bfqq3
Now that BIC2 points to bfqq3, while bfqq2 and bfqq3 both point to BIC2. If all the requests are completed, and Process 2 exit, BIC2 will be freed while there is no guarantee that bfqq2 will be freed before BIC2.
Fix the problem by clearing bfqq->bic if process references is decreased to zero, since that they are not related anymore.
Fixes: 3bc5e683c67d ("bfq: Split shared queues on move between cgroups") Signed-off-by: Yu Kuai yukuai3@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- block/bfq-iosched.c | 9 +++++++++ 1 file changed, 9 insertions(+)
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 6edc00da5b57..829e713639ad 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -2775,6 +2775,15 @@ void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfqq != bfqd->in_service_queue) bfq_del_bfqq_busy(bfqd, bfqq, false);
+ /* + * __bfq_bic_change_cgroup() just reset bic->bfqq so that a new bfqq + * will be created to handle new io, while old bfqq will stay around + * until all the requests are completed. It's unsafe to keep bfqq->bic + * since they are not related anymore. + */ + if (bfqq_process_refs(bfqq) == 1) + bfqq->bic = NULL; + bfq_put_queue(bfqq); }
From: Li Nan linan122@huawei.com
hulk inclusion category: bugfix bugzilla: 187921, https://gitee.com/openeuler/kernel/issues/I66VDB CVE: NA
--------------------------------
Enable CONFIG_BLK_RQ_ALLOC_TIME will cause kabi broken, use request wrapper to fix it.
Signed-off-by: Li Nan linan122@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- block/blk-iocost.c | 11 ++++++++--- block/blk-mq.c | 2 +- block/blk-mq.h | 4 ++++ include/linux/blkdev.h | 4 ---- 4 files changed, 13 insertions(+), 8 deletions(-)
diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 9207850d3056..c87320fa221e 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -2747,8 +2747,13 @@ static void ioc_rqos_done(struct rq_qos *rqos, struct request *rq) struct ioc_pcpu_stat *ccs; u64 on_q_ns, rq_wait_ns, size_nsec; int pidx, rw; + struct request_wrapper *rq_wrapper;
- if (!ioc->enabled || !rq->alloc_time_ns || !rq->start_time_ns) + if (WARN_ON_ONCE(!(rq->rq_flags & RQF_FROM_BLOCK))) + return; + + rq_wrapper = request_to_wrapper(rq); + if (!ioc->enabled || !rq_wrapper->alloc_time_ns || !rq->start_time_ns) return;
switch (req_op(rq) & REQ_OP_MASK) { @@ -2764,8 +2769,8 @@ static void ioc_rqos_done(struct rq_qos *rqos, struct request *rq) return; }
- on_q_ns = ktime_get_ns() - rq->alloc_time_ns; - rq_wait_ns = rq->start_time_ns - rq->alloc_time_ns; + on_q_ns = ktime_get_ns() - rq_wrapper->alloc_time_ns; + rq_wait_ns = rq->start_time_ns - rq_wrapper->alloc_time_ns; size_nsec = div64_u64(calc_size_vtime_cost(rq, ioc), VTIME_PER_NSEC);
ccs = get_cpu_ptr(ioc->pcpu_stat); diff --git a/block/blk-mq.c b/block/blk-mq.c index ffabe9c3de31..c02e42071615 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -386,7 +386,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, rq->rq_disk = NULL; rq->part = NULL; #ifdef CONFIG_BLK_RQ_ALLOC_TIME - rq->alloc_time_ns = alloc_time_ns; + request_to_wrapper(rq)->alloc_time_ns = alloc_time_ns; #endif request_to_wrapper(rq)->stat_time_ns = 0; if (blk_mq_need_time_stamp(rq)) diff --git a/block/blk-mq.h b/block/blk-mq.h index 358659fd3175..7bb0b82bfbe9 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -40,6 +40,10 @@ struct blk_mq_ctx { struct request_wrapper { /* Time that I/O was counted in part_get_stat_info(). */ u64 stat_time_ns; +#ifdef CONFIG_BLK_RQ_ALLOC_TIME + /* Time that the first bio started allocating this request. */ + u64 alloc_time_ns; +#endif } ____cacheline_aligned;
static inline struct request_wrapper *request_to_wrapper(void *rq) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index eed319e5d192..171884608cad 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -202,10 +202,6 @@ struct request {
struct gendisk *rq_disk; struct hd_struct *part; -#ifdef CONFIG_BLK_RQ_ALLOC_TIME - /* Time that the first bio started allocating this request. */ - u64 alloc_time_ns; -#endif /* Time that this request was allocated for this IO. */ u64 start_time_ns; /* Time that I/O was submitted to the device. */
From: Li Nan linan122@huawei.com
hulk inclusion category: bugfix bugzilla: 187921, https://gitee.com/openeuler/kernel/issues/I66VDB CVE: NA
--------------------------------
Enable CONFIG_BLK_CGROUP_IOCOST will cause kabi broken, use reserved fields to fix it.
Signed-off-by: Li Nan linan122@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- include/linux/blk_types.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index fbea6b0aef3e..1853ec569b72 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -239,9 +239,6 @@ struct bio { */ struct blkcg_gq *bi_blkg; struct bio_issue bi_issue; -#ifdef CONFIG_BLK_CGROUP_IOCOST - u64 bi_iocost_cost; -#endif #endif
#ifdef CONFIG_BLK_INLINE_ENCRYPTION @@ -268,7 +265,11 @@ struct bio {
struct bio_set *bi_pool;
+#ifdef CONFIG_BLK_CGROUP_IOCOST + KABI_USE(1, u64 bi_iocost_cost) +#else KABI_RESERVE(1) +#endif KABI_RESERVE(2) KABI_RESERVE(3) KABI_RESERVE(4)
From: Ming Lei ming.lei@redhat.com
mainline inclusion from mainline-v5.16-rc1 commit 8c54499a59b026a3dc2afccf6e1b36d5700d2fef category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I674BF CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
When the zram module is being unloaded, no one should be using the zram disks. However even while being unloaded the zram module's sysfs attributes might be poked at to re-configure zram devices. This is expected, and kernfs ensures that these operations complete before device_del() completes.
But reset_store() may set ->claim which will fail zram_remove(), when this happens, zram_reset_device() is bypassed, and zram->comp can't be destroyed, so the warning of 'Error: Removing state 63 which has instances left.' is triggered during unloading module, together with memory leak and sort of thing.
Fixes the issue by not failing zram_remove() if ->claim is set, and we actually need to do nothing in case that zram_reset() is running since del_gendisk() will wait until zram_reset() is done.
Reported-by: Luis Chamberlain mcgrof@kernel.org Reviewed-by: Luis Chamberlain mcgrof@kernel.org Signed-off-by: Ming Lei ming.lei@redhat.com Acked-by: Minchan Kim minchan@kernel.org Link: https://lore.kernel.org/r/20211025025426.2815424-3-ming.lei@redhat.com Signed-off-by: Jens Axboe axboe@kernel.dk
Conflicts: drivers/block/zram/zram_drv.c
Signed-off-by: Longlong Xia xialonglong1@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/block/zram/zram_drv.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-)
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 0636df6b67db..4916c02d4f8f 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1983,31 +1983,46 @@ static int zram_add(void) static int zram_remove(struct zram *zram) { struct block_device *bdev; + bool claimed;
bdev = bdget_disk(zram->disk, 0); if (!bdev) return -ENOMEM;
mutex_lock(&bdev->bd_mutex); - if (bdev->bd_openers || zram->claim) { + if (bdev->bd_openers) { mutex_unlock(&bdev->bd_mutex); bdput(bdev); return -EBUSY; }
- zram->claim = true; + claimed = zram->claim; + if (!claimed) + zram->claim = true; mutex_unlock(&bdev->bd_mutex);
zram_debugfs_unregister(zram);
- /* Make sure all the pending I/O are finished */ - fsync_bdev(bdev); - zram_reset_device(zram); + if (claimed) { + /* + * If we were claimed by reset_store(), del_gendisk() will + * wait until reset_store() is done, so nothing need to do. + */ + ; + } else { + /* Make sure all the pending I/O are finished */ + fsync_bdev(bdev); + zram_reset_device(zram); + } bdput(bdev);
pr_info("Removed device: %s\n", zram->disk->disk_name);
del_gendisk(zram->disk); + + /* del_gendisk drains pending reset_store */ + WARN_ON_ONCE(claimed && zram->claim); + blk_cleanup_queue(zram->disk->queue); put_disk(zram->disk); kfree(zram); @@ -2085,7 +2100,7 @@ static struct class zram_control_class = {
static int zram_remove_cb(int id, void *ptr, void *data) { - zram_remove(ptr); + WARN_ON_ONCE(zram_remove(ptr)); return 0; }
From: Ming Lei ming.lei@redhat.com
mainline inclusion from mainline-v5.16-rc1 commit 5a4b653655d554b5f51a5d2252882708c56a6f7e category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I674BF CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
After resetting device in zram_remove(), disksize_store still may come and allocate resources again before deleting gendisk, fix the race by resetting zram after del_gendisk() returns. At that time, disksize_store can't come any more.
Reported-by: Luis Chamberlain mcgrof@kernel.org Reviewed-by: Luis Chamberlain mcgrof@kernel.org Signed-off-by: Ming Lei ming.lei@redhat.com Acked-by: Minchan Kim minchan@kernel.org Link: https://lore.kernel.org/r/20211025025426.2815424-4-ming.lei@redhat.com Signed-off-by: Jens Axboe axboe@kernel.dk
Conflicts: drivers/block/zram/zram_drv.c
Signed-off-by: Longlong Xia xialonglong1@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/block/zram/zram_drv.c | 7 +++++++ 1 file changed, 7 insertions(+)
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 4916c02d4f8f..40df7f994b89 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -2023,6 +2023,13 @@ static int zram_remove(struct zram *zram) /* del_gendisk drains pending reset_store */ WARN_ON_ONCE(claimed && zram->claim);
+ /* + * disksize_store() may be called in between zram_reset_device() + * and del_gendisk(), so run the last reset to avoid leaking + * anything allocated with disksize_store() + */ + zram_reset_device(zram); + blk_cleanup_queue(zram->disk->queue); put_disk(zram->disk); kfree(zram);
From: Zhihao Cheng chengzhihao1@huawei.com
mainline inclusion from mainline-v6.2-rc1 commit 7991dbff6849f67e823b7cc0c15e5a90b0549b9f category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I65M32
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit?id...
--------------------------------
Recently we found a softlock up problem in dm thin pool btree lookup code due to corrupted metadata:
Kernel panic - not syncing: softlockup: hung tasks CPU: 7 PID: 2669225 Comm: kworker/u16:3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) Workqueue: dm-thin do_worker [dm_thin_pool] Call Trace: <IRQ> dump_stack+0x9c/0xd3 panic+0x35d/0x6b9 watchdog_timer_fn.cold+0x16/0x25 __run_hrtimer+0xa2/0x2d0 </IRQ> RIP: 0010:__relink_lru+0x102/0x220 [dm_bufio] __bufio_new+0x11f/0x4f0 [dm_bufio] new_read+0xa3/0x1e0 [dm_bufio] dm_bm_read_lock+0x33/0xd0 [dm_persistent_data] ro_step+0x63/0x100 [dm_persistent_data] btree_lookup_raw.constprop.0+0x44/0x220 [dm_persistent_data] dm_btree_lookup+0x16f/0x210 [dm_persistent_data] dm_thin_find_block+0x12c/0x210 [dm_thin_pool] __process_bio_read_only+0xc5/0x400 [dm_thin_pool] process_thin_deferred_bios+0x1a4/0x4a0 [dm_thin_pool] process_one_work+0x3c5/0x730
Following process may generate a broken btree mixed with fresh and stale btree nodes, which could get dm thin trapped in an infinite loop while looking up data block: Transaction 1: pmd->root = A, A->B->C // One path in btree pmd->root = X, X->Y->Z // Copy-up Transaction 2: X,Z is updated on disk, Y write failed. // Commit failed, dm thin becomes read-only. process_bio_read_only dm_thin_find_block __find_block dm_btree_lookup(pmd->root) The pmd->root points to a broken btree, Y may contain stale node pointing to any block, for example X, which gets dm thin trapped into a dead loop while looking up Z.
Fix this by setting pmd->root in __open_metadata(), so that dm thin will use the last transaction's pmd->root if commit failed.
Fetch a reproducer in [Link].
Linke: https://bugzilla.kernel.org/show_bug.cgi?id=216790 Cc: stable@vger.kernel.org Fixes: 991d9fa02da0 ("dm: add thin provisioning target") Signed-off-by: Zhihao Cheng chengzhihao1@huawei.com Acked-by: Joe Thornber ejt@redhat.com Signed-off-by: Mike Snitzer snitzer@kernel.org Signed-off-by: Zhihao Cheng chengzhihao1@huawei.com Reviewed-by: Zhang Yi yi.zhang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/md/dm-thin-metadata.c | 9 +++++++++ 1 file changed, 9 insertions(+)
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index d1e0ae06f04d..8f4d149bb99d 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -701,6 +701,15 @@ static int __open_metadata(struct dm_pool_metadata *pmd) goto bad_cleanup_data_sm; }
+ /* + * For pool metadata opening process, root setting is redundant + * because it will be set again in __begin_transaction(). But dm + * pool aborting process really needs to get last transaction's + * root to avoid accessing broken btree. + */ + pmd->root = le64_to_cpu(disk_super->data_mapping_root); + pmd->details_root = le64_to_cpu(disk_super->device_details_root); + __setup_btree_details(pmd); dm_bm_unlock(sblock);
From: Zheng Yejian zhengyejian1@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I67QMO CVE: NA
--------------------------------
When disable CONFIG_LIVEPATCH_WO_FTRACE, compiler report following error: kernel/livepatch/core.c: In function ‘check_address_conflict’: kernel/livepatch/core.c:1214:18: error: ‘KLP_MAX_REPLACE_SIZE’ undeclared (first use in this function) 1214 | end = start + KLP_MAX_REPLACE_SIZE - 1; | ^~~~~~~~~~~~~~~~~~~~ kernel/livepatch/core.c:1214:18: note: each undeclared identifier is reported only once for each function it appears in At top level: kernel/livepatch/core.c:1195:12: warning: ‘check_address_conflict’ defined but not used [-Wunused-function] 1195 | static int check_address_conflict(struct klp_patch *patch) | ^~~~~~~~~~~~~~~~~~~~~~
Fixes: 2c3c0b3af9f9 ("livepatch/x86: Avoid conflict with static {call,key}") Fixes: ed8c4c729c5f ("livepatch/core: Restrict minimum size of function that can be patched") Signed-off-by: Zheng Yejian zhengyejian1@huawei.com Reviewed-by: Kuohai Xu xukuohai@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- kernel/livepatch/core.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index 9e65f6ae4061..a00aa45eb065 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -1041,11 +1041,13 @@ static int klp_init_object_loaded(struct klp_patch *patch, func->old_name); return -ENOENT; } +#ifdef CONFIG_LIVEPATCH_STOP_MACHINE_CONSISTENCY if (func->old_size < KLP_MAX_REPLACE_SIZE) { pr_err("%s size less than limit (%lu < %zu)\n", func->old_name, func->old_size, KLP_MAX_REPLACE_SIZE); return -EINVAL; } +#endif
#ifdef PPC64_ELF_ABI_v1 /* @@ -1195,6 +1197,7 @@ extern int klp_static_call_register(struct module *mod); static inline int klp_static_call_register(struct module *mod) { return 0; } #endif
+#ifdef CONFIG_LIVEPATCH_STOP_MACHINE_CONSISTENCY static int check_address_conflict(struct klp_patch *patch) { struct klp_object *obj; @@ -1231,6 +1234,7 @@ static int check_address_conflict(struct klp_patch *patch) } return 0; } +#endif
static int klp_init_patch(struct klp_patch *patch) { @@ -1278,11 +1282,11 @@ static int klp_init_patch(struct klp_patch *patch) } module_enable_ro(patch->mod, true);
+#ifdef CONFIG_LIVEPATCH_STOP_MACHINE_CONSISTENCY ret = check_address_conflict(patch); if (ret) return ret;
-#ifdef CONFIG_LIVEPATCH_STOP_MACHINE_CONSISTENCY klp_for_each_object(patch, obj) klp_load_hook(obj); #endif
From: Zheng Wang zyytlz.wz@163.com
mainline inclusion from mainline-v6.2-rc1 commit 643a16a0eb1d6ac23744bb6e90a00fc21148a9dc category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I5UBUF CVE: CVE-2022-3424
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit?id...
--------------------------------
In some bad situation, the gts may be freed gru_check_chiplet_assignment. The call chain can be gru_unload_context->gru_free_gru_context->gts_drop and kfree finally. However, the caller didn't know if the gts is freed or not and use it afterwards. This will trigger a Use after Free bug.
Fix it by introducing a return value to see if it's in error path or not. Free the gts in caller if gru_check_chiplet_assignment check failed.
Fixes: 55484c45dbec ("gru: allow users to specify gru chiplet 2") Signed-off-by: Zheng Wang zyytlz.wz@163.com Acked-by: Dimitri Sivanich sivanich@hpe.com Link: https://lore.kernel.org/r/20221110035033.19498-1-zyytlz.wz@163.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Zheng Yejian zhengyejian1@huawei.com Reviewed-by: Kuohai Xu xukuohai@huawei.com Reviewed-by: Xiu Jianfeng xiujianfeng@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/misc/sgi-gru/grufault.c | 13 +++++++++++-- drivers/misc/sgi-gru/grumain.c | 22 ++++++++++++++++++---- drivers/misc/sgi-gru/grutables.h | 2 +- 3 files changed, 30 insertions(+), 7 deletions(-)
diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c index 723825524ea0..9c7d475d1890 100644 --- a/drivers/misc/sgi-gru/grufault.c +++ b/drivers/misc/sgi-gru/grufault.c @@ -648,6 +648,7 @@ int gru_handle_user_call_os(unsigned long cb) if ((cb & (GRU_HANDLE_STRIDE - 1)) || ucbnum >= GRU_NUM_CB) return -EINVAL;
+again: gts = gru_find_lock_gts(cb); if (!gts) return -EINVAL; @@ -656,7 +657,11 @@ int gru_handle_user_call_os(unsigned long cb) if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE) goto exit;
- gru_check_context_placement(gts); + if (gru_check_context_placement(gts)) { + gru_unlock_gts(gts); + gru_unload_context(gts, 1); + goto again; + }
/* * CCH may contain stale data if ts_force_cch_reload is set. @@ -874,7 +879,11 @@ int gru_set_context_option(unsigned long arg) } else { gts->ts_user_blade_id = req.val1; gts->ts_user_chiplet_id = req.val0; - gru_check_context_placement(gts); + if (gru_check_context_placement(gts)) { + gru_unlock_gts(gts); + gru_unload_context(gts, 1); + return ret; + } } break; case sco_gseg_owner: diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c index 40ac59dd018c..e2325e3d077e 100644 --- a/drivers/misc/sgi-gru/grumain.c +++ b/drivers/misc/sgi-gru/grumain.c @@ -716,9 +716,10 @@ static int gru_check_chiplet_assignment(struct gru_state *gru, * chiplet. Misassignment can occur if the process migrates to a different * blade or if the user changes the selected blade/chiplet. */ -void gru_check_context_placement(struct gru_thread_state *gts) +int gru_check_context_placement(struct gru_thread_state *gts) { struct gru_state *gru; + int ret = 0;
/* * If the current task is the context owner, verify that the @@ -726,15 +727,23 @@ void gru_check_context_placement(struct gru_thread_state *gts) * references. Pthread apps use non-owner references to the CBRs. */ gru = gts->ts_gru; + /* + * If gru or gts->ts_tgid_owner isn't initialized properly, return + * success to indicate that the caller does not need to unload the + * gru context.The caller is responsible for their inspection and + * reinitialization if needed. + */ if (!gru || gts->ts_tgid_owner != current->tgid) - return; + return ret;
if (!gru_check_chiplet_assignment(gru, gts)) { STAT(check_context_unload); - gru_unload_context(gts, 1); + ret = -EINVAL; } else if (gru_retarget_intr(gts)) { STAT(check_context_retarget_intr); } + + return ret; }
@@ -934,7 +943,12 @@ vm_fault_t gru_fault(struct vm_fault *vmf) mutex_lock(>s->ts_ctxlock); preempt_disable();
- gru_check_context_placement(gts); + if (gru_check_context_placement(gts)) { + preempt_enable(); + mutex_unlock(>s->ts_ctxlock); + gru_unload_context(gts, 1); + return VM_FAULT_NOPAGE; + }
if (!gts->ts_gru) { STAT(load_user_context); diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h index 5ce8f3081e96..10f0a083b1fa 100644 --- a/drivers/misc/sgi-gru/grutables.h +++ b/drivers/misc/sgi-gru/grutables.h @@ -637,7 +637,7 @@ extern int gru_user_flush_tlb(unsigned long arg); extern int gru_user_unload_context(unsigned long arg); extern int gru_get_exception_detail(unsigned long arg); extern int gru_set_context_option(unsigned long address); -extern void gru_check_context_placement(struct gru_thread_state *gts); +extern int gru_check_context_placement(struct gru_thread_state *gts); extern int gru_cpu_fault_map_id(void); extern struct vm_area_struct *gru_find_vma(unsigned long vaddr); extern void gru_flush_all_tlb(struct gru_state *gru);
From: Kefeng Wang wangkefeng.wang@huawei.com
mainline inclusion from mainline-v6.2-rc1 commit de2e5171433126d340573cb7d0d4fcac084ab2a0 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I67BWQ CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
When handling MADV_WILLNEED in madvise(), a soflockup may occurr in swapin_walk_pmd_entry() if swapping in lots of memory on a slow device. Add a cond_resched() to avoid the possible softlockup.
Link: https://lkml.kernel.org/r/20221205140327.72304-1-wangkefeng.wang@huawei.com Fixes: 1998cc048901 ("mm: make madvise(MADV_WILLNEED) support swap file prefetch") Signed-off-by: Kefeng Wang wangkefeng.wang@huawei.com Cc: Shaohua Li shli@fusionio.com Cc: Hugh Dickins hughd@google.com Cc: Rik van Riel riel@redhat.com Signed-off-by: Andrew Morton akpm@linux-foundation.org
Conflicts: mm/madvise.c
Signed-off-by: Longlong Xia xialonglong1@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- mm/madvise.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/mm/madvise.c b/mm/madvise.c index ba666aa1203b..1af2e4377bbc 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -221,6 +221,7 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start, if (page) put_page(page); } + cond_resched();
return 0; }
From: Junhao He hejunhao3@huawei.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I68F5A CVE: NA
--------------------------------
In TSV200 platform, SCCL is Aff3[7:0], CCL is Aff2[7:0] if mt. Fixes: 1af26389a3 ("perf: hisi: Fix read sccl_id and ccl_id error in some platform")
Signed-off-by: Junhao He hejunhao3@huawei.com Reviewed-by: Yang Shen shenyang39@huawei.com Reviewed-by: Yang Jihong yangjihong1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/perf/hisilicon/hisi_uncore_pmu.c | 1 - 1 file changed, 1 deletion(-)
diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 66ed28c2c544..9dbd3a98f7ad 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -437,7 +437,6 @@ static void hisi_read_sccl_and_ccl_id(int *scclp, int *cclp) if (mt) { switch (read_cpuid_part_number()) { case HISI_CPU_PART_TSV110: - case HISI_CPU_PART_TSV200: case ARM_CPU_PART_CORTEX_A55: sccl = aff2 >> 3; ccl = aff2 & 0x7;
From: Alan Stern stern@rowland.harvard.edu
mainline inclusion from mainline-v6.1-rc2 commit 41fd1cb6151439b205ac7611883d85ae14250172 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I6068W CVE: CVE-2022-3903
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Automatic kernel fuzzing led to a WARN about invalid pipe direction in the mceusb driver:
------------[ cut here ]------------ usb 6-1: BOGUS control dir, pipe 80000380 doesn't match bRequestType 40 WARNING: CPU: 0 PID: 2465 at drivers/usb/core/urb.c:410 usb_submit_urb+0x1326/0x1820 drivers/usb/core/urb.c:410 Modules linked in: CPU: 0 PID: 2465 Comm: kworker/0:2 Not tainted 5.19.0-rc4-00208-g69cb6c6556ad #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 Workqueue: usb_hub_wq hub_event RIP: 0010:usb_submit_urb+0x1326/0x1820 drivers/usb/core/urb.c:410 Code: 7c 24 40 e8 ac 23 91 fd 48 8b 7c 24 40 e8 b2 70 1b ff 45 89 e8 44 89 f1 4c 89 e2 48 89 c6 48 c7 c7 a0 30 a9 86 e8 48 07 11 02 <0f> 0b e9 1c f0 ff ff e8 7e 23 91 fd 0f b6 1d 63 22 83 05 31 ff 41 RSP: 0018:ffffc900032becf0 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffff8881100f3058 RCX: 0000000000000000 RDX: ffffc90004961000 RSI: ffff888114c6d580 RDI: fffff52000657d90 RBP: ffff888105ad90f0 R08: ffffffff812c3638 R09: 0000000000000000 R10: 0000000000000005 R11: ffffed1023504ef1 R12: ffff888105ad9000 R13: 0000000000000040 R14: 0000000080000380 R15: ffff88810ba96500 FS: 0000000000000000(0000) GS:ffff88811a800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffe810bda58 CR3: 000000010b720000 CR4: 0000000000350ef0 Call Trace: <TASK> usb_start_wait_urb+0x101/0x4c0 drivers/usb/core/message.c:58 usb_internal_control_msg drivers/usb/core/message.c:102 [inline] usb_control_msg+0x31c/0x4a0 drivers/usb/core/message.c:153 mceusb_gen1_init drivers/media/rc/mceusb.c:1431 [inline] mceusb_dev_probe+0x258e/0x33f0 drivers/media/rc/mceusb.c:1807
The reason for the warning is clear enough; the driver sends an unusual read request on endpoint 0 but does not set the USB_DIR_IN bit in the bRequestType field.
More importantly, the whole situation can be avoided and the driver simplified by converting it over to the relatively new usb_control_msg_recv() and usb_control_msg_send() routines. That's what this fix does.
Reported-and-tested-by: Rondreis linhaoguo86@gmail.com Link: https://lore.kernel.org/all/CAB7eexLLApHJwZfMQ=X-PtRhw0BgO+5KcSMS05FNUYejJXq...
Signed-off-by: Alan Stern stern@rowland.harvard.edu Cc: stable@vger.kernel.org Signed-off-by: Sean Young sean@mess.org Signed-off-by: Mauro Carvalho Chehab mchehab@kernel.org Signed-off-by: Zhang Peng zhangpeng362@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/media/rc/mceusb.c | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-)
diff --git a/drivers/media/rc/mceusb.c b/drivers/media/rc/mceusb.c index dbb5a4f44bda..de4cf6eb5258 100644 --- a/drivers/media/rc/mceusb.c +++ b/drivers/media/rc/mceusb.c @@ -1416,42 +1416,37 @@ static void mceusb_gen1_init(struct mceusb_dev *ir) { int ret; struct device *dev = ir->dev; - char *data; - - data = kzalloc(USB_CTRL_MSG_SZ, GFP_KERNEL); - if (!data) { - dev_err(dev, "%s: memory allocation failed!", __func__); - return; - } + char data[USB_CTRL_MSG_SZ];
/* * This is a strange one. Windows issues a set address to the device * on the receive control pipe and expect a certain value pair back */ - ret = usb_control_msg(ir->usbdev, usb_rcvctrlpipe(ir->usbdev, 0), - USB_REQ_SET_ADDRESS, USB_TYPE_VENDOR, 0, 0, - data, USB_CTRL_MSG_SZ, 3000); + ret = usb_control_msg_recv(ir->usbdev, 0, USB_REQ_SET_ADDRESS, + USB_DIR_IN | USB_TYPE_VENDOR, + 0, 0, data, USB_CTRL_MSG_SZ, 3000, + GFP_KERNEL); dev_dbg(dev, "set address - ret = %d", ret); dev_dbg(dev, "set address - data[0] = %d, data[1] = %d", data[0], data[1]);
/* set feature: bit rate 38400 bps */ - ret = usb_control_msg(ir->usbdev, usb_sndctrlpipe(ir->usbdev, 0), - USB_REQ_SET_FEATURE, USB_TYPE_VENDOR, - 0xc04e, 0x0000, NULL, 0, 3000); + ret = usb_control_msg_send(ir->usbdev, 0, + USB_REQ_SET_FEATURE, USB_TYPE_VENDOR, + 0xc04e, 0x0000, NULL, 0, 3000, GFP_KERNEL);
dev_dbg(dev, "set feature - ret = %d", ret);
/* bRequest 4: set char length to 8 bits */ - ret = usb_control_msg(ir->usbdev, usb_sndctrlpipe(ir->usbdev, 0), - 4, USB_TYPE_VENDOR, - 0x0808, 0x0000, NULL, 0, 3000); + ret = usb_control_msg_send(ir->usbdev, 0, + 4, USB_TYPE_VENDOR, + 0x0808, 0x0000, NULL, 0, 3000, GFP_KERNEL); dev_dbg(dev, "set char length - retB = %d", ret);
/* bRequest 2: set handshaking to use DTR/DSR */ - ret = usb_control_msg(ir->usbdev, usb_sndctrlpipe(ir->usbdev, 0), - 2, USB_TYPE_VENDOR, - 0x0000, 0x0100, NULL, 0, 3000); + ret = usb_control_msg_send(ir->usbdev, 0, + 2, USB_TYPE_VENDOR, + 0x0000, 0x0100, NULL, 0, 3000, GFP_KERNEL); dev_dbg(dev, "set handshake - retC = %d", ret);
/* device resume */ @@ -1459,8 +1454,6 @@ static void mceusb_gen1_init(struct mceusb_dev *ir)
/* get hw/sw revision? */ mce_command_out(ir, GET_REVISION, sizeof(GET_REVISION)); - - kfree(data); }
static void mceusb_gen2_init(struct mceusb_dev *ir)