From: Ma Wupeng mawupeng1@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S CVE: NA
--------------------------------
For reliable memory allocation bind to nodes which do not hvve any reliable zones, its memory allocation will fail and then warn message will be produced at the end of __alloc_pages_slowpath().
Though this memory allocation can fallback to movable zone in check_after_alloc() if fallback is enabled, something should be done to prevent this pointless warn log.
To solve this problem, fallback to movable zone if no suitable zone found.
Signed-off-by: Ma Wupeng mawupeng1@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- mm/page_alloc.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1e59761354e1..44d286286dbb 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4272,6 +4272,25 @@ check_retry_cpuset(int cpuset_mems_cookie, struct alloc_context *ac) }
#ifdef CONFIG_MEMORY_RELIABLE +/* + * if fallback is enabled, fallback to movable zone if no dma/normal zone + * found + */ +static inline struct zone *mem_reliable_fallback_zone(gfp_t gfp_mask, + struct alloc_context *ac) +{ + if (!reliable_allow_fb_enabled()) + return NULL; + + if (!(gfp_mask & ___GFP_RELIABILITY)) + return NULL; + + ac->high_zoneidx = gfp_zone(gfp_mask & ~___GFP_RELIABILITY); + ac->preferred_zoneref = first_zones_zonelist( + ac->zonelist, ac->high_zoneidx, ac->nodemask); + return ac->preferred_zoneref->zone; +} + static inline void mem_reliable_fallback_slowpath(gfp_t gfp_mask, struct alloc_context *ac) { @@ -4290,6 +4309,11 @@ static inline void mem_reliable_fallback_slowpath(gfp_t gfp_mask, } } #else +static inline struct zone *mem_reliable_fallback_zone(gfp_t gfp_mask, + struct alloc_context *ac) +{ + return NULL; +} static inline void mem_reliable_fallback_slowpath(gfp_t gfp_mask, struct alloc_context *ac) {} #endif @@ -4339,8 +4363,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, */ ac->preferred_zoneref = first_zones_zonelist(ac->zonelist, ac->high_zoneidx, ac->nodemask); - if (!ac->preferred_zoneref->zone) - goto nopage; + if (!ac->preferred_zoneref->zone) { + if (!mem_reliable_fallback_zone(gfp_mask, ac)) + goto nopage; + }
if (gfp_mask & __GFP_KSWAPD_RECLAIM) wake_all_kswapds(order, gfp_mask, ac);
From: David Jeffery djeffery@redhat.com
mainline inclusion from mainline-v5.18-rc1 commit 8f5fea65b06de1cc51d4fc23fb4d378d1abd6ed7 category: bugfix bugzilla: 187541, https://gitee.com/openeuler/kernel/issues/I5RUM6 CVE: NA
--------------------------------
When blk_mq_delay_run_hw_queues sets an hctx to run in the future, it can reset the delay length for an already pending delayed work run_work. This creates a scenario where multiple hctx may have their queues set to run, but if one runs first and finds nothing to do, it can reset the delay of another hctx and stall the other hctx's ability to run requests.
To avoid this I/O stall when an hctx's run_work is already pending, leave it untouched to run at its current designated time rather than extending its delay. The work will still run which keeps closed the race calling blk_mq_delay_run_hw_queues is needed for while also avoiding the I/O stall.
Signed-off-by: David Jeffery djeffery@redhat.com Reviewed-by: Ming Lei ming.lei@redhat.com Link: https://lore.kernel.org/r/20220131203337.GA17666@redhat Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Yu Kuai yukuai3@huawei.com Reviewed-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- block/blk-mq.c | 9 +++++++++ 1 file changed, 9 insertions(+)
diff --git a/block/blk-mq.c b/block/blk-mq.c index 690dbc02ab38..96debbe63ad2 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1550,6 +1550,15 @@ void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs) if (blk_mq_hctx_stopped(hctx)) continue;
+ /* + * If there is already a run_work pending, leave the + * pending delay untouched. Otherwise, a hctx can stall + * if another hctx is re-delaying the other's work + * before the work executes. + */ + if (delayed_work_pending(&hctx->run_work)) + continue; + blk_mq_delay_run_hw_queue(hctx, msecs); } }
From: Wang Wensheng wangwensheng4@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5PD4P CVE: NA
--------------------------------
[ 2058.802818][ T290] BUG: KASAN: use-after-free in get_process_sp_res+0x70/0x134 [ 2058.810194][ T290] Read of size 8 at addr ffff00088dc6ab28 by task test_debug_loop/290 [ 2058.820520][ T290] CPU: 5 PID: 290 Comm: test_debug_loop Tainted: G W OE 5.10.0+ #2 [ 2058.829377][ T290] Hardware name: EVB(EP) (DT) [ 2058.833982][ T290] Call trace: [ 2058.837217][ T290] dump_backtrace+0x0/0x30c [ 2058.841660][ T290] show_stack+0x20/0x30 [ 2058.845758][ T290] dump_stack+0x120/0x1b0 [ 2058.850028][ T290] print_address_description.constprop.0+0x2c/0x1fc [ 2058.856555][ T290] __kasan_report+0xfc/0x160 [ 2058.861086][ T290] kasan_report+0x44/0xb0 [ 2058.865356][ T290] __asan_load8+0x94/0xd0 [ 2058.869623][ T290] get_process_sp_res+0x70/0x134 [ 2058.874501][ T290] proc_usage_show+0x1ac/0x304 [ 2058.879208][ T290] seq_read_iter+0x254/0x750 [ 2058.883728][ T290] proc_reg_read_iter+0x100/0x140 [ 2058.888689][ T290] new_sync_read+0x1cc/0x2c0 [ 2058.893215][ T290] vfs_read+0x1f4/0x250 [ 2058.897304][ T290] ksys_read+0xcc/0x170 [ 2058.901399][ T290] __arm64_sys_read+0x4c/0x60 [ 2058.906016][ T290] el0_svc_common.constprop.0+0xb4/0x2a0 [ 2058.911584][ T290] do_el0_svc+0x8c/0xb0 [ 2058.915677][ T290] el0_svc+0x20/0x30 [ 2058.919503][ T290] el0_sync_handler+0xb0/0xbc [ 2058.924114][ T290] el0_sync+0x180/0x1c0 [ 2058.928190][ T290] [ 2058.930444][ T290] Allocated by task 2176: [ 2058.934714][ T290] kasan_save_stack+0x28/0x60 [ 2058.939328][ T290] __kasan_kmalloc.constprop.0+0xc8/0xf0 [ 2058.944909][ T290] kasan_kmalloc+0x10/0x20 [ 2058.949268][ T290] kmem_cache_alloc_trace+0x128/0xabc [ 2058.954577][ T290] create_spg_node+0x58/0x214 [ 2058.959188][ T290] local_group_add_task+0x30/0x14c [ 2058.964231][ T290] init_local_group+0xd0/0x1a0 [ 2058.968936][ T290] sp_init_group_master_locked.part.0+0x19c/0x290 [ 2058.975298][ T290] mg_sp_group_add_task+0x73c/0xdb0 [ 2058.980456][ T290] dev_sp_add_group+0x124/0x2dc [sharepool_dev] [ 2058.986647][ T290] dev_ioctl+0x21c/0x2ec [sharepool_dev] [ 2058.992222][ T290] __arm64_sys_ioctl+0xd8/0x120 [ 2058.997010][ T290] el0_svc_common.constprop.0+0xb4/0x2a0 [ 2059.002572][ T290] do_el0_svc+0x8c/0xb0 [ 2059.006662][ T290] el0_svc+0x20/0x30 [ 2059.010489][ T290] el0_sync_handler+0xb0/0xbc [ 2059.015101][ T290] el0_sync+0x180/0x1c0 [ 2059.019176][ T290] [ 2059.021427][ T290] Freed by task 4125: [ 2059.025343][ T290] kasan_save_stack+0x28/0x60 [ 2059.029949][ T290] kasan_set_track+0x28/0x40 [ 2059.034476][ T290] kasan_set_free_info+0x24/0x50 [ 2059.039347][ T290] __kasan_slab_free+0x104/0x1ac [ 2059.044227][ T290] kasan_slab_free+0x14/0x20 [ 2059.048744][ T290] kfree+0x164/0xb94 [ 2059.052576][ T290] sp_group_post_exit+0xf0/0x980 [ 2059.057448][ T290] mmput.part.0+0xb4/0x220 [ 2059.061790][ T290] mmput+0x2c/0x40 [ 2059.065450][ T290] exit_mm+0x27c/0x3a0 [ 2059.069450][ T290] do_exit+0x2a0/0x790 [ 2059.073448][ T290] do_group_exit+0x64/0x100 [ 2059.077884][ T290] get_signal+0x1fc/0x9fc [ 2059.082144][ T290] do_signal+0x110/0x2cc [ 2059.086320][ T290] do_notify_resume+0x158/0x2b0 [ 2059.091108][ T290] work_pending+0xc/0x6d4 [ 2059.095358][ T290]
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- mm/share_pool.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/mm/share_pool.c b/mm/share_pool.c index 661036a7df64..bd021da5a930 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -4749,6 +4749,7 @@ void sp_group_post_exit(struct mm_struct *mm) /* match with refcount inc in sp_group_add_task */ if (atomic_dec_and_test(&spg->use_count)) free_sp_group_locked(spg); + list_del(&spg_node->group_node); kfree(spg_node); } up_write(&sp_group_sem);
From: Lu Wei luwei32@huawei.com
mainline inclusion from mainline-v6.0-rc6 commit 81225b2ea161af48e093f58e8dfee6d705b16af4 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5SYBY CVE: NA
--------------------------------
If an AF_PACKET socket is used to send packets through ipvlan and the default xmit function of the AF_PACKET socket is changed from dev_queue_xmit() to packet_direct_xmit() via setsockopt() with the option name of PACKET_QDISC_BYPASS, the skb->mac_header may not be reset and remains as the initial value of 65535, this may trigger slab-out-of-bounds bugs as following:
================================================================= UG: KASAN: slab-out-of-bounds in ipvlan_xmit_mode_l2+0xdb/0x330 [ipvlan] PU: 2 PID: 1768 Comm: raw_send Kdump: loaded Not tainted 6.0.0-rc4+ #6 ardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1.fc33 all Trace: print_address_description.constprop.0+0x1d/0x160 print_report.cold+0x4f/0x112 kasan_report+0xa3/0x130 ipvlan_xmit_mode_l2+0xdb/0x330 [ipvlan] ipvlan_start_xmit+0x29/0xa0 [ipvlan] __dev_direct_xmit+0x2e2/0x380 packet_direct_xmit+0x22/0x60 packet_snd+0x7c9/0xc40 sock_sendmsg+0x9a/0xa0 __sys_sendto+0x18a/0x230 __x64_sys_sendto+0x74/0x90 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x63/0xcd
The root cause is: 1. packet_snd() only reset skb->mac_header when sock->type is SOCK_RAW and skb->protocol is not specified as in packet_parse_headers()
2. packet_direct_xmit() doesn't reset skb->mac_header as dev_queue_xmit()
In this case, skb->mac_header is 65535 when ipvlan_xmit_mode_l2() is called. So when ipvlan_xmit_mode_l2() gets mac header with eth_hdr() which use "skb->head + skb->mac_header", out-of-bound access occurs.
This patch replaces eth_hdr() with skb_eth_hdr() in ipvlan_xmit_mode_l2() and reset mac header in multicast to solve this out-of-bound bug.
Fixes: 2ad7bf363841 ("ipvlan: Initial check-in of the IPVLAN driver.") Signed-off-by: Lu Wei luwei32@huawei.com Reviewed-by: Eric Dumazet edumazet@google.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Lu Wei luwei32@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- drivers/net/ipvlan/ipvlan_core.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 02549a380f52..a83c22d46896 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -502,7 +502,6 @@ static int ipvlan_process_v6_outbound(struct sk_buff *skb)
static int ipvlan_process_outbound(struct sk_buff *skb) { - struct ethhdr *ethh = eth_hdr(skb); int ret = NET_XMIT_DROP;
/* The ipvlan is a pseudo-L2 device, so the packets that we receive @@ -512,6 +511,8 @@ static int ipvlan_process_outbound(struct sk_buff *skb) if (skb_mac_header_was_set(skb)) { /* In this mode we dont care about * multicast and broadcast traffic */ + struct ethhdr *ethh = eth_hdr(skb); + if (is_multicast_ether_addr(ethh->h_dest)) { pr_debug_ratelimited( "Dropped {multi|broad}cast of type=[%x]\n", @@ -620,7 +621,7 @@ static int ipvlan_process_v6_forward(struct sk_buff *skb)
static int ipvlan_process_forward(struct sk_buff *skb) { - struct ethhdr *ethh = eth_hdr(skb); + struct ethhdr *ethh = skb_eth_hdr(skb); int ret = NET_XMIT_DROP;
/* In this mode we dont care about multicast and broadcast traffic */ @@ -712,7 +713,7 @@ static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev) static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev) { const struct ipvl_dev *ipvlan = netdev_priv(dev); - struct ethhdr *eth = eth_hdr(skb); + struct ethhdr *eth = skb_eth_hdr(skb); struct ipvl_addr *addr; void *lyr3h; int addr_type; @@ -742,6 +743,7 @@ static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev) return dev_forward_skb(ipvlan->phy_dev, skb);
} else if (is_multicast_ether_addr(eth->h_dest)) { + skb_reset_mac_header(skb); ipvlan_skb_crossing_ns(skb, NULL); ipvlan_multicast_enqueue(ipvlan->port, skb, true); return NET_XMIT_SUCCESS; @@ -782,7 +784,7 @@ static int ipvlan_l2e_local_xmit_event(struct ipvl_dev *ipvlan, static int ipvlan_xmit_mode_l2e(struct sk_buff *skb, struct net_device *dev) { struct ipvl_dev *ipvlan = netdev_priv(dev); - struct ethhdr *eth = eth_hdr(skb); + struct ethhdr *eth = skb_eth_hdr(skb); struct ipvl_addr *addr; void *lyr3h; int addr_type; @@ -815,6 +817,7 @@ static int ipvlan_xmit_mode_l2e(struct sk_buff *skb, struct net_device *dev) ipvlan_skb_crossing_ns(skb, ipvlan->phy_dev); return ipvlan_process_forward(skb); } else if (is_multicast_ether_addr(eth->h_dest)) { + skb_reset_mac_header(skb); ipvlan_skb_crossing_ns(skb, NULL); ipvlan_multicast_enqueue(ipvlan->port, skb, true); return NET_XMIT_SUCCESS;