[PATCH OLK-6.6 0/3] some bugfixes for NUMA.remote
Jinjiang Tu (3): mm/numa_remote: fix memory leak of struct memory_block mm/numa_remote: fix restoring to preonline when failing to offline from preonline mm/numa_remote: check if memory block is preonlined when undoing fake-online drivers/base/memory.c | 52 ++++++++++++++++++++++-------------- drivers/base/numa_remote.c | 54 ++++++++++++++++++++++---------------- include/linux/memory.h | 1 + 3 files changed, 66 insertions(+), 41 deletions(-) -- 2.43.0
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ID4X9G ---------------------------------------- check_memory_block_nid(), check_memory_block_pre_online() and set_memory_block_pre_online() call find_memory_block_by_id() but forget to call put_device, leading to the refcount of struct memory_block never drop to zero and leading to memory leak. Add missing put_device() to fix. Besides, there are many duplicated code, introduce a new helper to extract same code. Currently, these three functions couldn't handle size less than memory block size. Extend it for future patch. Fixes: 8e27b94c109c ("mm/numa_remote: introduce pre-online mode to support hotplug unready memory") Signed-off-by: Jinjiang Tu <tujinjiang@huawei.com> --- drivers/base/memory.c | 46 ++++++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 1582cbe8ee6d..2d6ba0b423e4 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -932,57 +932,63 @@ void remove_memory_block_devices(unsigned long start, unsigned long size) } #ifdef CONFIG_NUMA_REMOTE -bool check_memory_block_nid(unsigned long start, unsigned long size, int nid) + +enum check_state_type { + CHECK_NID, + CHECK_PREONLINE, +}; + +static inline bool check_memory_block_state(unsigned long start, unsigned long size, + enum check_state_type type, int check_val) { unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start)); - unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size)); + unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size - 1)); unsigned long block_id; struct memory_block *mem; + bool check_res = true; - for (block_id = start_block_id; block_id != end_block_id; block_id++) { + for (block_id = start_block_id; block_id <= end_block_id; block_id++) { mem = find_memory_block_by_id(block_id); if (!mem) return false; - if (mem->nid != nid) + if (type == CHECK_NID) + check_res = (mem->nid == check_val); + else if (type == CHECK_PREONLINE) + check_res = (mem->pre_online == check_val); + put_device(&mem->dev); + if (!check_res) return false; } return true; } +bool check_memory_block_nid(unsigned long start, unsigned long size, int nid) +{ + return check_memory_block_state(start, size, CHECK_NID, nid); +} + bool check_memory_block_pre_online(unsigned long start, unsigned long size, bool pre_online) { - unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start)); - unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size)); - unsigned long block_id; - struct memory_block *mem; - - for (block_id = start_block_id; block_id != end_block_id; block_id++) { - mem = find_memory_block_by_id(block_id); - if (!mem) - return false; - - if (mem->pre_online != pre_online) - return false; - } - return true; + return check_memory_block_state(start, size, CHECK_PREONLINE, pre_online); } void set_memory_block_pre_online(unsigned long start, unsigned long size, bool pre_online) { unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start)); - unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size)); + unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size - 1)); unsigned long block_id; struct memory_block *mem; - for (block_id = start_block_id; block_id != end_block_id; block_id++) { + for (block_id = start_block_id; block_id <= end_block_id; block_id++) { mem = find_memory_block_by_id(block_id); if (!mem) continue; mem->pre_online = pre_online; + put_device(&mem->dev); } } #endif -- 2.43.0
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ID4X9G ---------------------------------------- When offling memory from preonline fails, due to pending signal, etc, offlined memory block will be reonlined. However, online_page callback is only set when doing preonline, leading to onlining the memory blocks intended to be preonlined. To fix it, always set numa_remote_preonline_pages() as onlin_page callback, and make numa_remote_preonline_pages() distinguish if preonline is needed, rename numa_remote_preonline_pages() to reflect it's functionality. Fixes: 8e27b94c109c ("mm/numa_remote: introduce pre-online mode to support hotplug unready memory") Signed-off-by: Jinjiang Tu <tujinjiang@huawei.com> --- drivers/base/numa_remote.c | 47 +++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/drivers/base/numa_remote.c b/drivers/base/numa_remote.c index 1a96026b2846..0336c7c574bc 100644 --- a/drivers/base/numa_remote.c +++ b/drivers/base/numa_remote.c @@ -213,6 +213,7 @@ static int numa_remote_memory_notifier_cb(struct notifier_block *nb, struct memory_notify *mhp = arg; const unsigned long start = PFN_PHYS(mhp->start_pfn); const unsigned long size = PFN_PHYS(mhp->nr_pages); + int nid = pfn_to_nid(mhp->start_pfn); if (!check_memory_block_pre_online(start, size, true)) return NOTIFY_DONE; @@ -224,6 +225,10 @@ static int numa_remote_memory_notifier_cb(struct notifier_block *nb, case MEM_CANCEL_OFFLINE: numa_remote_preonline_cancel_offline(mhp->start_pfn, mhp->nr_pages); break; + case MEM_OFFLINE: + atomic_long_add(-mhp->nr_pages, &pre_online_pages_node[nid]); + atomic_long_add(-mhp->nr_pages, &pre_online_pages); + break; default: break; } @@ -235,7 +240,7 @@ struct notifier_block numa_remote_memory_notifier = { .notifier_call = numa_remote_memory_notifier_cb, }; -static void numa_remote_preonline_pages(struct page *page, unsigned int order) +static void numa_remote_online_pages_cb(struct page *page, unsigned int order) { unsigned long start_pfn, end_pfn, pfn, nr_pages; int nid = page_to_nid(page); @@ -244,6 +249,12 @@ static void numa_remote_preonline_pages(struct page *page, unsigned int order) start_pfn = page_to_pfn(page); nr_pages = 1 << order; end_pfn = start_pfn + nr_pages; + + if (!check_memory_block_pre_online(PFN_PHYS(start_pfn), nr_pages * PAGE_SIZE, true)) { + generic_online_page(page, order); + return; + } + for (pfn = start_pfn; pfn < end_pfn; pfn++) { p = pfn_to_page(pfn); __SetPageOffline(p); @@ -320,9 +331,6 @@ static int __ref numa_remote_restore_isolation(u64 start, u64 size) { unsigned long start_pfn = PFN_DOWN(start); unsigned long end_pfn = PFN_DOWN(start + size); - unsigned long nr_pages = end_pfn - start_pfn; - struct zone *zone = page_zone(phys_to_page(start)); - int nid = zone_to_nid(zone); int ret = 0; mem_hotplug_begin(); @@ -333,8 +341,6 @@ static int __ref numa_remote_restore_isolation(u64 start, u64 size) goto out; } - atomic_long_add(-nr_pages, &pre_online_pages_node[nid]); - atomic_long_add(-nr_pages, &pre_online_pages); out: mem_hotplug_done(); return ret; @@ -487,22 +493,12 @@ int add_memory_remote(int nid, u64 start, u64 size, int flags) if (real_nid == NUMA_NO_NODE) goto unlock; - if (flags & MEMORY_KEEP_ISOLATED) { - int rc; - - rc = set_online_page_callback(&numa_remote_preonline_pages); - if (rc) { - real_nid = NUMA_NO_NODE; - goto unlock; - } + if (flags & MEMORY_KEEP_ISOLATED) mhp_flags |= MHP_PREONLINE; - } if (__add_memory(real_nid, start, size, mhp_flags)) real_nid = NUMA_NO_NODE; - if (flags & MEMORY_KEEP_ISOLATED) - restore_online_page_callback(&numa_remote_preonline_pages); unlock: unlock_device_hotplug(); out: @@ -661,12 +657,21 @@ static int __init numa_remote_init(void) if (!numa_remote_preonline_mode) return 0; + ret = set_online_page_callback(&numa_remote_online_pages_cb); + if (ret) + goto err_online_callback; + ret = register_memory_notifier(&numa_remote_memory_notifier); - if (ret) { - numa_remote_preonline_mode = false; - pr_err("fail to enanble preonline mode\n"); - } + if (ret) + goto err_register_notifier; + + return 0; +err_register_notifier: + restore_online_page_callback(&numa_remote_online_pages_cb); +err_online_callback: + numa_remote_preonline_mode = false; + pr_err("fail to enanble preonline mode\n"); return ret; } late_initcall(numa_remote_init); -- 2.43.0
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ID4X9G ---------------------------------------- During memory preonline, add_memory_resource() iterates all memory blocks to preonline. However, the result isn't returned to caller. So some memory blocks may be offlined in numa_remote_undo_fake_online(). Skip it if there exists offlined memory block. Fixes: 8e27b94c109c ("mm/numa_remote: introduce pre-online mode to support hotplug unready memory") Signed-off-by: Jinjiang Tu <tujinjiang@huawei.com> --- drivers/base/memory.c | 8 ++++++++ drivers/base/numa_remote.c | 7 ++++++- include/linux/memory.h | 1 + 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 2d6ba0b423e4..32a7bbc00599 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -936,6 +936,7 @@ void remove_memory_block_devices(unsigned long start, unsigned long size) enum check_state_type { CHECK_NID, CHECK_PREONLINE, + CHECK_ONLINE, }; static inline bool check_memory_block_state(unsigned long start, unsigned long size, @@ -956,6 +957,8 @@ static inline bool check_memory_block_state(unsigned long start, unsigned long s check_res = (mem->nid == check_val); else if (type == CHECK_PREONLINE) check_res = (mem->pre_online == check_val); + else if (type == CHECK_ONLINE) + check_res = (mem->state == check_val); put_device(&mem->dev); if (!check_res) return false; @@ -974,6 +977,11 @@ bool check_memory_block_pre_online(unsigned long start, unsigned long size, return check_memory_block_state(start, size, CHECK_PREONLINE, pre_online); } +bool check_memory_block_online(unsigned long start, unsigned long size) +{ + return check_memory_block_state(start, size, CHECK_ONLINE, MEM_ONLINE); +} + void set_memory_block_pre_online(unsigned long start, unsigned long size, bool pre_online) { diff --git a/drivers/base/numa_remote.c b/drivers/base/numa_remote.c index 0336c7c574bc..d31c5858977d 100644 --- a/drivers/base/numa_remote.c +++ b/drivers/base/numa_remote.c @@ -300,6 +300,11 @@ static int __ref numa_remote_undo_fake_online(u64 start, u64 size) goto out; } + if (!check_memory_block_online(start, size)) { + ret = -EINVAL; + goto out; + } + zone = page_zone(phys_to_page(start)); nid = zone_to_nid(zone); if (!check_memory_block_nid(start, size, nid)) { @@ -317,12 +322,12 @@ static int __ref numa_remote_undo_fake_online(u64 start, u64 size) atomic_long_add(-nr_pages, &pre_online_pages_node[nid]); atomic_long_add(-nr_pages, &pre_online_pages); numa_remote_online_pages(start_pfn, end_pfn); - atomic_long_add(-nr_pages, &undo_fake_online_pages_node[nid]); init_per_zone_wmark_min(); writeback_set_ratelimit(); out: + atomic_long_add(-nr_pages, &undo_fake_online_pages_node[nid]); mem_hotplug_done(); return ret; } diff --git a/include/linux/memory.h b/include/linux/memory.h index 9d7431ff1282..be51c078c497 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -155,6 +155,7 @@ bool check_memory_block_pre_online(unsigned long start, unsigned long size, bool pre_online); void set_memory_block_pre_online(unsigned long start, unsigned long size, bool pre_online); +bool check_memory_block_online(unsigned long start, unsigned long size); static inline bool memory_block_is_pre_online(struct memory_block *mem) { return mem->pre_online; -- 2.43.0
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/18894 邮件列表地址:https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/PTF... FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/18894 Mailing list address: https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/PTF...
participants (2)
-
Jinjiang Tu -
patchwork bot