From: Nick Desaulniers ndesaulniers@google.com
maillist inclusion category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I6V709 CVE: NA
Reference: https://lore.kernel.org/lkml/20221103210748.1343090-1-ndesaulniers@google.co...
--------------------------------
GNU binutils' assembler (GAS) didn't support L suffixes on immediates until binutils 2.28 release. Building arch/x86/entry/entry_64.S with GAS v2.27 will produce the following assembler errors:
arch/x86/entry/entry_64.S: Assembler messages: arch/x86/entry/entry_64.S:308: Error: found 'L', expected: ')' arch/x86/entry/entry_64.S:308: Error: found 'L', expected: ')' arch/x86/entry/entry_64.S:308: Error: junk `L<<(0)))' after expression arch/x86/entry/entry_64.S:596: Error: found 'L', expected: ')' arch/x86/entry/entry_64.S:596: Error: found 'L', expected: ')' arch/x86/entry/entry_64.S:596: Error: junk `L<<(0)))' after expression
These come from the use of the preprocessor defined SPEC_CTRL_IBRS in the IBRS_ENTER and IBRS_EXIT assembler macros. SPEC_CTRL_IBRS was using the BIT macros from include/linux/bits.h which are only portable between C and assembler for assemblers such as GAS v2.28 (or newer) or clang because they use the L suffixes for immediate operands, which older GAS releases cannot parse. The kernel still supports GAS v2.23 and newer (and older for branches of stable). Let's expand the value of SPEC_CTRL_IBRS in place so that assemblers don't have issues parsing the value.
Fixes: 2dbb887e875b ("x86/entry: Add kernel IBRS implementation") Reported-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Nick Desaulniers ndesaulniers@google.com Signed-off-by: Lin Yujun linyujun809@huawei.com Reviewed-by: Liao Chang liaochang1@huawei.com Reviewed-by: Zhang Jianhua chris.zjh@huawei.com Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- arch/x86/include/asm/msr-index.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 1336c900e723..779b653f6546 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -42,7 +42,7 @@ /* Intel MSRs. Some also available on other CPUs */
#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ -#define SPEC_CTRL_IBRS BIT(0) /* Indirect Branch Restricted Speculation */ +#define SPEC_CTRL_IBRS 1 /* Indirect Branch Restricted Speculation */ #define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */ #define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
From: Bin Wang wangbin224@huawei.com
If processes release part of a transparent huge page, kernel will not split the huge page to reclaim memory immediately. The huge page will be added to deferred_split_queue and will be reclaimed when shrink_slab.
This feature leads to a problem. Processes have released memory but system's available memory does not increase, which makes users think there's been a memory leak. To solve this problem, add a cmdline to let kernel split and reclaim the transparent huge pages when patial free.
Signed-off-by: Bin Wang wangbin224@huawei.com Reviewed-by: Pan Zhang zhangpan26@huawei.com --- mm/Kconfig | 7 +++++++ mm/huge_memory.c | 5 +++++ mm/memory.c | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+)
diff --git a/mm/Kconfig b/mm/Kconfig index b367e0cf9..42fdd0e8d 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -961,4 +961,11 @@ config EULEROS_EXTEND_MODULE_AREA Say Y here to extend the module area. Reserve enough memory for loading modules when the KASAN is open.
+config EULEROS_FORCE_SPLIT_HUGE_PAGE + bool "force split huge page" + depends on TRANSPARENT_HUGEPAGE + default y + help + Say Y here to force split transparent huge pages when partial free. + endmenu diff --git a/mm/huge_memory.c b/mm/huge_memory.c index acb2e2c9e..cd71bc08f 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2850,8 +2850,13 @@ static unsigned long deferred_split_count(struct shrinker *shrink, return READ_ONCE(ds_queue->split_queue_len); }
+#ifdef CONFIG_EULEROS_FORCE_SPLIT_HUGE_PAGE +unsigned long deferred_split_scan(struct shrinker *shrink, + struct shrink_control *sc) +#else static unsigned long deferred_split_scan(struct shrinker *shrink, struct shrink_control *sc) +#endif { struct pglist_data *pgdata = NODE_DATA(sc->nid); struct deferred_split *ds_queue = &pgdata->deferred_split_queue; diff --git a/mm/memory.c b/mm/memory.c index 58e3e276d..7fcc4fa44 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -146,6 +146,18 @@ EXPORT_SYMBOL(zero_pfn);
unsigned long highest_memmap_pfn __read_mostly;
+#ifdef CONFIG_EULEROS_FORCE_SPLIT_HUGE_PAGE +extern unsigned long deferred_split_scan(struct shrinker *shrink, + struct shrink_control *sc); +bool force_split_huge_page = false; +static int __init set_force_split_huge_page(char *s) +{ + force_split_huge_page = true; + return 1; +} +__setup("force_split_huge_page", set_force_split_huge_page); +#endif + /* * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init() */ @@ -1216,6 +1228,10 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, pte_t *start_pte; pte_t *pte; swp_entry_t entry; +#ifdef CONFIG_EULEROS_FORCE_SPLIT_HUGE_PAGE + bool is_trans_compound = false; + struct page *tmp_page = NULL; +#endif
tlb_change_page_size(tlb, PAGE_SIZE); again: @@ -1263,6 +1279,13 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, rss[mm_counter(page)]--; reliable_page_counter(page, mm, -1); page_remove_rmap(page, false); +#ifdef CONFIG_EULEROS_FORCE_SPLIT_HUGE_PAGE + if (unlikely(force_split_huge_page) && + unlikely(PageTransCompound(page))) { + is_trans_compound = true; + tmp_page = page; + } +#endif if (unlikely(page_mapcount(page) < 0)) print_bad_pte(vma, addr, ptent, page); if (unlikely(__tlb_remove_page(tlb, page))) { @@ -1320,6 +1343,23 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, tlb_flush_mmu_tlbonly(tlb); pte_unmap_unlock(start_pte, ptl);
+#ifdef CONFIG_EULEROS_FORCE_SPLIT_HUGE_PAGE + if (unlikely(force_split_huge_page) && unlikely(is_trans_compound)) { + struct shrink_control sc = { + /* nid of pages of compound page is same */ + .nid = page_to_nid(tmp_page), + /* force split 256 thps one time */ + .nr_to_scan = 256, +#ifdef CONFIG_MEMCG + /* memcg aware */ + .memcg = page_memcg(compound_head(tmp_page)), +#endif + }; + + deferred_split_scan(NULL, &sc); + } +#endif + /* * If we forced a TLB flush (either due to running out of * batch buffers or because we needed to flush dirty TLB