From: Ma Wupeng mawupeng1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S CVE: NA
--------------------------------
Counting reliable memory allocated by the reliable user tasks.
The policy of counting reliable memory usage is based on RSS statistics. Anywhere with counter of mm need count reliable pages too. Reliable page which is checked by page_reliable() need to update the reliable page counter by calling reliable_page_counter().
Updating the reliable pages should be considered if the following logic is added: - add_mm_counter - dec_mm_counter - inc_mm_counter_fast - dec_mm_counter_fast - rss[mm_counter(page)]
Signed-off-by: Ma Wupeng mawupeng1@huawei.com --- Documentation/filesystems/proc.rst | 2 ++ fs/proc/task_mmu.c | 1 + include/linux/mem_reliable.h | 39 ++++++++++++++++++++++++++++++ include/linux/mm_types.h | 4 +++ kernel/events/uprobes.c | 2 ++ mm/huge_memory.c | 4 +++ mm/khugepaged.c | 3 +++ mm/ksm.c | 2 ++ mm/mem_reliable.c | 11 +++++++++ mm/memory.c | 10 ++++++++ mm/migrate_device.c | 1 + mm/rmap.c | 7 ++++++ mm/swapfile.c | 1 + mm/userswap.c | 4 +++ 14 files changed, 91 insertions(+)
diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index 3bbae7b35bb0..56d9ba24bd14 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -197,6 +197,7 @@ read the file /proc/PID/status:: VmPTE: 20 kb VmSwap: 0 kB HugetlbPages: 0 kB + Reliable: 1608 kB CoreDumping: 0 THP_enabled: 1 Threads: 1 @@ -280,6 +281,7 @@ It's slow but very precise. VmSwap amount of swap used by anonymous private data (shmem swap usage is not included) HugetlbPages size of hugetlb memory portions + Reliable size of reliable memory used CoreDumping process's memory is currently being dumped (killing the process may lead to a corrupted core) THP_enabled process is allowed to use THP (returns 0 when diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index fe12b057d077..fac21bcba8a6 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -77,6 +77,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) SEQ_PUT_DEC(" kB\nVmSwap:\t", swap); seq_puts(m, " kB\n"); hugetlb_report_usage(m, mm); + reliable_report_usage(m, mm); } #undef SEQ_PUT_DEC
diff --git a/include/linux/mem_reliable.h b/include/linux/mem_reliable.h index 09621a4d5ae5..15f69349a2a8 100644 --- a/include/linux/mem_reliable.h +++ b/include/linux/mem_reliable.h @@ -36,6 +36,7 @@ bool mem_reliable_counter_initialized(void); void reliable_report_meminfo(struct seq_file *m); void mem_reliable_out_of_memory(gfp_t gfp_mask, unsigned int order, int preferred_nid, nodemask_t *nodemask); +void reliable_report_usage(struct seq_file *m, struct mm_struct *mm);
static inline bool mem_reliable_is_enabled(void) { @@ -180,6 +181,38 @@ static inline bool mem_reliable_should_reclaim(void)
return false; } + +static inline void reliable_page_counter_inner(struct mm_struct *mm, int val) +{ + atomic_long_add(val, &mm->reliable_nr_page); + + /* + * Update reliable page counter to zero if underflows. + * + * Since reliable page counter is used for debug purpose only, + * there is no real function problem by doing this. + */ + if (unlikely(atomic_long_read(&mm->reliable_nr_page) < 0)) + atomic_long_set(&mm->reliable_nr_page, 0); +} + +static inline void add_reliable_folio_counter(struct folio *folio, + struct mm_struct *mm, int val) +{ + if (!folio_reliable(folio)) + return; + + reliable_page_counter_inner(mm, val); +} + +static inline void add_reliable_page_counter(struct page *page, + struct mm_struct *mm, int val) +{ + if (!page_reliable(page)) + return; + + reliable_page_counter_inner(mm, val); +} #else #define reliable_enabled 0
@@ -217,6 +250,12 @@ static inline void mem_reliable_out_of_memory(gfp_t gfp_mask, int preferred_nid, nodemask_t *nodemask) {} static inline bool reliable_allow_fb_enabled(void) { return false; } +static inline void add_reliable_page_counter(struct page *page, + struct mm_struct *mm, int val) {} +static inline void add_reliable_folio_counter(struct folio *folio, + struct mm_struct *mm, int val) {} +static inline void reliable_report_usage(struct seq_file *m, + struct mm_struct *mm) {} #endif
#endif diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 0bc3c7c191a5..a077f60819d9 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -936,6 +936,10 @@ struct mm_struct { #endif /* CONFIG_LRU_GEN */ #ifdef CONFIG_SHARE_POOL struct sp_group_master *sp_group_master; +#endif +#ifdef CONFIG_MEMORY_RELIABLE + /* total used reliable pages */ + atomic_long_t reliable_nr_page; #endif } __randomize_layout;
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 3048589e2e85..789966263996 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -181,6 +181,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
if (new_page) { folio_get(new_folio); + add_reliable_folio_counter(new_folio, mm, folio_nr_pages(new_folio)); page_add_new_anon_rmap(new_page, vma, addr); folio_add_lru_vma(new_folio, vma); } else @@ -198,6 +199,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, set_pte_at_notify(mm, addr, pvmw.pte, mk_pte(new_page, vma->vm_page_prot));
+ add_reliable_page_counter(old_page, mm, -1); page_remove_rmap(old_page, vma, false); if (!folio_mapped(old_folio)) folio_free_swap(old_folio); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index a13418df1115..65421d751a9d 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -704,6 +704,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf, set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry); update_mmu_cache_pmd(vma, vmf->address, vmf->pmd); add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR); + add_reliable_page_counter(page, vma->vm_mm, HPAGE_PMD_NR); mm_inc_nr_ptes(vma->vm_mm); spin_unlock(vmf->ptl); count_vm_event(THP_FAULT_ALLOC); @@ -1143,6 +1144,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, return -EAGAIN; } add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR); + add_reliable_page_counter(src_page, dst_mm, HPAGE_PMD_NR); out_zero_page: mm_inc_nr_ptes(dst_mm); pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable); @@ -1687,6 +1689,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
if (pmd_present(orig_pmd)) { page = pmd_page(orig_pmd); + add_reliable_page_counter(page, tlb->mm, -HPAGE_PMD_NR); page_remove_rmap(page, vma, true); VM_BUG_ON_PAGE(page_mapcount(page) < 0, page); VM_BUG_ON_PAGE(!PageHead(page), page); @@ -2103,6 +2106,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, set_page_dirty(page); if (!PageReferenced(page) && pmd_young(old_pmd)) SetPageReferenced(page); + add_reliable_page_counter(page, mm, -HPAGE_PMD_NR); page_remove_rmap(page, vma, true); put_page(page); } diff --git a/mm/khugepaged.c b/mm/khugepaged.c index e28b79d02f31..6f2787d3b682 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -717,6 +717,7 @@ static void __collapse_huge_page_copy_succeeded(pte_t *pte, */ spin_lock(ptl); ptep_clear(vma->vm_mm, address, _pte); + add_reliable_page_counter(src_page, vma->vm_mm, 1); page_remove_rmap(src_page, vma, false); spin_unlock(ptl); free_page_and_swap_cache(src_page); @@ -1224,6 +1225,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
spin_lock(pmd_ptl); BUG_ON(!pmd_none(*pmd)); + add_reliable_page_counter(hpage, vma->vm_mm, HPAGE_PMD_NR); page_add_new_anon_rmap(hpage, vma, address); lru_cache_add_inactive_or_unevictable(hpage, vma); pgtable_trans_huge_deposit(mm, pmd, pgtable); @@ -1631,6 +1633,7 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, */ ptep_clear(mm, addr, pte); page_remove_rmap(page, vma, false); + add_reliable_page_counter(page, mm, -1); nr_ptes++; }
diff --git a/mm/ksm.c b/mm/ksm.c index 981af9c72e7a..7401a6c87a4b 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1233,6 +1233,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page, */ if (!is_zero_pfn(page_to_pfn(kpage))) { get_page(kpage); + add_reliable_page_counter(kpage, mm, 1); page_add_anon_rmap(kpage, vma, addr, RMAP_NONE); newpte = mk_pte(kpage, vma->vm_page_prot); } else { @@ -1262,6 +1263,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page, */ ptep_clear_flush(vma, addr, ptep); set_pte_at_notify(mm, addr, ptep, newpte); + add_reliable_page_counter(page, mm, -1);
folio = page_folio(page); page_remove_rmap(page, vma, false); diff --git a/mm/mem_reliable.c b/mm/mem_reliable.c index a77d0cc4100d..de90d9b97243 100644 --- a/mm/mem_reliable.c +++ b/mm/mem_reliable.c @@ -316,3 +316,14 @@ static int __init setup_reliable_debug(char *str) return 1; } __setup("reliable_debug", setup_reliable_debug); + +#define SEQ_PUT_DEC(str, val) \ + seq_put_decimal_ull_width(m, str, (val) << (PAGE_SHIFT-10), 8) +void reliable_report_usage(struct seq_file *m, struct mm_struct *mm) +{ + if (!mem_reliable_is_enabled()) + return; + + SEQ_PUT_DEC("Reliable:\t", atomic_long_read(&mm->reliable_nr_page)); + seq_puts(m, " kB\n"); +} diff --git a/mm/memory.c b/mm/memory.c index 6569c9e97c9d..944c2ce2756b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -963,6 +963,7 @@ copy_present_pte(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, folio_get(folio); page_dup_file_rmap(page, false); rss[mm_counter_file(page)]++; + add_reliable_folio_counter(folio, dst_vma->vm_mm, 1); }
/* @@ -1463,6 +1464,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, mark_page_accessed(page); } rss[mm_counter(page)]--; + add_reliable_page_counter(page, mm, -1); if (!delay_rmap) { page_remove_rmap(page, vma, false); if (unlikely(page_mapcount(page) < 0)) @@ -1490,6 +1492,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, */ WARN_ON_ONCE(!vma_is_anonymous(vma)); rss[mm_counter(page)]--; + add_reliable_page_counter(page, mm, -1); if (is_device_private_entry(entry)) page_remove_rmap(page, vma, false); put_page(page); @@ -3166,10 +3169,13 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) dec_mm_counter(mm, mm_counter_file(&old_folio->page)); inc_mm_counter(mm, MM_ANONPAGES); } + add_reliable_folio_counter(old_folio, mm, -1); } else { ksm_might_unmap_zero_page(mm, vmf->orig_pte); inc_mm_counter(mm, MM_ANONPAGES); } + + add_reliable_folio_counter(new_folio, mm, 1); flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); entry = mk_pte(&new_folio->page, vma->vm_page_prot); entry = pte_sw_mkyoung(entry); @@ -4023,6 +4029,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) folio_free_swap(folio);
inc_mm_counter(vma->vm_mm, MM_ANONPAGES); + add_reliable_folio_counter(folio, vma->vm_mm, 1); dec_mm_counter(vma->vm_mm, MM_SWAPENTS); pte = mk_pte(page, vma->vm_page_prot);
@@ -4198,6 +4205,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) }
inc_mm_counter(vma->vm_mm, MM_ANONPAGES); + add_reliable_folio_counter(folio, vma->vm_mm, 1); folio_add_new_anon_rmap(folio, vma, vmf->address); folio_add_lru_vma(folio, vma); setpte: @@ -4340,6 +4348,7 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
add_mm_counter(vma->vm_mm, mm_counter_file(page), HPAGE_PMD_NR); + add_reliable_page_counter(page, vma->vm_mm, HPAGE_PMD_NR); page_add_file_rmap(page, vma, true);
/* @@ -4396,6 +4405,7 @@ void set_pte_range(struct vm_fault *vmf, struct folio *folio, if (unlikely(uffd_wp)) entry = pte_mkuffd_wp(entry); /* copy-on-write page */ + add_reliable_folio_counter(folio, vma->vm_mm, nr); if (write && !(vma->vm_flags & VM_SHARED)) { add_mm_counter(vma->vm_mm, MM_ANONPAGES, nr); VM_BUG_ON_FOLIO(nr != 1, folio); diff --git a/mm/migrate_device.c b/mm/migrate_device.c index 8ac1f79f754a..2e5bce2f1cb9 100644 --- a/mm/migrate_device.c +++ b/mm/migrate_device.c @@ -652,6 +652,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate, goto unlock_abort;
inc_mm_counter(mm, MM_ANONPAGES); + add_reliable_page_counter(page, mm, 1); page_add_new_anon_rmap(page, vma, addr); if (!is_zone_device_page(page)) lru_cache_add_inactive_or_unevictable(page, vma); diff --git a/mm/rmap.c b/mm/rmap.c index 9f795b93cf40..93ea81fe5180 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1636,6 +1636,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, hsz); } else { dec_mm_counter(mm, mm_counter(&folio->page)); + add_reliable_page_counter(&folio->page, mm, -1); set_pte_at(mm, address, pvmw.pte, pteval); }
@@ -1651,6 +1652,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, * copied pages. */ dec_mm_counter(mm, mm_counter(&folio->page)); + add_reliable_page_counter(&folio->page, mm, -1); } else if (folio_test_anon(folio)) { swp_entry_t entry = page_swap_entry(subpage); pte_t swp_pte; @@ -1693,6 +1695,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, if (ref_count == 1 + map_count && !folio_test_dirty(folio)) { dec_mm_counter(mm, MM_ANONPAGES); + add_reliable_folio_counter(folio, mm, -1); goto discard; }
@@ -1737,6 +1740,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, spin_unlock(&mmlist_lock); } dec_mm_counter(mm, MM_ANONPAGES); + add_reliable_folio_counter(folio, mm, -1); inc_mm_counter(mm, MM_SWAPENTS); swp_pte = swp_entry_to_pte(entry); if (anon_exclusive) @@ -1759,6 +1763,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, * See Documentation/mm/mmu_notifier.rst */ dec_mm_counter(mm, mm_counter_file(&folio->page)); + add_reliable_folio_counter(folio, mm, -1); } discard: page_remove_rmap(subpage, vma, folio_test_hugetlb(folio)); @@ -2033,6 +2038,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, hsz); } else { dec_mm_counter(mm, mm_counter(&folio->page)); + add_reliable_page_counter(&folio->page, mm, -1); set_pte_at(mm, address, pvmw.pte, pteval); }
@@ -2048,6 +2054,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, * copied pages. */ dec_mm_counter(mm, mm_counter(&folio->page)); + add_reliable_page_counter(&folio->page, mm, -1); } else { swp_entry_t entry; pte_t swp_pte; diff --git a/mm/swapfile.c b/mm/swapfile.c index 4bc70f459164..99b54ff74973 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1792,6 +1792,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
dec_mm_counter(vma->vm_mm, MM_SWAPENTS); inc_mm_counter(vma->vm_mm, MM_ANONPAGES); + add_reliable_page_counter(page, vma->vm_mm, 1); get_page(page); if (page == swapcache) { rmap_t rmap_flags = RMAP_NONE; diff --git a/mm/userswap.c b/mm/userswap.c index 18c99c2a0fc7..e76e9d7a40de 100644 --- a/mm/userswap.c +++ b/mm/userswap.c @@ -162,6 +162,7 @@ static int uswap_unmap_anon_page(struct mm_struct *mm, SWP_USERSWAP_ENTRY, page_to_pfn(page))));
dec_mm_counter(mm, MM_ANONPAGES); + add_reliable_page_counter(page, mm, -1); page_remove_rmap(page, vma, false); page->mapping = NULL;
@@ -192,6 +193,7 @@ static unsigned long vm_insert_anon_page(struct vm_area_struct *vma, }
inc_mm_counter(mm, MM_ANONPAGES); + add_reliable_page_counter(page, mm, 1); page_add_new_anon_rmap(page, vma, addr); dst_pte = mk_pte(page, vma->vm_page_prot); if (vma->vm_flags & VM_WRITE) @@ -217,6 +219,7 @@ static void uswap_map_anon_page(struct mm_struct *mm, flush_cache_page(vma, addr, pte_pfn(*pte)); set_pte_at(mm, addr, pte, old_pte); inc_mm_counter(mm, MM_ANONPAGES); + add_reliable_page_counter(page, mm, 1); page_add_new_anon_rmap(page, vma, addr); pte_unmap_unlock(pte, ptl); } @@ -531,6 +534,7 @@ int mfill_atomic_pte_nocopy(struct mm_struct *mm, pmd_t *dst_pmd, }
inc_mm_counter(mm, MM_ANONPAGES); + add_reliable_page_counter(page, mm, 1); page_add_new_anon_rmap(page, dst_vma, dst_addr); set_pte_at(mm, dst_addr, pte, dst_pte);