From: Peng Wu wupeng58@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S CVE: NA
----------------------------------------------
Counting reliable memory allocated by the reliable user tasks.
The policy of counting reliable memory usage is based on RSS statistics. Anywhere with counter of mm need count reliable pages too. Reliable page which is checked by page_reliable() need to update the reliable page counter by calling reliable_page_counter().
Updating the reliable pages should be considered if the following logic is added: - add_mm_counter - dec_mm_counter - inc_mm_counter_fast - dec_mm_counter_fast - rss[mm_counter(page)]
Signed-off-by: Peng Wu wupeng58@huawei.com Signed-off-by: Ma Wupeng mawupeng1@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- Documentation/filesystems/proc.txt | 2 ++ fs/proc/task_mmu.c | 1 + include/linux/mem_reliable.h | 17 +++++++++++++++++ kernel/events/uprobes.c | 2 ++ mm/huge_memory.c | 8 ++++++++ mm/khugepaged.c | 1 + mm/ksm.c | 1 + mm/mem_reliable.c | 15 ++++++++++++++- mm/memory.c | 16 ++++++++++++++++ mm/migrate.c | 1 + mm/rmap.c | 5 +++++ mm/shmem.c | 1 + mm/swapfile.c | 1 + mm/userfaultfd.c | 1 + 14 files changed, 71 insertions(+), 1 deletion(-)
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 690db5b3eb53b..1ef781f33b376 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -181,6 +181,7 @@ read the file /proc/PID/status: VmPTE: 20 kb VmSwap: 0 kB HugetlbPages: 0 kB + Reliable: 1608 KB CoreDumping: 0 Threads: 1 SigQ: 0/28578 @@ -254,6 +255,7 @@ Table 1-2: Contents of the status files (as of 4.8) VmSwap amount of swap used by anonymous private data (shmem swap usage is not included) HugetlbPages size of hugetlb memory portions + Reliable size of reliable memory used CoreDumping process's memory is currently being dumped (killing the process may lead to a corrupted core) Threads number of threads diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 495044e1990bd..78ce353d0dfad 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -77,6 +77,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) SEQ_PUT_DEC(" kB\nVmSwap:\t", swap); seq_puts(m, " kB\n"); hugetlb_report_usage(m, mm); + reliable_report_usage(m, mm); } #undef SEQ_PUT_DEC
diff --git a/include/linux/mem_reliable.h b/include/linux/mem_reliable.h index 31be68fac330b..a18a843c7b52f 100644 --- a/include/linux/mem_reliable.h +++ b/include/linux/mem_reliable.h @@ -14,11 +14,14 @@ extern struct static_key_false mem_reliable;
extern bool reliable_enabled; +extern atomic_long_t reliable_user_used_nr_page;
extern void add_reliable_mem_size(long sz); extern void mem_reliable_init(bool has_unmirrored_mem, unsigned long *zone_movable_pfn); extern void reliable_report_meminfo(struct seq_file *m); +extern bool page_reliable(struct page *page); +extern void reliable_report_usage(struct seq_file *m, struct mm_struct *mm);
static inline bool mem_reliable_is_enabled(void) { @@ -47,6 +50,15 @@ static inline bool skip_none_movable_zone(gfp_t gfp, struct zoneref *z)
return false; } + +static inline void reliable_page_counter(struct page *page, + struct mm_struct *mm, int val) +{ + if (page_reliable(page)) { + atomic_long_add(val, &mm->reliable_nr_page); + atomic_long_add(val, &reliable_user_used_nr_page); + } +} #else #define reliable_enabled 0
@@ -60,6 +72,11 @@ static inline bool skip_none_movable_zone(gfp_t gfp, struct zoneref *z) return false; } static inline void reliable_report_meminfo(struct seq_file *m) {} +static inline bool page_reliable(struct page *page) { return false; } +static inline void reliable_page_counter(struct page *page, + struct mm_struct *mm, int val) {} +static inline void reliable_report_usage(struct seq_file *m, + struct mm_struct *mm) {}
#endif
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index c173e4131df88..de64e29830824 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -191,7 +191,9 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
if (!PageAnon(old_page)) { dec_mm_counter(mm, mm_counter_file(old_page)); + reliable_page_counter(old_page, mm, -1); inc_mm_counter(mm, MM_ANONPAGES); + reliable_page_counter(new_page, mm, 1); }
flush_cache_page(vma, addr, pte_pfn(*pvmw.pte)); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 31f1c580ba9c0..f8319265c1cf3 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -673,6 +673,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf, pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable); set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry); add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR); + reliable_page_counter(page, vma->vm_mm, HPAGE_PMD_NR); mm_inc_nr_ptes(vma->vm_mm); spin_unlock(vmf->ptl); count_vm_event(THP_FAULT_ALLOC); @@ -1080,6 +1081,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, get_page(src_page); page_dup_rmap(src_page, true); add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR); + reliable_page_counter(src_page, dst_mm, HPAGE_PMD_NR); mm_inc_nr_ptes(dst_mm); pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);
@@ -1468,6 +1470,8 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd) update_mmu_cache_pmd(vma, vmf->address, vmf->pmd); if (!page) { add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR); + reliable_page_counter(new_page, vma->vm_mm, + HPAGE_PMD_NR); } else { VM_BUG_ON_PAGE(!PageHead(page), page); page_remove_rmap(page, true); @@ -1850,10 +1854,12 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, if (PageAnon(page)) { zap_deposited_table(tlb->mm, pmd); add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR); + reliable_page_counter(page, tlb->mm, -HPAGE_PMD_NR); } else { if (arch_needs_pgtable_deposit()) zap_deposited_table(tlb->mm, pmd); add_mm_counter(tlb->mm, mm_counter_file(page), -HPAGE_PMD_NR); + reliable_page_counter(page, tlb->mm, -HPAGE_PMD_NR); }
spin_unlock(ptl); @@ -2209,6 +2215,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, put_page(page); } add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR); + reliable_page_counter(page, mm, -HPAGE_PMD_NR); return; }
@@ -3170,6 +3177,7 @@ vm_fault_t do_anon_huge_page_remap(struct vm_area_struct *vma, unsigned long add pgtable_trans_huge_deposit(vma->vm_mm, pmd, pgtable); set_pmd_at(vma->vm_mm, address, pmd, entry); add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR); + reliable_page_counter(page, vma->vm_mm, HPAGE_PMD_NR); mm_inc_nr_ptes(vma->vm_mm); spin_unlock(ptl); count_vm_event(THP_FAULT_ALLOC); diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 55f171ed2d08a..5ac2486327528 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -648,6 +648,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page, if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) { clear_user_highpage(page, address); add_mm_counter(vma->vm_mm, MM_ANONPAGES, 1); + reliable_page_counter(page, vma->vm_mm, 1); if (is_zero_pfn(pte_pfn(pteval))) { /* * ptl mostly unnecessary. diff --git a/mm/ksm.c b/mm/ksm.c index 9749729a5381a..b656fa77f92ff 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1184,6 +1184,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page, * when tearing down the mm. */ dec_mm_counter(mm, MM_ANONPAGES); + reliable_page_counter(page, mm, -1); }
flush_cache_page(vma, addr, pte_pfn(*ptep)); diff --git a/mm/mem_reliable.c b/mm/mem_reliable.c index c03c77090cf5b..d6aec08638923 100644 --- a/mm/mem_reliable.c +++ b/mm/mem_reliable.c @@ -12,14 +12,19 @@ DEFINE_STATIC_KEY_FALSE(mem_reliable);
bool reliable_enabled; - static atomic_long_t total_reliable_mem; +atomic_long_t reliable_user_used_nr_page;
void add_reliable_mem_size(long sz) { atomic_long_add(sz, &total_reliable_mem); }
+bool page_reliable(struct page *page) +{ + return mem_reliable_is_enabled() && page_zonenum(page) < ZONE_MOVABLE; +} + static int reliable_mem_notifier(struct notifier_block *nb, unsigned long action, void *arg) { @@ -105,3 +110,11 @@ void reliable_report_meminfo(struct seq_file *m) used_reliable_mem_sz() >> 10); } } + +void reliable_report_usage(struct seq_file *m, struct mm_struct *mm) +{ + if (mem_reliable_is_enabled()) { + seq_printf(m, "Reliable:\t%8lu kB\n", + atomic_long_read(&mm->reliable_nr_page)); + } +} diff --git a/mm/memory.c b/mm/memory.c index 054e62292902a..d4853970a7c10 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -740,6 +740,7 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
rss[mm_counter(page)]++;
+ reliable_page_counter(page, dst_mm, 1); if (is_write_migration_entry(entry) && is_cow_mapping(vm_flags)) { /* @@ -766,6 +767,7 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, */ get_page(page); rss[mm_counter(page)]++; + reliable_page_counter(page, dst_mm, 1); page_dup_rmap(page, false);
/* @@ -807,6 +809,7 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, get_page(page); page_dup_rmap(page, false); rss[mm_counter(page)]++; + reliable_page_counter(page, dst_mm, 1); } else if (pte_devmap(pte)) { page = pte_page(pte);
@@ -819,6 +822,7 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, get_page(page); page_dup_rmap(page, false); rss[mm_counter(page)]++; + reliable_page_counter(page, dst_mm, 1); } }
@@ -1102,6 +1106,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, mark_page_accessed(page); } rss[mm_counter(page)]--; + reliable_page_counter(page, mm, -1); page_remove_rmap(page, false); if (unlikely(page_mapcount(page) < 0)) print_bad_pte(vma, addr, ptent, page); @@ -1130,6 +1135,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); rss[mm_counter(page)]--; + reliable_page_counter(page, mm, -1); page_remove_rmap(page, false); put_page(page); continue; @@ -1147,6 +1153,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
page = migration_entry_to_page(entry); rss[mm_counter(page)]--; + reliable_page_counter(page, mm, -1); } if (unlikely(!free_swap_and_cache(entry))) print_bad_pte(vma, addr, ptent, NULL); @@ -1490,6 +1497,7 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr, /* Ok, finally just insert the thing.. */ get_page(page); inc_mm_counter_fast(mm, mm_counter_file(page)); + reliable_page_counter(page, mm, 1); page_add_file_rmap(page, false); set_pte_at(mm, addr, pte, mk_pte(page, prot));
@@ -2489,10 +2497,13 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) if (!PageAnon(old_page)) { dec_mm_counter_fast(mm, mm_counter_file(old_page)); + reliable_page_counter(old_page, mm, -1); inc_mm_counter_fast(mm, MM_ANONPAGES); + reliable_page_counter(new_page, mm, 1); } } else { inc_mm_counter_fast(mm, MM_ANONPAGES); + reliable_page_counter(new_page, mm, 1); } flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); entry = mk_pte(new_page, vma->vm_page_prot); @@ -3051,6 +3062,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) */
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); + reliable_page_counter(page, vma->vm_mm, 1); dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS); pte = mk_pte(page, vma->vm_page_prot); if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page, NULL)) { @@ -3216,6 +3228,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf) }
inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); + reliable_page_counter(page, vma->vm_mm, 1); page_add_new_anon_rmap(page, vma, vmf->address, false); mem_cgroup_commit_charge(page, memcg, false, false); lru_cache_add_active_or_unevictable(page, vma); @@ -3416,6 +3429,7 @@ static vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
add_mm_counter(vma->vm_mm, mm_counter_file(page), HPAGE_PMD_NR); + reliable_page_counter(page, vma->vm_mm, HPAGE_PMD_NR); page_add_file_rmap(page, true); /* * deposit and withdraw with pmd lock held @@ -3489,6 +3503,7 @@ vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, if (write) entry = maybe_mkwrite(pte_mkdirty(entry), vma); /* copy-on-write page */ + reliable_page_counter(page, vma->vm_mm, 1); if (write && !(vma->vm_flags & VM_SHARED)) { inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); page_add_new_anon_rmap(page, vma, vmf->address, false); @@ -4910,6 +4925,7 @@ vm_fault_t do_anon_page_remap(struct vm_area_struct *vma, unsigned long address, if (ret) goto release; inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); + reliable_page_counter(page, vma->vm_mm, 1); page_add_new_anon_rmap(page, vma, address, false); mem_cgroup_commit_charge(page, memcg, false, false); lru_cache_add_active_or_unevictable(page, vma); diff --git a/mm/migrate.c b/mm/migrate.c index 90aa493faa602..eb27e8e2bf213 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2714,6 +2714,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate, }
inc_mm_counter(mm, MM_ANONPAGES); + reliable_page_counter(page, mm, 1); page_add_new_anon_rmap(page, vma, addr, false); mem_cgroup_commit_charge(page, memcg, false, false); if (!is_zone_device_page(page)) diff --git a/mm/rmap.c b/mm/rmap.c index 7debdf0cc6785..224fac084ad0e 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1548,6 +1548,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, vma_mmu_pagesize(vma)); } else { dec_mm_counter(mm, mm_counter(page)); + reliable_page_counter(page, mm, -1); set_pte_at(mm, address, pvmw.pte, pteval); }
@@ -1563,6 +1564,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, * copied pages. */ dec_mm_counter(mm, mm_counter(page)); + reliable_page_counter(page, mm, -1); /* We have to invalidate as we cleared the pte */ mmu_notifier_invalidate_range(mm, address, address + PAGE_SIZE); @@ -1617,6 +1619,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, mmu_notifier_invalidate_range(mm, address, address + PAGE_SIZE); dec_mm_counter(mm, MM_ANONPAGES); + reliable_page_counter(page, mm, -1); goto discard; }
@@ -1650,6 +1653,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, spin_unlock(&mmlist_lock); } dec_mm_counter(mm, MM_ANONPAGES); + reliable_page_counter(page, mm, -1); inc_mm_counter(mm, MM_SWAPENTS); swp_pte = swp_entry_to_pte(entry); if (pte_soft_dirty(pteval)) @@ -1670,6 +1674,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, * See Documentation/vm/mmu_notifier.rst */ dec_mm_counter(mm, mm_counter_file(page)); + reliable_page_counter(page, mm, -1); } discard: /* diff --git a/mm/shmem.c b/mm/shmem.c index 8d32d49a4d7ba..16bb7806a25e6 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2473,6 +2473,7 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm, spin_unlock_irq(&info->lock);
inc_mm_counter(dst_mm, mm_counter_file(page)); + reliable_page_counter(page, dst_mm, 1); page_add_file_rmap(page, false); set_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
diff --git a/mm/swapfile.c b/mm/swapfile.c index 4028994a51ae6..2619729400d32 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1869,6 +1869,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
dec_mm_counter(vma->vm_mm, MM_SWAPENTS); inc_mm_counter(vma->vm_mm, MM_ANONPAGES); + reliable_page_counter(page, vma->vm_mm, 1); get_page(page); set_pte_at(vma->vm_mm, addr, pte, pte_mkold(mk_pte(page, vma->vm_page_prot))); diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 1c86abd41c6d7..c26dd2040624f 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -116,6 +116,7 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm, #endif
inc_mm_counter(dst_mm, MM_ANONPAGES); + reliable_page_counter(page, dst_mm, 1); page_add_new_anon_rmap(page, dst_vma, dst_addr, false); mem_cgroup_commit_charge(page, memcg, false, false); lru_cache_add_active_or_unevictable(page, dst_vma);