From: ZhangPeng zhangpeng362@huawei.com
Backport arch/mm/fault: accelerate pagefault when badaccess.
A test from lmbench shows 34% improve after this changes on arm64: lat_sig -P 1 prot lat_sig 0.29194 -> 0.19198
Kefeng Wang (2): arm64: mm: accelerate pagefault when VM_FAULT_BADACCESS x86: mm: accelerate pagefault when badaccess
arch/arm64/mm/fault.c | 4 +++- arch/x86/mm/fault.c | 23 ++++++++++++++--------- 2 files changed, 17 insertions(+), 10 deletions(-)
From: Kefeng Wang wangkefeng.wang@huawei.com
maillist inclusion category: performance bugzilla: https://gitee.com/openeuler/kernel/issues/I9E07B CVE: NA
Reference: https://lore.kernel.org/linux-mm/20240403083805.1818160-1-wangkefeng.wang@hu...
--------------------------------
The vm_flags of vma already checked under per-VMA lock, if it is a bad access, directly set fault to VM_FAULT_BADACCESS and handle error, no need to retry with mmap_lock again, the latency time reduces 34% in 'lat_sig -P 1 prot lat_sig' from lmbench testcase.
Since the page faut is handled under per-VMA lock, count it as a vma lock event with VMA_LOCK_SUCCESS.
Reviewed-by: Suren Baghdasaryan surenb@google.com Signed-off-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: ZhangPeng zhangpeng362@huawei.com --- arch/arm64/mm/fault.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 7d304e0132b7..e5b38a26bc73 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -622,7 +622,9 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
if (!(vma->vm_flags & vm_flags)) { vma_end_read(vma); - goto lock_mmap; + fault = VM_FAULT_BADACCESS; + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); + goto done; } fault = handle_mm_fault(vma, addr, mm_flags | FAULT_FLAG_VMA_LOCK, regs); if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
From: Kefeng Wang wangkefeng.wang@huawei.com
maillist inclusion category: performance bugzilla: https://gitee.com/openeuler/kernel/issues/I9E07B CVE: NA
Reference: https://lore.kernel.org/linux-mm/20240403083805.1818160-1-wangkefeng.wang@hu...
--------------------------------
The access_error() of vma already checked under per-VMA lock, if it is a bad access, directly handle error, no need to retry with mmap_lock again. In order to release the correct lock, pass the mm_struct into bad_area_access_error(), if mm is NULL, release vma lock, or release mmap_lock. Since the page faut is handled under per-VMA lock, count it as a vma lock event with VMA_LOCK_SUCCESS.
Reviewed-by: Suren Baghdasaryan surenb@google.com Signed-off-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: ZhangPeng zhangpeng362@huawei.com --- arch/x86/mm/fault.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-)
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 679b09cfe241..0a3962e32240 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -868,14 +868,17 @@ bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
static void __bad_area(struct pt_regs *regs, unsigned long error_code, - unsigned long address, u32 pkey, int si_code) + unsigned long address, struct mm_struct *mm, + struct vm_area_struct *vma, u32 pkey, int si_code) { - struct mm_struct *mm = current->mm; /* * Something tried to access memory that isn't in our memory map.. * Fix it, but check if it's kernel or user first.. */ - mmap_read_unlock(mm); + if (mm) + mmap_read_unlock(mm); + else + vma_end_read(vma);
__bad_area_nosemaphore(regs, error_code, address, pkey, si_code); } @@ -899,7 +902,8 @@ static inline bool bad_area_access_from_pkeys(unsigned long error_code,
static noinline void bad_area_access_error(struct pt_regs *regs, unsigned long error_code, - unsigned long address, struct vm_area_struct *vma) + unsigned long address, struct mm_struct *mm, + struct vm_area_struct *vma) { /* * This OSPKE check is not strictly necessary at runtime. @@ -929,9 +933,9 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code, */ u32 pkey = vma_pkey(vma);
- __bad_area(regs, error_code, address, pkey, SEGV_PKUERR); + __bad_area(regs, error_code, address, mm, vma, pkey, SEGV_PKUERR); } else { - __bad_area(regs, error_code, address, 0, SEGV_ACCERR); + __bad_area(regs, error_code, address, mm, vma, 0, SEGV_ACCERR); } }
@@ -1358,8 +1362,9 @@ void do_user_addr_fault(struct pt_regs *regs, goto lock_mmap;
if (unlikely(access_error(error_code, vma))) { - vma_end_read(vma); - goto lock_mmap; + bad_area_access_error(regs, error_code, address, NULL, vma); + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); + return; } fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs); if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED))) @@ -1395,7 +1400,7 @@ void do_user_addr_fault(struct pt_regs *regs, * we can handle it.. */ if (unlikely(access_error(error_code, vma))) { - bad_area_access_error(regs, error_code, address, vma); + bad_area_access_error(regs, error_code, address, mm, vma); return; }