From: Ma Wupeng mawupeng1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7ZC0H
--------------------------------
Introduce /proc/<pid>/pbha_bit0 to update PBHA bit0.
Value 0/1 can be accepted by this procfs: - 0: iter all vmas of this task, clear VM_PBHA_BIT0 for all vmas. New vma will stop bring VM_PBHA_BIT0 if exists. clear PTE 59 bit for all pte entries. - 1: iter all vmas of this task, set VM_PBHA_BIT0 for all vmas. New vma will bring VM_PBHA_BIT0 by default. set PTE 59 bit for all pte entries.
Signed-off-by: Ma Wupeng mawupeng1@huawei.com --- arch/arm64/include/asm/pgtable.h | 9 ++ drivers/soc/hisilicon/pbha.c | 145 +++++++++++++++++++++++++++++++ fs/proc/base.c | 103 ++++++++++++++++++++++ include/linux/mm.h | 2 +- include/linux/pbha.h | 3 + 5 files changed, 261 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 1ca5e427c603..1999bda3be61 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -532,6 +532,15 @@ static inline pte_t pte_mkpbha(pte_t pte, unsigned long pbha_val) return set_pte_bit(pte, __pgprot(__pbha_check_perf_only(pbha_val))); }
+#define pmd_mkpbha(pmd, pbha_val) pte_pmd(pte_mkpbha(pmd_pte(pmd), pbha_val)) + +static inline pte_t pte_rmpbha(pte_t pte, unsigned long pbha_val) +{ + return clear_pte_bit(pte, __pgprot(__pbha_check_perf_only(pbha_val))); +} + +#define pmd_rmpbha(pmd, pbha_val) pte_pmd(pte_rmpbha(pmd_pte(pmd), pbha_val)) + #define __HAVE_PHYS_MEM_ACCESS_PROT struct file; extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, diff --git a/drivers/soc/hisilicon/pbha.c b/drivers/soc/hisilicon/pbha.c index 95bf9de0e9e9..0e1366077d41 100644 --- a/drivers/soc/hisilicon/pbha.c +++ b/drivers/soc/hisilicon/pbha.c @@ -9,6 +9,9 @@ #include <linux/libfdt.h> #include <linux/printk.h> #include <linux/cpufeature.h> +#include <linux/mmu_notifier.h> +#include <linux/pagewalk.h> +#include <linux/pbha.h>
#include <asm/setup.h>
@@ -40,3 +43,145 @@ void __init early_pbha_bit0_init(void) if (*prop == HBM_MODE_CACHE) pbha_bit0_enabled = true; } + +#define pte_pbha_bit0(pte) \ + (!!(pte_val(pte) & (PBHA_VAL_BIT0 << PBHA_BITS_SHIFT))) + +enum { + CLEAR_PBHA_BIT0_FLAG, + SET_PBHA_BIT0_FLAG, +}; + +static inline void pbha_bit0_update_pte_bits(struct vm_area_struct *vma, + unsigned long addr, pte_t *pte, bool set) +{ + pte_t ptent = *pte; + + if (pte_present(ptent)) { + pte_t old_pte; + + old_pte = ptep_modify_prot_start(vma, addr, pte); + if (set) + ptent = pte_mkpbha(old_pte, PBHA_VAL_BIT0); + else + ptent = pte_rmpbha(old_pte, PBHA_VAL_BIT0); + ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent); + } +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline void pbha_bit0_update_pmd_bits(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp, bool set) +{ + pmd_t pmd = *pmdp; + + if (pmd_present(pmd)) { + if (set) + pmd = pmd_mkpbha(pmd, PBHA_VAL_BIT0); + else + pmd = pmd_rmpbha(pmd, PBHA_VAL_BIT0); + + set_pmd_at(vma->vm_mm, addr, pmdp, pmd); + } +} +#else +static inline void pbha_bit0_update_pmd_bits(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp, + bool set) +{ +} +#endif + +static int pbha_bit0_pte_range(pmd_t *pmd, unsigned long addr, + unsigned long end, struct mm_walk *walk) +{ + int *op = (int *)walk->private; + struct vm_area_struct *vma = walk->vma; + pte_t *pte, ptent; + spinlock_t *ptl; + bool set = (*op == SET_PBHA_BIT0_FLAG); + + ptl = pmd_trans_huge_lock(pmd, vma); + if (ptl) { + pbha_bit0_update_pmd_bits(vma, addr, pmd, set); + + spin_unlock(ptl); + return 0; + } + + if (pmd_trans_unstable(pmd)) + return 0; + + pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); + for (; addr != end; pte++, addr += PAGE_SIZE) { + ptent = *pte; + + pbha_bit0_update_pte_bits(vma, addr, pte, set); + } + pte_unmap_unlock(pte - 1, ptl); + cond_resched(); + return 0; +} + +static int pbha_bit0_test_walk(unsigned long start, unsigned long end, + struct mm_walk *walk) +{ + struct vm_area_struct *vma = walk->vma; + + if (vma->vm_flags & VM_PFNMAP) + return 1; + + return 0; +} + +struct mm_walk_ops pbha_bit0_walk_ops = { + .pmd_entry = pbha_bit0_pte_range, + .test_walk = pbha_bit0_test_walk, +}; + +int pbha_bit0_update_vma(struct mm_struct *mm, int val) +{ + struct mmu_notifier_range range; + struct vm_area_struct *vma; + int old_val; + + if (!system_support_pbha_bit0()) + return -EINVAL; + + old_val = (mm->def_flags & VM_PBHA_BIT0) ? 1 : 0; + if (val == old_val) + return 0; + + if (mmap_write_lock_killable(mm)) + return -EINTR; + + if (val == SET_PBHA_BIT0_FLAG) { + mm->def_flags |= VM_PBHA_BIT0; + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (vma->vm_flags & VM_PBHA_BIT0) + continue; + vma->vm_flags |= VM_PBHA_BIT0; + vma_set_page_prot(vma); + } + } else { + mm->def_flags &= ~VM_PBHA_BIT0; + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (!(vma->vm_flags & VM_PBHA_BIT0)) + continue; + vma->vm_flags &= ~VM_PBHA_BIT0; + vma_set_page_prot(vma); + } + } + + inc_tlb_flush_pending(mm); + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm, 0, -1UL); + mmu_notifier_invalidate_range_start(&range); + walk_page_range(mm, 0, mm->highest_vm_end, &pbha_bit0_walk_ops, + &val); + mmu_notifier_invalidate_range_end(&range); + flush_tlb_mm(mm); + dec_tlb_flush_pending(mm); + + mmap_write_unlock(mm); + return 0; +} diff --git a/fs/proc/base.c b/fs/proc/base.c index 24c70ff923b8..2a4cc5c796c7 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -98,6 +98,7 @@ #include <linux/resctrl.h> #include <linux/share_pool.h> #include <linux/ksm.h> +#include <linux/pbha.h> #include <trace/events/oom.h> #include "internal.h" #include "fd.h" @@ -1351,6 +1352,102 @@ static const struct file_operations proc_reliable_operations = { }; #endif
+#ifdef CONFIG_ARM64_PBHA +static inline int pbha_bit0_check(struct task_struct *task, struct pid *pid) +{ + if (!system_support_pbha_bit0()) + return -EACCES; + + if (is_global_init(task)) + return -EACCES; + + if (!task->mm || (task->flags & PF_KTHREAD) || + (task->flags & PF_EXITING)) + return -EACCES; + + return 0; +} + +static ssize_t pbha_bit0_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task = get_proc_task(file_inode(file)); + struct pid *pid = proc_pid(file_inode(file)); + char buffer[PROC_NUMBUF]; + size_t len; + short val; + int err; + + if (!task) + return -ESRCH; + + err = pbha_bit0_check(task, pid); + if (err) { + put_task_struct(task); + return err; + } + + val = task->mm->def_flags & VM_PBHA_BIT0 ? 1 : 0; + put_task_struct(task); + len = snprintf(buffer, sizeof(buffer), "%hd\n", val); + return simple_read_from_buffer(buf, count, ppos, buffer, len); +} + +static ssize_t pbha_bit0_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task = get_proc_task(file_inode(file)); + struct pid *pid = proc_pid(file_inode(file)); + char buffer[PROC_NUMBUF]; + struct mm_struct *mm; + int val, err; + + if (!task) + return -ESRCH; + + err = pbha_bit0_check(task, pid); + if (err) + goto out; + + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) { + err = -EFAULT; + goto out; + } + + err = kstrtoint(strstrip(buffer), 0, &val); + if (err) + goto out; + if (val != 0 && val != 1) { + err = -EINVAL; + goto out; + } + + mm = get_task_mm(task); + if (!mm) { + err = -ENOENT; + goto out; + } + + err = pbha_bit0_update_vma(mm, val); + if (err) + count = -EINTR; + + mmput(mm); +out: + put_task_struct(task); + return err < 0 ? err : count; +} + +static const struct file_operations proc_pbha_bit0_ops = { + .read = pbha_bit0_read, + .write = pbha_bit0_write, + .llseek = generic_file_llseek, +}; +#endif + #ifdef CONFIG_AUDIT #define TMPBUFLEN 11 static ssize_t proc_loginuid_read(struct file * file, char __user * buf, @@ -3483,6 +3580,9 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_MEMORY_RELIABLE REG("reliable", S_IRUGO|S_IWUSR, proc_reliable_operations), #endif +#ifdef CONFIG_ARM64_PBHA + REG("pbha_bit0", 0644, proc_pbha_bit0_ops), +#endif #ifdef CONFIG_AUDIT REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), REG("sessionid", S_IRUGO, proc_sessionid_operations), @@ -3902,6 +4002,9 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_MEMORY_RELIABLE REG("reliable", S_IRUGO|S_IWUSR, proc_reliable_operations), #endif +#ifdef CONFIG_ARM64_PBHA + REG("pbha_bit0", 0644, proc_pbha_bit0_ops), +#endif #ifdef CONFIG_AUDIT REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), REG("sessionid", S_IRUGO, proc_sessionid_operations), diff --git a/include/linux/mm.h b/include/linux/mm.h index 465a47afd2a9..a5316ffd4e1f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -457,7 +457,7 @@ static inline bool arch_is_platform_page(u64 paddr) #define VM_NO_KHUGEPAGED (VM_SPECIAL | VM_HUGETLB)
/* This mask defines which mm->def_flags a process can inherit its parent */ -#define VM_INIT_DEF_MASK VM_NOHUGEPAGE +#define VM_INIT_DEF_MASK (VM_NOHUGEPAGE | VM_PBHA_BIT0)
/* This mask is used to clear all the VMA flags used by mlock */ #define VM_LOCKED_CLEAR_MASK (~(VM_LOCKED | VM_LOCKONFAULT)) diff --git a/include/linux/pbha.h b/include/linux/pbha.h index b2b256696af3..d34ec0f4e1e8 100644 --- a/include/linux/pbha.h +++ b/include/linux/pbha.h @@ -10,6 +10,7 @@ #include <linux/pgtable.h>
#define PBHA_VAL_BIT0 1UL +#define PBHA_BITS_SHIFT 59
#define EFI_OEMCONFIG_VARIABLE_GUID \ EFI_GUID(0x21f3b3c5, 0x946d, 0x41c1, 0x83, 0x8c, 0x19, 0x4e, 0x48, \ @@ -20,7 +21,9 @@
#ifdef CONFIG_ARM64_PBHA extern bool __ro_after_init pbha_bit0_enabled; +extern struct mm_walk_ops pbha_bit0_walk_ops; extern void __init early_pbha_bit0_init(void); +extern int pbha_bit0_update_vma(struct mm_struct *mm, int val);
static inline bool system_support_pbha_bit0(void) {