From: Wang Wensheng wangwensheng4@huawei.com
Offering: HULK ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5DS9S
--------------------------------------------------
This is not used for THP but the user page table is just like THP. The user alloc hugepages via a special driver and its vma is not marked with VM_HUGETLB. This commit allow to share those vma to kernel.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com --- include/linux/share_pool.h | 1 + mm/share_pool.c | 44 +++++++++++++++++++++++++++++++++----- 2 files changed, 40 insertions(+), 5 deletions(-)
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 5539a17da7a9..022e61bb6ce4 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -179,6 +179,7 @@ struct sp_walk_data { unsigned long uva_aligned; unsigned long page_size; bool is_hugepage; + bool is_page_type_set; pmd_t *pmd; };
diff --git a/mm/share_pool.c b/mm/share_pool.c index 35edab122509..115200a1ee0d 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -3013,9 +3013,40 @@ EXPORT_SYMBOL_GPL(mg_sp_make_share_k2u); static int sp_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long next, struct mm_walk *walk) { + struct page *page; struct sp_walk_data *sp_walk_data = walk->private;
+ /* + * There exist a scene in DVPP where the pagetable is huge page but its + * vma doesn't record it, something like THP. + * So we cannot make out whether it is a hugepage map until we access the + * pmd here. If mixed size of pages appear, just return an error. + */ + if (pmd_huge(*pmd)) { + if (!sp_walk_data->is_page_type_set) { + sp_walk_data->is_page_type_set = true; + sp_walk_data->is_hugepage = true; + } else if (!sp_walk_data->is_hugepage) + return -EFAULT; + + /* To skip pte level walk */ + walk->action = ACTION_CONTINUE; + + page = pmd_page(*pmd); + get_page(page); + sp_walk_data->pages[sp_walk_data->page_count++] = page; + + return 0; + } + + if (!sp_walk_data->is_page_type_set) { + sp_walk_data->is_page_type_set = true; + sp_walk_data->is_hugepage = false; + } else if (sp_walk_data->is_hugepage) + return -EFAULT; + sp_walk_data->pmd = pmd; + return 0; }
@@ -3159,6 +3190,8 @@ static int __sp_walk_page_range(unsigned long uva, unsigned long size, sp_walk.pmd_entry = sp_pmd_entry; }
+ sp_walk_data->is_page_type_set = false; + sp_walk_data->page_count = 0; sp_walk_data->page_size = page_size; uva_aligned = ALIGN_DOWN(uva, page_size); sp_walk_data->uva_aligned = uva_aligned; @@ -3183,8 +3216,12 @@ static int __sp_walk_page_range(unsigned long uva, unsigned long size,
ret = walk_page_range(mm, uva_aligned, uva_aligned + size_aligned, &sp_walk, sp_walk_data); - if (ret) + if (ret) { + while (sp_walk_data->page_count--) + put_page(pages[sp_walk_data->page_count]); kvfree(pages); + sp_walk_data->pages = NULL; + }
return ret; } @@ -3220,9 +3257,7 @@ void *sp_make_share_u2k(unsigned long uva, unsigned long size, int pid) int ret = 0; struct mm_struct *mm = current->mm; void *p = ERR_PTR(-ESRCH); - struct sp_walk_data sp_walk_data = { - .page_count = 0, - }; + struct sp_walk_data sp_walk_data; struct vm_struct *area;
check_interrupt_context(); @@ -3563,7 +3598,6 @@ int sp_walk_page_range(unsigned long uva, unsigned long size, return -ESRCH; }
- sp_walk_data->page_count = 0; down_write(&mm->mmap_lock); if (likely(!mm->core_state)) ret = __sp_walk_page_range(uva, size, mm, sp_walk_data);