From: Daniel Jordan daniel.m.jordan@oracle.com
stable inclusion from stable-v5.10.146 commit abb560abdf47f3091eb45d345b28397bb852ccc7 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I6D0VX
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 4b6c33b3229678e38a6b0bbd4367d4b91366b523 ]
Get ready to pin more pages at once with struct vfio_batch, which represents a batch of pinned pages.
The struct has a fallback page pointer to avoid two unlikely scenarios: pointlessly allocating a page if disable_hugepages is enabled or failing the whole pinning operation if the kernel can't allocate memory.
vaddr_get_pfn() becomes vaddr_get_pfns() to prepare for handling multiple pages, though for now only one page is stored in the pages array.
Signed-off-by: Daniel Jordan daniel.m.jordan@oracle.com Signed-off-by: Alex Williamson alex.williamson@redhat.com Stable-dep-of: 873aefb376bb ("vfio/type1: Unpin zero pages") Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Jialin Zhang zhangjialin11@huawei.com
Conflicts: drivers/vfio/vfio_iommu_type1.c Reviewed-by: Zheng Zengkai zhengzengkai@huawei.com
Signed-off-by: Jialin Zhang zhangjialin11@huawei.com --- drivers/vfio/vfio_iommu_type1.c | 71 +++++++++++++++++++++++++++------ 1 file changed, 58 insertions(+), 13 deletions(-)
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 62563e30a4b9..fb663e0f365c 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -111,6 +111,12 @@ struct vfio_sva_handle { struct list_head next; };
+struct vfio_batch { + struct page **pages; /* for pin_user_pages_remote */ + struct page *fallback_page; /* if pages alloc fails */ + int capacity; /* length of pages array */ +}; + struct vfio_group { struct iommu_group *iommu_group; struct list_head next; @@ -442,6 +448,31 @@ static int put_pfn(unsigned long pfn, int prot) return 0; }
+#define VFIO_BATCH_MAX_CAPACITY (PAGE_SIZE / sizeof(struct page *)) + +static void vfio_batch_init(struct vfio_batch *batch) +{ + if (unlikely(disable_hugepages)) + goto fallback; + + batch->pages = (struct page **) __get_free_page(GFP_KERNEL); + if (!batch->pages) + goto fallback; + + batch->capacity = VFIO_BATCH_MAX_CAPACITY; + return; + +fallback: + batch->pages = &batch->fallback_page; + batch->capacity = 1; +} + +static void vfio_batch_fini(struct vfio_batch *batch) +{ + if (batch->capacity == VFIO_BATCH_MAX_CAPACITY) + free_page((unsigned long)batch->pages); +} + static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm, unsigned long vaddr, unsigned long *pfn, bool write_fault) @@ -482,10 +513,10 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm, * Returns the positive number of pfns successfully obtained or a negative * error code. */ -static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, - int prot, unsigned long *pfn) +static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr, + long npages, int prot, unsigned long *pfn, + struct page **pages) { - struct page *page[1]; struct vm_area_struct *vma; unsigned int flags = 0; int ret; @@ -494,10 +525,10 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, flags |= FOLL_WRITE;
mmap_read_lock(mm); - ret = pin_user_pages_remote(mm, vaddr, 1, flags | FOLL_LONGTERM, - page, NULL, NULL); - if (ret == 1) { - *pfn = page_to_pfn(page[0]); + ret = pin_user_pages_remote(mm, vaddr, npages, flags | FOLL_LONGTERM, + pages, NULL, NULL); + if (ret > 0) { + *pfn = page_to_pfn(pages[0]); goto done; }
@@ -530,7 +561,7 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, */ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, long npage, unsigned long *pfn_base, - unsigned long limit) + unsigned long limit, struct vfio_batch *batch) { unsigned long pfn = 0; long ret, pinned = 0, lock_acct = 0; @@ -541,7 +572,8 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, if (!current->mm) return -ENODEV;
- ret = vaddr_get_pfn(current->mm, vaddr, dma->prot, pfn_base); + ret = vaddr_get_pfns(current->mm, vaddr, 1, dma->prot, pfn_base, + batch->pages); if (ret < 0) return ret;
@@ -568,7 +600,8 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, /* Lock all the consecutive pages from pfn_base */ for (vaddr += PAGE_SIZE, iova += PAGE_SIZE; pinned < npage; pinned++, vaddr += PAGE_SIZE, iova += PAGE_SIZE) { - ret = vaddr_get_pfn(current->mm, vaddr, dma->prot, &pfn); + ret = vaddr_get_pfns(current->mm, vaddr, 1, dma->prot, &pfn, + batch->pages); if (ret < 0) break;
@@ -631,6 +664,7 @@ static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova, static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr, unsigned long *pfn_base, bool do_accounting) { + struct page *pages[1]; struct mm_struct *mm; int ret;
@@ -638,7 +672,7 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr, if (!mm) return -ENODEV;
- ret = vaddr_get_pfn(mm, vaddr, dma->prot, pfn_base); + ret = vaddr_get_pfns(mm, vaddr, 1, dma->prot, pfn_base, pages); if (ret == 1 && do_accounting && !is_invalid_reserved_pfn(*pfn_base)) { ret = vfio_lock_acct(dma, 1, true); if (ret) { @@ -1447,15 +1481,19 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma, { dma_addr_t iova = dma->iova; unsigned long vaddr = dma->vaddr; + struct vfio_batch batch; size_t size = map_size; long npage; unsigned long pfn, limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; int ret = 0;
+ vfio_batch_init(&batch); + while (size) { /* Pin a contiguous chunk of memory */ npage = vfio_pin_pages_remote(dma, vaddr + dma->size, - size >> PAGE_SHIFT, &pfn, limit); + size >> PAGE_SHIFT, &pfn, limit, + &batch); if (npage <= 0) { WARN_ON(!npage); ret = (int)npage; @@ -1475,6 +1513,7 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma, dma->size += npage << PAGE_SHIFT; }
+ vfio_batch_fini(&batch); dma->iommu_mapped = true;
if (ret) @@ -1638,6 +1677,7 @@ static int vfio_bus_type(struct device *dev, void *data) static int vfio_iommu_replay(struct vfio_iommu *iommu, struct vfio_domain *domain) { + struct vfio_batch batch; struct vfio_domain *d = NULL; struct rb_node *n; unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; @@ -1648,6 +1688,8 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, d = list_first_entry(&iommu->domain_list, struct vfio_domain, next);
+ vfio_batch_init(&batch); + n = rb_first(&iommu->dma_list);
for (; n; n = rb_next(n)) { @@ -1695,7 +1737,8 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
npage = vfio_pin_pages_remote(dma, vaddr, n >> PAGE_SHIFT, - &pfn, limit); + &pfn, limit, + &batch); if (npage <= 0) { WARN_ON(!npage); ret = (int)npage; @@ -1728,6 +1771,7 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, dma->iommu_mapped = true; }
+ vfio_batch_fini(&batch); return 0;
unwind: @@ -1768,6 +1812,7 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, } }
+ vfio_batch_fini(&batch); return ret; }