[PATCH OLK-6.6 04/11] mm,tmpfs: consider end of file write in shmem_is_huge

20 Sep 2024

From: Rik van Riel <riel@surriel.com>

next inclusion
category: performance
bugzilla: https://gitee.com/openeuler/kernel/issues/IAO6NS

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?...

--------------------------------

Take the end of a file write into consideration when deciding whether or
not to use huge pages for tmpfs files when the tmpfs filesystem is mounted
with huge=within_size

This allows large writes that append to the end of a file to automatically
use large pages.

Doing 4MB sequential writes without fallocate to a 16GB tmpfs file with
fio.  The numbers without THP or with huge=always stay the same, but the
performance with huge=within_size now matches that of huge=always.

huge		before		after
4kB pages	1560 MB/s	1560 MB/s
within_size	1560 MB/s	4720 MB/s
always:		4720 MB/s	4720 MB/s

[akpm@linux-foundation.org: coding-style cleanups]
Link: https://lkml.kernel.org/r/20240903111928.7171e60c@imladris.surriel.com
Signed-off-by: Rik van Riel <riel@surriel.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Tested-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Darrick J. Wong <djwong@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Conflicts:
	fs/xfs/scrub/xfile.c
	fs/xfs/xfs_buf_mem.c
	mm/khugepaged.c
	mm/shmem.c
[ Conflict in xfile.c,xfs_buf_mem.c and shmem.c because shmem_get_folio()
  has not been exported and used in xfs. Conflict in khugepaged.c because
  there are some page that have not been converted to folio.
  Context conflict with mm_in_dynamic_pool() in shmem.c ]
Signed-off-by: Liu Shixin <liushixin2@huawei.com>
---
 include/linux/shmem_fs.h |  8 +++---
 mm/huge_memory.c         |  2 +-
 mm/khugepaged.c          |  2 +-
 mm/shmem.c               | 58 +++++++++++++++++++++-------------------
 mm/userfaultfd.c         |  2 +-
 5 files changed, 37 insertions(+), 35 deletions(-)

diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 57e8a6689439..0880504a781e 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -117,11 +117,11 @@ int shmem_unuse(unsigned int type);
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 unsigned long shmem_allowable_huge_orders(struct inode *inode,
 				struct vm_area_struct *vma, pgoff_t index,
-				bool shmem_huge_force);
+				loff_t write_end, bool shmem_huge_force);
 #else
 static inline unsigned long shmem_allowable_huge_orders(struct inode *inode,
 				struct vm_area_struct *vma, pgoff_t index,
-				bool shmem_huge_force)
+				loff_t write_end, bool shmem_huge_force)
 {
 	return 0;
 }
@@ -147,8 +147,8 @@ enum sgp_type {
 	SGP_FALLOC,	/* like SGP_WRITE, but make existing page Uptodate */
 };
 
-int shmem_get_folio(struct inode *inode, pgoff_t index, struct folio **foliop,
-		enum sgp_type sgp);
+int shmem_get_folio(struct inode *inode, pgoff_t index, loff_t write_end,
+		struct folio **foliop, enum sgp_type sgp);
 struct folio *shmem_read_folio_gfp(struct address_space *mapping,
 		pgoff_t index, gfp_t gfp);
 
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index fbcfbd5fa914..ea560ea7b39e 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -158,7 +158,7 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
 	 */
 	if (!in_pf && shmem_file(vma->vm_file))
 		return shmem_allowable_huge_orders(file_inode(vma->vm_file),
-						   vma, vma->vm_pgoff,
+						   vma, vma->vm_pgoff, 0,
 						   !enforce_sysfs);
 
 	if (!vma_is_anonymous(vma)) {
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 8006b13304de..c6379a2d55bc 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1900,7 +1900,7 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
 			if (xa_is_value(page) || !PageUptodate(page)) {
 				xas_unlock_irq(&xas);
 				/* swap in or instantiate fallocated page */
-				if (shmem_get_folio(mapping->host, index,
+				if (shmem_get_folio(mapping->host, index, 0,
 						&folio, SGP_NOALLOC)) {
 					result = SCAN_FAIL;
 					goto xa_unlocked;
diff --git a/mm/shmem.c b/mm/shmem.c
index 9ab915d5c060..700335e56e67 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -543,7 +543,8 @@ static bool shmem_confirm_swap(struct address_space *mapping,
 static int shmem_huge __read_mostly = SHMEM_HUGE_NEVER;
 
 static bool __shmem_huge_global_enabled(struct inode *inode, pgoff_t index,
-					bool shmem_huge_force, struct vm_area_struct *vma,
+					loff_t write_end, bool shmem_huge_force,
+					struct vm_area_struct *vma,
 					unsigned long vm_flags)
 {
 	struct mm_struct *mm = vma ? vma->vm_mm : NULL;
@@ -563,7 +564,8 @@ static bool __shmem_huge_global_enabled(struct inode *inode, pgoff_t index,
 		return true;
 	case SHMEM_HUGE_WITHIN_SIZE:
 		index = round_up(index + 1, HPAGE_PMD_NR);
-		i_size = round_up(i_size_read(inode), PAGE_SIZE);
+		i_size = max(write_end, i_size_read(inode));
+		i_size = round_up(i_size, PAGE_SIZE);
 		if (i_size >> PAGE_SHIFT >= index)
 			return true;
 		fallthrough;
@@ -577,14 +579,14 @@ static bool __shmem_huge_global_enabled(struct inode *inode, pgoff_t index,
 }
 
 static bool shmem_huge_global_enabled(struct inode *inode, pgoff_t index,
-		   bool shmem_huge_force, struct vm_area_struct *vma,
-		   unsigned long vm_flags)
+		   loff_t write_end, bool shmem_huge_force,
+		   struct vm_area_struct *vma, unsigned long vm_flags)
 {
 	if (HPAGE_PMD_ORDER > MAX_PAGECACHE_ORDER)
 		return false;
 
-	return __shmem_huge_global_enabled(inode, index, shmem_huge_force,
-					   vma, vm_flags);
+	return __shmem_huge_global_enabled(inode, index, write_end,
+					   shmem_huge_force, vma, vm_flags);
 }
 
 #if defined(CONFIG_SYSFS)
@@ -769,8 +771,8 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
 }
 
 static bool shmem_huge_global_enabled(struct inode *inode, pgoff_t index,
-		bool shmem_huge_force, struct vm_area_struct *vma,
-		unsigned long vm_flags)
+		loff_t write_end, bool shmem_huge_force,
+		struct vm_area_struct *vma, unsigned long vm_flags)
 {
 	return false;
 }
@@ -976,7 +978,7 @@ static struct folio *shmem_get_partial_folio(struct inode *inode, pgoff_t index)
 	 * (although in some cases this is just a waste of time).
 	 */
 	folio = NULL;
-	shmem_get_folio(inode, index, &folio, SGP_READ);
+	shmem_get_folio(inode, index, 0, &folio, SGP_READ);
 	return folio;
 }
 
@@ -1161,7 +1163,7 @@ static int shmem_getattr(struct mnt_idmap *idmap,
 			STATX_ATTR_NODUMP);
 	generic_fillattr(idmap, request_mask, inode, stat);
 
-	if (shmem_huge_global_enabled(inode, 0, false, NULL, 0))
+	if (shmem_huge_global_enabled(inode, 0, 0, false, NULL, 0))
 		stat->blksize = HPAGE_PMD_SIZE;
 
 	if (request_mask & STATX_BTIME) {
@@ -1650,7 +1652,7 @@ static gfp_t limit_gfp_mask(gfp_t huge_gfp, gfp_t limit_gfp)
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 unsigned long shmem_allowable_huge_orders(struct inode *inode,
 				struct vm_area_struct *vma, pgoff_t index,
-				bool shmem_huge_force)
+				loff_t write_end, bool shmem_huge_force)
 {
 	unsigned long mask = READ_ONCE(huge_shmem_orders_always);
 	unsigned long within_size_orders = READ_ONCE(huge_shmem_orders_within_size);
@@ -1667,8 +1669,8 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode,
 	if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_UNSUPPORTED))
 		return 0;
 
-	global_huge = shmem_huge_global_enabled(inode, index, shmem_huge_force,
-						vma, vm_flags);
+	global_huge = shmem_huge_global_enabled(inode, index, write_end,
+					shmem_huge_force, vma, vm_flags);
 	if (!vma || !vma_is_anon_shmem(vma)) {
 		/*
 		 * For tmpfs, we now only support PMD sized THP if huge page
@@ -2112,8 +2114,8 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
  * vmf and fault_type are only supplied by shmem_fault: otherwise they are NULL.
  */
 static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
-		struct folio **foliop, enum sgp_type sgp, gfp_t gfp,
-		struct vm_fault *vmf, vm_fault_t *fault_type)
+		loff_t write_end, struct folio **foliop, enum sgp_type sgp,
+		gfp_t gfp, struct vm_fault *vmf, vm_fault_t *fault_type)
 {
 	struct vm_area_struct *vma = vmf ? vmf->vma : NULL;
 	struct mm_struct *fault_mm;
@@ -2193,7 +2195,7 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
 	}
 
 	/* Find hugepage orders that are allowed for anonymous shmem and tmpfs. */
-	orders = shmem_allowable_huge_orders(inode, vma, index, false);
+	orders = shmem_allowable_huge_orders(inode, vma, index, write_end, false);
 	if (mm_in_dynamic_pool(vma ? vma->vm_mm : current->mm))
 		orders = 0;
 	if (orders > 0) {
@@ -2294,10 +2296,10 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
 	return error;
 }
 
-int shmem_get_folio(struct inode *inode, pgoff_t index, struct folio **foliop,
-		enum sgp_type sgp)
+int shmem_get_folio(struct inode *inode, pgoff_t index, loff_t write_end,
+		    struct folio **foliop, enum sgp_type sgp)
 {
-	return shmem_get_folio_gfp(inode, index, foliop, sgp,
+	return shmem_get_folio_gfp(inode, index, write_end, foliop, sgp,
 			mapping_gfp_mask(inode->i_mapping), NULL, NULL);
 }
 
@@ -2391,7 +2393,7 @@ static vm_fault_t shmem_fault(struct vm_fault *vmf)
 	}
 
 	WARN_ON_ONCE(vmf->page != NULL);
-	err = shmem_get_folio_gfp(inode, vmf->pgoff, &folio, SGP_CACHE,
+	err = shmem_get_folio_gfp(inode, vmf->pgoff, 0, &folio, SGP_CACHE,
 				  gfp, vmf, &ret);
 	if (err)
 		return vmf_error(err);
@@ -2876,7 +2878,7 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
 			return -EPERM;
 	}
 
-	ret = shmem_get_folio(inode, index, &folio, SGP_WRITE);
+	ret = shmem_get_folio(inode, index, pos + len, &folio, SGP_WRITE);
 	if (ret)
 		return ret;
 
@@ -2947,7 +2949,7 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 				break;
 		}
 
-		error = shmem_get_folio(inode, index, &folio, SGP_READ);
+		error = shmem_get_folio(inode, index, 0, &folio, SGP_READ);
 		if (error) {
 			if (error == -EINVAL)
 				error = 0;
@@ -3123,7 +3125,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
 		if (*ppos >= i_size_read(inode))
 			break;
 
-		error = shmem_get_folio(inode, *ppos / PAGE_SIZE, &folio,
+		error = shmem_get_folio(inode, *ppos / PAGE_SIZE, 0, &folio,
 					SGP_READ);
 		if (error) {
 			if (error == -EINVAL)
@@ -3310,8 +3312,8 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 		else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced)
 			error = -ENOMEM;
 		else
-			error = shmem_get_folio(inode, index, &folio,
-						SGP_FALLOC);
+			error = shmem_get_folio(inode, index, offset + len,
+						&folio, SGP_FALLOC);
 		if (error) {
 			info->fallocend = undo_fallocend;
 			/* Remove the !uptodate folios we added */
@@ -3663,7 +3665,7 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
 	} else {
 		inode_nohighmem(inode);
 		inode->i_mapping->a_ops = &shmem_aops;
-		error = shmem_get_folio(inode, 0, &folio, SGP_WRITE);
+		error = shmem_get_folio(inode, 0, 0, &folio, SGP_WRITE);
 		if (error)
 			goto out_remove_offset;
 		inode->i_op = &shmem_symlink_inode_operations;
@@ -3709,7 +3711,7 @@ static const char *shmem_get_link(struct dentry *dentry, struct inode *inode,
 			return ERR_PTR(-ECHILD);
 		}
 	} else {
-		error = shmem_get_folio(inode, 0, &folio, SGP_READ);
+		error = shmem_get_folio(inode, 0, 0, &folio, SGP_READ);
 		if (error)
 			return ERR_PTR(error);
 		if (!folio)
@@ -5168,7 +5170,7 @@ struct folio *shmem_read_folio_gfp(struct address_space *mapping,
 	struct folio *folio;
 	int error;
 
-	error = shmem_get_folio_gfp(inode, index, &folio, SGP_CACHE,
+	error = shmem_get_folio_gfp(inode, index, 0, &folio, SGP_CACHE,
 				    gfp, NULL, NULL);
 	if (error)
 		return ERR_PTR(error);
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 4ab24c56f660..8c22dd4e5e15 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -292,7 +292,7 @@ static int mfill_atomic_pte_continue(pmd_t *dst_pmd,
 	struct page *page;
 	int ret;
 
-	ret = shmem_get_folio(inode, pgoff, &folio, SGP_NOALLOC);
+	ret = shmem_get_folio(inode, pgoff, 0, &folio, SGP_NOALLOC);
 	/* Our caller expects us to return -EFAULT if we failed to find folio */
 	if (ret == -ENOENT)
 		ret = -EFAULT;
-- 
2.34.1