From: Jens Axboe axboe@kernel.dk
mainline inclusion from mainline-5.2-rc1 commit 22f96b3808c12a218e9a3bce6e1bfbd74efbe374 category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=27 CVE: NA ---------------------------
This just pulls out the ksys_sync_file_range() code to work on a struct file instead of an fd, so we can use it elsewhere.
Signed-off-by: Jens Axboe axboe@kernel.dk
Conflicts: fs/sync.c include/linux/fs.h [ Patch c553ea4fdf("fs/sync.c: sync_file_range(2) may use WB_SYNC_ALL writeback") applied earlier. ]
Signed-off-by: Zhihao Cheng chengzhihao1@huawei.com Signed-off-by: yangerkun yangerkun@huawei.com Reviewed-by: zhangyi (F) yi.zhang@huawei.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com --- fs/sync.c | 141 ++++++++++++++++++++++++--------------------- include/linux/fs.h | 3 + 2 files changed, 77 insertions(+), 67 deletions(-)
diff --git a/fs/sync.c b/fs/sync.c index 9e8cd90e890f..4d1ff010bc5a 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -234,61 +234,10 @@ SYSCALL_DEFINE1(fdatasync, unsigned int, fd) return do_fsync(fd, 1); }
-/* - * ksys_sync_file_range() permits finely controlled syncing over a segment of - * a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is - * zero then ksys_sync_file_range() will operate from offset out to EOF. - * - * The flag bits are: - * - * SYNC_FILE_RANGE_WAIT_BEFORE: wait upon writeout of all pages in the range - * before performing the write. - * - * SYNC_FILE_RANGE_WRITE: initiate writeout of all those dirty pages in the - * range which are not presently under writeback. Note that this may block for - * significant periods due to exhaustion of disk request structures. - * - * SYNC_FILE_RANGE_WAIT_AFTER: wait upon writeout of all pages in the range - * after performing the write. - * - * Useful combinations of the flag bits are: - * - * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE: ensures that all pages - * in the range which were dirty on entry to ksys_sync_file_range() are placed - * under writeout. This is a start-write-for-data-integrity operation. - * - * SYNC_FILE_RANGE_WRITE: start writeout of all dirty pages in the range which - * are not presently under writeout. This is an asynchronous flush-to-disk - * operation. Not suitable for data integrity operations. - * - * SYNC_FILE_RANGE_WAIT_BEFORE (or SYNC_FILE_RANGE_WAIT_AFTER): wait for - * completion of writeout of all pages in the range. This will be used after an - * earlier SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE operation to wait - * for that operation to complete and to return the result. - * - * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|SYNC_FILE_RANGE_WAIT_AFTER - * (a.k.a. SYNC_FILE_RANGE_WRITE_AND_WAIT): - * a traditional sync() operation. This is a write-for-data-integrity operation - * which will ensure that all pages in the range which were dirty on entry to - * ksys_sync_file_range() are written to disk. It should be noted that disk - * caches are not flushed by this call, so there are no guarantees here that the - * data will be available on disk after a crash. - * - * - * SYNC_FILE_RANGE_WAIT_BEFORE and SYNC_FILE_RANGE_WAIT_AFTER will detect any - * I/O errors or ENOSPC conditions and will return those to the caller, after - * clearing the EIO and ENOSPC flags in the address_space. - * - * It should be noted that none of these operations write out the file's - * metadata. So unless the application is strictly performing overwrites of - * already-instantiated disk blocks, there are no guarantees here that the data - * will be available after a crash. - */ -int ksys_sync_file_range(int fd, loff_t offset, loff_t nbytes, - unsigned int flags) +int sync_file_range(struct file *file, loff_t offset, loff_t nbytes, + unsigned int flags) { int ret; - struct fd f; struct address_space *mapping; loff_t endbyte; /* inclusive */ umode_t i_mode; @@ -328,23 +277,18 @@ int ksys_sync_file_range(int fd, loff_t offset, loff_t nbytes, else endbyte--; /* inclusive */
- ret = -EBADF; - f = fdget(fd); - if (!f.file) - goto out; - - i_mode = file_inode(f.file)->i_mode; + i_mode = file_inode(file)->i_mode; ret = -ESPIPE; if (!S_ISREG(i_mode) && !S_ISBLK(i_mode) && !S_ISDIR(i_mode) && !S_ISLNK(i_mode)) - goto out_put; + goto out;
- mapping = f.file->f_mapping; + mapping = file->f_mapping; ret = 0; if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) { - ret = file_fdatawait_range(f.file, offset, endbyte); + ret = file_fdatawait_range(file, offset, endbyte); if (ret < 0) - goto out_put; + goto out; }
if (flags & SYNC_FILE_RANGE_WRITE) { @@ -357,18 +301,81 @@ int ksys_sync_file_range(int fd, loff_t offset, loff_t nbytes, ret = __filemap_fdatawrite_range(mapping, offset, endbyte, sync_mode); if (ret < 0) - goto out_put; + goto out; }
if (flags & SYNC_FILE_RANGE_WAIT_AFTER) - ret = file_fdatawait_range(f.file, offset, endbyte); + ret = file_fdatawait_range(file, offset, endbyte);
-out_put: - fdput(f); out: return ret; }
+/* + * ksys_sync_file_range() permits finely controlled syncing over a segment of + * a file in the range offset .. (offset+nbytes-1) inclusive. If nbytes is + * zero then ksys_sync_file_range() will operate from offset out to EOF. + * + * The flag bits are: + * + * SYNC_FILE_RANGE_WAIT_BEFORE: wait upon writeout of all pages in the range + * before performing the write. + * + * SYNC_FILE_RANGE_WRITE: initiate writeout of all those dirty pages in the + * range which are not presently under writeback. Note that this may block for + * significant periods due to exhaustion of disk request structures. + * + * SYNC_FILE_RANGE_WAIT_AFTER: wait upon writeout of all pages in the range + * after performing the write. + * + * Useful combinations of the flag bits are: + * + * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE: ensures that all pages + * in the range which were dirty on entry to ksys_sync_file_range() are placed + * under writeout. This is a start-write-for-data-integrity operation. + * + * SYNC_FILE_RANGE_WRITE: start writeout of all dirty pages in the range which + * are not presently under writeout. This is an asynchronous flush-to-disk + * operation. Not suitable for data integrity operations. + * + * SYNC_FILE_RANGE_WAIT_BEFORE (or SYNC_FILE_RANGE_WAIT_AFTER): wait for + * completion of writeout of all pages in the range. This will be used after an + * earlier SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE operation to wait + * for that operation to complete and to return the result. + * + * SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE|SYNC_FILE_RANGE_WAIT_AFTER + * (a.k.a. SYNC_FILE_RANGE_WRITE_AND_WAIT): + * a traditional sync() operation. This is a write-for-data-integrity operation + * which will ensure that all pages in the range which were dirty on entry to + * ksys_sync_file_range() are written to disk. It should be noted that disk + * caches are not flushed by this call, so there are no guarantees here that the + * data will be available on disk after a crash. + * + * + * SYNC_FILE_RANGE_WAIT_BEFORE and SYNC_FILE_RANGE_WAIT_AFTER will detect any + * I/O errors or ENOSPC conditions and will return those to the caller, after + * clearing the EIO and ENOSPC flags in the address_space. + * + * It should be noted that none of these operations write out the file's + * metadata. So unless the application is strictly performing overwrites of + * already-instantiated disk blocks, there are no guarantees here that the data + * will be available after a crash. + */ +int ksys_sync_file_range(int fd, loff_t offset, loff_t nbytes, + unsigned int flags) +{ + int ret; + struct fd f; + + ret = -EBADF; + f = fdget(fd); + if (f.file) + ret = sync_file_range(f.file, offset, nbytes, flags); + + fdput(f); + return ret; +} + SYSCALL_DEFINE4(sync_file_range, int, fd, loff_t, offset, loff_t, nbytes, unsigned int, flags) { diff --git a/include/linux/fs.h b/include/linux/fs.h index db7dd25ce645..36d828c741d5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2804,6 +2804,9 @@ extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync); extern int vfs_fsync(struct file *file, int datasync);
+extern int sync_file_range(struct file *file, loff_t offset, loff_t nbytes, + unsigned int flags); + /* * Sync the bytes written if this was a synchronous write. Expect ki_pos * to already be updated for the write, and will return either the amount