hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB5UKT
--------------------------------
ABBA deadlocks may be triggered during the following concurrency:
write(2) | page fault | mmap(2) ------------------------------------------------------------------ asm_exc_page_fault exc_page_fault handle_page_fault do_user_addr_fault mmap_read_lock(mm) ---> read lock B vfs_write vm_mmap_pgoff() new_sync_write mmap_write_lock_killable(mm) cachefiles_ondemand_fd_write_iter ---> try write lock B vfs_iocb_iter_write ext4_file_write_iter ext4_buffered_write_iter inode_lock(inode) ----------> write lock A handle_mm_fault __handle_mm_fault handle_pte_fault do_fault do_read_fault __do_fault filemap_fault erofs_fscache_readpage __fscache_read_or_alloc_page cachefiles_read_or_alloc_page bmap ext4_bmap inode_lock_shared(inode) -> try read lock A generic_perform_write | iov_iter_fault_in_readable | __get_user_nocheck_1 | asm_exc_page_fault | exc_page_fault | handle_page_fault ABBA deadlcok do_user_addr_fault | mmap_read_lock(mm) | down_read(&mm->mmap_lock) -----------------> try read lock B
This is due to the fact that inode lock cannot be used anywhere in the page fault path, whereas in ondemand mode erofs call bmap when triggering a page fault, and bmap may try to lock the inode and trigger the above issue.
There's no good way to fix this completely, so bring in the user pages early in cachefiles_ondemand_fd_write_iter() to avoid triggering a page fault in generic_perform_write() to circumvent the problem.
Fixes: 9ae326a69004 ("CacheFiles: A cache that backs onto a mounted filesystem") Signed-off-by: Baokun Li libaokun1@huawei.com --- fs/cachefiles/ondemand.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-)
diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c index 76c936fc9a68..ed3b49a4fd4e 100644 --- a/fs/cachefiles/ondemand.c +++ b/fs/cachefiles/ondemand.c @@ -72,10 +72,11 @@ static ssize_t cachefiles_ondemand_fd_write_iter(struct kiocb *kiocb, struct iov_iter *iter) { struct cachefiles_object *object = kiocb->ki_filp->private_data; - size_t len = iter->count; struct kiocb iocb; struct file *file; - int ret; + ssize_t ret = 0; + ssize_t written = 0; + size_t bytes;
rcu_read_lock(); file = rcu_dereference(object->file); @@ -95,12 +96,29 @@ static ssize_t cachefiles_ondemand_fd_write_iter(struct kiocb *kiocb,
if (!cachefiles_buffered_ondemand) iocb.ki_flags |= IOCB_DIRECT; +retry: + bytes = iov_iter_count(iter); + if (unlikely(!bytes)) + goto out; + + ret = iov_iter_fault_in_readable(iter, bytes); + if (unlikely(ret)) + goto out;
+ pagefault_disable(); ret = vfs_iocb_iter_write(file, &iocb, iter); + pagefault_enable(); + if (ret > 0) { + written += ret; + goto retry; + } else if (ret == -EFAULT) { + goto retry; + } +out: fput(file); - if (ret != len) + if (!ret && iov_iter_count(iter)) return -EIO; - return len; + return ret < 0 ? ret : written; }
static long cachefiles_ondemand_fd_ioctl(struct file *filp, unsigned int ioctl,