From: "Kirill A. Shutemov" kirill@shutemov.name
mainline inclusion from mainline-v5.9-rc1 commit e630bfac79456d3acd22c9286b50e83aafb7a07c category: bugfix bugzilla: 41542 CVE: NA
-----------------------------------------------
struct file_ra_state ra.mmap_miss could be accessed concurrently during page faults as noticed by KCSAN,
BUG: KCSAN: data-race in filemap_fault / filemap_map_pages
write to 0xffff9b1700a2c1b4 of 4 bytes by task 3292 on cpu 30: filemap_fault+0x920/0xfc0 do_sync_mmap_readahead at mm/filemap.c:2384 (inlined by) filemap_fault at mm/filemap.c:2486 __xfs_filemap_fault+0x112/0x3e0 [xfs] xfs_filemap_fault+0x74/0x90 [xfs] __do_fault+0x9e/0x220 do_fault+0x4a0/0x920 __handle_mm_fault+0xc69/0xd00 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40
read to 0xffff9b1700a2c1b4 of 4 bytes by task 3313 on cpu 32: filemap_map_pages+0xc2e/0xd80 filemap_map_pages at mm/filemap.c:2625 do_fault+0x3da/0x920 __handle_mm_fault+0xc69/0xd00 handle_mm_fault+0xfc/0x2f0 do_page_fault+0x263/0x6f9 page_fault+0x34/0x40
Reported by Kernel Concurrency Sanitizer on: CPU: 32 PID: 3313 Comm: systemd-udevd Tainted: G W L 5.5.0-next-20200210+ #1 Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019
ra.mmap_miss is used to contribute the readahead decisions, a data race could be undesirable. Both the read and write is only under non-exclusive mmap_sem, two concurrent writers could even underflow the counter. Fix the underflow by writing to a local variable before committing a final store to ra.mmap_miss given a small inaccuracy of the counter should be acceptable.
Signed-off-by: Kirill A. Shutemov kirill@shutemov.name Signed-off-by: Qian Cai cai@lca.pw Signed-off-by: Andrew Morton akpm@linux-foundation.org Tested-by: Qian Cai cai@lca.pw Reviewed-by: Matthew Wilcox (Oracle) willy@infradead.org Cc: Marco Elver elver@google.com Link: http://lkml.kernel.org/r/20200211030134.1847-1-cai@lca.pw Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Conflicts: mm/filemap.c [Peng Liu: cherry-pick from e630bfac79456d3acd22c9286b50e83aafb7a07c] Signed-off-by: Peng Liu liupeng256@huawei.com Reviewed-by: tong tiangen tongtiangen@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/filemap.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-)
diff --git a/mm/filemap.c b/mm/filemap.c index 5c33c40e80192..4cf4b096d5b63 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2528,6 +2528,7 @@ static void do_sync_mmap_readahead(struct vm_area_struct *vma, pgoff_t offset) { struct address_space *mapping = file->f_mapping; + unsigned int mmap_miss;
/* If we don't want any read-ahead, don't bother */ if (vma->vm_flags & VM_RAND_READ) @@ -2542,14 +2543,15 @@ static void do_sync_mmap_readahead(struct vm_area_struct *vma, }
/* Avoid banging the cache line if not needed */ - if (ra->mmap_miss < MMAP_LOTSAMISS * 10) - ra->mmap_miss++; + mmap_miss = READ_ONCE(ra->mmap_miss); + if (mmap_miss < MMAP_LOTSAMISS * 10) + WRITE_ONCE(ra->mmap_miss, ++mmap_miss);
/* * Do we miss much more than hit in this file? If so, * stop bothering with read-ahead. It will only hurt. */ - if (ra->mmap_miss > MMAP_LOTSAMISS) + if (mmap_miss > MMAP_LOTSAMISS) return;
/* @@ -2572,12 +2574,14 @@ static void do_async_mmap_readahead(struct vm_area_struct *vma, pgoff_t offset) { struct address_space *mapping = file->f_mapping; + unsigned int mmap_miss;
/* If we don't want any read-ahead, don't bother */ if (vma->vm_flags & VM_RAND_READ) return; - if (ra->mmap_miss > 0) - ra->mmap_miss--; + mmap_miss = READ_ONCE(ra->mmap_miss); + if (mmap_miss) + WRITE_ONCE(ra->mmap_miss, --mmap_miss); if (PageReadahead(page)) page_cache_async_readahead(mapping, ra, file, page, offset, ra->ra_pages); @@ -2737,6 +2741,7 @@ void filemap_map_pages(struct vm_fault *vmf, pgoff_t last_pgoff = start_pgoff; unsigned long max_idx; struct page *head, *page; + unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss);
rcu_read_lock(); radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start_pgoff) { @@ -2784,8 +2789,8 @@ void filemap_map_pages(struct vm_fault *vmf, if (page->index >= max_idx) goto unlock;
- if (file->f_ra.mmap_miss > 0) - file->f_ra.mmap_miss--; + if (mmap_miss > 0) + mmap_miss--;
vmf->address += (iter.index - last_pgoff) << PAGE_SHIFT; if (vmf->pte) @@ -2807,6 +2812,7 @@ void filemap_map_pages(struct vm_fault *vmf, break; } rcu_read_unlock(); + WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss); } EXPORT_SYMBOL(filemap_map_pages);