When memory is only a little and there are some pagecache can be reclaimed to avoid triggering OOM, but these pagecache is just right what the program is currently reading. The pagecache will be reclaimed and read from disk again, which will keeping a long time before OOM. Due to read-ahead, there are large of redundant read of disk which aggravate this problem.
These two patch lessen the problem by early break read-ahead.
Liu Shixin (2): mm/readahead: stop readahead loop if memcg charge fails mm/readahead: limit sync readahead while too many active refault
include/linux/fs.h | 1 + include/linux/pagemap.h | 1 + mm/filemap.c | 16 ++++++++++++++++ mm/readahead.c | 21 ++++++++++++++++----- 4 files changed, 34 insertions(+), 5 deletions(-)
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/4083 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/X...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/4083 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/X...
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8EXN6
--------------------------------
When a task in memcg readaheads file pages, page_cache_ra_unbounded() will try to readahead nr_to_read pages. Even if the new allocated page fails to charge, page_cache_ra_unbounded() still tries to readahead next page. This leads to too much memory reclaim.
Stop readahead if mem_cgroup_charge() fails, i.e. add_to_page_cache_lru() returns -ENOMEM.
Signed-off-by: Liu Shixin liushixin2@huawei.com Signed-off-by: Jinjiang Tu tujinjiang@huawei.com --- mm/readahead.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/mm/readahead.c b/mm/readahead.c index ed23d5dec123..22dd9c8fe808 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -220,11 +220,18 @@ void page_cache_ra_unbounded(struct readahead_control *ractl, if (mapping->a_ops->readpages) { page->index = index + i; list_add(&page->lru, &page_pool); - } else if (add_to_page_cache_lru(page, mapping, index + i, - gfp_mask) < 0) { - put_page(page); - read_pages(ractl, &page_pool, true); - continue; + } else { + int ret; + + ret = add_to_page_cache_lru(page, mapping, index + i, + gfp_mask); + if (ret < 0) { + put_page(page); + read_pages(ractl, &page_pool, true); + if (ret == -ENOMEM) + break; + continue; + } } if (i == nr_to_read - lookahead_size) SetPageReadahead(page);
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8EXN6
--------------------------------
When there are too many refault of active pages in a file, that means the memory reclaim is ongoing. Stop bothering with read-ahead since it will only waste IO.
Signed-off-by: Liu Shixin liushixin2@huawei.com --- include/linux/fs.h | 1 + include/linux/pagemap.h | 1 + mm/filemap.c | 16 ++++++++++++++++ mm/readahead.c | 4 ++++ 4 files changed, 22 insertions(+)
diff --git a/include/linux/fs.h b/include/linux/fs.h index f66f51edd0ba..36f5741071eb 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -929,6 +929,7 @@ struct file_ra_state {
unsigned int ra_pages; /* Maximum readahead window */ unsigned int mmap_miss; /* Cache miss stat for mmap accesses */ + unsigned int active_refault; /* Number of active page refault */ loff_t prev_pos; /* Cache last read() position */ };
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 0bfa9cce6589..b2f64a793379 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -806,6 +806,7 @@ struct readahead_control { pgoff_t _index; unsigned int _nr_pages; unsigned int _batch_count; + unsigned int _active_refault; };
#define DEFINE_READAHEAD(rac, f, m, i) \ diff --git a/mm/filemap.c b/mm/filemap.c index 04e4aad7ed67..d1805c339653 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2859,6 +2859,7 @@ EXPORT_SYMBOL(mapping_seek_hole_data);
#ifdef CONFIG_MMU #define MMAP_LOTSAMISS (100) +#define ACTIVE_REFAULT_LIMIT (10000) /* * lock_page_maybe_drop_mmap - lock the page, possibly dropping the mmap_lock * @vmf - the vm_fault for this fault. @@ -2943,6 +2944,18 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) if (mmap_miss > MMAP_LOTSAMISS) return fpin;
+ ractl._active_refault = READ_ONCE(ra->active_refault); + if (ractl._active_refault) + WRITE_ONCE(ra->active_refault, --ractl._active_refault); + + /* + * If there are a lot of refault of active pages in this file, + * that means the memory reclaim is ongoing. Stop bothering with + * read-ahead since it will only waste IO. + */ + if (ractl._active_refault >= ACTIVE_REFAULT_LIMIT) + return fpin; + /* * mmap read-around */ @@ -2952,6 +2965,9 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) ra->async_size = ra->ra_pages / 4; ractl._index = ra->start; do_page_cache_ra(&ractl, ra->size, ra->async_size); + + WRITE_ONCE(ra->active_refault, ractl._active_refault); + return fpin; }
diff --git a/mm/readahead.c b/mm/readahead.c index 22dd9c8fe808..0331dcad7805 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -236,6 +236,10 @@ void page_cache_ra_unbounded(struct readahead_control *ractl, if (i == nr_to_read - lookahead_size) SetPageReadahead(page); ractl->_nr_pages++; + if (unlikely(PageActive(page))) + ractl->_active_refault++; + else if (unlikely(ractl->_active_refault)) + ractl->_active_refault--; }
/*