From: Christoph Hellwig hch@lst.de
mainline inclusion from mainline-v6.1-rc1 commit 176042404ee6a96ba7e9054e1bda6220360a26ad category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8BCRJ CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
PSI tries to account for the cost of bringing back in pages discarded by the MM LRU management. Currently the prime place for that is hooked into the bio submission path, which is a rather bad place:
- it does not actually account I/O for non-block file systems, of which we have many - it adds overhead and a layering violation to the block layer
Add the accounting into the two places in the core MM code that read pages into an address space by calling into ->read_folio and ->readahead so that the entire file system operations are covered, to broaden the coverage and allow removing the accounting in the block layer going forward.
As psi_memstall_enter can deal with nested calls this will not lead to double accounting even while the bio annotations are still present.
Signed-off-by: Christoph Hellwig hch@lst.de Acked-by: Johannes Weiner hannes@cmpxchg.org Link: https://lore.kernel.org/r/20220915094200.139713-2-hch@lst.de Signed-off-by: Jens Axboe axboe@kernel.dk Conflicts: mm/filemap.c mm/readahead.c [ Replace folio_test_workingset with PageWorkingset, and skip nonexistent function. ] Signed-off-by: Liu Shixin liushixin2@huawei.com --- include/linux/pagemap.h | 2 ++ mm/filemap.c | 7 +++++++ mm/readahead.c | 7 +++++++ 3 files changed, 16 insertions(+)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 0bfa9cce65890..728550720e126 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -806,6 +806,8 @@ struct readahead_control { pgoff_t _index; unsigned int _nr_pages; unsigned int _batch_count; + bool _workingset; + unsigned long _pflags; };
#define DEFINE_READAHEAD(rac, f, m, i) \ diff --git a/mm/filemap.c b/mm/filemap.c index fd4aae06ff150..8beb7ccae51f0 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2294,6 +2294,8 @@ generic_file_buffered_read_readpage(struct kiocb *iocb, struct page *page) { struct file_ra_state *ra = &filp->f_ra; + bool workingset = PageWorkingset(page); + unsigned long pflags; int error;
if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT)) { @@ -2308,8 +2310,13 @@ generic_file_buffered_read_readpage(struct kiocb *iocb, * PG_error will be set again if readpage fails. */ ClearPageError(page); + /* Start the actual read. The read will unlock the page. */ + if (unlikely(workingset)) + psi_memstall_enter(&pflags); error = mapping->a_ops->readpage(filp, page); + if (unlikely(workingset)) + psi_memstall_leave(&pflags);
if (unlikely(error)) { put_page(page); diff --git a/mm/readahead.c b/mm/readahead.c index ed23d5dec1238..a9e6169cb3710 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -17,6 +17,7 @@ #include <linux/task_io_accounting_ops.h> #include <linux/pagevec.h> #include <linux/pagemap.h> +#include <linux/psi.h> #include <linux/syscalls.h> #include <linux/file.h> #include <linux/mm_inline.h> @@ -125,6 +126,8 @@ static void read_pages(struct readahead_control *rac, struct list_head *pages, if (!readahead_count(rac)) goto out;
+ if (unlikely(rac->_workingset)) + psi_memstall_enter(&rac->_pflags); blk_start_plug(&plug);
if (aops->readahead) { @@ -149,6 +152,9 @@ static void read_pages(struct readahead_control *rac, struct list_head *pages, }
blk_finish_plug(&plug); + if (unlikely(rac->_workingset)) + psi_memstall_leave(&rac->_pflags); + rac->_workingset = false;
BUG_ON(!list_empty(pages)); BUG_ON(readahead_count(rac)); @@ -228,6 +234,7 @@ void page_cache_ra_unbounded(struct readahead_control *ractl, } if (i == nr_to_read - lookahead_size) SetPageReadahead(page); + ractl->_workingset |= PageWorkingset(page); ractl->_nr_pages++; }