hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8OHM4?from=project-issue
---------------------------
In order to analyse the IO performance when using buffer IO, it's useful to obtain the number of dirty pages for each inode in the filesystem.
This feature depends on 'CONFIG_DIRTY_PAGES'. It creates 2 interfaces by using profs. /proc/dirty/page_threshold to filter result and /proc/dirty/dirty_list to get dirty pages.
Signed-off-by: Zizhi Wo wozizhi@huawei.com --- arch/arm64/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + fs/Kconfig | 13 ++ fs/Makefile | 1 + fs/dirty_pages.c | 275 +++++++++++++++++++++++++ 5 files changed, 291 insertions(+) create mode 100644 fs/dirty_pages.c
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 3781d138c3e3..ce101e7d8fcc 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -6881,6 +6881,7 @@ CONFIG_PROC_PAGE_MONITOR=y CONFIG_PROC_CHILDREN=y CONFIG_KERNFS=y CONFIG_SYSFS=y +CONFIG_DIRTY_PAGES=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_TMPFS_XATTR=y diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index e19bf53c0bd9..76b87ed62579 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -8088,6 +8088,7 @@ CONFIG_PROC_PID_ARCH_STATUS=y CONFIG_PROC_CPU_RESCTRL=y CONFIG_KERNFS=y CONFIG_SYSFS=y +CONFIG_DIRTY_PAGES=y CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_TMPFS_XATTR=y diff --git a/fs/Kconfig b/fs/Kconfig index aa7e03cc1941..a2280cf98729 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -170,6 +170,19 @@ source "fs/proc/Kconfig" source "fs/kernfs/Kconfig" source "fs/sysfs/Kconfig"
+config DIRTY_PAGES + bool "Dumps the number of dirty pages of each file" + depends on PROC_FS + default y + help + This config supports the rendering of dirty page data to the user, + which may be useful to analyze the IO performance when using buffer + IO. + + It create 3 interfaces by using procfs. /proc/dirty/buffer_size for + buffer allocation and release; /proc/dirty/page_threshold to filter + result; /proc/dirty/dirty_list to get dirty pages. + config TMPFS bool "Tmpfs virtual memory file system support (former shm fs)" depends on SHMEM diff --git a/fs/Makefile b/fs/Makefile index f9541f40be4e..6246e173e1e4 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -44,6 +44,7 @@ obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o obj-$(CONFIG_NFS_COMMON) += nfs_common/ obj-$(CONFIG_COREDUMP) += coredump.o obj-$(CONFIG_SYSCTL) += drop_caches.o sysctls.o +obj-$(CONFIG_DIRTY_PAGES) += dirty_pages.o
obj-$(CONFIG_FHANDLE) += fhandle.o obj-y += iomap/ diff --git a/fs/dirty_pages.c b/fs/dirty_pages.c new file mode 100644 index 000000000000..4edcba2bf1d0 --- /dev/null +++ b/fs/dirty_pages.c @@ -0,0 +1,275 @@ +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/uaccess.h> +#include <linux/pagemap.h> +#include <linux/pagevec.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/proc_fs.h> +#include <linux/kdev_t.h> +#include <linux/vmalloc.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include "internal.h" + +static int buff_limit; /* filter threshold of dirty pages*/ + +static struct proc_dir_entry *dirty_dir; + +/* proc root directory */ +#define DIRTY_ROOT "dirty" +/* proc file to obtain diry pages of each inode */ +#define DIRTY_PAGES "dirty_list" +/* proc file to filter result */ +#define DIRTY_LIMIT "page_threshold" + +static void seq_set_overflow(struct seq_file *m) +{ + m->count = m->size; +} + +static unsigned long dump_dirtypages_inode(struct inode *inode) +{ + struct folio_batch fbatch; + unsigned long nr_dirtys = 0; + unsigned int nr_folios; + pgoff_t index = 0; + + folio_batch_init(&fbatch); + + while (1) { + nr_folios = filemap_get_folios_tag(inode->i_mapping, &index, + (pgoff_t)-1, PAGECACHE_TAG_DIRTY, &fbatch); + if (!nr_folios) + break; + + folio_batch_release(&fbatch); + cond_resched(); + + nr_dirtys += nr_folios; + } + + return nr_dirtys; +} + +static char *inode_filename(struct inode *inode, char *tmpname) +{ + struct dentry *dentry; + char *filename; + + dentry = d_find_alias(inode); + if (!dentry) + return ERR_PTR(-ENOENT); + + tmpname[PATH_MAX-1] = '\0'; + filename = dentry_path_raw(dentry, tmpname, PATH_MAX); + + dput(dentry); + + return filename; +} + +static inline bool is_sb_writable(struct super_block *sb) +{ + if (sb_rdonly(sb)) + return false; + + if (sb->s_writers.frozen == SB_FREEZE_COMPLETE) + return false; + + return true; +} + +/* + * dump_dirtypages_sb - dump the dirty pages of each inode in the sb + * @sb the super block + * @m the seq_file witch is initialized in proc_dpages_open + * + * For each inode in the sb, call dump_dirtypages_pages to get the number + * of dirty pages. And use seq_printf to store the result in the buffer + * if it's not less than the threshold. The inode in unusual state will + * be skipped. + */ +static void dump_dirtypages_sb(struct super_block *sb, struct seq_file *m) +{ + struct inode *inode, *toput_inode = NULL; + unsigned long nr_dirtys; + const char *fstype; + char *filename; + char *tmpname; + int limit = READ_ONCE(buff_limit); + + if (!is_sb_writable(sb)) + return; + + tmpname = kmalloc(PATH_MAX, GFP_KERNEL); + if (!tmpname) + return; + + spin_lock(&sb->s_inode_list_lock); + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { + spin_lock(&inode->i_lock); + + /* + * We must skip inodes in unusual state. We may also skip + * inodes without pages but we deliberately won't in case + * we need to reschedule to avoid softlockups. + */ + if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || + (inode->i_mapping->nrpages == 0 && !need_resched())) { + spin_unlock(&inode->i_lock); + continue; + } + __iget(inode); + spin_unlock(&inode->i_lock); + spin_unlock(&sb->s_inode_list_lock); + + cond_resched(); + + nr_dirtys = dump_dirtypages_inode(inode); + if (!nr_dirtys || nr_dirtys < limit) + goto skip; + + filename = inode_filename(inode, tmpname); + if (IS_ERR_OR_NULL(filename)) + filename = "unknown"; + + if (sb->s_type && sb->s_type->name) + fstype = sb->s_type->name; + else + fstype = "unknown"; + /* + * seq_printf return nothing, if the buffer is exhausted + * (m->size <= m->count), seq_printf will not store + * anything, just set m->count = m->size and return. In + * that case, log a warn message in buffer to remind users. + */ + if (m->size <= m->count) { + seq_set_overflow(m); + strncpy(m->buf+m->count-12, "terminated\n\0", 12); + iput(inode); + goto done; + } + seq_printf(m, "FSType: %s, Dev ID: %u(%u:%u) ino %lu, dirty pages %lu, path %s\n", + fstype, sb->s_dev, MAJOR(sb->s_dev), + MINOR(sb->s_dev), inode->i_ino, + nr_dirtys, filename); +skip: + iput(toput_inode); + toput_inode = inode; + spin_lock(&sb->s_inode_list_lock); + } + spin_unlock(&sb->s_inode_list_lock); +done: + iput(toput_inode); + kfree(tmpname); +} + +static int proc_dpages_show(struct seq_file *m, void *v) +{ + iterate_supers((void *)dump_dirtypages_sb, (void *)m); + return 0; +} + +static int proc_dpages_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, proc_dpages_show, NULL); +} + +static const struct proc_ops proc_dpages_operations = { + .proc_open = proc_dpages_open, + .proc_read = seq_read, + .proc_release = single_release, +}; + +static int proc_limit_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%d\n", READ_ONCE(buff_limit)); + return 0; +} + +static int proc_limit_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, proc_limit_show, NULL); +} + +static ssize_t write_limit_proc( + struct file *filp, + const char *buf, + size_t count, + loff_t *offp) +{ + char *msg; + int ret = 0; + long temp; + + if (count > PAGE_SIZE) { + ret = -EINVAL; + goto error; + } + + msg = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!msg) { + ret = -ENOMEM; + goto error; + } + + if (copy_from_user(msg, buf, count)) { + ret = -EINVAL; + goto free; + } + ret = kstrtol(msg, 10, &temp); + if (ret != 0 || temp < 0 || temp > INT_MAX) { + ret = -EINVAL; + goto free; + } + + WRITE_ONCE(buff_limit, temp); + ret = count; + +free: + kfree(msg); +error: + return ret; +} + +static const struct proc_ops proc_limit_operations = { + .proc_open = proc_limit_open, + .proc_read = seq_read, + .proc_write = write_limit_proc, + .proc_lseek = seq_lseek, + .proc_release = single_release, +}; + +static int __init dpages_proc_init(void) +{ + static struct proc_dir_entry *proc_file; + + dirty_dir = proc_mkdir(DIRTY_ROOT, NULL); + if (!dirty_dir) + goto fail_dir; + + proc_file = proc_create(DIRTY_PAGES, 0440, + dirty_dir, &proc_dpages_operations); + if (!proc_file) + goto fail_pages; + + proc_file = proc_create(DIRTY_LIMIT, 0640, + dirty_dir, &proc_limit_operations); + if (!proc_file) + goto fail_limit; + + return 0; + +fail_limit: + remove_proc_entry(DIRTY_PAGES, dirty_dir); +fail_pages: + remove_proc_entry(DIRTY_ROOT, NULL); +fail_dir: + return -ENOMEM; +} + +subsys_initcall(dpages_proc_init);
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/3400 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/4...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/3400 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/4...