From: yu kuai yukuai3@huawei.com
euler inclusion category: feature bugzilla: 46858 CVE: NA
---------------------------
In order to analysing the IO performance when using buffer IO, it's useful to obtain the number of dirty pages for a inode in the filesystem.
This feather is migrated from redhat-7.2. It create 3 interfaces by using profs. /proc/dirty/buffer_size for buffer allocation and release; /proc/dirty/page_threshold to filter result; /proc/dirty/dirty_list to get dirty pages.
Visit http://openeuler.huawei.com/bugzilla/show_bug.cgi?id=23941 for details about modifications and implementations.
Signed-off-by: yu kuai yukuai3@huawei.com Reviewed-by: zhangyi (F) yi.zhang@huawei.com Signed-off-by: zhangyi (F) yi.zhang@huawei.com Signed-off-by: Dianfang Zhang zhangdianfang@huawei.com --- fs/Makefile | 1 + fs/dirty_pages.c | 474 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 475 insertions(+) create mode 100644 fs/dirty_pages.c
diff --git a/fs/Makefile b/fs/Makefile index 999d1a23f036..727338b4430d 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -52,6 +52,7 @@ obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o obj-$(CONFIG_NFS_COMMON) += nfs_common/ obj-$(CONFIG_COREDUMP) += coredump.o obj-$(CONFIG_SYSCTL) += drop_caches.o +obj-$(CONFIG_SYSCTL) += dirty_pages.o
obj-$(CONFIG_FHANDLE) += fhandle.o obj-y += iomap/ diff --git a/fs/dirty_pages.c b/fs/dirty_pages.c new file mode 100644 index 000000000000..9972b53d9acc --- /dev/null +++ b/fs/dirty_pages.c @@ -0,0 +1,474 @@ +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/uaccess.h> +#include <linux/pagemap.h> +#include <linux/pagevec.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/proc_fs.h> +#include <linux/kdev_t.h> +#include <linux/vmalloc.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include "internal.h" + +static char *buf_dirty; /* buffer to store number of dirty pages */ +static unsigned long buf_size; /* size of buffer in bytes */ +static long buff_num; /* size of buffer in number of pages */ +static int buff_limit; /* filter threshold of dirty pages*/ +static spinlock_t inode_sb_list_lock; + +static struct proc_dir_entry *dirty_dir; + +static bool warn_once; /* print warn message once */ +static bool buff_used; /* buffer is in used */ +static struct mutex buff_lock; /* lock when buffer is changed */ + +/* proc root directory */ +#define DIRTY_ROOT "dirty" +/* proc file for buffer allocation and release */ +#define DIRTY_SWITCH "buffer_size" +/* proc file to obtain diry pages of each inode */ +#define DIRTY_PAGES "dirty_list" +/* proc file to filter result */ +#define DIRTY_LIMIT "page_threshold" + +static void seq_set_overflow(struct seq_file *m) +{ + m->count = m->size; +} + +static unsigned long dump_dirtypages_inode(struct inode *inode) +{ + struct pagevec pvec; + unsigned long nr_dirtys = 0; + unsigned int nr_pages; + pgoff_t index = 0; + + pagevec_init(&pvec); + + while (1) { + nr_pages = pagevec_lookup_range_tag(&pvec, inode->i_mapping, + &index, (pgoff_t)-1, PAGECACHE_TAG_DIRTY); + if (!nr_pages) + break; + + pagevec_release(&pvec); + cond_resched(); + + nr_dirtys += nr_pages; + } + + return nr_dirtys; +} + +static char *inode_filename(struct inode *inode, char *tmpname) +{ + struct dentry *dentry; + char *filename; + + dentry = d_find_alias(inode); + if (!dentry) + return ERR_PTR(-ENOENT); + + tmpname[PATH_MAX-1] = '\0'; + filename = dentry_path_raw(dentry, tmpname, PATH_MAX); + + dput(dentry); + + return filename; +} + +static inline bool is_sb_writable(struct super_block *sb) +{ + if (sb_rdonly(sb)) + return false; + + if (sb->s_writers.frozen == SB_FREEZE_COMPLETE) + return false; + + return true; +} + +/* + * dump_dirtypages_sb - dump the dirty pages of each inode in the sb + * @sb the super block + * @m the seq_file witch is initialized in proc_dpages_open + * + * For each inode in the sb, call dump_dirtypages_pages to get the number + * of dirty pages. And use seq_printf to store the result in the buffer + * if it's not less than the threshold. The inode in unusual state will + * be skipped. + */ +static void dump_dirtypages_sb(struct super_block *sb, struct seq_file *m) +{ + struct inode *inode, *toput_inode = NULL; + unsigned long nr_dirtys; + const char *fstype; + char *filename; + char *tmpname; + int limit = READ_ONCE(buff_limit); + + if (warn_once) + return; + + if (!is_sb_writable(sb)) + return; + + tmpname = kmalloc(PATH_MAX, GFP_KERNEL); + if (!tmpname) + return; + + spin_lock(&inode_sb_list_lock); + list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { + spin_lock(&inode->i_lock); + /* + * We must skip inodes in unusual state. We may also skip + * inodes without pages but we deliberately won't in case + * we need to reschedule to avoid softlockups. + */ + if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || + (inode->i_mapping->nrpages == 0 && !need_resched())) { + spin_unlock(&inode->i_lock); + continue; + } + __iget(inode); + spin_unlock(&inode->i_lock); + spin_unlock(&inode_sb_list_lock); + + cond_resched(); + + nr_dirtys = dump_dirtypages_inode(inode); + if (!nr_dirtys || nr_dirtys < limit) + goto skip; + + filename = inode_filename(inode, tmpname); + if (IS_ERR_OR_NULL(filename)) + filename = "unknown"; + + if (sb->s_type && sb->s_type->name) + fstype = sb->s_type->name; + else + fstype = "unknown"; + /* + * seq_printf return nothing, if the buffer is exhausted + * (m->size <= m->count), seq_printf will not store + * anything, just set m->count = m->size and return. In + * that case, log a warn message in buffer to remind users. + */ + if (!warn_once && m->size <= m->count) { + warn_once = true; + seq_set_overflow(m); + strncpy(m->buf+m->count-12, "terminated\n\0", 12); + goto done; + } + seq_printf(m, "FSType: %s, Dev ID: %u(%u:%u) ino %lu, dirty pages %lu, path %s\n", + fstype, sb->s_dev, MAJOR(sb->s_dev), + MINOR(sb->s_dev), inode->i_ino, + nr_dirtys, filename); +skip: + iput(toput_inode); + toput_inode = inode; + spin_lock(&inode_sb_list_lock); + } + spin_unlock(&inode_sb_list_lock); +done: + iput(toput_inode); + kfree(tmpname); +} + +static int proc_dpages_show(struct seq_file *m, void *v) +{ + iterate_supers((void *)dump_dirtypages_sb, (void *)m); + return 0; +} + + +static ssize_t seq_read_dirty( + struct file *file, + char __user *buf, + size_t size, + loff_t *ppos) +{ + struct seq_file *m = (struct seq_file *)file->private_data; + size_t copied = 0; + size_t n; + int err = 0; + + buff_used = true; + if (m->count == 0) { + err = m->op->show(m, NULL); + if (err < 0) + goto done; + } + + n = min(m->count - m->from, size); + err = simple_read_from_buffer(buf, n, + (loff_t *) &m->from, m->buf, m->count); + if (err < 0) { + err = -EFAULT; + goto done; + } + copied += n; +done: + if (!copied) + copied = err; + else + *ppos += copied; + buff_used = false; + return copied; +} + +static void free_buf_dirty(void) +{ + if (buf_dirty != NULL) { + vfree(buf_dirty); + buf_dirty = NULL; + buf_size = 0; + } +} +static ssize_t write_proc( + struct file *filp, + const char *buf, + size_t count, + loff_t *offp) +{ + char *msg; + int ret = 0; + long old_buff_num; + + msg = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + if (count > PAGE_SIZE) { + ret = -EINVAL; + goto error; + } + + msg[count] = '\0'; + + if (copy_from_user(msg, buf, count)) { + ret = -EINVAL; + goto error; + } + + old_buff_num = buff_num; + ret = kstrtol(msg, 10, &buff_num); + if (ret != 0 || buff_num < 0 || buff_num > 102400) { + buff_num = 0; + ret = -EINVAL; + goto error; + } + + mutex_lock(&buff_lock); + + if (buff_used) { + ret = -EBUSY; + goto out; + } + + buff_used = true; + + ret = count; + if (buff_num == 0) { + free_buf_dirty(); + goto out; + } + if (buff_num == old_buff_num) + goto out; + + free_buf_dirty(); + buf_size = PAGE_SIZE * buff_num; + buf_dirty = vmalloc(buf_size); + + if (!buf_dirty) { + ret = -ENOMEM; + goto out; + } +out: + buff_used = false; + mutex_unlock(&buff_lock); +error: + kfree(msg); + return ret; +} + +static int proc_dpages_open(struct inode *inode, struct file *filp) +{ + int ret; + struct seq_file *m; + + ret = single_open(filp, proc_dpages_show, NULL); + m = filp->private_data; + mutex_lock(&buff_lock); + if (buff_used) { + ret = -EBUSY; + goto out; + } + if (!ret) { + if (buf_dirty == NULL || buf_size == 0) { + pr_info("please allocate buffer before getting dirty pages\n"); + ret = -ENOMEM; + goto out; + } else { + warn_once = false; + memset(buf_dirty, 0, buf_size); + if (!m->buf) { + m->size = buf_size; + m->buf = buf_dirty; + } + } + } +out: + mutex_unlock(&buff_lock); + return ret; +} + +static int seq_release_dirty(struct inode *inode, struct file *file) +{ + struct seq_file *m = file->private_data; + + buff_used = false; + /* we don't want to free the buf */ + m->buf = NULL; + single_release(inode, file); + return 0; +} + +static const struct proc_ops proc_dpages_operations = { + .proc_open = proc_dpages_open, + .proc_read = seq_read_dirty, + .proc_release = seq_release_dirty, +}; + +static int proc_switch_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%ld\n", buff_num); + return 0; +} + +static int proc_limit_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%d\n", READ_ONCE(buff_limit)); + return 0; +} + +static int proc_switch_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, proc_switch_show, NULL); +} + +static int proc_limit_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, proc_limit_show, NULL); +} + +static ssize_t write_limit_proc( + struct file *filp, + const char *buf, + size_t count, + loff_t *offp) +{ + char *msg; + int ret = 0; + long temp; + + msg = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + if (count > PAGE_SIZE) { + ret = -EINVAL; + goto error; + } + + msg[count] = '\0'; + if (copy_from_user(msg, buf, count)) { + ret = -EINVAL; + goto error; + } + ret = kstrtol(msg, 10, &temp); + if (ret != 0 || temp < 0) { + ret = -EINVAL; + goto error; +} + + WRITE_ONCE(buff_limit, temp); + ret = count; + +error: + kfree(msg); + return ret; +} + + +static const struct proc_ops proc_switch_operations = { + .proc_open = proc_switch_open, + .proc_read = seq_read, + .proc_write = write_proc, + .proc_lseek = seq_lseek, + .proc_release = single_release, +}; + +static const struct proc_ops proc_limit_operations = { + .proc_open = proc_limit_open, + .proc_read = seq_read, + .proc_write = write_limit_proc, + .proc_lseek = seq_lseek, + .proc_release = single_release, +}; + + +static int __init dpages_proc_init(void) +{ + static struct proc_dir_entry *proc_file; + + dirty_dir = proc_mkdir(DIRTY_ROOT, NULL); + if (!dirty_dir) + goto fail_dir; + + proc_file = proc_create(DIRTY_PAGES, 0440, + dirty_dir, &proc_dpages_operations); + if (!proc_file) + goto fail_pages; + + proc_file = proc_create(DIRTY_SWITCH, 0640, + dirty_dir, &proc_switch_operations); + if (!proc_file) + goto fail_switch; + + proc_file = proc_create(DIRTY_LIMIT, 0640, + dirty_dir, &proc_limit_operations); + if (!proc_file) + goto fail_limit; + + mutex_init(&buff_lock); + return 0; + +fail_limit: + remove_proc_entry(DIRTY_SWITCH, dirty_dir); +fail_switch: + remove_proc_entry(DIRTY_PAGES, dirty_dir); +fail_pages: + remove_proc_entry(DIRTY_ROOT, NULL); +fail_dir: + return -ENOMEM; +} + +static void dpages_proc_exit(void) +{ + mutex_lock(&buff_lock); + free_buf_dirty(); + mutex_unlock(&buff_lock); + remove_proc_entry(DIRTY_PAGES, dirty_dir); + remove_proc_entry(DIRTY_SWITCH, dirty_dir); + remove_proc_entry(DIRTY_LIMIT, dirty_dir); + remove_proc_entry(DIRTY_ROOT, NULL); +} + +module_init(dpages_proc_init); +module_exit(dpages_proc_exit);