
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/ICA1GK -------------------------------- Create a bpf iter target for the 'diskstats' interface, to which the bpf prog can attach. Signed-off-by: GONG Ruiqi <gongruiqi1@huawei.com> --- block/genhd.c | 222 ++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 186 insertions(+), 36 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index 61d340aa30f4..9d9b60501bcb 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -26,6 +26,10 @@ #include <linux/badblocks.h> #include <linux/part_stat.h> #include <linux/blktrace_api.h> +#ifdef CONFIG_BPF_RVI +#include <linux/bpf.h> +#include <linux/pid_namespace.h> +#endif #include "blk-throttle.h" #include "blk.h" @@ -1231,6 +1235,57 @@ const struct device_type disk_type = { }; #ifdef CONFIG_PROC_FS +static int native_diskstats_show(struct seq_file *seqf, struct block_device *hd, + struct disk_stats *stat, unsigned int inflight) +{ + seq_printf(seqf, "%4d %7d %pg " + "%lu %lu %lu %u " + "%lu %lu %lu %u " + "%u %u %u " + "%lu %lu %lu %u " + "%lu %u" + "\n", + MAJOR(hd->bd_dev), MINOR(hd->bd_dev), hd, + stat->ios[STAT_READ], + stat->merges[STAT_READ], + stat->sectors[STAT_READ], + (unsigned int)div_u64(stat->nsecs[STAT_READ], + NSEC_PER_MSEC), + stat->ios[STAT_WRITE], + stat->merges[STAT_WRITE], + stat->sectors[STAT_WRITE], + (unsigned int)div_u64(stat->nsecs[STAT_WRITE], + NSEC_PER_MSEC), + inflight, + jiffies_to_msecs(stat->io_ticks), + (unsigned int)div_u64(stat->nsecs[STAT_READ] + + stat->nsecs[STAT_WRITE] + + stat->nsecs[STAT_DISCARD] + + stat->nsecs[STAT_FLUSH], + NSEC_PER_MSEC), + stat->ios[STAT_DISCARD], + stat->merges[STAT_DISCARD], + stat->sectors[STAT_DISCARD], + (unsigned int)div_u64(stat->nsecs[STAT_DISCARD], + NSEC_PER_MSEC), + stat->ios[STAT_FLUSH], + (unsigned int)div_u64(stat->nsecs[STAT_FLUSH], + NSEC_PER_MSEC) + ); + return 0; +} + +#ifdef CONFIG_BPF_RVI +static int __diskstats_show(struct seq_file *seqf, struct block_device *hd, + struct disk_stats *stat, unsigned int inflight); +#else +static int __diskstats_show(struct seq_file *seqf, struct block_device *hd, + struct disk_stats *stat, unsigned int inflight) +{ + return native_diskstats_show(seqf, hd, stat, inflight); +} +#endif + /* * aggregate disk stat collector. Uses the same stats that the sysfs * entries do, above, but makes them available through one seq_file. @@ -1245,6 +1300,7 @@ static int diskstats_show(struct seq_file *seqf, void *v) unsigned int inflight; struct disk_stats stat; unsigned long idx; + int ret = 0; /* if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next) @@ -1269,44 +1325,13 @@ static int diskstats_show(struct seq_file *seqf, void *v) part_stat_unlock(); } part_stat_read_all(hd, &stat); - seq_printf(seqf, "%4d %7d %pg " - "%lu %lu %lu %u " - "%lu %lu %lu %u " - "%u %u %u " - "%lu %lu %lu %u " - "%lu %u" - "\n", - MAJOR(hd->bd_dev), MINOR(hd->bd_dev), hd, - stat.ios[STAT_READ], - stat.merges[STAT_READ], - stat.sectors[STAT_READ], - (unsigned int)div_u64(stat.nsecs[STAT_READ], - NSEC_PER_MSEC), - stat.ios[STAT_WRITE], - stat.merges[STAT_WRITE], - stat.sectors[STAT_WRITE], - (unsigned int)div_u64(stat.nsecs[STAT_WRITE], - NSEC_PER_MSEC), - inflight, - jiffies_to_msecs(stat.io_ticks), - (unsigned int)div_u64(stat.nsecs[STAT_READ] + - stat.nsecs[STAT_WRITE] + - stat.nsecs[STAT_DISCARD] + - stat.nsecs[STAT_FLUSH], - NSEC_PER_MSEC), - stat.ios[STAT_DISCARD], - stat.merges[STAT_DISCARD], - stat.sectors[STAT_DISCARD], - (unsigned int)div_u64(stat.nsecs[STAT_DISCARD], - NSEC_PER_MSEC), - stat.ios[STAT_FLUSH], - (unsigned int)div_u64(stat.nsecs[STAT_FLUSH], - NSEC_PER_MSEC) - ); + ret = __diskstats_show(seqf, hd, &stat, inflight); + if (ret) + break; } rcu_read_unlock(); - return 0; + return ret; } static const struct seq_operations diskstats_op = { @@ -1316,11 +1341,136 @@ static const struct seq_operations diskstats_op = { .show = diskstats_show }; +#ifdef CONFIG_BPF_RVI +struct diskstats_seq_priv { + struct class_dev_iter iter; // must be the first, + // to let us reuse disk_seqf_next() + struct blkcg *task_blkcg; +}; + +/* + * Basically the same with disk_seqf_start() but without allocating iter and + * then overwriting seqf->private, which points to priv_data->target_private + * in bpf_iter case (see prepare_seq_file()), and is needed to retrieve + * struct bpf_iter_priv_data. Here we allocate iter via setting + * .seq_priv_size and turning priv_data->target_private into iter. + */ +static void *bpf_disk_seqf_start(struct seq_file *seqf, loff_t *pos) +{ + loff_t skip = *pos; + struct diskstats_seq_priv *priv = seqf->private; + struct class_dev_iter *iter; + struct device *dev; + struct task_struct *task; + + task = get_current_level1_reaper(); + if (!task) + task = current; + priv->task_blkcg = css_to_blkcg(task_css(task, io_cgrp_id)); + + iter = &priv->iter; + class_dev_iter_init(iter, &block_class, NULL, &disk_type); + do { + dev = class_dev_iter_next(iter); + if (!dev) + return NULL; + } while (skip--); + + return dev_to_disk(dev); +} + +/* + * Similar to the difference between {bpf_,}disk_seqf_start, + * here we don't free iter. + */ +static void bpf_disk_seqf_stop(struct seq_file *seqf, void *v) +{ + struct diskstats_seq_priv *priv = seqf->private; + struct class_dev_iter *iter = &priv->iter; + + /* stop is called even after start failed :-( */ + if (iter) + class_dev_iter_exit(iter); +} + +struct bpf_iter__diskstats { + __bpf_md_ptr(struct bpf_iter_meta *, meta); + __bpf_md_ptr(struct block_device *, bd); + __bpf_md_ptr(struct disk_stats *, native_stat); + unsigned int inflight __aligned(8); + __bpf_md_ptr(struct blkcg *, task_blkcg); +}; + +DEFINE_BPF_ITER_FUNC(diskstats, struct bpf_iter_meta *meta, + struct block_device *bd, struct disk_stats *native_stat, + uint inflight, + struct blkcg *task_blkcg) + +static int __diskstats_show(struct seq_file *seqf, struct block_device *hd, + struct disk_stats *stat, unsigned int inflight) +{ + struct bpf_iter__diskstats ctx; + struct bpf_iter_meta meta; + struct bpf_prog *prog; + struct diskstats_seq_priv *priv = seqf->private; + + meta.seq = seqf; + prog = bpf_iter_get_info(&meta, false); + if (!prog) + return native_diskstats_show(seqf, hd, stat, inflight); + + ctx.meta = &meta; + ctx.bd = hd; + ctx.native_stat = stat; + ctx.inflight = inflight; + ctx.task_blkcg = priv->task_blkcg; + return bpf_iter_run_prog(prog, &ctx); +} + +static const struct seq_operations bpf_diskstats_op = { + .start = bpf_disk_seqf_start, + .next = disk_seqf_next, + .stop = bpf_disk_seqf_stop, + .show = diskstats_show +}; + +static const struct bpf_iter_seq_info diskstats_seq_info = { + .seq_ops = &bpf_diskstats_op, + .init_seq_private = NULL, + .fini_seq_private = NULL, + .seq_priv_size = sizeof(struct diskstats_seq_priv), +}; + +static struct bpf_iter_reg diskstats_reg_info = { + .target = "diskstats", + .ctx_arg_info_size = 2, + .ctx_arg_info = { + { offsetof(struct bpf_iter__diskstats, bd), + PTR_TO_BTF_ID }, + { offsetof(struct bpf_iter__diskstats, native_stat), + PTR_TO_BTF_ID }, + }, + .seq_info = &diskstats_seq_info, +}; + +BTF_ID_LIST(btf_diststats_ids) +BTF_ID(struct, block_device) +BTF_ID(struct, disk_stats) +#endif /* CONFIG_BPF_RVI */ + static int __init proc_genhd_init(void) { + int err = 0; + proc_create_seq("diskstats", 0, NULL, &diskstats_op); proc_create_seq("partitions", 0, NULL, &partitions_op); - return 0; + +#ifdef CONFIG_BPF_RVI + diskstats_reg_info.ctx_arg_info[0].btf_id = btf_diststats_ids[0]; + diskstats_reg_info.ctx_arg_info[1].btf_id = btf_diststats_ids[1]; + err = bpf_iter_reg_target(&diskstats_reg_info); +#endif + return err; } module_init(proc_genhd_init); #endif /* CONFIG_PROC_FS */ -- 2.25.1