
The demo version. Signed-off-by: Yu Kuai <yukuai3@huawei.com> --- block/Kconfig | 5 + block/Makefile | 1 + block/blk-ioinf.c | 843 +++++++++++++++++++++++++++++++++++++++++ block/blk-rq-qos.h | 3 + include/linux/blkdev.h | 3 + 5 files changed, 855 insertions(+) create mode 100644 block/blk-ioinf.c diff --git a/block/Kconfig b/block/Kconfig index 24c6bb87727d..280e076ecd75 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -155,6 +155,11 @@ config BLK_CGROUP_IOCOST distributes IO capacity between different groups based on their share of the overall weight distribution. +config BLK_CGROUP_IOINFLIGHT + bool "Enable support for inflight based cgroup IO controller" + help + xxxx + config BLK_WBT_MQ bool "Multiqueue writeback throttling" default y diff --git a/block/Makefile b/block/Makefile index 29814c6bb2df..d1e00f7fc88d 100644 --- a/block/Makefile +++ b/block/Makefile @@ -22,6 +22,7 @@ obj-$(CONFIG_BLK_CGROUP_RWSTAT) += blk-cgroup-rwstat.o obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o obj-$(CONFIG_BLK_CGROUP_IOLATENCY) += blk-iolatency.o obj-$(CONFIG_BLK_CGROUP_IOCOST) += blk-iocost.o +obj-$(CONFIG_BLK_CGROUP_IOINFLIGHT) += blk-ioinf.o obj-$(CONFIG_MQ_IOSCHED_DEADLINE) += mq-deadline.o obj-$(CONFIG_MQ_IOSCHED_KYBER) += kyber-iosched.o bfq-y := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o diff --git a/block/blk-ioinf.c b/block/blk-ioinf.c new file mode 100644 index 000000000000..8d4d7a5d693b --- /dev/null +++ b/block/blk-ioinf.c @@ -0,0 +1,843 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * IO inflight relative controller + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/timer.h> +#include <linux/time64.h> +#include <linux/parser.h> +#include <linux/blk-cgroup.h> +#include "blk-rq-qos.h" +#include "blk-mq.h" + +/* default weight for each cgroup */ +#define IOINF_DFL_WEIGHT 10 +/* default wake up time in jiffies for backgroup job, see ioinf_timer_fn() */ +#define IOINF_TIMER_PERID 500 +/* default time in jiffies that cgroup will idle without any IO */ +#define INFG_DFL_EXPIRE 100 + +/* io.inf.qos controls */ +enum { + QOS_ENABLE, + QOS_INFLIGHT, + NR_QOS_CTRL_PARAMS, +}; + +/* ioinf_gq flags */ +enum { + INFG_EXHAUSTED, + INFG_LEND, + INFG_BORROW, +}; + +/* the global conrtol structure */ +struct ioinf { + struct rq_qos rqos; + + /* qos control params */ + bool enabled; + u32 inflight; + + /* default time for ioinf_timer_fn */ + unsigned long inf_timer_perid; + struct timer_list inf_timer; + + /* default time for infg_expire_fn */ + unsigned long infg_expire_jiffies; + + /* global lock */ + spinlock_t lock; + /* list of active infgs */ + struct list_head active_infgs; + /* list of active infgs that lend inflight budget to other infgs */ + struct list_head lend_infgs; + /* list of active infgs that borrow inflight budget from other infgs */ + struct list_head borrow_infgs; +}; + +/* per disk-cgroup pair structure */ +struct ioinf_gq { + struct blkg_policy_data pd; + struct ioinf *inf; + + unsigned long flags; + /* head of the list is inf->active_infgs */ + struct list_head active; + /* head of the list is inf->lend_infgs */ + struct list_head lend; + /* head of the list is inf->borrow_infgs */ + struct list_head borrow; + + /* configured by user */ + u32 weight; + /* normalized weight */ + u32 hweight; + /* normalized inflight budget */ + u32 hinflight; + /* inuse inflight budget */ + u32 hinflight_inuse; + /* IO beyond budget will wait here */ + struct rq_wait rqw; + + struct timer_list expire_timer; + + /* max inflight in current perid */ + u32 max_inflight; + /* max inflight in last perid, will gradual reduction */ + u32 last_max_inflight; +}; + +/* per cgroup structure, used to record default weight for all disks */ +struct ioinf_cgrp { + struct blkcg_policy_data cpd; + + u32 dfl_weight; +}; + +static struct blkcg_policy blkcg_policy_ioinf; + +static struct ioinf *rqos_to_inf(struct rq_qos *rqos) +{ + return container_of(rqos, struct ioinf, rqos); +} + +static struct ioinf *q_to_inf(struct request_queue *q) +{ + return rqos_to_inf(rq_qos_id(q, RQ_QOS_INFLIGHT)); +} + +static struct ioinf_gq *pd_to_infg(struct blkg_policy_data *pd) +{ + if (!pd) + return NULL; + + return container_of(pd, struct ioinf_gq, pd); +} + +static struct ioinf_gq *blkg_to_infg(struct blkcg_gq *blkg) +{ + return pd_to_infg(blkg_to_pd(blkg, &blkcg_policy_ioinf)); +} + +static struct blkcg_gq *infg_to_blkg(struct ioinf_gq *infg) +{ + return pd_to_blkg(&infg->pd); +} + +static struct ioinf_cgrp *blkcg_to_infcg(struct blkcg *blkcg) +{ + struct blkcg_policy_data *cpd = + blkcg_to_cpd(blkcg, &blkcg_policy_ioinf); + + return container_of(cpd, struct ioinf_cgrp, cpd); +} + +static struct blkcg_gq *ioinf_bio_blkg(struct bio *bio) +{ + struct blkcg_gq *blkg = bio->bi_blkg; + + if (!blkg || !blkg->online) + return NULL; + + if (blkg->blkcg->css.cgroup->level == 0) + return NULL; + + return blkg; +} + +static struct ioinf_gq *ioinf_bio_infg(struct bio *bio) +{ + struct ioinf_gq *infg; + struct blkcg_gq *blkg = ioinf_bio_blkg(bio); + + if (!blkg) + return NULL; + + infg = blkg_to_infg(blkg); + if (!infg) + return NULL; + + return infg; +} + +static u32 infg_weight(struct ioinf_gq *infg) +{ + struct ioinf_cgrp *infcg; + struct blkcg_gq *blkg; + + if (infg->weight) + return infg->weight; + + /* if user doen't set per disk weight, use the cgroup default weight */ + blkg = infg_to_blkg(infg); + infcg = blkcg_to_infcg(blkg->blkcg); + + return infcg->dfl_weight; +} + +static void infg_clear_loan(struct ioinf_gq *infg) +{ + if (!list_empty(&infg->lend)) { + clear_bit(INFG_LEND, &infg->flags); + list_del_init(&infg->lend); + } + + if (!list_empty(&infg->borrow)) { + clear_bit(INFG_BORROW, &infg->flags); + list_del_init(&infg->borrow); + } +} + +/* + * called when infg is activate or deactivate + * TODO: support cgroup hierarchy, each infg is independent for now + */ +static void __propagate_weights(struct ioinf *inf) +{ + struct ioinf_gq *infg; + u32 total = 0; + + if (list_empty(&inf->active_infgs)) + return; + + /* + * TODO: instead of clearing loan and reinitializing everything, it's + * better to keep loan and do minor incremental modification. + */ + list_for_each_entry(infg, &inf->active_infgs, active) { + total += infg_weight(infg); + infg->max_inflight = 0; + infg->last_max_inflight = 0; + infg_clear_loan(infg); + } + + list_for_each_entry(infg, &inf->active_infgs, active) { + u32 weight = infg_weight(infg); + + infg->hweight = weight * 100 / total; + infg->hinflight = infg->inf->inflight * infg->hweight / 100; + if (!infg->hinflight) + infg->hinflight = 1; + infg->hinflight_inuse = infg->hinflight; + } + + mod_timer(&inf->inf_timer, jiffies + inf->inf_timer_perid); +} + +static void propagate_weights(struct ioinf *inf) +{ + spin_lock_irq(&inf->lock); + __propagate_weights(inf); + spin_unlock_irq(&inf->lock); +} + +static void ioinf_active_infg(struct ioinf_gq *infg) +{ + struct ioinf *inf = infg->inf; + + spin_lock_irq(&inf->lock); + if (list_empty(&infg->active)) { + list_add(&infg->active, &inf->active_infgs); + __propagate_weights(inf); + } + spin_unlock_irq(&inf->lock); +} + +static unsigned int atomic_inc_below_return(atomic_t *v, unsigned int below) +{ + unsigned int cur = atomic_read(v); + + for (;;) { + unsigned int old; + + if (cur >= below) + return below + 1; + + old = atomic_cmpxchg(v, cur, cur + 1); + if (old == cur) + break; + cur = old; + } + + return cur + 1; +} + +/* + * Called from io fast path, return false means inflight IO is full, and the + * forground thread will wait inflight IO to be done. + */ +static bool ioinf_inflight_cb(struct rq_wait *rqw, void *private_data) +{ + struct ioinf_gq *infg = private_data; + unsigned int inflight; + unsigned int limit; + +retry: + limit = infg->hinflight_inuse; + inflight = atomic_inc_below_return(&infg->rqw.inflight, limit); + + if (inflight > infg->max_inflight) + infg->max_inflight = inflight; + + if (inflight <= limit) + return true; + + if (infg->hinflight_inuse == limit) { + /* + * This infg want more inflight budget, set INFG_EXHAUSTED, and + * later ioinf_timer_fn() will check, if other infg can lend + * budget. + */ + set_bit(INFG_EXHAUSTED, &infg->flags); + return false; + } + + /* Stop lend inflight budget to other infgs */ + infg->hinflight_inuse = infg->hinflight; + /* wake up ioinf_timer_fn() immediately to inform other infgs */ + timer_reduce(&infg->inf->inf_timer, jiffies + 1); + goto retry; +} + +void ioinf_done(struct ioinf_gq *infg) +{ + int inflight = atomic_dec_return(&infg->rqw.inflight); + + BUG_ON(inflight < 0); + + if (inflight < infg->hinflight && wq_has_sleeper(&infg->rqw.wait)) + wake_up_all(&infg->rqw.wait); + + /* deactivate infg if there is no IO for infg_expire_jiffies */ + if (inflight == 0) + mod_timer(&infg->expire_timer, + jiffies + infg->inf->infg_expire_jiffies); +} + +static void ioinf_cleanup_cb(struct rq_wait *rqw, void *private_data) +{ + ioinf_done(private_data); +} + +static void ioinf_rqos_throttle(struct rq_qos *rqos, struct bio *bio) +{ + struct ioinf *inf = rqos_to_inf(rqos); + struct ioinf_gq *infg = ioinf_bio_infg(bio); + + if (!inf->enabled || !infg) + return; + + if (list_empty_careful(&infg->active)) + ioinf_active_infg(infg); + + rq_qos_wait(&infg->rqw, infg, ioinf_inflight_cb, ioinf_cleanup_cb); +} + +static void ioinf_rqos_track(struct rq_qos *rqos, struct request *rq, + struct bio *bio) +{ + struct blkcg_gq *blkg = ioinf_bio_blkg(bio); + + if (!blkg) + return; + + rq->blkg = blkg; +} + +static void ioinf_rqos_cleanup(struct rq_qos *rqos, struct bio *bio) +{ + struct ioinf_gq *infg = ioinf_bio_infg(bio); + + if (!infg || infg->inf->enabled || + list_empty_careful(&infg->active)) + return; + + ioinf_done(infg); +} + +static void ioinf_rqos_done(struct rq_qos *rqos, struct request *rq) +{ + struct blkcg_gq *blkg = rq->blkg; + + if (blkg) { + ioinf_done(blkg_to_infg(blkg)); + rq->blkg = NULL; + } +} + +static void ioinf_rqos_exit(struct rq_qos *rqos) +{ + struct ioinf *inf = rqos_to_inf(rqos); + + blkcg_deactivate_policy(rqos->q, &blkcg_policy_ioinf); + + del_timer_sync(&inf->inf_timer); + kfree(inf); +} + +static int ioinf_stat_show(void *data, struct seq_file *m) +{ + struct rq_qos *rqos = data; + struct ioinf *inf = rqos_to_inf(rqos); + struct ioinf_gq *infg; + char path[32]; + + spin_lock_irq(&inf->lock); + list_for_each_entry(infg, &inf->active_infgs, active) { + blkg_path(infg_to_blkg(infg), path, sizeof(path)); + seq_printf(m, "%s: hweight %u, inflight %d/(%d->%d) %u->%u\n", path, + infg->hweight, atomic_read(&infg->rqw.inflight), + infg->hinflight, infg->hinflight_inuse, + infg->last_max_inflight, + infg->max_inflight); + } + spin_unlock_irq(&inf->lock); + + return 0; +} + +static const struct blk_mq_debugfs_attr ioinf_debugfs_attrs[] = { + {"stat", 0400, ioinf_stat_show}, + {}, +}; + +static struct rq_qos_ops ioinf_rqos_ops = { + .throttle = ioinf_rqos_throttle, + .done = ioinf_rqos_done, + .track = ioinf_rqos_track, + .cleanup = ioinf_rqos_cleanup, + .exit = ioinf_rqos_exit, + +#ifdef CONFIG_BLK_DEBUG_FS + .debugfs_attrs = ioinf_debugfs_attrs, +#endif +}; + +static void infg_update_inflight(struct ioinf_gq *infg, u32 *exhausted_count) +{ + unsigned int last_max_inflight = infg->last_max_inflight; + + infg->hinflight_inuse = max(last_max_inflight, infg->max_inflight); + + infg->last_max_inflight = max(last_max_inflight >> 1, infg->max_inflight); + infg->max_inflight = infg->max_inflight >> 1; + + if (infg->hinflight_inuse < infg->hinflight && + list_empty(&infg->lend)) { + if (!list_empty(&infg->borrow)) { + clear_bit(INFG_BORROW, &infg->flags); + list_del_init(&infg->borrow); + } + + set_bit(INFG_LEND, &infg->flags); + list_add_tail(&infg->lend, &infg->inf->lend_infgs); + } + + if (test_bit(INFG_EXHAUSTED, &infg->flags)) { + (*exhausted_count)++; + if (list_empty(&infg->borrow)) { + set_bit(INFG_BORROW, &infg->flags); + list_add_tail(&infg->borrow, &infg->inf->borrow_infgs); + } + } +} + +static void ioinf_timer_fn(struct timer_list *timer) +{ + struct ioinf *inf = container_of(timer, struct ioinf, inf_timer); + struct ioinf_gq *infg; + u32 exhausted_count = 0; + u32 lend_total = 0; + unsigned long flags; + + if (list_empty(&inf->active_infgs)) + return; + + spin_lock_irqsave(&inf->lock, flags); + + list_for_each_entry(infg, &inf->active_infgs, active) + infg_update_inflight(infg, &exhausted_count); + + list_for_each_entry(infg, &inf->lend_infgs, lend) + lend_total += infg->hinflight - infg->hinflight_inuse; + + /* + * TODO: handle loan gracefully, equal division for now. + */ + if (exhausted_count) { + u32 borrow = lend_total / exhausted_count; + + list_for_each_entry(infg, &inf->borrow_infgs, borrow) { + if (test_and_clear_bit(INFG_EXHAUSTED, &infg->flags)) + infg->hinflight_inuse += borrow; + } + } + + spin_unlock_irqrestore(&inf->lock, flags); +} + +static int blk_ioinf_init(struct request_queue *q) +{ + struct rq_qos *rqos; + struct ioinf *inf; + int ret; + + inf = kzalloc_node(sizeof(*inf), GFP_KERNEL, q->node); + if (!inf) + return -ENOMEM; + + spin_lock_init(&inf->lock); + inf->inflight = q->nr_requests; + inf->infg_expire_jiffies = INFG_DFL_EXPIRE; + inf->inf_timer_perid = IOINF_TIMER_PERID; + INIT_LIST_HEAD(&inf->active_infgs); + INIT_LIST_HEAD(&inf->lend_infgs); + INIT_LIST_HEAD(&inf->borrow_infgs); + rqos = &inf->rqos; + + rqos->q = q; + rqos->id = RQ_QOS_INFLIGHT; + rqos->ops = &ioinf_rqos_ops; + + timer_setup(&inf->inf_timer, ioinf_timer_fn, 0); + + ret = rq_qos_add(q, rqos); + if (ret) + goto err_free_inf; + + ret = blkcg_activate_policy(q, &blkcg_policy_ioinf); + if (ret) + goto err_del_qos; + return 0; + +err_del_qos: + rq_qos_del(q, rqos); +err_free_inf: + kfree(inf); + return ret; +} + +static struct blkcg_policy_data *ioinf_cpd_alloc(gfp_t gfp) +{ + struct ioinf_cgrp *infcg = kzalloc(sizeof(*infcg), gfp); + + if (!infcg) + return NULL; + + infcg->dfl_weight = IOINF_DFL_WEIGHT; + return &infcg->cpd; +} + +static void ioinf_cpd_free(struct blkcg_policy_data *cpd) +{ + kfree(container_of(cpd, struct ioinf_cgrp, cpd)); +} + +static struct blkg_policy_data *ioinf_pd_alloc(gfp_t gfp, + struct request_queue *q, + struct blkcg *blkcg) +{ + struct ioinf_gq *infg = kzalloc_node(sizeof(*infg), gfp, q->node); + + if (!infg) + return NULL; + + return &infg->pd; +} + +static void infg_expire_fn(struct timer_list *timer) +{ + struct ioinf_gq *infg = + container_of(timer, struct ioinf_gq, expire_timer); + struct ioinf *inf = infg->inf; + unsigned long flags; + + if (atomic_read(&infg->rqw.inflight) > 0) + return; + + spin_lock_irqsave(&inf->lock, flags); + if (atomic_read(&infg->rqw.inflight) == 0) { + list_del_init(&infg->active); + if (atomic_read(&infg->rqw.inflight) == 0) { + infg_clear_loan(infg); + __propagate_weights(inf); + } else { + list_add(&infg->active, &inf->active_infgs); + } + } + spin_unlock_irqrestore(&inf->lock, flags); +} + +static void ioinf_pd_init(struct blkg_policy_data *pd) +{ + struct ioinf_gq *infg = pd_to_infg(pd); + struct blkcg_gq *blkg = pd_to_blkg(pd); + + INIT_LIST_HEAD(&infg->active); + INIT_LIST_HEAD(&infg->lend); + INIT_LIST_HEAD(&infg->borrow); + infg->inf = q_to_inf(blkg->q); + rq_wait_init(&infg->rqw); + timer_setup(&infg->expire_timer, infg_expire_fn, 0); +} + +static void ioinf_pd_offline(struct blkg_policy_data *pd) +{ + struct ioinf_gq *infg = pd_to_infg(pd); + struct ioinf *inf = infg->inf; + + if (list_empty_careful(&infg->active)) + return; + + del_timer_sync(&infg->expire_timer); + + spin_lock_irq(&inf->lock); + + if (!list_empty(&infg->lend)) + list_del_init(&infg->lend); + + if (!list_empty(&infg->borrow)) + list_del_init(&infg->borrow); + + if (!list_empty(&infg->active)) { + list_del_init(&infg->active); + __propagate_weights(inf); + } + + spin_unlock_irq(&inf->lock); +} + +static void ioinf_pd_free(struct blkg_policy_data *pd) +{ + struct ioinf_gq *infg = pd_to_infg(pd); + + kfree(infg); +} + +static u64 ioinf_weight_prfill(struct seq_file *sf, struct blkg_policy_data *pd, + int off) +{ + const char *dname = blkg_dev_name(pd->blkg); + struct ioinf_gq *infg = pd_to_infg(pd); + + if (dname && infg->weight) + seq_printf(sf, "%s %u\n", dname, infg->weight); + + return 0; +} + +static int ioinf_weight_show(struct seq_file *sf, void *v) +{ + struct blkcg *blkcg = css_to_blkcg(seq_css(sf)); + struct ioinf_cgrp *infcg = blkcg_to_infcg(blkcg); + + seq_printf(sf, "default %u\n", infcg->dfl_weight); + blkcg_print_blkgs(sf, blkcg, ioinf_weight_prfill, &blkcg_policy_ioinf, + seq_cft(sf)->private, false); + + return 0; +} + +static ssize_t ioinf_weight_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) +{ + struct blkcg *blkcg = css_to_blkcg(of_css(of)); + struct ioinf_cgrp *infcg = blkcg_to_infcg(blkcg); + struct blkg_conf_ctx ctx; + struct ioinf_gq *infg; + int ret; + u32 v; + + if (!strchr(buf, ':')) { + if (!sscanf(buf, "default %u", &v) && !sscanf(buf, "%u", &v)) + return -EINVAL; + if (v <= 0) + return -EINVAL; + + infcg->dfl_weight = v; + + return nbytes; + } + + ret = blkg_conf_prep(blkcg, &blkcg_policy_ioinf, buf, &ctx); + if (ret) + return ret; + + infg = blkg_to_infg(ctx.blkg); + if (!strncmp(ctx.body, "default", 7)) { + v = IOINF_DFL_WEIGHT; + } else if (!sscanf(ctx.body, "%u", &v) || + v < CGROUP_WEIGHT_MIN || v > CGROUP_WEIGHT_MAX) { + blkg_conf_finish(&ctx); + return -EINVAL; + } + + infg->weight = v; + blkg_conf_finish(&ctx); + propagate_weights(infg->inf); + return nbytes; +} + +static u64 ioinf_qos_prfill(struct seq_file *sf, struct blkg_policy_data *pd, + int off) +{ + const char *dname = blkg_dev_name(pd->blkg); + struct ioinf *inf = q_to_inf(pd->blkg->q); + + if (!dname) + return 0; + + seq_printf(sf, "%s enable=%d inflight=%u\n", dname, inf->enabled, + inf->inflight); + return 0; +} + +static int ioinf_qos_show(struct seq_file *sf, void *v) +{ + struct blkcg *blkcg = css_to_blkcg(seq_css(sf)); + + blkcg_print_blkgs(sf, blkcg, ioinf_qos_prfill, + &blkcg_policy_ioinf, seq_cft(sf)->private, false); + return 0; +} + +static const match_table_t qos_ctrl_tokens = { + { QOS_ENABLE, "enable=%u" }, + { QOS_INFLIGHT, "inflight=%u" }, + { NR_QOS_CTRL_PARAMS, NULL }, +}; + +static ssize_t ioinf_qos_write(struct kernfs_open_file *of, char *input, + size_t nbytes, loff_t off) +{ + struct gendisk *disk; + struct ioinf *inf; + u32 inflight; + bool enable; + char *p; + int ret; + + disk = blkcg_conf_get_disk(&input); + if (IS_ERR(disk)) + return PTR_ERR(disk); + + if (!queue_is_mq(disk->queue)) { + ret = -EOPNOTSUPP; + goto err; + } + + inf = q_to_inf(disk->queue); + if (!inf) { + ret = blk_ioinf_init(disk->queue); + if (ret) + goto err; + + inf = q_to_inf(disk->queue); + } + + enable = inf->enabled; + inflight = inf->inflight; + + while ((p = strsep(&input, " \t\n"))) { + substring_t args[MAX_OPT_ARGS]; + s64 v; + + if (!*p) + continue; + + switch (match_token(p, qos_ctrl_tokens, args)) { + case QOS_ENABLE: + if (match_u64(&args[0], &v)) + goto einval; + enable = !!v; + continue; + case QOS_INFLIGHT: + if (match_u64(&args[0], &v)) + goto einval; + inflight = v; + continue; + default: + goto einval; + } + } + + inf->enabled = enable; + + if (inflight == 0) + inflight = disk->queue->nr_requests; + + if (inf->inflight != inflight) { + inf->inflight = inflight; + propagate_weights(inf); + } + + put_disk_and_module(disk); + return nbytes; + +einval: + ret = -EINVAL; +err: + put_disk_and_module(disk); + return ret; +} + +static struct cftype ioinf_files[] = { + { + .name = "inf.weight", + .flags = CFTYPE_NOT_ON_ROOT, + .seq_show = ioinf_weight_show, + .write = ioinf_weight_write, + }, + { + .name = "inf.qos", + .flags = CFTYPE_ONLY_ON_ROOT, + .seq_show = ioinf_qos_show, + .write = ioinf_qos_write, + }, + {} +}; + +static struct cftype ioinf_legacy_files[] = { + { + .name = "inf.weight", + .flags = CFTYPE_NOT_ON_ROOT, + .seq_show = ioinf_weight_show, + .write = ioinf_weight_write, + }, + { + .name = "inf.qos", + .flags = CFTYPE_ONLY_ON_ROOT, + .seq_show = ioinf_qos_show, + .write = ioinf_qos_write, + }, + {} +}; + +static struct blkcg_policy blkcg_policy_ioinf = { + .dfl_cftypes = ioinf_files, + .legacy_cftypes = ioinf_legacy_files, + + .cpd_alloc_fn = ioinf_cpd_alloc, + .cpd_free_fn = ioinf_cpd_free, + + .pd_alloc_fn = ioinf_pd_alloc, + .pd_init_fn = ioinf_pd_init, + .pd_offline_fn = ioinf_pd_offline, + .pd_free_fn = ioinf_pd_free, +}; + +static int __init ioinf_init(void) +{ + return blkcg_policy_register(&blkcg_policy_ioinf); +} + +static void __exit ioinf_exit(void) +{ + blkcg_policy_unregister(&blkcg_policy_ioinf); +} + +module_init(ioinf_init); +module_exit(ioinf_exit); diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index 31e54f84ac89..6dde3815aa4f 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -17,6 +17,7 @@ enum rq_qos_id { RQ_QOS_WBT, RQ_QOS_LATENCY, RQ_QOS_COST, + RQ_QOS_INFLIGHT, }; struct rq_wait { @@ -88,6 +89,8 @@ static inline const char *rq_qos_id_to_name(enum rq_qos_id id) return "latency"; case RQ_QOS_COST: return "cost"; + case RQ_QOS_INFLIGHT: + return "inflight"; } return "unknown"; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 49578094b500..671358c4c19d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -208,6 +208,9 @@ struct request { u64 io_start_time_ns; #ifdef CONFIG_BLK_WBT unsigned short wbt_flags; +#endif +#ifdef CONFIG_BLK_CGROUP_IOINFLIGHT + struct blkcg_gq *blkg; #endif /* * rq sectors used for blk stats. It has the same value -- 2.39.2