The demo version.
Signed-off-by: Yu Kuai <yukuai3(a)huawei.com>
---
block/Kconfig | 5 +
block/Makefile | 1 +
block/blk-ioinf.c | 843 +++++++++++++++++++++++++++++++++++++++++
block/blk-rq-qos.h | 3 +
include/linux/blkdev.h | 3 +
5 files changed, 855 insertions(+)
create mode 100644 block/blk-ioinf.c
diff --git a/block/Kconfig b/block/Kconfig
index 24c6bb87727d..280e076ecd75 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -155,6 +155,11 @@ config BLK_CGROUP_IOCOST
distributes IO capacity between different groups based on
their share of the overall weight distribution.
+config BLK_CGROUP_IOINFLIGHT
+ bool "Enable support for inflight based cgroup IO controller"
+ help
+ xxxx
+
config BLK_WBT_MQ
bool "Multiqueue writeback throttling"
default y
diff --git a/block/Makefile b/block/Makefile
index 29814c6bb2df..d1e00f7fc88d 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_BLK_CGROUP_RWSTAT) += blk-cgroup-rwstat.o
obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o
obj-$(CONFIG_BLK_CGROUP_IOLATENCY) += blk-iolatency.o
obj-$(CONFIG_BLK_CGROUP_IOCOST) += blk-iocost.o
+obj-$(CONFIG_BLK_CGROUP_IOINFLIGHT) += blk-ioinf.o
obj-$(CONFIG_MQ_IOSCHED_DEADLINE) += mq-deadline.o
obj-$(CONFIG_MQ_IOSCHED_KYBER) += kyber-iosched.o
bfq-y := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o
diff --git a/block/blk-ioinf.c b/block/blk-ioinf.c
new file mode 100644
index 000000000000..8d4d7a5d693b
--- /dev/null
+++ b/block/blk-ioinf.c
@@ -0,0 +1,843 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * IO inflight relative controller
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/time64.h>
+#include <linux/parser.h>
+#include <linux/blk-cgroup.h>
+#include "blk-rq-qos.h"
+#include "blk-mq.h"
+
+/* default weight for each cgroup */
+#define IOINF_DFL_WEIGHT 10
+/* default wake up time in jiffies for backgroup job, see ioinf_timer_fn() */
+#define IOINF_TIMER_PERID 500
+/* default time in jiffies that cgroup will idle without any IO */
+#define INFG_DFL_EXPIRE 100
+
+/* io.inf.qos controls */
+enum {
+ QOS_ENABLE,
+ QOS_INFLIGHT,
+ NR_QOS_CTRL_PARAMS,
+};
+
+/* ioinf_gq flags */
+enum {
+ INFG_EXHAUSTED,
+ INFG_LEND,
+ INFG_BORROW,
+};
+
+/* the global conrtol structure */
+struct ioinf {
+ struct rq_qos rqos;
+
+ /* qos control params */
+ bool enabled;
+ u32 inflight;
+
+ /* default time for ioinf_timer_fn */
+ unsigned long inf_timer_perid;
+ struct timer_list inf_timer;
+
+ /* default time for infg_expire_fn */
+ unsigned long infg_expire_jiffies;
+
+ /* global lock */
+ spinlock_t lock;
+ /* list of active infgs */
+ struct list_head active_infgs;
+ /* list of active infgs that lend inflight budget to other infgs */
+ struct list_head lend_infgs;
+ /* list of active infgs that borrow inflight budget from other infgs */
+ struct list_head borrow_infgs;
+};
+
+/* per disk-cgroup pair structure */
+struct ioinf_gq {
+ struct blkg_policy_data pd;
+ struct ioinf *inf;
+
+ unsigned long flags;
+ /* head of the list is inf->active_infgs */
+ struct list_head active;
+ /* head of the list is inf->lend_infgs */
+ struct list_head lend;
+ /* head of the list is inf->borrow_infgs */
+ struct list_head borrow;
+
+ /* configured by user */
+ u32 weight;
+ /* normalized weight */
+ u32 hweight;
+ /* normalized inflight budget */
+ u32 hinflight;
+ /* inuse inflight budget */
+ u32 hinflight_inuse;
+ /* IO beyond budget will wait here */
+ struct rq_wait rqw;
+
+ struct timer_list expire_timer;
+
+ /* max inflight in current perid */
+ u32 max_inflight;
+ /* max inflight in last perid, will gradual reduction */
+ u32 last_max_inflight;
+};
+
+/* per cgroup structure, used to record default weight for all disks */
+struct ioinf_cgrp {
+ struct blkcg_policy_data cpd;
+
+ u32 dfl_weight;
+};
+
+static struct blkcg_policy blkcg_policy_ioinf;
+
+static struct ioinf *rqos_to_inf(struct rq_qos *rqos)
+{
+ return container_of(rqos, struct ioinf, rqos);
+}
+
+static struct ioinf *q_to_inf(struct request_queue *q)
+{
+ return rqos_to_inf(rq_qos_id(q, RQ_QOS_INFLIGHT));
+}
+
+static struct ioinf_gq *pd_to_infg(struct blkg_policy_data *pd)
+{
+ if (!pd)
+ return NULL;
+
+ return container_of(pd, struct ioinf_gq, pd);
+}
+
+static struct ioinf_gq *blkg_to_infg(struct blkcg_gq *blkg)
+{
+ return pd_to_infg(blkg_to_pd(blkg, &blkcg_policy_ioinf));
+}
+
+static struct blkcg_gq *infg_to_blkg(struct ioinf_gq *infg)
+{
+ return pd_to_blkg(&infg->pd);
+}
+
+static struct ioinf_cgrp *blkcg_to_infcg(struct blkcg *blkcg)
+{
+ struct blkcg_policy_data *cpd =
+ blkcg_to_cpd(blkcg, &blkcg_policy_ioinf);
+
+ return container_of(cpd, struct ioinf_cgrp, cpd);
+}
+
+static struct blkcg_gq *ioinf_bio_blkg(struct bio *bio)
+{
+ struct blkcg_gq *blkg = bio->bi_blkg;
+
+ if (!blkg || !blkg->online)
+ return NULL;
+
+ if (blkg->blkcg->css.cgroup->level == 0)
+ return NULL;
+
+ return blkg;
+}
+
+static struct ioinf_gq *ioinf_bio_infg(struct bio *bio)
+{
+ struct ioinf_gq *infg;
+ struct blkcg_gq *blkg = ioinf_bio_blkg(bio);
+
+ if (!blkg)
+ return NULL;
+
+ infg = blkg_to_infg(blkg);
+ if (!infg)
+ return NULL;
+
+ return infg;
+}
+
+static u32 infg_weight(struct ioinf_gq *infg)
+{
+ struct ioinf_cgrp *infcg;
+ struct blkcg_gq *blkg;
+
+ if (infg->weight)
+ return infg->weight;
+
+ /* if user doen't set per disk weight, use the cgroup default weight */
+ blkg = infg_to_blkg(infg);
+ infcg = blkcg_to_infcg(blkg->blkcg);
+
+ return infcg->dfl_weight;
+}
+
+static void infg_clear_loan(struct ioinf_gq *infg)
+{
+ if (!list_empty(&infg->lend)) {
+ clear_bit(INFG_LEND, &infg->flags);
+ list_del_init(&infg->lend);
+ }
+
+ if (!list_empty(&infg->borrow)) {
+ clear_bit(INFG_BORROW, &infg->flags);
+ list_del_init(&infg->borrow);
+ }
+}
+
+/*
+ * called when infg is activate or deactivate
+ * TODO: support cgroup hierarchy, each infg is independent for now
+ */
+static void __propagate_weights(struct ioinf *inf)
+{
+ struct ioinf_gq *infg;
+ u32 total = 0;
+
+ if (list_empty(&inf->active_infgs))
+ return;
+
+ /*
+ * TODO: instead of clearing loan and reinitializing everything, it's
+ * better to keep loan and do minor incremental modification.
+ */
+ list_for_each_entry(infg, &inf->active_infgs, active) {
+ total += infg_weight(infg);
+ infg->max_inflight = 0;
+ infg->last_max_inflight = 0;
+ infg_clear_loan(infg);
+ }
+
+ list_for_each_entry(infg, &inf->active_infgs, active) {
+ u32 weight = infg_weight(infg);
+
+ infg->hweight = weight * 100 / total;
+ infg->hinflight = infg->inf->inflight * infg->hweight / 100;
+ if (!infg->hinflight)
+ infg->hinflight = 1;
+ infg->hinflight_inuse = infg->hinflight;
+ }
+
+ mod_timer(&inf->inf_timer, jiffies + inf->inf_timer_perid);
+}
+
+static void propagate_weights(struct ioinf *inf)
+{
+ spin_lock_irq(&inf->lock);
+ __propagate_weights(inf);
+ spin_unlock_irq(&inf->lock);
+}
+
+static void ioinf_active_infg(struct ioinf_gq *infg)
+{
+ struct ioinf *inf = infg->inf;
+
+ spin_lock_irq(&inf->lock);
+ if (list_empty(&infg->active)) {
+ list_add(&infg->active, &inf->active_infgs);
+ __propagate_weights(inf);
+ }
+ spin_unlock_irq(&inf->lock);
+}
+
+static unsigned int atomic_inc_below_return(atomic_t *v, unsigned int below)
+{
+ unsigned int cur = atomic_read(v);
+
+ for (;;) {
+ unsigned int old;
+
+ if (cur >= below)
+ return below + 1;
+
+ old = atomic_cmpxchg(v, cur, cur + 1);
+ if (old == cur)
+ break;
+ cur = old;
+ }
+
+ return cur + 1;
+}
+
+/*
+ * Called from io fast path, return false means inflight IO is full, and the
+ * forground thread will wait inflight IO to be done.
+ */
+static bool ioinf_inflight_cb(struct rq_wait *rqw, void *private_data)
+{
+ struct ioinf_gq *infg = private_data;
+ unsigned int inflight;
+ unsigned int limit;
+
+retry:
+ limit = infg->hinflight_inuse;
+ inflight = atomic_inc_below_return(&infg->rqw.inflight, limit);
+
+ if (inflight > infg->max_inflight)
+ infg->max_inflight = inflight;
+
+ if (inflight <= limit)
+ return true;
+
+ if (infg->hinflight_inuse == limit) {
+ /*
+ * This infg want more inflight budget, set INFG_EXHAUSTED, and
+ * later ioinf_timer_fn() will check, if other infg can lend
+ * budget.
+ */
+ set_bit(INFG_EXHAUSTED, &infg->flags);
+ return false;
+ }
+
+ /* Stop lend inflight budget to other infgs */
+ infg->hinflight_inuse = infg->hinflight;
+ /* wake up ioinf_timer_fn() immediately to inform other infgs */
+ timer_reduce(&infg->inf->inf_timer, jiffies + 1);
+ goto retry;
+}
+
+void ioinf_done(struct ioinf_gq *infg)
+{
+ int inflight = atomic_dec_return(&infg->rqw.inflight);
+
+ BUG_ON(inflight < 0);
+
+ if (inflight < infg->hinflight && wq_has_sleeper(&infg->rqw.wait))
+ wake_up_all(&infg->rqw.wait);
+
+ /* deactivate infg if there is no IO for infg_expire_jiffies */
+ if (inflight == 0)
+ mod_timer(&infg->expire_timer,
+ jiffies + infg->inf->infg_expire_jiffies);
+}
+
+static void ioinf_cleanup_cb(struct rq_wait *rqw, void *private_data)
+{
+ ioinf_done(private_data);
+}
+
+static void ioinf_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
+{
+ struct ioinf *inf = rqos_to_inf(rqos);
+ struct ioinf_gq *infg = ioinf_bio_infg(bio);
+
+ if (!inf->enabled || !infg)
+ return;
+
+ if (list_empty_careful(&infg->active))
+ ioinf_active_infg(infg);
+
+ rq_qos_wait(&infg->rqw, infg, ioinf_inflight_cb, ioinf_cleanup_cb);
+}
+
+static void ioinf_rqos_track(struct rq_qos *rqos, struct request *rq,
+ struct bio *bio)
+{
+ struct blkcg_gq *blkg = ioinf_bio_blkg(bio);
+
+ if (!blkg)
+ return;
+
+ rq->blkg = blkg;
+}
+
+static void ioinf_rqos_cleanup(struct rq_qos *rqos, struct bio *bio)
+{
+ struct ioinf_gq *infg = ioinf_bio_infg(bio);
+
+ if (!infg || infg->inf->enabled ||
+ list_empty_careful(&infg->active))
+ return;
+
+ ioinf_done(infg);
+}
+
+static void ioinf_rqos_done(struct rq_qos *rqos, struct request *rq)
+{
+ struct blkcg_gq *blkg = rq->blkg;
+
+ if (blkg) {
+ ioinf_done(blkg_to_infg(blkg));
+ rq->blkg = NULL;
+ }
+}
+
+static void ioinf_rqos_exit(struct rq_qos *rqos)
+{
+ struct ioinf *inf = rqos_to_inf(rqos);
+
+ blkcg_deactivate_policy(rqos->q, &blkcg_policy_ioinf);
+
+ del_timer_sync(&inf->inf_timer);
+ kfree(inf);
+}
+
+static int ioinf_stat_show(void *data, struct seq_file *m)
+{
+ struct rq_qos *rqos = data;
+ struct ioinf *inf = rqos_to_inf(rqos);
+ struct ioinf_gq *infg;
+ char path[32];
+
+ spin_lock_irq(&inf->lock);
+ list_for_each_entry(infg, &inf->active_infgs, active) {
+ blkg_path(infg_to_blkg(infg), path, sizeof(path));
+ seq_printf(m, "%s: hweight %u, inflight %d/(%d->%d) %u->%u\n", path,
+ infg->hweight, atomic_read(&infg->rqw.inflight),
+ infg->hinflight, infg->hinflight_inuse,
+ infg->last_max_inflight,
+ infg->max_inflight);
+ }
+ spin_unlock_irq(&inf->lock);
+
+ return 0;
+}
+
+static const struct blk_mq_debugfs_attr ioinf_debugfs_attrs[] = {
+ {"stat", 0400, ioinf_stat_show},
+ {},
+};
+
+static struct rq_qos_ops ioinf_rqos_ops = {
+ .throttle = ioinf_rqos_throttle,
+ .done = ioinf_rqos_done,
+ .track = ioinf_rqos_track,
+ .cleanup = ioinf_rqos_cleanup,
+ .exit = ioinf_rqos_exit,
+
+#ifdef CONFIG_BLK_DEBUG_FS
+ .debugfs_attrs = ioinf_debugfs_attrs,
+#endif
+};
+
+static void infg_update_inflight(struct ioinf_gq *infg, u32 *exhausted_count)
+{
+ unsigned int last_max_inflight = infg->last_max_inflight;
+
+ infg->hinflight_inuse = max(last_max_inflight, infg->max_inflight);
+
+ infg->last_max_inflight = max(last_max_inflight >> 1, infg->max_inflight);
+ infg->max_inflight = infg->max_inflight >> 1;
+
+ if (infg->hinflight_inuse < infg->hinflight &&
+ list_empty(&infg->lend)) {
+ if (!list_empty(&infg->borrow)) {
+ clear_bit(INFG_BORROW, &infg->flags);
+ list_del_init(&infg->borrow);
+ }
+
+ set_bit(INFG_LEND, &infg->flags);
+ list_add_tail(&infg->lend, &infg->inf->lend_infgs);
+ }
+
+ if (test_bit(INFG_EXHAUSTED, &infg->flags)) {
+ (*exhausted_count)++;
+ if (list_empty(&infg->borrow)) {
+ set_bit(INFG_BORROW, &infg->flags);
+ list_add_tail(&infg->borrow, &infg->inf->borrow_infgs);
+ }
+ }
+}
+
+static void ioinf_timer_fn(struct timer_list *timer)
+{
+ struct ioinf *inf = container_of(timer, struct ioinf, inf_timer);
+ struct ioinf_gq *infg;
+ u32 exhausted_count = 0;
+ u32 lend_total = 0;
+ unsigned long flags;
+
+ if (list_empty(&inf->active_infgs))
+ return;
+
+ spin_lock_irqsave(&inf->lock, flags);
+
+ list_for_each_entry(infg, &inf->active_infgs, active)
+ infg_update_inflight(infg, &exhausted_count);
+
+ list_for_each_entry(infg, &inf->lend_infgs, lend)
+ lend_total += infg->hinflight - infg->hinflight_inuse;
+
+ /*
+ * TODO: handle loan gracefully, equal division for now.
+ */
+ if (exhausted_count) {
+ u32 borrow = lend_total / exhausted_count;
+
+ list_for_each_entry(infg, &inf->borrow_infgs, borrow) {
+ if (test_and_clear_bit(INFG_EXHAUSTED, &infg->flags))
+ infg->hinflight_inuse += borrow;
+ }
+ }
+
+ spin_unlock_irqrestore(&inf->lock, flags);
+}
+
+static int blk_ioinf_init(struct request_queue *q)
+{
+ struct rq_qos *rqos;
+ struct ioinf *inf;
+ int ret;
+
+ inf = kzalloc_node(sizeof(*inf), GFP_KERNEL, q->node);
+ if (!inf)
+ return -ENOMEM;
+
+ spin_lock_init(&inf->lock);
+ inf->inflight = q->nr_requests;
+ inf->infg_expire_jiffies = INFG_DFL_EXPIRE;
+ inf->inf_timer_perid = IOINF_TIMER_PERID;
+ INIT_LIST_HEAD(&inf->active_infgs);
+ INIT_LIST_HEAD(&inf->lend_infgs);
+ INIT_LIST_HEAD(&inf->borrow_infgs);
+ rqos = &inf->rqos;
+
+ rqos->q = q;
+ rqos->id = RQ_QOS_INFLIGHT;
+ rqos->ops = &ioinf_rqos_ops;
+
+ timer_setup(&inf->inf_timer, ioinf_timer_fn, 0);
+
+ ret = rq_qos_add(q, rqos);
+ if (ret)
+ goto err_free_inf;
+
+ ret = blkcg_activate_policy(q, &blkcg_policy_ioinf);
+ if (ret)
+ goto err_del_qos;
+ return 0;
+
+err_del_qos:
+ rq_qos_del(q, rqos);
+err_free_inf:
+ kfree(inf);
+ return ret;
+}
+
+static struct blkcg_policy_data *ioinf_cpd_alloc(gfp_t gfp)
+{
+ struct ioinf_cgrp *infcg = kzalloc(sizeof(*infcg), gfp);
+
+ if (!infcg)
+ return NULL;
+
+ infcg->dfl_weight = IOINF_DFL_WEIGHT;
+ return &infcg->cpd;
+}
+
+static void ioinf_cpd_free(struct blkcg_policy_data *cpd)
+{
+ kfree(container_of(cpd, struct ioinf_cgrp, cpd));
+}
+
+static struct blkg_policy_data *ioinf_pd_alloc(gfp_t gfp,
+ struct request_queue *q,
+ struct blkcg *blkcg)
+{
+ struct ioinf_gq *infg = kzalloc_node(sizeof(*infg), gfp, q->node);
+
+ if (!infg)
+ return NULL;
+
+ return &infg->pd;
+}
+
+static void infg_expire_fn(struct timer_list *timer)
+{
+ struct ioinf_gq *infg =
+ container_of(timer, struct ioinf_gq, expire_timer);
+ struct ioinf *inf = infg->inf;
+ unsigned long flags;
+
+ if (atomic_read(&infg->rqw.inflight) > 0)
+ return;
+
+ spin_lock_irqsave(&inf->lock, flags);
+ if (atomic_read(&infg->rqw.inflight) == 0) {
+ list_del_init(&infg->active);
+ if (atomic_read(&infg->rqw.inflight) == 0) {
+ infg_clear_loan(infg);
+ __propagate_weights(inf);
+ } else {
+ list_add(&infg->active, &inf->active_infgs);
+ }
+ }
+ spin_unlock_irqrestore(&inf->lock, flags);
+}
+
+static void ioinf_pd_init(struct blkg_policy_data *pd)
+{
+ struct ioinf_gq *infg = pd_to_infg(pd);
+ struct blkcg_gq *blkg = pd_to_blkg(pd);
+
+ INIT_LIST_HEAD(&infg->active);
+ INIT_LIST_HEAD(&infg->lend);
+ INIT_LIST_HEAD(&infg->borrow);
+ infg->inf = q_to_inf(blkg->q);
+ rq_wait_init(&infg->rqw);
+ timer_setup(&infg->expire_timer, infg_expire_fn, 0);
+}
+
+static void ioinf_pd_offline(struct blkg_policy_data *pd)
+{
+ struct ioinf_gq *infg = pd_to_infg(pd);
+ struct ioinf *inf = infg->inf;
+
+ if (list_empty_careful(&infg->active))
+ return;
+
+ del_timer_sync(&infg->expire_timer);
+
+ spin_lock_irq(&inf->lock);
+
+ if (!list_empty(&infg->lend))
+ list_del_init(&infg->lend);
+
+ if (!list_empty(&infg->borrow))
+ list_del_init(&infg->borrow);
+
+ if (!list_empty(&infg->active)) {
+ list_del_init(&infg->active);
+ __propagate_weights(inf);
+ }
+
+ spin_unlock_irq(&inf->lock);
+}
+
+static void ioinf_pd_free(struct blkg_policy_data *pd)
+{
+ struct ioinf_gq *infg = pd_to_infg(pd);
+
+ kfree(infg);
+}
+
+static u64 ioinf_weight_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
+ int off)
+{
+ const char *dname = blkg_dev_name(pd->blkg);
+ struct ioinf_gq *infg = pd_to_infg(pd);
+
+ if (dname && infg->weight)
+ seq_printf(sf, "%s %u\n", dname, infg->weight);
+
+ return 0;
+}
+
+static int ioinf_weight_show(struct seq_file *sf, void *v)
+{
+ struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
+ struct ioinf_cgrp *infcg = blkcg_to_infcg(blkcg);
+
+ seq_printf(sf, "default %u\n", infcg->dfl_weight);
+ blkcg_print_blkgs(sf, blkcg, ioinf_weight_prfill, &blkcg_policy_ioinf,
+ seq_cft(sf)->private, false);
+
+ return 0;
+}
+
+static ssize_t ioinf_weight_write(struct kernfs_open_file *of, char *buf,
+ size_t nbytes, loff_t off)
+{
+ struct blkcg *blkcg = css_to_blkcg(of_css(of));
+ struct ioinf_cgrp *infcg = blkcg_to_infcg(blkcg);
+ struct blkg_conf_ctx ctx;
+ struct ioinf_gq *infg;
+ int ret;
+ u32 v;
+
+ if (!strchr(buf, ':')) {
+ if (!sscanf(buf, "default %u", &v) && !sscanf(buf, "%u", &v))
+ return -EINVAL;
+ if (v <= 0)
+ return -EINVAL;
+
+ infcg->dfl_weight = v;
+
+ return nbytes;
+ }
+
+ ret = blkg_conf_prep(blkcg, &blkcg_policy_ioinf, buf, &ctx);
+ if (ret)
+ return ret;
+
+ infg = blkg_to_infg(ctx.blkg);
+ if (!strncmp(ctx.body, "default", 7)) {
+ v = IOINF_DFL_WEIGHT;
+ } else if (!sscanf(ctx.body, "%u", &v) ||
+ v < CGROUP_WEIGHT_MIN || v > CGROUP_WEIGHT_MAX) {
+ blkg_conf_finish(&ctx);
+ return -EINVAL;
+ }
+
+ infg->weight = v;
+ blkg_conf_finish(&ctx);
+ propagate_weights(infg->inf);
+ return nbytes;
+}
+
+static u64 ioinf_qos_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
+ int off)
+{
+ const char *dname = blkg_dev_name(pd->blkg);
+ struct ioinf *inf = q_to_inf(pd->blkg->q);
+
+ if (!dname)
+ return 0;
+
+ seq_printf(sf, "%s enable=%d inflight=%u\n", dname, inf->enabled,
+ inf->inflight);
+ return 0;
+}
+
+static int ioinf_qos_show(struct seq_file *sf, void *v)
+{
+ struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
+
+ blkcg_print_blkgs(sf, blkcg, ioinf_qos_prfill,
+ &blkcg_policy_ioinf, seq_cft(sf)->private, false);
+ return 0;
+}
+
+static const match_table_t qos_ctrl_tokens = {
+ { QOS_ENABLE, "enable=%u" },
+ { QOS_INFLIGHT, "inflight=%u" },
+ { NR_QOS_CTRL_PARAMS, NULL },
+};
+
+static ssize_t ioinf_qos_write(struct kernfs_open_file *of, char *input,
+ size_t nbytes, loff_t off)
+{
+ struct gendisk *disk;
+ struct ioinf *inf;
+ u32 inflight;
+ bool enable;
+ char *p;
+ int ret;
+
+ disk = blkcg_conf_get_disk(&input);
+ if (IS_ERR(disk))
+ return PTR_ERR(disk);
+
+ if (!queue_is_mq(disk->queue)) {
+ ret = -EOPNOTSUPP;
+ goto err;
+ }
+
+ inf = q_to_inf(disk->queue);
+ if (!inf) {
+ ret = blk_ioinf_init(disk->queue);
+ if (ret)
+ goto err;
+
+ inf = q_to_inf(disk->queue);
+ }
+
+ enable = inf->enabled;
+ inflight = inf->inflight;
+
+ while ((p = strsep(&input, " \t\n"))) {
+ substring_t args[MAX_OPT_ARGS];
+ s64 v;
+
+ if (!*p)
+ continue;
+
+ switch (match_token(p, qos_ctrl_tokens, args)) {
+ case QOS_ENABLE:
+ if (match_u64(&args[0], &v))
+ goto einval;
+ enable = !!v;
+ continue;
+ case QOS_INFLIGHT:
+ if (match_u64(&args[0], &v))
+ goto einval;
+ inflight = v;
+ continue;
+ default:
+ goto einval;
+ }
+ }
+
+ inf->enabled = enable;
+
+ if (inflight == 0)
+ inflight = disk->queue->nr_requests;
+
+ if (inf->inflight != inflight) {
+ inf->inflight = inflight;
+ propagate_weights(inf);
+ }
+
+ put_disk_and_module(disk);
+ return nbytes;
+
+einval:
+ ret = -EINVAL;
+err:
+ put_disk_and_module(disk);
+ return ret;
+}
+
+static struct cftype ioinf_files[] = {
+ {
+ .name = "inf.weight",
+ .flags = CFTYPE_NOT_ON_ROOT,
+ .seq_show = ioinf_weight_show,
+ .write = ioinf_weight_write,
+ },
+ {
+ .name = "inf.qos",
+ .flags = CFTYPE_ONLY_ON_ROOT,
+ .seq_show = ioinf_qos_show,
+ .write = ioinf_qos_write,
+ },
+ {}
+};
+
+static struct cftype ioinf_legacy_files[] = {
+ {
+ .name = "inf.weight",
+ .flags = CFTYPE_NOT_ON_ROOT,
+ .seq_show = ioinf_weight_show,
+ .write = ioinf_weight_write,
+ },
+ {
+ .name = "inf.qos",
+ .flags = CFTYPE_ONLY_ON_ROOT,
+ .seq_show = ioinf_qos_show,
+ .write = ioinf_qos_write,
+ },
+ {}
+};
+
+static struct blkcg_policy blkcg_policy_ioinf = {
+ .dfl_cftypes = ioinf_files,
+ .legacy_cftypes = ioinf_legacy_files,
+
+ .cpd_alloc_fn = ioinf_cpd_alloc,
+ .cpd_free_fn = ioinf_cpd_free,
+
+ .pd_alloc_fn = ioinf_pd_alloc,
+ .pd_init_fn = ioinf_pd_init,
+ .pd_offline_fn = ioinf_pd_offline,
+ .pd_free_fn = ioinf_pd_free,
+};
+
+static int __init ioinf_init(void)
+{
+ return blkcg_policy_register(&blkcg_policy_ioinf);
+}
+
+static void __exit ioinf_exit(void)
+{
+ blkcg_policy_unregister(&blkcg_policy_ioinf);
+}
+
+module_init(ioinf_init);
+module_exit(ioinf_exit);
diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
index 31e54f84ac89..6dde3815aa4f 100644
--- a/block/blk-rq-qos.h
+++ b/block/blk-rq-qos.h
@@ -17,6 +17,7 @@ enum rq_qos_id {
RQ_QOS_WBT,
RQ_QOS_LATENCY,
RQ_QOS_COST,
+ RQ_QOS_INFLIGHT,
};
struct rq_wait {
@@ -88,6 +89,8 @@ static inline const char *rq_qos_id_to_name(enum rq_qos_id id)
return "latency";
case RQ_QOS_COST:
return "cost";
+ case RQ_QOS_INFLIGHT:
+ return "inflight";
}
return "unknown";
}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 49578094b500..671358c4c19d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -208,6 +208,9 @@ struct request {
u64 io_start_time_ns;
#ifdef CONFIG_BLK_WBT
unsigned short wbt_flags;
+#endif
+#ifdef CONFIG_BLK_CGROUP_IOINFLIGHT
+ struct blkcg_gq *blkg;
#endif
/*
* rq sectors used for blk stats. It has the same value
--
2.39.2