On 2022/9/16 16:33, Yu Kuai wrote:
From: Yu Kuai yukuai3@huawei.com
hulk inclusion category: performance bugzilla: https://gitee.com/openeuler/kernel/issues/I5QK5M CVE: NA
In some architecture memory access latency is very bad across nodes compare to local node. For consequence, io performance is rather bad while users issue io from multiple nodes if lock contention exist in the driver.
This patch make io dispatch asynchronously to specific kthread that is bind to cpus that are belong to the same node, so that memory access across nodes in driver can be avoided.
Signed-off-by: Yu Kuai yukuai3@huawei.com
block/blk-core.c | 193 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 192 insertions(+), 1 deletion(-)
diff --git a/block/blk-core.c b/block/blk-core.c index fc81dff50a34..69bea96fab90 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -85,6 +85,29 @@ struct kmem_cache *blk_requestq_cachep; */ static struct workqueue_struct *kblockd_workqueue;
+/* prevent false sharing */ +#define BIO_ASYNC_LIST_SHIFT 2 +#define BIO_ASYNC_LOCK_SHIFT 4 +#define bio_async_list(ctl, i) (&ctl->list[i << BIO_ASYNC_LIST_SHIFT]) +#define bio_async_lock(ctl, i) (&ctl->lock[i << BIO_ASYNC_LOCK_SHIFT])
+struct bio_dispatch_async_ctl {
- /*
* Vector size is nr_cpu_ids, list stores bio dispatched from other cpu,
* such bio will be dispatched asynchronously to the cpu this structure
* is serviced.
*/
- struct bio_list *list;
- /* list is protected by lock */
- spinlock_t *lock;
- /* kthread to dispatch bio asynchronously */
- struct task_struct *thread;
- /* thread will wait here if there are no bios in list */
- wait_queue_head_t wait;
+};
+static struct bio_dispatch_async_ctl __percpu **bio_dispatch_async_ctl;
- /**
- blk_queue_flag_set - atomically set a queue flag
- @flag: flag to be set
@@ -2295,6 +2318,133 @@ static inline int blk_partition_remap(struct bio *bio) return ret; }
+static int collect_bio(struct bio_dispatch_async_ctl *ctl,
struct bio_list *list)
+{
- int count = 0;
- int cpu;
- struct bio *bio;
- for_each_possible_cpu(cpu) {
spin_lock_irq(bio_async_lock(ctl, cpu));
while ((bio = bio_list_pop(bio_async_list(ctl, cpu)))) {
bio_list_add(list, bio);
count++;
}
spin_unlock_irq(bio_async_lock(ctl, cpu));
- }
- return count;
+}
+#define BIO_DISPATCH_MAX_LOOP 16 +static int bio_dispatch_work(void *data) +{
- int loop_count = 0;
- int cpu = get_cpu();
我看get_cpu会禁止抢占,所以这个线程一直禁着抢占,调度没有问题吗?
- struct bio_dispatch_async_ctl *ctl =
*per_cpu_ptr(bio_dispatch_async_ctl, cpu);
- for (;; loop_count++) {
struct bio_list bio_list_on_stack;
struct blk_plug plug;
struct bio *bio;
int count;
bio_list_init(&bio_list_on_stack);
count = collect_bio(ctl, &bio_list_on_stack);
if (!count) {
DEFINE_WAIT(wait);
for (;;) {
prepare_to_wait(&ctl->wait, &wait,
TASK_INTERRUPTIBLE);
count = collect_bio(ctl, &bio_list_on_stack);
if (count)
break;
schedule();
loop_count = 0;
}
finish_wait(&ctl->wait, &wait);
}
blk_start_plug(&plug);
while ((bio = bio_list_pop(&bio_list_on_stack))) {
struct request_queue *q = bio->bi_disk->queue;
q->make_request_fn(q, bio);
}
blk_finish_plug(&plug);
/* prevent soft lockup */
if (loop_count >= BIO_DISPATCH_MAX_LOOP) {
loop_count = 0;
cond_resched();
}
- }
- put_cpu();
- return 0;
+}
+static int get_dispatch_cpu(struct request_queue *q, int cpu) +{
- int *last_dispatch_cpu = per_cpu_ptr(q->last_dispatch_cpu, cpu);
- cpu = cpumask_next(*last_dispatch_cpu, &q->dispatch_async_cpus);
- if (cpu >= nr_cpu_ids)
cpu = cpumask_first(&q->dispatch_async_cpus);
- *last_dispatch_cpu = cpu;
- return cpu;
+}
+static void blk_queue_make_request_async(struct bio *bio) +{
- struct request_queue *q = bio->bi_disk->queue;
- int cpu = get_cpu();
- int dispatch_cpu = get_dispatch_cpu(q, cpu);
- struct bio_dispatch_async_ctl *ctl =
*per_cpu_ptr(bio_dispatch_async_ctl, dispatch_cpu);
- spin_lock_irq(bio_async_lock(ctl, cpu));
- bio_list_add(bio_async_list(ctl, cpu), bio);
- spin_unlock_irq(bio_async_lock(ctl, cpu));
- if (wq_has_sleeper(&ctl->wait))
wake_up(&ctl->wait);
这里如果work线程没有等待(或者刚刚结束等待),会不会就没有人唤醒了?
- put_cpu();
+}
+static blk_qc_t blk_queue_do_make_request(struct bio *bio) +{
- struct request_queue *q = bio->bi_disk->queue;
- int cpu = get_cpu();
- put_cpu();
- /*
* Don't dispatch bio asynchronously in following cases:
*
* 1) QUEUE_FLAG_DISPATCH_ASYNC is not set;
* 2) current cpu is the target cpu;
* 3) bio is flagged no wait;
* 4) TODO: return value of submit_bio() will be used in io polling.
*/
- if (!test_bit(QUEUE_FLAG_DISPATCH_ASYNC, &q->queue_flags) ||
cpumask_test_cpu(cpu, &q->dispatch_async_cpus) ||
bio->bi_opf & REQ_NOWAIT)
return q->make_request_fn(q, bio);
- /* return value is not concerned */
- blk_queue_make_request_async(bio);
- return BLK_QC_T_NONE;
+}
- static noinline_for_stack bool generic_make_request_checks(struct bio *bio) {
@@ -2507,7 +2657,7 @@ blk_qc_t generic_make_request(struct bio *bio) /* Create a fresh bio_list for all subordinate requests */ bio_list_on_stack[1] = bio_list_on_stack[0]; bio_list_init(&bio_list_on_stack[0]);
ret = q->make_request_fn(q, bio);
ret = blk_queue_do_make_request(bio); /* sort new bios into those for a lower level * and those for the same level
@@ -4026,6 +4176,45 @@ void blk_set_runtime_active(struct request_queue *q) EXPORT_SYMBOL(blk_set_runtime_active); #endif
+static void init_blk_queue_async_dispatch(void) +{
- int cpu;
- bio_dispatch_async_ctl = alloc_percpu(struct bio_dispatch_async_ctl *);
- if (!bio_dispatch_async_ctl)
panic("Failed to alloc bio_dispatch_async_ctl\n");
- for_each_possible_cpu(cpu) {
int i;
struct bio_dispatch_async_ctl *ctl =
kmalloc(sizeof(struct bio_dispatch_async_ctl),
GFP_KERNEL | __GFP_NOFAIL);
*per_cpu_ptr(bio_dispatch_async_ctl, cpu) = ctl;
ctl->thread =
kthread_create_on_cpu(bio_dispatch_work, NULL, cpu,
"bio_dispatch_work_%u");
if (IS_ERR_OR_NULL(ctl->thread))
panic("Failed to create bio dispatch thread\n");
ctl->list = kmalloc_array(nr_cpu_ids,
sizeof(struct bio_list) << BIO_ASYNC_LIST_SHIFT,
GFP_KERNEL | __GFP_NOFAIL);
ctl->lock = kmalloc_array(nr_cpu_ids,
sizeof(spinlock_t) << BIO_ASYNC_LOCK_SHIFT,
GFP_KERNEL | __GFP_NOFAIL);
for (i = 0; i < nr_cpu_ids; ++i) {
bio_list_init(bio_async_list(ctl, i));
spin_lock_init(bio_async_lock(ctl, i));
}
kthread_set_per_cpu(ctl->thread, cpu);
wake_up_process(ctl->thread);
init_waitqueue_head(&ctl->wait);
- }
+}
- int __init blk_dev_init(void) { BUILD_BUG_ON(REQ_OP_LAST >= (1 << REQ_OP_BITS));
@@ -4047,6 +4236,8 @@ int __init blk_dev_init(void) sizeof(struct request_queue_wrapper), 0, SLAB_PANIC, NULL);
- init_blk_queue_async_dispatch();
- #ifdef CONFIG_DEBUG_FS blk_debugfs_root = debugfs_create_dir("block", NULL); #endif