From: Jan Kara jack@suse.cz
mainline inclusion from mainline-v5.14-rc1 commit a921c655f2033dd1ce1379128efe881dda23ea37 category: bugfix bugzilla: 185777, 185811 CVE: NA
Currently, bfq does very little in bfq_requests_merged() and handles all the request cleanup in bfq_finish_requeue_request() called from blk_mq_free_request(). That is currently safe only because blk_mq_free_request() is called shortly after bfq_requests_merged() while bfqd->lock is still held. However to fix a lock inversion between bfqd->lock and ioc->lock, we need to call blk_mq_free_request() after dropping bfqd->lock. That would mean that already merged request could be seen by other processes inside bfq queues and possibly dispatched to the device which is wrong. So move cleanup of the request from bfq_finish_requeue_request() to bfq_requests_merged().
Acked-by: Paolo Valente paolo.valente@linaro.org Signed-off-by: Jan Kara jack@suse.cz Link: https://lore.kernel.org/r/20210623093634.27879-2-jack@suse.cz Signed-off-by: Jens Axboe axboe@kernel.dk
conflict: in bfq_finish_requeue_request, 4.19 not have bfq_update_inject_limit branch; Signed-off-by: zhangwensheng zhangwensheng5@huawei.com Reviewed-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- block/bfq-iosched.c | 41 +++++++++++++---------------------------- 1 file changed, 13 insertions(+), 28 deletions(-)
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 7d77de9a0f5c0..5452d892480ba 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -1937,7 +1937,7 @@ static void bfq_requests_merged(struct request_queue *q, struct request *rq, *next_bfqq = bfq_init_rq(next);
if (!bfqq) - return; + goto remove;
/* * If next and rq belong to the same bfq_queue and next is older @@ -1960,6 +1960,14 @@ static void bfq_requests_merged(struct request_queue *q, struct request *rq, bfqq->next_rq = rq;
bfqg_stats_update_io_merged(bfqq_group(bfqq), next->cmd_flags); +remove: + /* Merged request may be in the IO scheduler. Remove it. */ + if (!RB_EMPTY_NODE(&next->rb_node)) { + bfq_remove_request(next->q, next); + if (next_bfqq) + bfqg_stats_update_io_remove(bfqq_group(next_bfqq), + next->cmd_flags); + } }
/* Must be called with bfqq != NULL */ @@ -4876,6 +4884,7 @@ static void bfq_finish_requeue_request(struct request *rq) { struct bfq_queue *bfqq = RQ_BFQQ(rq); struct bfq_data *bfqd; + unsigned long flags;
/* * rq either is not associated with any icq, or is an already @@ -4893,36 +4902,12 @@ static void bfq_finish_requeue_request(struct request *rq) rq->io_start_time_ns, rq->cmd_flags);
+ spin_lock_irqsave(&bfqd->lock, flags); if (likely(rq->rq_flags & RQF_STARTED)) { - unsigned long flags; - - spin_lock_irqsave(&bfqd->lock, flags); - bfq_completed_request(bfqq, bfqd); - bfq_finish_requeue_request_body(bfqq); - - spin_unlock_irqrestore(&bfqd->lock, flags); - } else { - /* - * Request rq may be still/already in the scheduler, - * in which case we need to remove it (this should - * never happen in case of requeue). And we cannot - * defer such a check and removal, to avoid - * inconsistencies in the time interval from the end - * of this function to the start of the deferred work. - * This situation seems to occur only in process - * context, as a consequence of a merge. In the - * current version of the code, this implies that the - * lock is held. - */ - - if (!RB_EMPTY_NODE(&rq->rb_node)) { - bfq_remove_request(rq->q, rq); - bfqg_stats_update_io_remove(bfqq_group(bfqq), - rq->cmd_flags); - } - bfq_finish_requeue_request_body(bfqq); } + bfq_finish_requeue_request_body(bfqq); + spin_unlock_irqrestore(&bfqd->lock, flags);
/* * Reset private fields. In case of a requeue, this allows
From: Jan Kara jack@suse.cz
mainline inclusion from mainline-v5.14-rc1 commit fd2ef39cc9a6b9c4c41864ac506906c52f94b06a category: bugfix bugzilla: 185777, 185811 CVE: NA
Lockdep complains about lock inversion between ioc->lock and bfqd->lock:
bfqd -> ioc: put_io_context+0x33/0x90 -> ioc->lock grabbed blk_mq_free_request+0x51/0x140 blk_put_request+0xe/0x10 blk_attempt_req_merge+0x1d/0x30 elv_attempt_insert_merge+0x56/0xa0 blk_mq_sched_try_insert_merge+0x4b/0x60 bfq_insert_requests+0x9e/0x18c0 -> bfqd->lock grabbed blk_mq_sched_insert_requests+0xd6/0x2b0 blk_mq_flush_plug_list+0x154/0x280 blk_finish_plug+0x40/0x60 ext4_writepages+0x696/0x1320 do_writepages+0x1c/0x80 __filemap_fdatawrite_range+0xd7/0x120 sync_file_range+0xac/0xf0
ioc->bfqd: bfq_exit_icq+0xa3/0xe0 -> bfqd->lock grabbed put_io_context_active+0x78/0xb0 -> ioc->lock grabbed exit_io_context+0x48/0x50 do_exit+0x7e9/0xdd0 do_group_exit+0x54/0xc0
To avoid this inversion we change blk_mq_sched_try_insert_merge() to not free the merged request but rather leave that upto the caller similarly to blk_mq_sched_try_merge(). And in bfq_insert_requests() we make sure to free all the merged requests after dropping bfqd->lock.
Fixes: aee69d78dec0 ("block, bfq: introduce the BFQ-v0 I/O scheduler as an extra scheduler") Reviewed-by: Ming Lei ming.lei@redhat.com Acked-by: Paolo Valente paolo.valente@linaro.org Signed-off-by: Jan Kara jack@suse.cz Link: https://lore.kernel.org/r/20210623093634.27879-3-jack@suse.cz Signed-off-by: Jens Axboe axboe@kernel.dk
Conflict: 1. mainline include/linux/elevator.h file change; 2. mainline block/mq-deadline-main.c file change, now change to block/mq-deadline.c; 3. in block/blk-merge.c, in function blk_attempt_req_merge, 4.19 have difference from mainline; 4. in 4.19, elv_attempt_insert_merge has beed called twice but once in mainline; Reviewed-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- block/bfq-iosched.c | 6 ++++-- block/blk-merge.c | 16 +++++++--------- block/blk-mq-sched.c | 5 +++-- block/blk-mq-sched.h | 3 ++- block/blk-mq.h | 10 ++++++++++ block/blk.h | 4 ++-- block/elevator.c | 16 ++++++++++++---- block/mq-deadline.c | 5 ++++- include/linux/elevator.h | 3 ++- 9 files changed, 46 insertions(+), 22 deletions(-)
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 5452d892480ba..4686ca1e93afe 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -1854,9 +1854,9 @@ static bool bfq_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio)
ret = blk_mq_sched_try_merge(q, bio, &free);
+ spin_unlock_irq(&bfqd->lock); if (free) blk_mq_free_request(free); - spin_unlock_irq(&bfqd->lock);
return ret; } @@ -4669,10 +4669,12 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, struct bfq_queue *bfqq; bool idle_timer_disabled = false; unsigned int cmd_flags; + LIST_HEAD(free);
spin_lock_irq(&bfqd->lock); - if (blk_mq_sched_try_insert_merge(q, rq)) { + if (blk_mq_sched_try_insert_merge(q, rq, &free)) { spin_unlock_irq(&bfqd->lock); + blk_mq_free_requests(&free); return; }
diff --git a/block/blk-merge.c b/block/blk-merge.c index 89a9bec91b8de..4c17c1031e34f 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -822,23 +822,21 @@ struct request *attempt_front_merge(struct request_queue *q, struct request *rq) return NULL; }
-int blk_attempt_req_merge(struct request_queue *q, struct request *rq, +/* + * Try to merge 'next' into 'rq'. Return true if the merge happened, false + * otherwise. The caller is responsible for freeing 'next' if the merge + * happened. + */ +bool blk_attempt_req_merge(struct request_queue *q, struct request *rq, struct request *next) { struct elevator_queue *e = q->elevator; - struct request *free;
if (!e->uses_mq && e->type->ops.sq.elevator_allow_rq_merge_fn) if (!e->type->ops.sq.elevator_allow_rq_merge_fn(q, rq, next)) return 0;
- free = attempt_merge(q, rq, next); - if (free) { - __blk_put_request(q, free); - return 1; - } - - return 0; + return attempt_merge(q, rq, next); }
bool blk_rq_merge_ok(struct request *rq, struct bio *bio) diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 3521eca1b2984..09c1e140195c9 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -359,9 +359,10 @@ bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio) return ret; }
-bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq) +bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq, + struct list_head *free) { - return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq); + return rq_mergeable(rq) && elv_attempt_insert_merge(q, rq, free); } EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h index 011445f7852bf..c81a26e7b6209 100644 --- a/block/blk-mq-sched.h +++ b/block/blk-mq-sched.h @@ -14,7 +14,8 @@ void blk_mq_sched_request_inserted(struct request *rq); bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, struct request **merged_request); bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio); -bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq); +bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq, + struct list_head *free); void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx); void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx);
diff --git a/block/blk-mq.h b/block/blk-mq.h index bbb0c1d8849b4..2e598675953fb 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -220,4 +220,14 @@ static inline void blk_mq_clear_mq_map(struct blk_mq_tag_set *set) set->mq_map[cpu] = 0; }
+static inline void blk_mq_free_requests(struct list_head *list) +{ + while (!list_empty(list)) { + struct request *rq = list_entry_rq(list->next); + + list_del_init(&rq->queuelist); + blk_mq_free_request(rq); + } +} + #endif diff --git a/block/blk.h b/block/blk.h index c2fa239ca78f2..e496e26630f71 100644 --- a/block/blk.h +++ b/block/blk.h @@ -280,8 +280,8 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req, struct bio *bio); struct request *attempt_back_merge(struct request_queue *q, struct request *rq); struct request *attempt_front_merge(struct request_queue *q, struct request *rq); -int blk_attempt_req_merge(struct request_queue *q, struct request *rq, - struct request *next); +bool blk_attempt_req_merge(struct request_queue *q, struct request *rq, + struct request *next); void blk_recalc_rq_segments(struct request *rq); void blk_rq_set_mixed_merge(struct request *rq); bool blk_rq_merge_ok(struct request *rq, struct bio *bio); diff --git a/block/elevator.c b/block/elevator.c index ddbcd36616a8d..34ff47fd913e3 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -469,9 +469,11 @@ enum elv_merge elv_merge(struct request_queue *q, struct request **req, * we can append 'rq' to an existing request, so we can throw 'rq' away * afterwards. * - * Returns true if we merged, false otherwise + * Returns true if we merged, false otherwise. 'free' will contain all + * requests that need to be freed. */ -bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq) +bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq, + struct list_head *free) { struct request *__rq; bool ret; @@ -482,8 +484,10 @@ bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq) /* * First try one-hit cache. */ - if (q->last_merge && blk_attempt_req_merge(q, q->last_merge, rq)) + if (q->last_merge && blk_attempt_req_merge(q, q->last_merge, rq)) { + list_add(&rq->queuelist, free); return true; + }
if (blk_queue_noxmerges(q)) return false; @@ -497,6 +501,7 @@ bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq) if (!__rq || !blk_attempt_req_merge(q, __rq, rq)) break;
+ list_add(&rq->queuelist, free); /* The merged request could be merged with others, try again */ ret = true; rq = __rq; @@ -618,6 +623,7 @@ void elv_drain_elevator(struct request_queue *q)
void __elv_add_request(struct request_queue *q, struct request *rq, int where) { + LIST_HEAD(free); trace_block_rq_insert(q, rq);
blk_pm_add_request(q, rq); @@ -665,8 +671,10 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where) * queue already, we are done - rq has now been freed, * so no need to do anything further. */ - if (elv_attempt_insert_merge(q, rq)) + if (elv_attempt_insert_merge(q, rq, &free)) { + blk_mq_free_requests(&free); break; + } /* fall through */ case ELEVATOR_INSERT_SORT: BUG_ON(blk_rq_is_passthrough(rq)); diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 69094d6410623..7ad8200506753 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -484,6 +484,7 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, struct request_queue *q = hctx->queue; struct deadline_data *dd = q->elevator->elevator_data; const int data_dir = rq_data_dir(rq); + LIST_HEAD(free);
/* * This may be a requeue of a write request that has locked its @@ -491,8 +492,10 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, */ blk_req_zone_write_unlock(rq);
- if (blk_mq_sched_try_insert_merge(q, rq)) + if (blk_mq_sched_try_insert_merge(q, rq, &free)) { + blk_mq_free_requests(&free); return; + }
blk_mq_sched_request_inserted(rq);
diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 8524b803528b3..663ce1780c5d4 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -209,7 +209,8 @@ extern void elv_merged_request(struct request_queue *, struct request *, enum elv_merge); extern void elv_bio_merged(struct request_queue *q, struct request *, struct bio *); -extern bool elv_attempt_insert_merge(struct request_queue *, struct request *); +extern bool elv_attempt_insert_merge(struct request_queue *, struct request *, + struct list_head *); extern void elv_requeue_request(struct request_queue *, struct request *); extern struct request *elv_former_request(struct request_queue *, struct request *); extern struct request *elv_latter_request(struct request_queue *, struct request *);