A VM in the cloud environment may use a virutal disk as the backend storage, and there are usually filesystems on the virtual block device. When backend storage is temporarily down, any I/O issued to the virtual block device will cause an error. For example, an error occurred in ext4 filesystem would make the filesystem readonly. In production environment, a cloud backend storage can be soon recovered. For example, an IP-SAN may be down due to network failure and will be online soon after network is recovered. However, the error in the filesystem may not be recovered unless a device reattach or system restart. Thus an I/O retry mechanism is in need to implement a self-healing system.
This patch series propose to extend the werror=/rerror= mechanism to add a 'retry' feature. It can automatically retry failed I/O requests on error without sending error back to guest, and guest can get back running smoothly when I/O is recovred.
Jiahui Cen (9): qapi/block-core: Add retry option for error action block-backend: Introduce retry timer block-backend: Add device specific retry callback block-backend: Enable retry action on errors block-backend: Add timeout support for retry block: Add error retry param setting virtio_blk: Add support for retry on errors scsi-bus: Refactor the code that retries requests scsi-disk: Add support for retry on errors
Sergio Lopez (2): virtio-blk: Refactor the code that processes queued requests virtio-blk: On restart, process queued requests in the proper context
block/block-backend.c | 66 ++++++++++++++++++++ blockdev.c | 52 +++++++++++++++ hw/block/block.c | 10 +++ hw/block/dataplane/virtio-blk.c | 8 +++ hw/block/virtio-blk.c | 51 +++++++++++---- hw/scsi/scsi-bus.c | 16 +++-- hw/scsi/scsi-disk.c | 16 +++++ include/hw/block/block.h | 7 ++- include/hw/scsi/scsi.h | 1 + include/hw/virtio/virtio-blk.h | 1 + include/sysemu/block-backend.h | 10 +++ qapi/block-core.json | 4 +- 12 files changed, 222 insertions(+), 20 deletions(-)
Add a new error action 'retry' to support retry on errors.
Signed-off-by: Jiahui Cen cenjiahui@huawei.com Signed-off-by: Ying Fang fangying1@huawei.com --- blockdev.c | 2 ++ qapi/block-core.json | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/blockdev.c b/blockdev.c index 4d141e9a1f..0f49fd290e 100644 --- a/blockdev.c +++ b/blockdev.c @@ -319,6 +319,8 @@ static int parse_block_error_action(const char *buf, bool is_read, Error **errp) return BLOCKDEV_ON_ERROR_STOP; } else if (!strcmp(buf, "report")) { return BLOCKDEV_ON_ERROR_REPORT; + } else if (!strcmp(buf, "retry")) { + return BLOCKDEV_ON_ERROR_RETRY; } else { error_setg(errp, "'%s' invalid %s error action", buf, is_read ? "read" : "write"); diff --git a/qapi/block-core.json b/qapi/block-core.json index 0d43d4f37c..db24f0dfe5 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1113,7 +1113,7 @@ # Since: 1.3 ## { 'enum': 'BlockdevOnError', - 'data': ['report', 'ignore', 'enospc', 'stop', 'auto'] } + 'data': ['report', 'ignore', 'enospc', 'stop', 'auto', 'retry'] }
## # @MirrorSyncMode: @@ -4894,7 +4894,7 @@ # Since: 2.1 ## { 'enum': 'BlockErrorAction', - 'data': [ 'ignore', 'report', 'stop' ] } + 'data': [ 'ignore', 'report', 'stop', 'retry' ] }
##
Add a timer to regularly trigger retry on errors.
Signed-off-by: Jiahui Cen cenjiahui@huawei.com Signed-off-by: Ying Fang fangying1@huawei.com --- block/block-backend.c | 21 ++++++++++++++++++++ 1 file changed, 21 insertions(+)
diff --git a/block/block-backend.c b/block/block-backend.c index 0056b526b8..a9a43b1440 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -31,6 +31,9 @@
static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb);
+/* block backend default retry interval */ +#define BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL 1000 + typedef struct BlockBackendAioNotifier { void (*attached_aio_context)(AioContext *new_context, void *opaque); void (*detach_aio_context)(void *opaque); @@ -88,6 +91,15 @@ struct BlockBackend { * Accessed with atomic ops. */ unsigned int in_flight; + + /* Timer for retry on errors. */ + QEMUTimer *retry_timer; + /* Interval in ms to trigger next retry. */ + int64_t retry_interval; + /* Start time of the first error. Used to check timeout. */ + int64_t retry_start_time; + /* Retry timeout. 0 represents infinite retry. */ + int64_t retry_timeout; };
typedef struct BlockBackendAIOCB { @@ -337,6 +349,11 @@ BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm) blk->on_read_error = BLOCKDEV_ON_ERROR_REPORT; blk->on_write_error = BLOCKDEV_ON_ERROR_ENOSPC;
+ blk->retry_timer = NULL; + blk->retry_interval = BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL; + blk->retry_start_time = 0; + blk->retry_timeout = 0; + block_acct_init(&blk->stats);
notifier_list_init(&blk->remove_bs_notifiers); @@ -423,6 +440,10 @@ static void blk_delete(BlockBackend *blk) QTAILQ_REMOVE(&block_backends, blk, link); drive_info_del(blk->legacy_dinfo); block_acct_cleanup(&blk->stats); + if (blk->retry_timer) { + timer_del(blk->retry_timer); + timer_free(blk->retry_timer); + } g_free(blk); }
Add retry_request_cb in BlockDevOps to do device specific retry action. Backend's timer would be registered only when the backend is set 'retry' on errors and the device supports retry action.
Signed-off-by: Jiahui Cen cenjiahui@huawei.com Signed-off-by: Ying Fang fangying1@huawei.com --- block/block-backend.c | 8 ++++++++ include/sysemu/block-backend.h | 4 ++++ 2 files changed, 12 insertions(+)
diff --git a/block/block-backend.c b/block/block-backend.c index a9a43b1440..b8f535a5fd 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -958,6 +958,14 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, blk->dev_ops = ops; blk->dev_opaque = opaque;
+ if ((blk->on_read_error == BLOCKDEV_ON_ERROR_RETRY || + blk->on_write_error == BLOCKDEV_ON_ERROR_RETRY) && + ops->retry_request_cb) { + blk->retry_timer = aio_timer_new(blk->ctx, QEMU_CLOCK_REALTIME, + SCALE_MS, ops->retry_request_cb, + opaque); + } + /* Are we currently quiesced? Should we enforce this right now? */ if (blk->quiesce_counter && ops->drained_begin) { ops->drained_begin(opaque); diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 733c4957eb..b58dc6bde8 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -66,6 +66,10 @@ typedef struct BlockDevOps { * Runs when the backend's last drain request ends. */ void (*drained_end)(void *opaque); + /* + * Runs when retrying failed requests. + */ + void (*retry_request_cb)(void *opaque); } BlockDevOps;
/* This struct is embedded in (the private) BlockBackend struct and contains
Enable retry action when backend's retry timer is available. It would trigger the timer to do device specific retry action.
Signed-off-by: Jiahui Cen cenjiahui@huawei.com Signed-off-by: Ying Fang fangying1@huawei.com --- block/block-backend.c | 7 +++++++ 1 file changed, 7 insertions(+)
diff --git a/block/block-backend.c b/block/block-backend.c index b8f535a5fd..11f8ff4301 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1660,6 +1660,9 @@ BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read, return BLOCK_ERROR_ACTION_REPORT; case BLOCKDEV_ON_ERROR_IGNORE: return BLOCK_ERROR_ACTION_IGNORE; + case BLOCKDEV_ON_ERROR_RETRY: + return (blk->retry_timer) ? + BLOCK_ERROR_ACTION_RETRY : BLOCK_ERROR_ACTION_REPORT; case BLOCKDEV_ON_ERROR_AUTO: default: abort(); @@ -1707,6 +1710,10 @@ void blk_error_action(BlockBackend *blk, BlockErrorAction action, qemu_system_vmstop_request_prepare(); send_qmp_error_event(blk, action, is_read, error); qemu_system_vmstop_request(RUN_STATE_IO_ERROR); + } else if (action == BLOCK_ERROR_ACTION_RETRY) { + timer_mod(blk->retry_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + + blk->retry_interval); + send_qmp_error_event(blk, action, is_read, error); } else { send_qmp_error_event(blk, action, is_read, error); }
Retry should only be triggered when timeout is not reached, so let's check timeout before retry. Device should also reset retry_start_time after successful retry.
Signed-off-by: Jiahui Cen cenjiahui@huawei.com Signed-off-by: Ying Fang fangying1@huawei.com --- block/block-backend.c | 25 +++++++++++++++++++- include/sysemu/block-backend.h | 1 + 2 files changed, 25 insertions(+), 1 deletion(-)
diff --git a/block/block-backend.c b/block/block-backend.c index 11f8ff4301..0fe99ffe52 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1633,6 +1633,29 @@ void blk_drain_all(void) bdrv_drain_all_end(); }
+static bool blk_error_retry_timeout(BlockBackend *blk) +{ + /* No timeout set, infinite retries. */ + if (!blk->retry_timeout) { + return false; + } + + /* The first time an error occurs. */ + if (!blk->retry_start_time) { + blk->retry_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + return false; + } + + return qemu_clock_get_ms(QEMU_CLOCK_REALTIME) > (blk->retry_start_time + + blk->retry_timeout); +} + +void blk_error_retry_reset_timeout(BlockBackend *blk) +{ + if (blk->retry_timer && blk->retry_start_time) + blk->retry_start_time = 0; +} + void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error, BlockdevOnError on_write_error) { @@ -1661,7 +1684,7 @@ BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read, case BLOCKDEV_ON_ERROR_IGNORE: return BLOCK_ERROR_ACTION_IGNORE; case BLOCKDEV_ON_ERROR_RETRY: - return (blk->retry_timer) ? + return (blk->retry_timer && !blk_error_retry_timeout(blk)) ? BLOCK_ERROR_ACTION_RETRY : BLOCK_ERROR_ACTION_REPORT; case BLOCKDEV_ON_ERROR_AUTO: default: diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index b58dc6bde8..58dde446ca 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -184,6 +184,7 @@ void blk_inc_in_flight(BlockBackend *blk); void blk_dec_in_flight(BlockBackend *blk); void blk_drain(BlockBackend *blk); void blk_drain_all(void); +void blk_error_retry_reset_timeout(BlockBackend *blk); void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error, BlockdevOnError on_write_error); BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read);
Add "retry_interval" and "retry_timeout" parameter for drive and device option. These parameter are valid only when werror/rerror=retry.
eg. --drive file=image,rerror=retry,retry_interval=1000,retry_timeout=5000
Signed-off-by: Jiahui Cen cenjiahui@huawei.com Signed-off-by: Ying Fang fangying1@huawei.com --- block/block-backend.c | 13 +++-- blockdev.c | 50 ++++++++++++++++++++ hw/block/block.c | 10 ++++ include/hw/block/block.h | 7 ++- include/sysemu/block-backend.h | 5 ++ 5 files changed, 81 insertions(+), 4 deletions(-)
diff --git a/block/block-backend.c b/block/block-backend.c index 0fe99ffe52..2d812e2254 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -31,9 +31,6 @@
static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb);
-/* block backend default retry interval */ -#define BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL 1000 - typedef struct BlockBackendAioNotifier { void (*attached_aio_context)(AioContext *new_context, void *opaque); void (*detach_aio_context)(void *opaque); @@ -1633,6 +1630,16 @@ void blk_drain_all(void) bdrv_drain_all_end(); }
+void blk_set_on_error_retry_interval(BlockBackend *blk, int64_t interval) +{ + blk->retry_interval = interval; +} + +void blk_set_on_error_retry_timeout(BlockBackend *blk, int64_t timeout) +{ + blk->retry_timeout = timeout; +} + static bool blk_error_retry_timeout(BlockBackend *blk) { /* No timeout set, infinite retries. */ diff --git a/blockdev.c b/blockdev.c index 0f49fd290e..99c92b96d2 100644 --- a/blockdev.c +++ b/blockdev.c @@ -470,6 +470,7 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, const char *buf; int bdrv_flags = 0; int on_read_error, on_write_error; + int64_t retry_interval, retry_timeout; bool account_invalid, account_failed; bool writethrough, read_only; BlockBackend *blk; @@ -565,6 +566,10 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts, } }
+ retry_interval = qemu_opt_get_number(opts, "retry_interval", + BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL); + retry_timeout = qemu_opt_get_number(opts, "retry_timeout", 0); + if (snapshot) { bdrv_flags |= BDRV_O_SNAPSHOT; } @@ -629,6 +634,11 @@ static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
blk_set_enable_write_cache(blk, !writethrough); blk_set_on_error(blk, on_read_error, on_write_error); + if (on_read_error == BLOCKDEV_ON_ERROR_RETRY || + on_write_error == BLOCKDEV_ON_ERROR_RETRY) { + blk_set_on_error_retry_interval(blk, retry_interval); + blk_set_on_error_retry_timeout(blk, retry_timeout); + }
if (!monitor_add_blk(blk, id, errp)) { blk_unref(blk); @@ -754,6 +764,14 @@ QemuOptsList qemu_legacy_drive_opts = { .name = "werror", .type = QEMU_OPT_STRING, .help = "write error action", + },{ + .name = "retry_interval", + .type = QEMU_OPT_NUMBER, + .help = "interval for retry action in millisecond", + },{ + .name = "retry_timeout", + .type = QEMU_OPT_NUMBER, + .help = "timeout for retry action in millisecond", },{ .name = "copy-on-read", .type = QEMU_OPT_BOOL, @@ -776,6 +794,7 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type, BlockInterfaceType type; int max_devs, bus_id, unit_id, index; const char *werror, *rerror; + int64_t retry_interval, retry_timeout; bool read_only = false; bool copy_on_read; const char *filename; @@ -992,6 +1011,29 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type, qdict_put_str(bs_opts, "rerror", rerror); }
+ if (qemu_opt_find(legacy_opts, "retry_interval")) { + if ((werror == NULL || strcmp(werror, "retry")) && + (rerror == NULL || strcmp(rerror, "retry"))) { + error_setg(errp, "retry_interval is only supported " + "by werror/rerror=retry"); + goto fail; + } + retry_interval = qemu_opt_get_number(legacy_opts, "retry_interval", + BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL); + qdict_put_int(bs_opts, "retry_interval", retry_interval); + } + + if (qemu_opt_find(legacy_opts, "retry_timeout")) { + if ((werror == NULL || strcmp(werror, "retry")) && + (rerror == NULL || strcmp(rerror, "retry"))) { + error_setg(errp, "retry_timeout is only supported " + "by werror/rerror=retry"); + goto fail; + } + retry_timeout = qemu_opt_get_number(legacy_opts, "retry_timeout", 0); + qdict_put_int(bs_opts, "retry_timeout", retry_timeout); + } + /* Actual block device init: Functionality shared with blockdev-add */ blk = blockdev_init(filename, bs_opts, &local_err); bs_opts = NULL; @@ -4593,6 +4635,14 @@ QemuOptsList qemu_common_drive_opts = { .name = "werror", .type = QEMU_OPT_STRING, .help = "write error action", + },{ + .name = "retry_interval", + .type = QEMU_OPT_NUMBER, + .help = "interval for retry action in millisecond", + },{ + .name = "retry_timeout", + .type = QEMU_OPT_NUMBER, + .help = "timeout for retry action in millisecond", },{ .name = BDRV_OPT_READ_ONLY, .type = QEMU_OPT_BOOL, diff --git a/hw/block/block.c b/hw/block/block.c index bf56c7612b..56141940ca 100644 --- a/hw/block/block.c +++ b/hw/block/block.c @@ -134,6 +134,16 @@ bool blkconf_apply_backend_options(BlockConf *conf, bool readonly, blk_set_enable_write_cache(blk, wce); blk_set_on_error(blk, rerror, werror);
+ if (rerror == BLOCKDEV_ON_ERROR_RETRY || + werror == BLOCKDEV_ON_ERROR_RETRY) { + if (conf->retry_interval >= 0) { + blk_set_on_error_retry_interval(blk, conf->retry_interval); + } + if (conf->retry_timeout >= 0) { + blk_set_on_error_retry_timeout(blk, conf->retry_timeout); + } + } + return true; }
diff --git a/include/hw/block/block.h b/include/hw/block/block.h index 607539057a..d12603aabd 100644 --- a/include/hw/block/block.h +++ b/include/hw/block/block.h @@ -30,6 +30,8 @@ typedef struct BlockConf { bool share_rw; BlockdevOnError rerror; BlockdevOnError werror; + int64_t retry_interval; + int64_t retry_timeout; } BlockConf;
static inline unsigned int get_physical_block_exp(BlockConf *conf) @@ -71,7 +73,10 @@ static inline unsigned int get_physical_block_exp(BlockConf *conf) DEFINE_PROP_BLOCKDEV_ON_ERROR("rerror", _state, _conf.rerror, \ BLOCKDEV_ON_ERROR_AUTO), \ DEFINE_PROP_BLOCKDEV_ON_ERROR("werror", _state, _conf.werror, \ - BLOCKDEV_ON_ERROR_AUTO) + BLOCKDEV_ON_ERROR_AUTO), \ + DEFINE_PROP_INT64("retry_interval", _state, _conf.retry_interval, \ + -1), \ + DEFINE_PROP_INT64("retry_timeout", _state, _conf.retry_timeout, -1)
/* Backend access helpers */
diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 58dde446ca..dc10e507ae 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -25,6 +25,9 @@ */ #include "block/block.h"
+/* block backend default retry interval */ +#define BLOCK_BACKEND_DEFAULT_RETRY_INTERVAL 1000 + /* Callbacks for block device models */ typedef struct BlockDevOps { /* @@ -184,6 +187,8 @@ void blk_inc_in_flight(BlockBackend *blk); void blk_dec_in_flight(BlockBackend *blk); void blk_drain(BlockBackend *blk); void blk_drain_all(void); +void blk_set_on_error_retry_interval(BlockBackend *blk, int64_t interval); +void blk_set_on_error_retry_timeout(BlockBackend *blk, int64_t timeout); void blk_error_retry_reset_timeout(BlockBackend *blk); void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error, BlockdevOnError on_write_error);
From: Sergio Lopez slp@redhat.com
Move the code that processes queued requests from virtio_blk_dma_restart_bh() to its own, non-static, function. This will allow us to call it from the virtio_blk_data_plane_start() in a future patch.
Signed-off-by: Sergio Lopez slp@redhat.com Message-Id: 20200603093240.40489-2-slp@redhat.com Signed-off-by: Kevin Wolf kwolf@redhat.com --- hw/block/virtio-blk.c | 16 +++++++++++----- include/hw/virtio/virtio-blk.h | 1 + 2 files changed, 12 insertions(+), 5 deletions(-)
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 703ed4c93b..cee2c673a5 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -809,15 +809,11 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) virtio_blk_handle_output_do(s, vq); }
-static void virtio_blk_dma_restart_bh(void *opaque) +void virtio_blk_process_queued_requests(VirtIOBlock *s) { - VirtIOBlock *s = opaque; VirtIOBlockReq *req = s->rq; MultiReqBuffer mrb = {};
- qemu_bh_delete(s->bh); - s->bh = NULL; - s->rq = NULL;
aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); @@ -845,6 +841,16 @@ static void virtio_blk_dma_restart_bh(void *opaque) aio_context_release(blk_get_aio_context(s->conf.conf.blk)); }
+static void virtio_blk_dma_restart_bh(void *opaque) +{ + VirtIOBlock *s = opaque; + + qemu_bh_delete(s->bh); + s->bh = NULL; + + virtio_blk_process_queued_requests(s); +} + static void virtio_blk_dma_restart_cb(void *opaque, int running, RunState state) { diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h index cddcfbebe9..cf8eea2f58 100644 --- a/include/hw/virtio/virtio-blk.h +++ b/include/hw/virtio/virtio-blk.h @@ -84,5 +84,6 @@ typedef struct MultiReqBuffer { } MultiReqBuffer;
bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq); +void virtio_blk_process_queued_requests(VirtIOBlock *s);
#endif
From: Sergio Lopez slp@redhat.com
On restart, we were scheduling a BH to process queued requests, which would run before starting up the data plane, leading to those requests being assigned and started on coroutines on the main context.
This could cause requests to be wrongly processed in parallel from different threads (the main thread and the iothread managing the data plane), potentially leading to multiple issues.
For example, stopping and resuming a VM multiple times while the guest is generating I/O on a virtio_blk device can trigger a crash with a stack tracing looking like this one:
<------> Thread 2 (Thread 0x7ff736765700 (LWP 1062503)): #0 0x00005567a13b99d6 in iov_memset (iov=0x6563617073206f4e, iov_cnt=1717922848, offset=516096, fillc=0, bytes=7018105756081554803) at util/iov.c:69 #1 0x00005567a13bab73 in qemu_iovec_memset (qiov=0x7ff73ec99748, offset=516096, fillc=0, bytes=7018105756081554803) at util/iov.c:530 #2 0x00005567a12f411c in qemu_laio_process_completion (laiocb=0x7ff6512ee6c0) at block/linux-aio.c:86 #3 0x00005567a12f42ff in qemu_laio_process_completions (s=0x7ff7182e8420) at block/linux-aio.c:217 #4 0x00005567a12f480d in ioq_submit (s=0x7ff7182e8420) at block/linux-aio.c:323 #5 0x00005567a12f43d9 in qemu_laio_process_completions_and_submit (s=0x7ff7182e8420) at block/linux-aio.c:236 #6 0x00005567a12f44c2 in qemu_laio_poll_cb (opaque=0x7ff7182e8430) at block/linux-aio.c:267 #7 0x00005567a13aed83 in run_poll_handlers_once (ctx=0x5567a2b58c70, timeout=0x7ff7367645f8) at util/aio-posix.c:520 #8 0x00005567a13aee9f in run_poll_handlers (ctx=0x5567a2b58c70, max_ns=16000, timeout=0x7ff7367645f8) at util/aio-posix.c:562 #9 0x00005567a13aefde in try_poll_mode (ctx=0x5567a2b58c70, timeout=0x7ff7367645f8) at util/aio-posix.c:597 #10 0x00005567a13af115 in aio_poll (ctx=0x5567a2b58c70, blocking=true) at util/aio-posix.c:639 #11 0x00005567a109acca in iothread_run (opaque=0x5567a2b29760) at iothread.c:75 #12 0x00005567a13b2790 in qemu_thread_start (args=0x5567a2b694c0) at util/qemu-thread-posix.c:519 #13 0x00007ff73eedf2de in start_thread () at /lib64/libpthread.so.0 #14 0x00007ff73ec10e83 in clone () at /lib64/libc.so.6
Thread 1 (Thread 0x7ff743986f00 (LWP 1062500)): #0 0x00005567a13b99d6 in iov_memset (iov=0x6563617073206f4e, iov_cnt=1717922848, offset=516096, fillc=0, bytes=7018105756081554803) at util/iov.c:69 #1 0x00005567a13bab73 in qemu_iovec_memset (qiov=0x7ff73ec99748, offset=516096, fillc=0, bytes=7018105756081554803) at util/iov.c:530 #2 0x00005567a12f411c in qemu_laio_process_completion (laiocb=0x7ff6512ee6c0) at block/linux-aio.c:86 #3 0x00005567a12f42ff in qemu_laio_process_completions (s=0x7ff7182e8420) at block/linux-aio.c:217 #4 0x00005567a12f480d in ioq_submit (s=0x7ff7182e8420) at block/linux-aio.c:323 #5 0x00005567a12f4a2f in laio_do_submit (fd=19, laiocb=0x7ff5f4ff9ae0, offset=472363008, type=2) at block/linux-aio.c:375 #6 0x00005567a12f4af2 in laio_co_submit (bs=0x5567a2b8c460, s=0x7ff7182e8420, fd=19, offset=472363008, qiov=0x7ff5f4ff9ca0, type=2) at block/linux-aio.c:394 #7 0x00005567a12f1803 in raw_co_prw (bs=0x5567a2b8c460, offset=472363008, bytes=20480, qiov=0x7ff5f4ff9ca0, type=2) at block/file-posix.c:1892 #8 0x00005567a12f1941 in raw_co_pwritev (bs=0x5567a2b8c460, offset=472363008, bytes=20480, qiov=0x7ff5f4ff9ca0, flags=0) at block/file-posix.c:1925 #9 0x00005567a12fe3e1 in bdrv_driver_pwritev (bs=0x5567a2b8c460, offset=472363008, bytes=20480, qiov=0x7ff5f4ff9ca0, qiov_offset=0, flags=0) at block/io.c:1183 #10 0x00005567a1300340 in bdrv_aligned_pwritev (child=0x5567a2b5b070, req=0x7ff5f4ff9db0, offset=472363008, bytes=20480, align=512, qiov=0x7ff72c0425b8, qiov_offset=0, flags=0) at block/io.c:1980 #11 0x00005567a1300b29 in bdrv_co_pwritev_part (child=0x5567a2b5b070, offset=472363008, bytes=20480, qiov=0x7ff72c0425b8, qiov_offset=0, flags=0) at block/io.c:2137 #12 0x00005567a12baba1 in qcow2_co_pwritev_task (bs=0x5567a2b92740, file_cluster_offset=472317952, offset=487305216, bytes=20480, qiov=0x7ff72c0425b8, qiov_offset=0, l2meta=0x0) at block/qcow2.c:2444 #13 0x00005567a12bacdb in qcow2_co_pwritev_task_entry (task=0x5567a2b48540) at block/qcow2.c:2475 #14 0x00005567a13167d8 in aio_task_co (opaque=0x5567a2b48540) at block/aio_task.c:45 #15 0x00005567a13cf00c in coroutine_trampoline (i0=738245600, i1=32759) at util/coroutine-ucontext.c:115 #16 0x00007ff73eb622e0 in __start_context () at /lib64/libc.so.6 #17 0x00007ff6626f1350 in () #18 0x0000000000000000 in () <------>
This is also known to cause crashes with this message (assertion failed):
aio_co_schedule: Co-routine was already scheduled in 'aio_co_schedule'
RHBZ: https://bugzilla.redhat.com/show_bug.cgi?id=1812765 Signed-off-by: Sergio Lopez slp@redhat.com Message-Id: 20200603093240.40489-3-slp@redhat.com Signed-off-by: Kevin Wolf kwolf@redhat.com --- hw/block/dataplane/virtio-blk.c | 8 ++++++++ hw/block/virtio-blk.c | 18 ++++++++++++------ include/hw/virtio/virtio-blk.h | 2 +- 3 files changed, 21 insertions(+), 7 deletions(-)
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index 5fea76df85..4476f97960 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -219,6 +219,9 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) goto fail_guest_notifiers; }
+ /* Process queued requests before the ones in vring */ + virtio_blk_process_queued_requests(vblk, false); + /* Kick right away to begin processing requests already in vring */ for (i = 0; i < nvqs; i++) { VirtQueue *vq = virtio_get_queue(s->vdev, i); @@ -238,6 +241,11 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) return 0;
fail_guest_notifiers: + /* + * If we failed to set up the guest notifiers queued requests will be + * processed on the main context. + */ + virtio_blk_process_queued_requests(vblk, false); vblk->dataplane_disabled = true; s->starting = false; vblk->dataplane_started = true; diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index cee2c673a5..ddf525b9d7 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -809,7 +809,7 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq) virtio_blk_handle_output_do(s, vq); }
-void virtio_blk_process_queued_requests(VirtIOBlock *s) +void virtio_blk_process_queued_requests(VirtIOBlock *s, bool is_bh) { VirtIOBlockReq *req = s->rq; MultiReqBuffer mrb = {}; @@ -837,7 +837,9 @@ void virtio_blk_process_queued_requests(VirtIOBlock *s) if (mrb.num_reqs) { virtio_blk_submit_multireq(s->blk, &mrb); } - blk_dec_in_flight(s->conf.conf.blk); + if (is_bh) { + blk_dec_in_flight(s->conf.conf.blk); + } aio_context_release(blk_get_aio_context(s->conf.conf.blk)); }
@@ -848,21 +850,25 @@ static void virtio_blk_dma_restart_bh(void *opaque) qemu_bh_delete(s->bh); s->bh = NULL;
- virtio_blk_process_queued_requests(s); + virtio_blk_process_queued_requests(s, true); }
static void virtio_blk_dma_restart_cb(void *opaque, int running, RunState state) { VirtIOBlock *s = opaque; + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s))); + VirtioBusState *bus = VIRTIO_BUS(qbus);
if (!running) { return; }
- if (!s->bh) { - /* FIXME The data plane is not started yet, so these requests are - * processed in the main thread. */ + /* + * If ioeventfd is enabled, don't schedule the BH here as queued + * requests will be processed while starting the data plane. + */ + if (!s->bh && !virtio_bus_ioeventfd_enabled(bus)) { s->bh = aio_bh_new(blk_get_aio_context(s->conf.conf.blk), virtio_blk_dma_restart_bh, s); blk_inc_in_flight(s->conf.conf.blk); diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h index cf8eea2f58..e77f0db3b0 100644 --- a/include/hw/virtio/virtio-blk.h +++ b/include/hw/virtio/virtio-blk.h @@ -84,6 +84,6 @@ typedef struct MultiReqBuffer { } MultiReqBuffer;
bool virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq); -void virtio_blk_process_queued_requests(VirtIOBlock *s); +void virtio_blk_process_queued_requests(VirtIOBlock *s, bool is_bh);
#endif
Insert failed requests into device's list for later retry and handle queued requests to implement retry_request_cb.
Signed-off-by: Jiahui Cen cenjiahui@huawei.com Signed-off-by: Ying Fang fangying1@huawei.com --- hw/block/virtio-blk.c | 21 +++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-)
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index ddf525b9d7..2db9804cfe 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -101,6 +101,10 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, block_acct_failed(blk_get_stats(s->blk), &req->acct); } virtio_blk_free_request(req); + } else if (action == BLOCK_ERROR_ACTION_RETRY) { + req->mr_next = NULL; + req->next = s->rq; + s->rq = req; }
blk_error_action(s->blk, action, is_read, error); @@ -142,6 +146,7 @@ static void virtio_blk_rw_complete(void *opaque, int ret) } }
+ blk_error_retry_reset_timeout(s->blk); virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); block_acct_done(blk_get_stats(s->blk), &req->acct); virtio_blk_free_request(req); @@ -161,6 +166,7 @@ static void virtio_blk_flush_complete(void *opaque, int ret) } }
+ blk_error_retry_reset_timeout(s->blk); virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); block_acct_done(blk_get_stats(s->blk), &req->acct); virtio_blk_free_request(req); @@ -183,6 +189,7 @@ static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret) } }
+ blk_error_retry_reset_timeout(s->blk); virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); if (is_write_zeroes) { block_acct_done(blk_get_stats(s->blk), &req->acct); @@ -811,12 +818,12 @@ static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
void virtio_blk_process_queued_requests(VirtIOBlock *s, bool is_bh) { - VirtIOBlockReq *req = s->rq; + VirtIOBlockReq *req; MultiReqBuffer mrb = {};
- s->rq = NULL; - aio_context_acquire(blk_get_aio_context(s->conf.conf.blk)); + req = s->rq; + s->rq = NULL; while (req) { VirtIOBlockReq *next = req->next; if (virtio_blk_handle_request(req, &mrb)) { @@ -1101,8 +1108,16 @@ static void virtio_blk_resize(void *opaque) virtio_notify_config(vdev); }
+static void virtio_blk_retry_request(void *opaque) +{ + VirtIOBlock *s = VIRTIO_BLK(opaque); + + virtio_blk_process_queued_requests(s, false); +} + static const BlockDevOps virtio_block_ops = { .resize_cb = virtio_blk_resize, + .retry_request_cb = virtio_blk_retry_request, };
static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
Move the code that retries requests from scsi_dma_restart_bh() to its own, non-static, function. This will allow us to call it from the retry_request_cb() of scsi-disk in a future patch.
Signed-off-by: Jiahui Cen cenjiahui@huawei.com Signed-off-by: Ying Fang fangying1@huawei.com --- hw/scsi/scsi-bus.c | 16 +++++++++++----- include/hw/scsi/scsi.h | 1 + 2 files changed, 12 insertions(+), 5 deletions(-)
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c index fdc3a0e4e0..9dc09b5f3e 100644 --- a/hw/scsi/scsi-bus.c +++ b/hw/scsi/scsi-bus.c @@ -99,14 +99,10 @@ void scsi_bus_new(SCSIBus *bus, size_t bus_size, DeviceState *host, qbus_set_bus_hotplug_handler(BUS(bus), &error_abort); }
-static void scsi_dma_restart_bh(void *opaque) +void scsi_retry_requests(SCSIDevice *s) { - SCSIDevice *s = opaque; SCSIRequest *req, *next;
- qemu_bh_delete(s->bh); - s->bh = NULL; - aio_context_acquire(blk_get_aio_context(s->conf.blk)); QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) { scsi_req_ref(req); @@ -128,6 +124,16 @@ static void scsi_dma_restart_bh(void *opaque) aio_context_release(blk_get_aio_context(s->conf.blk)); }
+static void scsi_dma_restart_bh(void *opaque) +{ + SCSIDevice *s = opaque; + + qemu_bh_delete(s->bh); + s->bh = NULL; + + scsi_retry_requests(s); +} + void scsi_req_retry(SCSIRequest *req) { /* No need to save a reference, because scsi_dma_restart_bh just diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h index 426566a5c6..1231d30b35 100644 --- a/include/hw/scsi/scsi.h +++ b/include/hw/scsi/scsi.h @@ -184,6 +184,7 @@ void scsi_req_cancel_complete(SCSIRequest *req); void scsi_req_cancel(SCSIRequest *req); void scsi_req_cancel_async(SCSIRequest *req, Notifier *notifier); void scsi_req_retry(SCSIRequest *req); +void scsi_retry_requests(SCSIDevice *s); void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense); void scsi_device_set_ua(SCSIDevice *sdev, SCSISense sense); void scsi_device_report_change(SCSIDevice *dev, SCSISense sense);
Mark failed requests as to be retried and implement retry_request_cb to handle these requests.
Signed-off-by: Jiahui Cen cenjiahui@huawei.com Signed-off-by: Ying Fang fangying1@huawei.com --- hw/scsi/scsi-disk.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+)
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index cd90cd780e..93fdd913fe 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -184,6 +184,8 @@ static void scsi_disk_load_request(QEMUFile *f, SCSIRequest *req)
static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed) { + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); + if (r->req.io_canceled) { scsi_req_cancel_complete(&r->req); return true; @@ -193,6 +195,7 @@ static bool scsi_disk_req_check_error(SCSIDiskReq *r, int ret, bool acct_failed) return scsi_handle_rw_error(r, -ret, acct_failed); }
+ blk_error_retry_reset_timeout(s->qdev.conf.blk); return false; }
@@ -480,6 +483,10 @@ static bool scsi_handle_rw_error(SCSIDiskReq *r, int error, bool acct_failed) } }
+ if (action == BLOCK_ERROR_ACTION_RETRY) { + scsi_req_retry(&r->req); + } + blk_error_action(s->qdev.conf.blk, action, is_read, error); if (action == BLOCK_ERROR_ACTION_IGNORE) { scsi_req_complete(&r->req, 0); @@ -2252,6 +2259,13 @@ static void scsi_disk_resize_cb(void *opaque) } }
+static void scsi_disk_retry_request(void *opaque) +{ + SCSIDiskState *s = opaque; + + scsi_retry_requests(&s->qdev); +} + static void scsi_cd_change_media_cb(void *opaque, bool load, Error **errp) { SCSIDiskState *s = opaque; @@ -2300,10 +2314,12 @@ static const BlockDevOps scsi_disk_removable_block_ops = { .is_medium_locked = scsi_cd_is_medium_locked,
.resize_cb = scsi_disk_resize_cb, + .retry_request_cb = scsi_disk_retry_request, };
static const BlockDevOps scsi_disk_block_ops = { .resize_cb = scsi_disk_resize_cb, + .retry_request_cb = scsi_disk_retry_request, };
static void scsi_disk_unit_attention_reported(SCSIDevice *dev)