[PATCH 05/17] nvme-fabrics: reject I/O to offline device

8 Dec 2020

From: Victor Gladkov victor.gladkov@kioxia.com
driver inclusion
category: bugfix
bugzilla: NA
CVE: NA
Link: https://gitee.com/openeuler/kernel/issues/I1WGZE
-------------------------------------------------
Commands get stuck while Host NVMe-oF controller is in reconnect state.
NVMe ctrler enters into reconnect state when it loses connection with the
target. It tries to reconnect every 10 seconds
 until successful reconnection or until reconnect time-out is reached.
The default reconnect time out is 10 minutes.
Applications are expecting commands to complete with success or error
within a certain timeout (30 seconds by default).  The NVMe host is
enforcing that timeout while it is connected, never the less, during
reconnection, the timeout is not enforced and commands may get stuck for
 a long period or even forever.
To fix this long delay due to the default timeout we introduce new
session parameter "fast_io_fail_tmo". The timeout is measured in seconds
from the controller reconnect, any command beyond that timeout is
rejected. The new parameter value may be passed during 'connect'.
The default value of -1 means no timeout (similar to current behavior).
We add a new controller NVME_CTRL_FAILFAST_EXPIRED and respective
delayed work that updates the NVME_CTRL_FAILFAST_EXPIRED flag.
When the controller is entering the CONNECTING state, we schedule the
delayed_work based on failfast timeout value. If the transition is out of
CONNECTING, terminate delayed work item and ensure failfast_expired is
false. If delayed work item expires then set "NVME_CTRL_FAILFAST_EXPIRED"
flag to true.
We also update nvmf_fail_nonready_command() and
nvme_available_path() functions with check the
"NVME_CTRL_FAILFAST_EXPIRED" controller flag.
Signed-off-by: Victor Gladkov victor.gladkov@kioxia.com
Signed-off-by: Chaitanya Kulkarni chaitanya.kulkarni@wdc.com
Reviewed-by: Hannes Reinecke hare@suse.de
Reviewed-by: Chao Leng lengchao@huawei.com
Reviewed-by: Jike Cheng chengjike.cheng@huawei.com
Signed-off-by: Ruozhu Li liruozhu@huawei.com
Reviewed-by: Hou Tao houtao1@huawei.com
Signed-off-by: Yang Yingliang yangyingliang@huawei.com
---
 drivers/nvme/host/core.c      | 51 +++++++++++++++++++++++++++++++++--
 drivers/nvme/host/fabrics.c   | 28 ++++++++++++++++---
 drivers/nvme/host/fabrics.h   |  5 ++++
 drivers/nvme/host/multipath.c |  2 ++
 drivers/nvme/host/nvme.h      |  3 +++
 5 files changed, 83 insertions(+), 6 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index c9faa824de26..5f500ee424a3 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -129,6 +129,37 @@ static void nvme_queue_scan(struct nvme_ctrl *ctrl)
    	queue_work(nvme_wq, &ctrl->scan_work);
 }
+static void nvme_failfast_work(struct work_struct *work)
+{
+	struct nvme_ctrl *ctrl = container_of(to_delayed_work(work),
+			struct nvme_ctrl, failfast_work);
+
+	if (ctrl->state != NVME_CTRL_CONNECTING)
+		return;
+
+	set_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
+	dev_info(ctrl->device, "failfast expired\n");
+	nvme_kick_requeue_lists(ctrl);
+}
+
+static inline void nvme_start_failfast_work(struct nvme_ctrl *ctrl)
+{
+	if (!ctrl->opts || ctrl->opts->fast_io_fail_tmo == -1)
+		return;
+
+	schedule_delayed_work(&ctrl->failfast_work,
+			      ctrl->opts->fast_io_fail_tmo * HZ);
+}
+
+static inline void nvme_stop_failfast_work(struct nvme_ctrl *ctrl)
+{
+	if (!ctrl->opts)
+		return;
+
+	cancel_delayed_work_sync(&ctrl->failfast_work);
+	clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
+}
+
 int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
 {
    if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
@@ -384,8 +415,21 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
    	ctrl->state = new_state;
spin_unlock_irqrestore(&ctrl->lock, flags);
-	if (changed && ctrl->state == NVME_CTRL_LIVE)
-		nvme_kick_requeue_lists(ctrl);
+	if (changed) {
+		switch (ctrl->state) {
+		case NVME_CTRL_LIVE:
+			if (old_state == NVME_CTRL_CONNECTING)
+				nvme_stop_failfast_work(ctrl);
+			nvme_kick_requeue_lists(ctrl);
+			break;
+		case NVME_CTRL_CONNECTING:
+			if (old_state == NVME_CTRL_RESETTING)
+				nvme_start_failfast_work(ctrl);
+			break;
+		default:
+			break;
+		}
+	}
    return changed;
 }
 EXPORT_SYMBOL_GPL(nvme_change_ctrl_state);
@@ -3696,6 +3740,7 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
 {
    nvme_mpath_stop(ctrl);
    nvme_stop_keep_alive(ctrl);
+	nvme_stop_failfast_work(ctrl);
    flush_work(&ctrl->async_event_work);
    cancel_work_sync(&ctrl->fw_act_work);
    if (ctrl->ops->stop_ctrl)
@@ -3761,6 +3806,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
    int ret;
ctrl->state = NVME_CTRL_NEW;
+	clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
    spin_lock_init(&ctrl->lock);
    mutex_init(&ctrl->scan_lock);
    INIT_LIST_HEAD(&ctrl->namespaces);
@@ -3776,6 +3822,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
    INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
    memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
    ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;
+	INIT_DELAYED_WORK(&ctrl->failfast_work, nvme_failfast_work);
BUILD_BUG_ON(NVME_DSM_MAX_RANGES * sizeof(struct nvme_dsm_range) >
    		PAGE_SIZE);
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index a509d90e520f..86e344b4ee46 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -550,6 +550,7 @@ blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl,
 {
    if (ctrl->state != NVME_CTRL_DELETING &&
        ctrl->state != NVME_CTRL_DEAD &&
+	    !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
        !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
    	return BLK_STS_RESOURCE;
@@ -606,6 +607,7 @@ static const match_table_t opt_tokens = {
    { NVMF_OPT_HOST_TRADDR,		"host_traddr=%s"	},
    { NVMF_OPT_HOST_ID,		"hostid=%s"		},
    { NVMF_OPT_DUP_CONNECT,		"duplicate_connect"	},
+	{ NVMF_OPT_FAIL_FAST_TMO,       "fast_io_fail_tmo=%d"   },
    { NVMF_OPT_ERR,			NULL			}
 };
@@ -625,6 +627,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
    opts->reconnect_delay = NVMF_DEF_RECONNECT_DELAY;
    opts->kato = NVME_DEFAULT_KATO;
    opts->duplicate_connect = false;
+	opts->fast_io_fail_tmo = NVMF_DEF_FAIL_FAST_TMO;
options = o = kstrdup(buf, GFP_KERNEL);
    if (!options)
@@ -749,6 +752,17 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
    			pr_warn("ctrl_loss_tmo < 0 will reconnect forever\n");
    		ctrl_loss_tmo = token;
    		break;
+		case NVMF_OPT_FAIL_FAST_TMO:
+			if (match_int(args, &token)) {
+				ret = -EINVAL;
+				goto out;
+			}
+
+			if (token >= 0)
+				pr_warn("I/O will fail on after %d sec reconnect\n",
+					token);
+			opts->fast_io_fail_tmo = token;
+			break;
    	case NVMF_OPT_HOSTNQN:
    		if (opts->host) {
    			pr_err("hostnqn already user-assigned: %s\n",
@@ -829,11 +843,17 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
    	opts->nr_io_queues = 0;
    	opts->duplicate_connect = true;
    }
-	if (ctrl_loss_tmo < 0)
+
+	if (ctrl_loss_tmo < 0) {
    	opts->max_reconnects = -1;
-	else
+	} else {
    	opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo,
    					opts->reconnect_delay);
+		if (ctrl_loss_tmo < opts->fast_io_fail_tmo)
+			pr_warn("failfast tmo (%d) > ctrl_loss_tmo (%d)\n",
+				opts->fast_io_fail_tmo,
+				ctrl_loss_tmo);
+	}
if (!opts->host) {
    	kref_get(&nvmf_default_host->ref);
@@ -902,8 +922,8 @@ EXPORT_SYMBOL_GPL(nvmf_free_options);
 #define NVMF_REQUIRED_OPTS	(NVMF_OPT_TRANSPORT | NVMF_OPT_NQN)
 #define NVMF_ALLOWED_OPTS	(NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \
    			 NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \
-				 NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT)
-
+				 NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT |\
+				 NVMF_OPT_FAIL_FAST_TMO)
 static struct nvme_ctrl *
 nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
 {
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index 188ebbeec32c..a7a3100714b1 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -24,6 +24,8 @@
 /* default to 600 seconds of reconnect attempts before giving up */
 #define NVMF_DEF_CTRL_LOSS_TMO		600
 #define NVMF_DEF_RECONNECT_FOREVER	-1
+/* set default  fail fast timeout to 150s */
+#define NVMF_DEF_FAIL_FAST_TMO		150
/*
  * Define a host as seen by the target.  We allocate one at boot, but also
@@ -59,6 +61,7 @@ enum {
    NVMF_OPT_CTRL_LOSS_TMO	= 1 << 11,
    NVMF_OPT_HOST_ID	= 1 << 12,
    NVMF_OPT_DUP_CONNECT	= 1 << 13,
+	NVMF_OPT_FAIL_FAST_TMO	= 1 << 20,
 };
/**
@@ -86,6 +89,7 @@ enum {
  * @max_reconnects: maximum number of allowed reconnect attempts before removing
  *              the controller, (-1) means reconnect forever, zero means remove
  *              immediately;
+ * @fast_io_fail_tmo: Fast I/O fail timeout in seconds
  */
 struct nvmf_ctrl_options {
    unsigned		mask;
@@ -102,6 +106,7 @@ struct nvmf_ctrl_options {
    unsigned int		kato;
    struct nvmf_host	*host;
    int			max_reconnects;
+	int			fast_io_fail_tmo;
 };
/*
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index e1fc9ffbd3ee..8df0bf238455 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -199,6 +199,8 @@ static bool nvme_available_path(struct nvme_ns_head *head)
    struct nvme_ns *ns;
list_for_each_entry_rcu(ns, &head->list, siblings) {
+		if (test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ns->ctrl->flags))
+			continue;
    	switch (ns->ctrl->state) {
    	case NVME_CTRL_LIVE:
    	case NVME_CTRL_RESETTING:
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 5699397d3a5d..04c2d9ffd004 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -204,6 +204,7 @@ struct nvme_ctrl {
    struct work_struct scan_work;
    struct work_struct async_event_work;
    struct delayed_work ka_work;
+	struct delayed_work failfast_work;
    struct nvme_command ka_cmd;
    struct work_struct fw_act_work;
    unsigned long events;
@@ -239,6 +240,8 @@ struct nvme_ctrl {
    u16 icdoff;
    u16 maxcmd;
    int nr_reconnects;
+	unsigned long flags;
+#define NVME_CTRL_FAILFAST_EXPIRED	0
    struct nvmf_ctrl_options *opts;
struct page *discard_page;
-- 
2.25.1

    

2024

2023

2022

2021

2020

2019

[PATCH 05/17] nvme-fabrics: reject I/O to offline device