The NetLlink mechanism is used to send unreliable notifications when a task fails.
Bharath Ravi (1): scsi: iscsi: Perform connection failure entirely in kernel space
Gabriel Krisman Bertazi (2): scsi: iscsi: Report connection state in sysfs scsi: iscsi: Fix deadlock on recovery path during GFP_IO reclaim
Gulam Mohamed (1): scsi: iscsi: Fix race condition between login and sync thread
Mike Christie (10): scsi: iscsi: Fix iSCSI cls conn state scsi: iscsi: Force immediate failure during shutdown scsi: iscsi: Rel ref after iscsi_lookup_endpoint() scsi: iscsi: Fix in-kernel conn failure handling scsi: iscsi: Fix set_param() handling scsi: iscsi: Move iscsi_ep_disconnect() scsi: iscsi: Fix offload conn cleanup when iscsid restarts scsi: iscsi: Fix conn cleanup and stop race during iscsid restart scsi: iscsi: Fix endpoint reuse regression scsi: iscsi: Fix unbound endpoint error handling
Zhong Jinghua (1): scsi: iscsi: Fix kabi change in iscsi_cls_conn
drivers/infiniband/ulp/iser/iscsi_iser.c | 1 + drivers/scsi/be2iscsi/be_iscsi.c | 19 +- drivers/scsi/bnx2i/bnx2i_iscsi.c | 23 +- drivers/scsi/cxgbi/libcxgbi.c | 12 +- drivers/scsi/libiscsi.c | 21 +- drivers/scsi/qedi/qedi_iscsi.c | 25 +- drivers/scsi/qla4xxx/ql4_os.c | 1 + drivers/scsi/scsi_transport_iscsi.c | 487 ++++++++++++++++++----- include/scsi/scsi_transport_iscsi.h | 28 ++ 9 files changed, 485 insertions(+), 132 deletions(-)
From: Bharath Ravi rbharath@google.com
mainline inclusion from mainline-v5.7-rc1 commit 0ab710458da113a71c461c4df27e7f1353d9f864 category: bugfix bugzilla: 188286, https://gitee.com/openeuler/kernel/issues/I67L59 CVE: NA
----------------------------------------
Connection failure processing depends on a daemon being present to (at least) stop the connection and start recovery. This is a problem on a multipath scenario, where if the daemon failed for whatever reason, the SCSI path is never marked as down, multipath won't perform the failover and IO to the device will be forever waiting for that connection to come back.
This patch performs the connection failure entirely inside the kernel. This way, the failover can happen and pending IO can continue even if the daemon is dead. Once the daemon comes alive again, it can execute recovery procedures if applicable.
Cc: Mike Christie mchristi@redhat.com Cc: Lee Duncan LDuncan@suse.com Cc: Bart Van Assche bvanassche@acm.org Link: https://lore.kernel.org/r/20200125061925.191601-1-krisman@collabora.com Co-developed-by: Dave Clausen dclausen@google.com Co-developed-by: Nick Black nlb@google.com Co-developed-by: Vaibhav Nagarnaik vnagarnaik@google.com Co-developed-by: Anatol Pomazau anatol@google.com Co-developed-by: Tahsin Erdogan tahsin@google.com Co-developed-by: Frank Mayhar fmayhar@google.com Co-developed-by: Junho Ryu jayr@google.com Co-developed-by: Khazhismel Kumykov khazhy@google.com Reviewed-by: Reviewed-by: Khazhismel Kumykov khazhy@google.com Co-developed-by: Gabriel Krisman Bertazi krisman@collabora.com Reviewed-by: Lee Duncan lduncan@suse.com Signed-off-by: Bharath Ravi rbharath@google.com Signed-off-by: Dave Clausen dclausen@google.com Signed-off-by: Nick Black nlb@google.com Signed-off-by: Vaibhav Nagarnaik vnagarnaik@google.com Signed-off-by: Anatol Pomazau anatol@google.com Signed-off-by: Tahsin Erdogan tahsin@google.com Signed-off-by: Frank Mayhar fmayhar@google.com Signed-off-by: Junho Ryu jayr@google.com Signed-off-by: Khazhismel Kumykov khazhy@google.com Signed-off-by: Gabriel Krisman Bertazi krisman@collabora.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Zhong Jinghua zhongjinghua@huawei.com --- drivers/scsi/scsi_transport_iscsi.c | 68 +++++++++++++++++++++++++++++ include/scsi/scsi_transport_iscsi.h | 1 + 2 files changed, 69 insertions(+)
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index e50fbcce55aa..e60c68d8f5ef 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -81,6 +81,12 @@ struct iscsi_internal { struct transport_container session_cont; };
+/* Worker to perform connection failure on unresponsive connections + * completely in kernel space. + */ +static void stop_conn_work_fn(struct work_struct *work); +static DECLARE_WORK(stop_conn_work, stop_conn_work_fn); + static atomic_t iscsi_session_nr; /* sysfs session id for next new session */ static struct workqueue_struct *iscsi_eh_timer_workq;
@@ -1591,6 +1597,7 @@ static DEFINE_MUTEX(rx_queue_mutex); static LIST_HEAD(sesslist); static DEFINE_SPINLOCK(sesslock); static LIST_HEAD(connlist); +static LIST_HEAD(connlist_err); static DEFINE_SPINLOCK(connlock);
static uint32_t iscsi_conn_get_sid(struct iscsi_cls_conn *conn) @@ -2225,6 +2232,7 @@ iscsi_alloc_conn(struct iscsi_cls_session *session, int dd_size, uint32_t cid)
mutex_init(&conn->ep_mutex); INIT_LIST_HEAD(&conn->conn_list); + INIT_LIST_HEAD(&conn->conn_list_err); conn->transport = transport; conn->cid = cid;
@@ -2287,6 +2295,7 @@ void iscsi_remove_conn(struct iscsi_cls_conn *conn)
spin_lock_irqsave(&connlock, flags); list_del(&conn->conn_list); + list_del(&conn->conn_list_err); spin_unlock_irqrestore(&connlock, flags);
transport_unregister_device(&conn->dev); @@ -2497,6 +2506,51 @@ int iscsi_offload_mesg(struct Scsi_Host *shost, } EXPORT_SYMBOL_GPL(iscsi_offload_mesg);
+static void stop_conn_work_fn(struct work_struct *work) +{ + struct iscsi_cls_conn *conn, *tmp; + unsigned long flags; + LIST_HEAD(recovery_list); + + spin_lock_irqsave(&connlock, flags); + if (list_empty(&connlist_err)) { + spin_unlock_irqrestore(&connlock, flags); + return; + } + list_splice_init(&connlist_err, &recovery_list); + spin_unlock_irqrestore(&connlock, flags); + + list_for_each_entry_safe(conn, tmp, &recovery_list, conn_list_err) { + uint32_t sid = iscsi_conn_get_sid(conn); + struct iscsi_cls_session *session; + + mutex_lock(&rx_queue_mutex); + + session = iscsi_session_lookup(sid); + if (session) { + if (system_state != SYSTEM_RUNNING) { + session->recovery_tmo = 0; + conn->transport->stop_conn(conn, + STOP_CONN_TERM); + } else { + conn->transport->stop_conn(conn, + STOP_CONN_RECOVER); + } + } + + list_del_init(&conn->conn_list_err); + + mutex_unlock(&rx_queue_mutex); + + /* we don't want to hold rx_queue_mutex for too long, + * for instance if many conns failed at the same time, + * since this stall other iscsi maintenance operations. + * Give other users a chance to proceed. + */ + cond_resched(); + } +} + void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error) { struct nlmsghdr *nlh; @@ -2504,6 +2558,12 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error) struct iscsi_uevent *ev; struct iscsi_internal *priv; int len = nlmsg_total_size(sizeof(*ev)); + unsigned long flags; + + spin_lock_irqsave(&connlock, flags); + list_add(&conn->conn_list_err, &connlist_err); + spin_unlock_irqrestore(&connlock, flags); + queue_work(system_unbound_wq, &stop_conn_work);
priv = iscsi_if_transport_lookup(conn->transport); if (!priv) @@ -2833,11 +2893,19 @@ static int iscsi_if_destroy_conn(struct iscsi_transport *transport, struct iscsi_uevent *ev) { struct iscsi_cls_conn *conn; + unsigned long flags;
conn = iscsi_conn_lookup(ev->u.d_conn.sid, ev->u.d_conn.cid); if (!conn) return -EINVAL;
+ spin_lock_irqsave(&connlock, flags); + if (!list_empty(&conn->conn_list_err)) { + spin_unlock_irqrestore(&connlock, flags); + return -EAGAIN; + } + spin_unlock_irqrestore(&connlock, flags); + ISCSI_DBG_TRANS_CONN(conn, "Destroying transport conn\n"); if (transport->destroy_conn) transport->destroy_conn(conn); diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index 105458b20cd8..65b4962d9532 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -203,6 +203,7 @@ extern void iscsi_ping_comp_event(uint32_t host_no,
struct iscsi_cls_conn { struct list_head conn_list; /* item in connlist */ + struct list_head conn_list_err; /* item in connlist_err */ void *dd_data; /* LLD private data */ struct iscsi_transport *transport; uint32_t cid; /* connection id */
From: Gabriel Krisman Bertazi krisman@collabora.com
mainline inclusion from mainline-v5.7-rc1 commit 82b8cf40bfe1077d8d757e223eb7a35c25f650ec category: bugfix bugzilla: 188286, https://gitee.com/openeuler/kernel/issues/I67L59 CVE: NA
----------------------------------------
If an iSCSI connection happens to fail while the daemon isn't running (due to a crash or for another reason), the kernel failure report is not received. When the daemon restarts, there is insufficient kernel state in sysfs for it to know that this happened. open-iscsi tries to reopen every connection, but on different initiators, we'd like to know which connections have failed.
There is session->state, but that has a different lifetime than an iSCSI connection, so it doesn't directly reflect the connection state.
[mkp: typos]
Link: https://lore.kernel.org/r/20200317233422.532961-1-krisman@collabora.com Cc: Khazhismel Kumykov khazhy@google.com Suggested-by: Junho Ryu jayr@google.com Reviewed-by: Lee Duncan lduncan@suse.com Signed-off-by: Gabriel Krisman Bertazi krisman@collabora.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Zhong Jinghua zhongjinghua@huawei.com --- drivers/scsi/libiscsi.c | 7 ++++++- drivers/scsi/scsi_transport_iscsi.c | 29 ++++++++++++++++++++++++++++- include/scsi/scsi_transport_iscsi.h | 8 ++++++++ 3 files changed, 42 insertions(+), 2 deletions(-)
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 72463874d7b4..f14aaccf255c 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -3378,13 +3378,18 @@ void iscsi_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
switch (flag) { case STOP_CONN_RECOVER: + cls_conn->state = ISCSI_CONN_FAILED; + break; case STOP_CONN_TERM: - iscsi_start_session_recovery(session, conn, flag); + cls_conn->state = ISCSI_CONN_DOWN; break; default: iscsi_conn_printk(KERN_ERR, conn, "invalid stop flag %d\n", flag); + return; } + + iscsi_start_session_recovery(session, conn, flag); } EXPORT_SYMBOL_GPL(iscsi_conn_stop);
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index e60c68d8f5ef..2f397dc46476 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2235,6 +2235,7 @@ iscsi_alloc_conn(struct iscsi_cls_session *session, int dd_size, uint32_t cid) INIT_LIST_HEAD(&conn->conn_list_err); conn->transport = transport; conn->cid = cid; + conn->state = ISCSI_CONN_DOWN;
/* this is released in the dev's release function */ if (!get_device(&session->dev)) @@ -3766,8 +3767,11 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) break; case ISCSI_UEVENT_START_CONN: conn = iscsi_conn_lookup(ev->u.start_conn.sid, ev->u.start_conn.cid); - if (conn) + if (conn) { ev->r.retcode = transport->start_conn(conn); + if (!ev->r.retcode) + conn->state = ISCSI_CONN_UP; + } else err = -EINVAL; break; @@ -3972,6 +3976,26 @@ iscsi_conn_attr(tcp_xmit_wsf, ISCSI_PARAM_TCP_XMIT_WSF); iscsi_conn_attr(tcp_recv_wsf, ISCSI_PARAM_TCP_RECV_WSF); iscsi_conn_attr(local_ipaddr, ISCSI_PARAM_LOCAL_IPADDR);
+static const char *const connection_state_names[] = { + [ISCSI_CONN_UP] = "up", + [ISCSI_CONN_DOWN] = "down", + [ISCSI_CONN_FAILED] = "failed" +}; + +static ssize_t show_conn_state(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct iscsi_cls_conn *conn = iscsi_dev_to_conn(dev->parent); + const char *state = "unknown"; + + if (conn->state >= 0 && + conn->state < ARRAY_SIZE(connection_state_names)) + state = connection_state_names[conn->state]; + + return sprintf(buf, "%s\n", state); +} +static ISCSI_CLASS_ATTR(conn, state, S_IRUGO, show_conn_state, + NULL);
#define iscsi_conn_ep_attr_show(param) \ static ssize_t show_conn_ep_param_##param(struct device *dev, \ @@ -4041,6 +4065,7 @@ static struct attribute *iscsi_conn_attrs[] = { &dev_attr_conn_tcp_xmit_wsf.attr, &dev_attr_conn_tcp_recv_wsf.attr, &dev_attr_conn_local_ipaddr.attr, + &dev_attr_conn_state.attr, NULL, };
@@ -4112,6 +4137,8 @@ static umode_t iscsi_conn_attr_is_visible(struct kobject *kobj, param = ISCSI_PARAM_TCP_RECV_WSF; else if (attr == &dev_attr_conn_local_ipaddr.attr) param = ISCSI_PARAM_LOCAL_IPADDR; + else if (attr == &dev_attr_conn_state.attr) + return S_IRUGO; else { WARN_ONCE(1, "Invalid conn attr"); return 0; diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index 65b4962d9532..22623621b41b 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -201,6 +201,13 @@ extern void iscsi_ping_comp_event(uint32_t host_no, uint32_t status, uint32_t pid, uint32_t data_size, uint8_t *data);
+/* iscsi class connection state */ +enum iscsi_connection_state { + ISCSI_CONN_UP = 0, + ISCSI_CONN_DOWN, + ISCSI_CONN_FAILED, +}; + struct iscsi_cls_conn { struct list_head conn_list; /* item in connlist */ struct list_head conn_list_err; /* item in connlist_err */ @@ -211,6 +218,7 @@ struct iscsi_cls_conn { struct iscsi_endpoint *ep;
struct device dev; /* sysfs transport/container device */ + enum iscsi_connection_state state; };
#define iscsi_dev_to_conn(_dev) \
From: Gabriel Krisman Bertazi krisman@collabora.com
mainline inclusion from mainline-v5.8-rc1 commit 7e7cd796f2776d055351d80328f45633bbb0aae5 category: bugfix bugzilla: 188286, https://gitee.com/openeuler/kernel/issues/I67L59 CVE: NA
----------------------------------------
iSCSI suffers from a deadlock in case a management command submitted via the netlink socket sleeps on an allocation while holding the rx_queue_mutex if that allocation causes a memory reclaim that writebacks to a failed iSCSI device. The recovery procedure can never make progress to recover the failed disk or abort outstanding IO operations to complete the reclaim (since rx_queue_mutex is locked), thus locking the system.
Nevertheless, just marking all allocations under rx_queue_mutex as GFP_NOIO (or locking the userspace process with something like PF_MEMALLOC_NOIO) is not enough, since the iSCSI command code relies on other subsystems that try to grab locked mutexes, whose threads are GFP_IO, leading to the same deadlock. One instance where this situation can be observed is in the backtraces below, stitched from multiple bugs reports, involving the kobj uevent sent when a session is created.
The root of the problem is not the fact that iSCSI does GFP_IO allocations, that is acceptable. The actual problem is that rx_queue_mutex has a very large granularity, covering every unrelated netlink command execution at the same time as the error recovery path.
The proposed fix leverages the recently added mechanism to stop failed connections from the kernel, by enabling it to execute even though a management command from the netlink socket is being run (rx_queue_mutex is held), provided that the command is known to be safe. It splits the rx_queue_mutex in two mutexes, one protecting from concurrent command execution from the netlink socket, and one protecting stop_conn from racing with other connection management operations that might conflict with it.
It is not very pretty, but it is the simplest way to resolve the deadlock. I considered making it a lock per connection, but some external mutex would still be needed to deal with iscsi_if_destroy_conn.
The patch was tested by forcing a memory shrinker (unrelated, but used bufio/dm-verity) to reclaim iSCSI pages every time ISCSI_UEVENT_CREATE_SESSION happens, which is reasonable to simulate reclaims that might happen with GFP_KERNEL on that path. Then, a faulty hung target causes a connection to fail during intensive IO, at the same time a new session is added by iscsid.
The following stacktraces are stiches from several bug reports, showing a case where the deadlock can happen.
iSCSI-write holding: rx_queue_mutex waiting: uevent_sock_mutex
kobject_uevent_env+0x1bd/0x419 kobject_uevent+0xb/0xd device_add+0x48a/0x678 scsi_add_host_with_dma+0xc5/0x22d iscsi_host_add+0x53/0x55 iscsi_sw_tcp_session_create+0xa6/0x129 iscsi_if_rx+0x100/0x1247 netlink_unicast+0x213/0x4f0 netlink_sendmsg+0x230/0x3c0
iscsi_fail iscsi_conn_failure waiting: rx_queue_mutex
schedule_preempt_disabled+0x325/0x734 __mutex_lock_slowpath+0x18b/0x230 mutex_lock+0x22/0x40 iscsi_conn_failure+0x42/0x149 worker_thread+0x24a/0xbc0
EventManager_ holding: uevent_sock_mutex waiting: dm_bufio_client->lock
dm_bufio_lock+0xe/0x10 shrink+0x34/0xf7 shrink_slab+0x177/0x5d0 do_try_to_free_pages+0x129/0x470 try_to_free_mem_cgroup_pages+0x14f/0x210 memcg_kmem_newpage_charge+0xa6d/0x13b0 __alloc_pages_nodemask+0x4a3/0x1a70 fallback_alloc+0x1b2/0x36c __kmalloc_node_track_caller+0xb9/0x10d0 __alloc_skb+0x83/0x2f0 kobject_uevent_env+0x26b/0x419 dm_kobject_uevent+0x70/0x79 dev_suspend+0x1a9/0x1e7 ctl_ioctl+0x3e9/0x411 dm_ctl_ioctl+0x13/0x17 do_vfs_ioctl+0xb3/0x460 SyS_ioctl+0x5e/0x90
MemcgReclaimerD" holding: dm_bufio_client->lock waiting: stuck io to finish (needs iscsi_fail thread to progress)
schedule at ffffffffbd603618 io_schedule at ffffffffbd603ba4 do_io_schedule at ffffffffbdaf0d94 __wait_on_bit at ffffffffbd6008a6 out_of_line_wait_on_bit at ffffffffbd600960 wait_on_bit.constprop.10 at ffffffffbdaf0f17 __make_buffer_clean at ffffffffbdaf18ba __cleanup_old_buffer at ffffffffbdaf192f shrink at ffffffffbdaf19fd do_shrink_slab at ffffffffbd6ec000 shrink_slab at ffffffffbd6ec24a do_try_to_free_pages at ffffffffbd6eda09 try_to_free_mem_cgroup_pages at ffffffffbd6ede7e mem_cgroup_resize_limit at ffffffffbd7024c0 mem_cgroup_write at ffffffffbd703149 cgroup_file_write at ffffffffbd6d9c6e sys_write at ffffffffbd6662ea system_call_fastpath at ffffffffbdbc34a2
Link: https://lore.kernel.org/r/20200520022959.1912856-1-krisman@collabora.com Reported-by: Khazhismel Kumykov khazhy@google.com Reviewed-by: Lee Duncan lduncan@suse.com Signed-off-by: Gabriel Krisman Bertazi krisman@collabora.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com conflict: drivers/scsi/scsi_transport_iscsi.c Signed-off-by: Zhong Jinghua zhongjinghua@huawei.com --- drivers/scsi/scsi_transport_iscsi.c | 64 +++++++++++++++++++++-------- 1 file changed, 47 insertions(+), 17 deletions(-)
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 2f397dc46476..a00ae6c8188a 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -1594,6 +1594,12 @@ static DECLARE_TRANSPORT_CLASS(iscsi_connection_class, static struct sock *nls; static DEFINE_MUTEX(rx_queue_mutex);
+/* + * conn_mutex protects the {start,bind,stop,destroy}_conn from racing + * against the kernel stop_connection recovery mechanism + */ +static DEFINE_MUTEX(conn_mutex); + static LIST_HEAD(sesslist); static DEFINE_SPINLOCK(sesslock); static LIST_HEAD(connlist); @@ -2507,6 +2513,32 @@ int iscsi_offload_mesg(struct Scsi_Host *shost, } EXPORT_SYMBOL_GPL(iscsi_offload_mesg);
+/* + * This can be called without the rx_queue_mutex, if invoked by the kernel + * stop work. But, in that case, it is guaranteed not to race with + * iscsi_destroy by conn_mutex. + */ +static void iscsi_if_stop_conn(struct iscsi_cls_conn *conn, int flag) +{ + /* + * It is important that this path doesn't rely on + * rx_queue_mutex, otherwise, a thread doing allocation on a + * start_session/start_connection could sleep waiting on a + * writeback to a failed iscsi device, that cannot be recovered + * because the lock is held. If we don't hold it here, the + * kernel stop_conn_work_fn has a chance to stop the broken + * session and resolve the allocation. + * + * Still, the user invoked .stop_conn() needs to be serialized + * with stop_conn_work_fn by a private mutex. Not pretty, but + * it works. + */ + mutex_lock(&conn_mutex); + conn->transport->stop_conn(conn, flag); + mutex_unlock(&conn_mutex); + +} + static void stop_conn_work_fn(struct work_struct *work) { struct iscsi_cls_conn *conn, *tmp; @@ -2525,30 +2557,17 @@ static void stop_conn_work_fn(struct work_struct *work) uint32_t sid = iscsi_conn_get_sid(conn); struct iscsi_cls_session *session;
- mutex_lock(&rx_queue_mutex); - session = iscsi_session_lookup(sid); if (session) { if (system_state != SYSTEM_RUNNING) { session->recovery_tmo = 0; - conn->transport->stop_conn(conn, - STOP_CONN_TERM); + iscsi_if_stop_conn(conn, STOP_CONN_TERM); } else { - conn->transport->stop_conn(conn, - STOP_CONN_RECOVER); + iscsi_if_stop_conn(conn, STOP_CONN_RECOVER); } }
list_del_init(&conn->conn_list_err); - - mutex_unlock(&rx_queue_mutex); - - /* we don't want to hold rx_queue_mutex for too long, - * for instance if many conns failed at the same time, - * since this stall other iscsi maintenance operations. - * Give other users a chance to proceed. - */ - cond_resched(); } }
@@ -2908,8 +2927,11 @@ iscsi_if_destroy_conn(struct iscsi_transport *transport, struct iscsi_uevent *ev spin_unlock_irqrestore(&connlock, flags);
ISCSI_DBG_TRANS_CONN(conn, "Destroying transport conn\n"); + + mutex_lock(&conn_mutex); if (transport->destroy_conn) transport->destroy_conn(conn); + mutex_unlock(&conn_mutex);
return 0; } @@ -3744,9 +3766,12 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) break; }
+ mutex_lock(&conn_mutex); ev->r.retcode = transport->bind_conn(session, conn, ev->u.b_conn.transport_eph, ev->u.b_conn.is_leading); + mutex_unlock(&conn_mutex); + if (ev->r.retcode || !transport->ep_connect) break;
@@ -3768,9 +3793,11 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) case ISCSI_UEVENT_START_CONN: conn = iscsi_conn_lookup(ev->u.start_conn.sid, ev->u.start_conn.cid); if (conn) { + mutex_lock(&conn_mutex); ev->r.retcode = transport->start_conn(conn); if (!ev->r.retcode) conn->state = ISCSI_CONN_UP; + mutex_unlock(&conn_mutex); } else err = -EINVAL; @@ -3778,7 +3805,7 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) case ISCSI_UEVENT_STOP_CONN: conn = iscsi_conn_lookup(ev->u.stop_conn.sid, ev->u.stop_conn.cid); if (conn) - transport->stop_conn(conn, ev->u.stop_conn.flag); + iscsi_if_stop_conn(conn, ev->u.stop_conn.flag); else err = -EINVAL; break; @@ -3792,11 +3819,14 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) }
conn = iscsi_conn_lookup(ev->u.send_pdu.sid, ev->u.send_pdu.cid); - if (conn) + if (conn) { + mutex_lock(&conn_mutex); ev->r.retcode = transport->send_pdu(conn, (struct iscsi_hdr*)((char*)ev + sizeof(*ev)), (char*)ev + sizeof(*ev) + ev->u.send_pdu.hdr_size, ev->u.send_pdu.data_size); + mutex_unlock(&conn_mutex); + } else err = -EINVAL; break;
From: Gulam Mohamed gulam.mohamed@oracle.com
mainline inclusion from mainline-v5.12-rc6 commit 9e67600ed6b8565da4b85698ec659b5879a6c1c6 category: bugfix bugzilla: 188286, https://gitee.com/openeuler/kernel/issues/I67L59 CVE: NA
----------------------------------------
A kernel panic was observed due to a timing issue between the sync thread and the initiator processing a login response from the target. The session reopen can be invoked both from the session sync thread when iscsid restarts and from iscsid through the error handler. Before the initiator receives the response to a login, another reopen request can be sent from the error handler/sync session. When the initial login response is subsequently processed, the connection has been closed and the socket has been released.
To fix this a new connection state, ISCSI_CONN_BOUND, is added:
- Set the connection state value to ISCSI_CONN_DOWN upon iscsi_if_ep_disconnect() and iscsi_if_stop_conn()
- Set the connection state to the newly created value ISCSI_CONN_BOUND after bind connection (transport->bind_conn())
- In iscsi_set_param(), return -ENOTCONN if the connection state is not either ISCSI_CONN_BOUND or ISCSI_CONN_UP
Link: https://lore.kernel.org/r/20210325093248.284678-1-gulam.mohamed@oracle.com Reviewed-by: Mike Christie michael.christie@oracle.com Signed-off-by: Gulam Mohamed gulam.mohamed@oracle.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com
index 91074fd97f64..f4bf62b007a0 100644
Signed-off-by: Zhong Jinghua zhongjinghua@huawei.com --- drivers/scsi/scsi_transport_iscsi.c | 14 +++++++++++++- include/scsi/scsi_transport_iscsi.h | 1 + 2 files changed, 14 insertions(+), 1 deletion(-)
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index a00ae6c8188a..9b671326f2cc 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2535,6 +2535,7 @@ static void iscsi_if_stop_conn(struct iscsi_cls_conn *conn, int flag) */ mutex_lock(&conn_mutex); conn->transport->stop_conn(conn, flag); + conn->state = ISCSI_CONN_DOWN; mutex_unlock(&conn_mutex);
} @@ -2961,6 +2962,13 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev) default: err = transport->set_param(conn, ev->u.set_param.param, data, ev->u.set_param.len); + if ((conn->state == ISCSI_CONN_BOUND) || + (conn->state == ISCSI_CONN_UP)) { + err = transport->set_param(conn, ev->u.set_param.param, + data, ev->u.set_param.len); + } else { + return -ENOTCONN; + } }
return err; @@ -3020,6 +3028,7 @@ static int iscsi_if_ep_disconnect(struct iscsi_transport *transport, mutex_lock(&conn->ep_mutex); conn->ep = NULL; mutex_unlock(&conn->ep_mutex); + conn->state = ISCSI_CONN_DOWN; }
transport->ep_disconnect(ep); @@ -3770,6 +3779,8 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) ev->r.retcode = transport->bind_conn(session, conn, ev->u.b_conn.transport_eph, ev->u.b_conn.is_leading); + if (!ev->r.retcode) + conn->state = ISCSI_CONN_BOUND; mutex_unlock(&conn_mutex);
if (ev->r.retcode || !transport->ep_connect) @@ -4009,7 +4020,8 @@ iscsi_conn_attr(local_ipaddr, ISCSI_PARAM_LOCAL_IPADDR); static const char *const connection_state_names[] = { [ISCSI_CONN_UP] = "up", [ISCSI_CONN_DOWN] = "down", - [ISCSI_CONN_FAILED] = "failed" + [ISCSI_CONN_FAILED] = "failed", + [ISCSI_CONN_BOUND] = "bound" };
static ssize_t show_conn_state(struct device *dev, diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index 22623621b41b..e8656e08a1bb 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -206,6 +206,7 @@ enum iscsi_connection_state { ISCSI_CONN_UP = 0, ISCSI_CONN_DOWN, ISCSI_CONN_FAILED, + ISCSI_CONN_BOUND, };
struct iscsi_cls_conn {
From: Mike Christie michael.christie@oracle.com
mainline inclusion from mainline-v5.12-rc8 commit 0dcf8febcb7b9d42bec98bc068e01d1a6ea578b8 category: bugfix bugzilla: 188286, https://gitee.com/openeuler/kernel/issues/I67L59 CVE: NA
----------------------------------------
In commit 9e67600ed6b8 ("scsi: iscsi: Fix race condition between login and sync thread") I missed that libiscsi was now setting the iSCSI class state, and that patch ended up resetting the state during conn stoppage and using the wrong state value during ep_disconnect. This patch moves the setting of the class state to the class module and then fixes the two issues above.
Link: https://lore.kernel.org/r/20210406171746.5016-1-michael.christie@oracle.com Fixes: 9e67600ed6b8 ("scsi: iscsi: Fix race condition between login and sync thread") Cc: Gulam Mohamed gulam.mohamed@oracle.com Signed-off-by: Mike Christie michael.christie@oracle.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com conflict: drivers/scsi/libiscsi.c Signed-off-by: Zhong Jinghua zhongjinghua@huawei.com --- drivers/scsi/libiscsi.c | 26 +++----------------------- drivers/scsi/scsi_transport_iscsi.c | 18 +++++++++++++++--- 2 files changed, 18 insertions(+), 26 deletions(-)
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index f14aaccf255c..d646eacb5869 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -3308,9 +3308,10 @@ fail_mgmt_tasks(struct iscsi_session *session, struct iscsi_conn *conn) } }
-static void iscsi_start_session_recovery(struct iscsi_session *session, - struct iscsi_conn *conn, int flag) +void iscsi_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) { + struct iscsi_conn *conn = cls_conn->dd_data; + struct iscsi_session *session = conn->session; int old_stop_stage; struct iscsi_cls_session_wrapper *cls_session = iscsi_cls_session_to_wrapper(session->cls_session); @@ -3370,27 +3371,6 @@ static void iscsi_start_session_recovery(struct iscsi_session *session, spin_unlock_bh(&session->frwd_lock); mutex_unlock(&session->eh_mutex); } - -void iscsi_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) -{ - struct iscsi_conn *conn = cls_conn->dd_data; - struct iscsi_session *session = conn->session; - - switch (flag) { - case STOP_CONN_RECOVER: - cls_conn->state = ISCSI_CONN_FAILED; - break; - case STOP_CONN_TERM: - cls_conn->state = ISCSI_CONN_DOWN; - break; - default: - iscsi_conn_printk(KERN_ERR, conn, - "invalid stop flag %d\n", flag); - return; - } - - iscsi_start_session_recovery(session, conn, flag); -} EXPORT_SYMBOL_GPL(iscsi_conn_stop);
int iscsi_conn_bind(struct iscsi_cls_session *cls_session, diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 9b671326f2cc..f372a2cd82be 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2534,10 +2534,22 @@ static void iscsi_if_stop_conn(struct iscsi_cls_conn *conn, int flag) * it works. */ mutex_lock(&conn_mutex); + switch (flag) { + case STOP_CONN_RECOVER: + conn->state = ISCSI_CONN_FAILED; + break; + case STOP_CONN_TERM: + conn->state = ISCSI_CONN_DOWN; + break; + default: + iscsi_cls_conn_printk(KERN_ERR, conn, + "invalid stop flag %d\n", flag); + goto unlock; + } + conn->transport->stop_conn(conn, flag); - conn->state = ISCSI_CONN_DOWN; +unlock: mutex_unlock(&conn_mutex); - }
static void stop_conn_work_fn(struct work_struct *work) @@ -3028,7 +3040,7 @@ static int iscsi_if_ep_disconnect(struct iscsi_transport *transport, mutex_lock(&conn->ep_mutex); conn->ep = NULL; mutex_unlock(&conn->ep_mutex); - conn->state = ISCSI_CONN_DOWN; + conn->state = ISCSI_CONN_FAILED; }
transport->ep_disconnect(ep);
From: Mike Christie michael.christie@oracle.com
stable inclusion from stable-v5.10.112 commit 4029a1e992fc6a0e29ff56c74a9632317fe76022 category: bugfix bugzilla: 188286, https://gitee.com/openeuler/kernel/issues/I67L59 CVE: NA
----------------------------------------
[ Upstream commit 06c203a5566beecebb1f8838d026de8a61c8df71 ]
If the system is not up, we can just fail immediately since iscsid is not going to ever answer our netlink events. We are already setting the recovery_tmo to 0, but by passing stop_conn STOP_CONN_TERM we never will block the session and start the recovery timer, because for that flag userspace will do the unbind and destroy events which would remove the devices and wake up and kill the eh.
Since the conn is dead and the system is going dowm this just has us use STOP_CONN_RECOVER with recovery_tmo=0 so we fail immediately. However, if the user has set the recovery_tmo=-1 we let the system hang like they requested since they might have used that setting for specific reasons (one known reason is for buggy cluster software).
Link: https://lore.kernel.org/r/20210525181821.7617-5-michael.christie@oracle.com Signed-off-by: Mike Christie michael.christie@oracle.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Zhong Jinghua zhongjinghua@huawei.com --- drivers/scsi/scsi_transport_iscsi.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index f372a2cd82be..db89dfe51dec 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2573,11 +2573,17 @@ static void stop_conn_work_fn(struct work_struct *work) session = iscsi_session_lookup(sid); if (session) { if (system_state != SYSTEM_RUNNING) { - session->recovery_tmo = 0; - iscsi_if_stop_conn(conn, STOP_CONN_TERM); - } else { - iscsi_if_stop_conn(conn, STOP_CONN_RECOVER); + /* + * If the user has set up for the session to + * never timeout then hang like they wanted. + * For all other cases fail right away since + * userspace is not going to relogin. + */ + if (session->recovery_tmo > 0) + session->recovery_tmo = 0; } + + iscsi_if_stop_conn(conn, STOP_CONN_RECOVER); }
list_del_init(&conn->conn_list_err);
From: Mike Christie michael.christie@oracle.com
stable inclusion from stable-v5.10.112 commit 812573896711f3ffb083c374f26f5807efca3b2f category: bugfix bugzilla: 188286, https://gitee.com/openeuler/kernel/issues/I67L59 CVE: NA
----------------------------------------
[ Upstream commit 9e5fe1700896c85040943fdc0d3fee0dd3e0d36f ]
Subsequent commits allow the kernel to do ep_disconnect. In that case we will have to get a proper refcount on the ep so one thread does not delete it from under another.
Link: https://lore.kernel.org/r/20210525181821.7617-7-michael.christie@oracle.com Reviewed-by: Lee Duncan lduncan@suse.com Signed-off-by: Mike Christie michael.christie@oracle.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Zhong Jinghua zhongjinghua@huawei.com --- drivers/infiniband/ulp/iser/iscsi_iser.c | 1 + drivers/scsi/be2iscsi/be_iscsi.c | 19 ++++++++++++------ drivers/scsi/bnx2i/bnx2i_iscsi.c | 23 +++++++++++++++------- drivers/scsi/cxgbi/libcxgbi.c | 12 ++++++++---- drivers/scsi/qedi/qedi_iscsi.c | 25 +++++++++++++++++------- drivers/scsi/qla4xxx/ql4_os.c | 1 + drivers/scsi/scsi_transport_iscsi.c | 25 ++++++++++++++++-------- include/scsi/scsi_transport_iscsi.h | 1 + 8 files changed, 75 insertions(+), 32 deletions(-)
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index b4e0ae024575..6c97c2230a83 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -501,6 +501,7 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session, iser_conn->iscsi_conn = conn;
out: + iscsi_put_endpoint(ep); mutex_unlock(&iser_conn->state_mutex); return error; } diff --git a/drivers/scsi/be2iscsi/be_iscsi.c b/drivers/scsi/be2iscsi/be_iscsi.c index c8f0a2144b44..7d5fa3246b5f 100644 --- a/drivers/scsi/be2iscsi/be_iscsi.c +++ b/drivers/scsi/be2iscsi/be_iscsi.c @@ -184,6 +184,7 @@ int beiscsi_conn_bind(struct iscsi_cls_session *cls_session, struct beiscsi_endpoint *beiscsi_ep; struct iscsi_endpoint *ep; uint16_t cri_index; + int rc = 0;
ep = iscsi_lookup_endpoint(transport_fd); if (!ep) @@ -191,15 +192,17 @@ int beiscsi_conn_bind(struct iscsi_cls_session *cls_session,
beiscsi_ep = ep->dd_data;
- if (iscsi_conn_bind(cls_session, cls_conn, is_leading)) - return -EINVAL; + if (iscsi_conn_bind(cls_session, cls_conn, is_leading)) { + rc = -EINVAL; + goto put_ep; + }
if (beiscsi_ep->phba != phba) { beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_CONFIG, "BS_%d : beiscsi_ep->hba=%p not equal to phba=%p\n", beiscsi_ep->phba, phba); - - return -EEXIST; + rc = -EEXIST; + goto put_ep; } cri_index = BE_GET_CRI_FROM_CID(beiscsi_ep->ep_cid); if (phba->conn_table[cri_index]) { @@ -211,7 +214,8 @@ int beiscsi_conn_bind(struct iscsi_cls_session *cls_session, beiscsi_ep->ep_cid, beiscsi_conn, phba->conn_table[cri_index]); - return -EINVAL; + rc = -EINVAL; + goto put_ep; } }
@@ -228,7 +232,10 @@ int beiscsi_conn_bind(struct iscsi_cls_session *cls_session, "BS_%d : cid %d phba->conn_table[%u]=%p\n", beiscsi_ep->ep_cid, cri_index, beiscsi_conn); phba->conn_table[cri_index] = beiscsi_conn; - return 0; + +put_ep: + iscsi_put_endpoint(ep); + return rc; }
static int beiscsi_iface_create_ipv4(struct beiscsi_hba *phba) diff --git a/drivers/scsi/bnx2i/bnx2i_iscsi.c b/drivers/scsi/bnx2i/bnx2i_iscsi.c index f2f415021770..a3a9c2258fcd 100644 --- a/drivers/scsi/bnx2i/bnx2i_iscsi.c +++ b/drivers/scsi/bnx2i/bnx2i_iscsi.c @@ -1421,17 +1421,23 @@ static int bnx2i_conn_bind(struct iscsi_cls_session *cls_session, * Forcefully terminate all in progress connection recovery at the * earliest, either in bind(), send_pdu(LOGIN), or conn_start() */ - if (bnx2i_adapter_ready(hba)) - return -EIO; + if (bnx2i_adapter_ready(hba)) { + ret_code = -EIO; + goto put_ep; + }
bnx2i_ep = ep->dd_data; if ((bnx2i_ep->state == EP_STATE_TCP_FIN_RCVD) || - (bnx2i_ep->state == EP_STATE_TCP_RST_RCVD)) + (bnx2i_ep->state == EP_STATE_TCP_RST_RCVD)) { /* Peer disconnect via' FIN or RST */ - return -EINVAL; + ret_code = -EINVAL; + goto put_ep; + }
- if (iscsi_conn_bind(cls_session, cls_conn, is_leading)) - return -EINVAL; + if (iscsi_conn_bind(cls_session, cls_conn, is_leading)) { + ret_code = -EINVAL; + goto put_ep; + }
if (bnx2i_ep->hba != hba) { /* Error - TCP connection does not belong to this device @@ -1442,7 +1448,8 @@ static int bnx2i_conn_bind(struct iscsi_cls_session *cls_session, iscsi_conn_printk(KERN_ALERT, cls_conn->dd_data, "belong to hba (%s)\n", hba->netdev->name); - return -EEXIST; + ret_code = -EEXIST; + goto put_ep; } bnx2i_ep->conn = bnx2i_conn; bnx2i_conn->ep = bnx2i_ep; @@ -1459,6 +1466,8 @@ static int bnx2i_conn_bind(struct iscsi_cls_session *cls_session, bnx2i_put_rq_buf(bnx2i_conn, 0);
bnx2i_arm_cq_event_coalescing(bnx2i_conn->ep, CNIC_ARM_CQE); +put_ep: + iscsi_put_endpoint(ep); return ret_code; }
diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c index dcb148a09b06..a319501b39ec 100644 --- a/drivers/scsi/cxgbi/libcxgbi.c +++ b/drivers/scsi/cxgbi/libcxgbi.c @@ -2392,11 +2392,13 @@ int cxgbi_bind_conn(struct iscsi_cls_session *cls_session, err = csk->cdev->csk_ddp_setup_pgidx(csk, csk->tid, ppm->tformat.pgsz_idx_dflt); if (err < 0) - return err; + goto put_ep;
err = iscsi_conn_bind(cls_session, cls_conn, is_leading); - if (err) - return -EINVAL; + if (err) { + err = -EINVAL; + goto put_ep; + }
/* calculate the tag idx bits needed for this conn based on cmds_max */ cconn->task_idx_bits = (__ilog2_u32(conn->session->cmds_max - 1)) + 1; @@ -2417,7 +2419,9 @@ int cxgbi_bind_conn(struct iscsi_cls_session *cls_session, /* init recv engine */ iscsi_tcp_hdr_recv_prep(tcp_conn);
- return 0; +put_ep: + iscsi_put_endpoint(ep); + return err; } EXPORT_SYMBOL_GPL(cxgbi_bind_conn);
diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c index 1b7049dce169..9447e7176752 100644 --- a/drivers/scsi/qedi/qedi_iscsi.c +++ b/drivers/scsi/qedi/qedi_iscsi.c @@ -381,6 +381,7 @@ static int qedi_conn_bind(struct iscsi_cls_session *cls_session, struct qedi_ctx *qedi = iscsi_host_priv(shost); struct qedi_endpoint *qedi_ep; struct iscsi_endpoint *ep; + int rc = 0;
ep = iscsi_lookup_endpoint(transport_fd); if (!ep) @@ -388,11 +389,16 @@ static int qedi_conn_bind(struct iscsi_cls_session *cls_session,
qedi_ep = ep->dd_data; if ((qedi_ep->state == EP_STATE_TCP_FIN_RCVD) || - (qedi_ep->state == EP_STATE_TCP_RST_RCVD)) - return -EINVAL; + (qedi_ep->state == EP_STATE_TCP_RST_RCVD)) { + rc = -EINVAL; + goto put_ep; + } + + if (iscsi_conn_bind(cls_session, cls_conn, is_leading)) { + rc = -EINVAL; + goto put_ep; + }
- if (iscsi_conn_bind(cls_session, cls_conn, is_leading)) - return -EINVAL;
qedi_ep->conn = qedi_conn; qedi_conn->ep = qedi_ep; @@ -401,13 +407,18 @@ static int qedi_conn_bind(struct iscsi_cls_session *cls_session, qedi_conn->cmd_cleanup_req = 0; qedi_conn->cmd_cleanup_cmpl = 0;
- if (qedi_bind_conn_to_iscsi_cid(qedi, qedi_conn)) - return -EINVAL; + if (qedi_bind_conn_to_iscsi_cid(qedi, qedi_conn)) { + rc = -EINVAL; + goto put_ep; + } +
spin_lock_init(&qedi_conn->tmf_work_lock); INIT_LIST_HEAD(&qedi_conn->tmf_work_list); init_waitqueue_head(&qedi_conn->wait_queue); - return 0; +put_ep: + iscsi_put_endpoint(ep); + return rc; }
static int qedi_iscsi_update_conn(struct qedi_ctx *qedi, diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index f8acf101af3d..4b93255699b9 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -3209,6 +3209,7 @@ static int qla4xxx_conn_bind(struct iscsi_cls_session *cls_session, conn = cls_conn->dd_data; qla_conn = conn->dd_data; qla_conn->qla_ep = ep->dd_data; + iscsi_put_endpoint(ep); return 0; }
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index db89dfe51dec..cd92fa4dcca1 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -261,9 +261,20 @@ void iscsi_destroy_endpoint(struct iscsi_endpoint *ep) } EXPORT_SYMBOL_GPL(iscsi_destroy_endpoint);
+void iscsi_put_endpoint(struct iscsi_endpoint *ep) +{ + put_device(&ep->dev); +} +EXPORT_SYMBOL_GPL(iscsi_put_endpoint); + +/** + * iscsi_lookup_endpoint - get ep from handle + * @handle: endpoint handle + * + * Caller must do a iscsi_put_endpoint. + */ struct iscsi_endpoint *iscsi_lookup_endpoint(u64 handle) { - struct iscsi_endpoint *ep; struct device *dev;
dev = class_find_device(&iscsi_endpoint_class, NULL, &handle, @@ -271,13 +282,7 @@ struct iscsi_endpoint *iscsi_lookup_endpoint(u64 handle) if (!dev) return NULL;
- ep = iscsi_dev_to_endpoint(dev); - /* - * we can drop this now because the interface will prevent - * removals and lookups from racing. - */ - put_device(dev); - return ep; + return iscsi_dev_to_endpoint(dev); } EXPORT_SYMBOL_GPL(iscsi_lookup_endpoint);
@@ -3050,6 +3055,7 @@ static int iscsi_if_ep_disconnect(struct iscsi_transport *transport, }
transport->ep_disconnect(ep); + iscsi_put_endpoint(ep); return 0; }
@@ -3075,6 +3081,7 @@ iscsi_if_transport_ep(struct iscsi_transport *transport,
ev->r.retcode = transport->ep_poll(ep, ev->u.ep_poll.timeout_ms); + iscsi_put_endpoint(ep); break; case ISCSI_UEVENT_TRANSPORT_EP_DISCONNECT: rc = iscsi_if_ep_disconnect(transport, @@ -3757,6 +3764,7 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) ev->u.c_bound_session.initial_cmdsn, ev->u.c_bound_session.cmds_max, ev->u.c_bound_session.queue_depth); + iscsi_put_endpoint(ep); break; case ISCSI_UEVENT_DESTROY_SESSION: session = iscsi_session_lookup(ev->u.d_session.sid); @@ -3811,6 +3819,7 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) mutex_lock(&conn->ep_mutex); conn->ep = ep; mutex_unlock(&conn->ep_mutex); + iscsi_put_endpoint(ep); } else iscsi_cls_conn_printk(KERN_ERR, conn, "Could not set ep conn " diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index e8656e08a1bb..e30aad182f5b 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -471,6 +471,7 @@ extern int iscsi_scan_finished(struct Scsi_Host *shost, unsigned long time); extern struct iscsi_endpoint *iscsi_create_endpoint(int dd_size); extern void iscsi_destroy_endpoint(struct iscsi_endpoint *ep); extern struct iscsi_endpoint *iscsi_lookup_endpoint(u64 handle); +extern void iscsi_put_endpoint(struct iscsi_endpoint *ep); extern int iscsi_block_scsi_eh(struct scsi_cmnd *cmd); extern struct iscsi_iface *iscsi_create_iface(struct Scsi_Host *shost, struct iscsi_transport *t,
From: Mike Christie michael.christie@oracle.com
stable inclusion from stable-v5.10.112 commit 46f37a34a53d4c12fa413b7aec7fe7c3e5cece51 category: bugfix bugzilla: 188286, https://gitee.com/openeuler/kernel/issues/I67L59 CVE: NA
----------------------------------------
[ Upstream commit 23d6fefbb3f6b1cc29794427588b470ed06ff64e ]
Commit 0ab710458da1 ("scsi: iscsi: Perform connection failure entirely in kernel space") has the following regressions/bugs that this patch fixes:
1. It can return cmds to upper layers like dm-multipath where that can retry them. After they are successful the fs/app can send new I/O to the same sectors, but we've left the cmds running in FW or in the net layer. We need to be calling ep_disconnect if userspace is not up.
This patch only fixes the issue for offload drivers. iscsi_tcp will be fixed in separate commit because it doesn't have a ep_disconnect call.
2. The drivers that implement ep_disconnect expect that it's called before conn_stop. Besides crashes, if the cleanup_task callout is called before ep_disconnect it might free up driver/card resources for session1 then they could be allocated for session2. But because the driver's ep_disconnect is not called it has not cleaned up the firmware so the card is still using the resources for the original cmd.
3. The stop_conn_work_fn can run after userspace has done its recovery and we are happily using the session. We will then end up with various bugs depending on what is going on at the time.
We may also run stop_conn_work_fn late after userspace has called stop_conn and ep_disconnect and is now going to call start/bind conn. If stop_conn_work_fn runs after bind but before start, we would leave the conn in a unbound but sort of started state where IO might be allowed even though the drivers have been set in a state where they no longer expect I/O.
4. Returning -EAGAIN in iscsi_if_destroy_conn if we haven't yet run the in kernel stop_conn function is breaking userspace. We should have been doing this for the caller.
Link: https://lore.kernel.org/r/20210525181821.7617-8-michael.christie@oracle.com Fixes: 0ab710458da1 ("scsi: iscsi: Perform connection failure entirely in kernel space") Reviewed-by: Lee Duncan lduncan@suse.com Signed-off-by: Mike Christie michael.christie@oracle.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org conflict: drivers/scsi/scsi_transport_iscsi.c Signed-off-by: Zhong Jinghua zhongjinghua@huawei.com --- drivers/scsi/scsi_transport_iscsi.c | 465 ++++++++++++++++------------ include/scsi/scsi_transport_iscsi.h | 10 +- 2 files changed, 281 insertions(+), 194 deletions(-)
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index cd92fa4dcca1..06fa9e5d7cda 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -81,15 +81,11 @@ struct iscsi_internal { struct transport_container session_cont; };
-/* Worker to perform connection failure on unresponsive connections - * completely in kernel space. - */ -static void stop_conn_work_fn(struct work_struct *work); -static DECLARE_WORK(stop_conn_work, stop_conn_work_fn); - static atomic_t iscsi_session_nr; /* sysfs session id for next new session */ static struct workqueue_struct *iscsi_eh_timer_workq;
+static struct workqueue_struct *iscsi_conn_cleanup_workq; + static DEFINE_IDA(iscsi_sess_ida); /* * list of registered transports and lock that must @@ -1599,12 +1595,6 @@ static DECLARE_TRANSPORT_CLASS(iscsi_connection_class, static struct sock *nls; static DEFINE_MUTEX(rx_queue_mutex);
-/* - * conn_mutex protects the {start,bind,stop,destroy}_conn from racing - * against the kernel stop_connection recovery mechanism - */ -static DEFINE_MUTEX(conn_mutex); - static LIST_HEAD(sesslist); static DEFINE_SPINLOCK(sesslock); static LIST_HEAD(connlist); @@ -2215,6 +2205,122 @@ void iscsi_remove_session(struct iscsi_cls_session *session) } EXPORT_SYMBOL_GPL(iscsi_remove_session);
+static void iscsi_stop_conn(struct iscsi_cls_conn *conn, int flag) +{ + ISCSI_DBG_TRANS_CONN(conn, "Stopping conn.\n"); + + switch (flag) { + case STOP_CONN_RECOVER: + conn->state = ISCSI_CONN_FAILED; + break; + case STOP_CONN_TERM: + conn->state = ISCSI_CONN_DOWN; + break; + default: + iscsi_cls_conn_printk(KERN_ERR, conn, "invalid stop flag %d\n", + flag); + return; + } + + conn->transport->stop_conn(conn, flag); + ISCSI_DBG_TRANS_CONN(conn, "Stopping conn done.\n"); +} + +static int iscsi_if_stop_conn(struct iscsi_transport *transport, + struct iscsi_uevent *ev) +{ + int flag = ev->u.stop_conn.flag; + struct iscsi_cls_conn *conn; + + conn = iscsi_conn_lookup(ev->u.stop_conn.sid, ev->u.stop_conn.cid); + if (!conn) + return -EINVAL; + + ISCSI_DBG_TRANS_CONN(conn, "iscsi if conn stop.\n"); + /* + * If this is a termination we have to call stop_conn with that flag + * so the correct states get set. If we haven't run the work yet try to + * avoid the extra run. + */ + if (flag == STOP_CONN_TERM) { + cancel_work_sync(&conn->cleanup_work); + iscsi_stop_conn(conn, flag); + } else { + /* + * Figure out if it was the kernel or userspace initiating this. + */ + if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) { + iscsi_stop_conn(conn, flag); + } else { + ISCSI_DBG_TRANS_CONN(conn, + "flush kernel conn cleanup.\n"); + flush_work(&conn->cleanup_work); + } + /* + * Only clear for recovery to avoid extra cleanup runs during + * termination. + */ + clear_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags); + } + ISCSI_DBG_TRANS_CONN(conn, "iscsi if conn stop done.\n"); + return 0; +} + +static void iscsi_ep_disconnect(struct iscsi_cls_conn *conn, bool is_active) +{ + struct iscsi_cls_session *session = iscsi_conn_to_session(conn); + struct iscsi_endpoint *ep; + + ISCSI_DBG_TRANS_CONN(conn, "disconnect ep.\n"); + conn->state = ISCSI_CONN_FAILED; + + if (!conn->ep || !session->transport->ep_disconnect) + return; + + ep = conn->ep; + conn->ep = NULL; + + session->transport->ep_disconnect(ep); + ISCSI_DBG_TRANS_CONN(conn, "disconnect ep done.\n"); +} + +static void iscsi_cleanup_conn_work_fn(struct work_struct *work) +{ + struct iscsi_cls_conn *conn = container_of(work, struct iscsi_cls_conn, + cleanup_work); + struct iscsi_cls_session *session = iscsi_conn_to_session(conn); + + mutex_lock(&conn->ep_mutex); + /* + * If we are not at least bound there is nothing for us to do. Userspace + * will do a ep_disconnect call if offload is used, but will not be + * doing a stop since there is nothing to clean up, so we have to clear + * the cleanup bit here. + */ + if (conn->state != ISCSI_CONN_BOUND && conn->state != ISCSI_CONN_UP) { + ISCSI_DBG_TRANS_CONN(conn, "Got error while conn is already failed. Ignoring.\n"); + clear_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags); + mutex_unlock(&conn->ep_mutex); + return; + } + + iscsi_ep_disconnect(conn, false); + + if (system_state != SYSTEM_RUNNING) { + /* + * If the user has set up for the session to never timeout + * then hang like they wanted. For all other cases fail right + * away since userspace is not going to relogin. + */ + if (session->recovery_tmo > 0) + session->recovery_tmo = 0; + } + + iscsi_stop_conn(conn, STOP_CONN_RECOVER); + mutex_unlock(&conn->ep_mutex); + ISCSI_DBG_TRANS_CONN(conn, "cleanup done.\n"); +} + void iscsi_free_session(struct iscsi_cls_session *session) { ISCSI_DBG_TRANS_SESSION(session, "Freeing session\n"); @@ -2243,7 +2349,7 @@ iscsi_alloc_conn(struct iscsi_cls_session *session, int dd_size, uint32_t cid)
mutex_init(&conn->ep_mutex); INIT_LIST_HEAD(&conn->conn_list); - INIT_LIST_HEAD(&conn->conn_list_err); + INIT_WORK(&conn->cleanup_work, iscsi_cleanup_conn_work_fn); conn->transport = transport; conn->cid = cid; conn->state = ISCSI_CONN_DOWN; @@ -2307,7 +2413,6 @@ void iscsi_remove_conn(struct iscsi_cls_conn *conn)
spin_lock_irqsave(&connlock, flags); list_del(&conn->conn_list); - list_del(&conn->conn_list_err); spin_unlock_irqrestore(&connlock, flags);
transport_unregister_device(&conn->dev); @@ -2518,83 +2623,6 @@ int iscsi_offload_mesg(struct Scsi_Host *shost, } EXPORT_SYMBOL_GPL(iscsi_offload_mesg);
-/* - * This can be called without the rx_queue_mutex, if invoked by the kernel - * stop work. But, in that case, it is guaranteed not to race with - * iscsi_destroy by conn_mutex. - */ -static void iscsi_if_stop_conn(struct iscsi_cls_conn *conn, int flag) -{ - /* - * It is important that this path doesn't rely on - * rx_queue_mutex, otherwise, a thread doing allocation on a - * start_session/start_connection could sleep waiting on a - * writeback to a failed iscsi device, that cannot be recovered - * because the lock is held. If we don't hold it here, the - * kernel stop_conn_work_fn has a chance to stop the broken - * session and resolve the allocation. - * - * Still, the user invoked .stop_conn() needs to be serialized - * with stop_conn_work_fn by a private mutex. Not pretty, but - * it works. - */ - mutex_lock(&conn_mutex); - switch (flag) { - case STOP_CONN_RECOVER: - conn->state = ISCSI_CONN_FAILED; - break; - case STOP_CONN_TERM: - conn->state = ISCSI_CONN_DOWN; - break; - default: - iscsi_cls_conn_printk(KERN_ERR, conn, - "invalid stop flag %d\n", flag); - goto unlock; - } - - conn->transport->stop_conn(conn, flag); -unlock: - mutex_unlock(&conn_mutex); -} - -static void stop_conn_work_fn(struct work_struct *work) -{ - struct iscsi_cls_conn *conn, *tmp; - unsigned long flags; - LIST_HEAD(recovery_list); - - spin_lock_irqsave(&connlock, flags); - if (list_empty(&connlist_err)) { - spin_unlock_irqrestore(&connlock, flags); - return; - } - list_splice_init(&connlist_err, &recovery_list); - spin_unlock_irqrestore(&connlock, flags); - - list_for_each_entry_safe(conn, tmp, &recovery_list, conn_list_err) { - uint32_t sid = iscsi_conn_get_sid(conn); - struct iscsi_cls_session *session; - - session = iscsi_session_lookup(sid); - if (session) { - if (system_state != SYSTEM_RUNNING) { - /* - * If the user has set up for the session to - * never timeout then hang like they wanted. - * For all other cases fail right away since - * userspace is not going to relogin. - */ - if (session->recovery_tmo > 0) - session->recovery_tmo = 0; - } - - iscsi_if_stop_conn(conn, STOP_CONN_RECOVER); - } - - list_del_init(&conn->conn_list_err); - } -} - void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error) { struct nlmsghdr *nlh; @@ -2602,12 +2630,9 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error) struct iscsi_uevent *ev; struct iscsi_internal *priv; int len = nlmsg_total_size(sizeof(*ev)); - unsigned long flags;
- spin_lock_irqsave(&connlock, flags); - list_add(&conn->conn_list_err, &connlist_err); - spin_unlock_irqrestore(&connlock, flags); - queue_work(system_unbound_wq, &stop_conn_work); + if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) + queue_work(iscsi_conn_cleanup_workq, &conn->cleanup_work);
priv = iscsi_if_transport_lookup(conn->transport); if (!priv) @@ -2937,26 +2962,17 @@ static int iscsi_if_destroy_conn(struct iscsi_transport *transport, struct iscsi_uevent *ev) { struct iscsi_cls_conn *conn; - unsigned long flags;
conn = iscsi_conn_lookup(ev->u.d_conn.sid, ev->u.d_conn.cid); if (!conn) return -EINVAL;
- spin_lock_irqsave(&connlock, flags); - if (!list_empty(&conn->conn_list_err)) { - spin_unlock_irqrestore(&connlock, flags); - return -EAGAIN; - } - spin_unlock_irqrestore(&connlock, flags); - + ISCSI_DBG_TRANS_CONN(conn, "Flushing cleanup during destruction\n"); + flush_work(&conn->cleanup_work); ISCSI_DBG_TRANS_CONN(conn, "Destroying transport conn\n");
- mutex_lock(&conn_mutex); if (transport->destroy_conn) transport->destroy_conn(conn); - mutex_unlock(&conn_mutex); - return 0; }
@@ -3046,15 +3062,30 @@ static int iscsi_if_ep_disconnect(struct iscsi_transport *transport, ep = iscsi_lookup_endpoint(ep_handle); if (!ep) return -EINVAL; + conn = ep->conn; - if (conn) { - mutex_lock(&conn->ep_mutex); - conn->ep = NULL; + if (!conn) { + /* + * conn was not even bound yet, so we can't get iscsi conn + * failures yet. + */ + transport->ep_disconnect(ep); + goto put_ep; + } + + mutex_lock(&conn->ep_mutex); + /* Check if this was a conn error and the kernel took ownership */ + if (test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) { + ISCSI_DBG_TRANS_CONN(conn, "flush kernel conn cleanup.\n"); mutex_unlock(&conn->ep_mutex); - conn->state = ISCSI_CONN_FAILED; + + flush_work(&conn->cleanup_work); + goto put_ep; }
- transport->ep_disconnect(ep); + iscsi_ep_disconnect(conn, false); + mutex_unlock(&conn->ep_mutex); +put_ep: iscsi_put_endpoint(ep); return 0; } @@ -3712,18 +3743,129 @@ iscsi_get_host_stats(struct iscsi_transport *transport, struct nlmsghdr *nlh) return err; }
+static int iscsi_if_transport_conn(struct iscsi_transport *transport, + struct nlmsghdr *nlh) +{ + struct iscsi_uevent *ev = nlmsg_data(nlh); + struct iscsi_cls_session *session; + struct iscsi_cls_conn *conn = NULL; + struct iscsi_endpoint *ep; + uint32_t pdu_len; + int err = 0; + + switch (nlh->nlmsg_type) { + case ISCSI_UEVENT_CREATE_CONN: + return iscsi_if_create_conn(transport, ev); + case ISCSI_UEVENT_DESTROY_CONN: + return iscsi_if_destroy_conn(transport, ev); + case ISCSI_UEVENT_STOP_CONN: + return iscsi_if_stop_conn(transport, ev); + } + + /* + * The following cmds need to be run under the ep_mutex so in kernel + * conn cleanup (ep_disconnect + unbind and conn) is not done while + * these are running. They also must not run if we have just run a conn + * cleanup because they would set the state in a way that might allow + * IO or send IO themselves. + */ + switch (nlh->nlmsg_type) { + case ISCSI_UEVENT_START_CONN: + conn = iscsi_conn_lookup(ev->u.start_conn.sid, + ev->u.start_conn.cid); + break; + case ISCSI_UEVENT_BIND_CONN: + conn = iscsi_conn_lookup(ev->u.b_conn.sid, ev->u.b_conn.cid); + break; + case ISCSI_UEVENT_SEND_PDU: + conn = iscsi_conn_lookup(ev->u.send_pdu.sid, ev->u.send_pdu.cid); + break; + } + + if (!conn) + return -EINVAL; + + mutex_lock(&conn->ep_mutex); + if (test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) { + mutex_unlock(&conn->ep_mutex); + ev->r.retcode = -ENOTCONN; + return 0; + } + + switch (nlh->nlmsg_type) { + case ISCSI_UEVENT_BIND_CONN: + if (conn->ep) { + /* + * For offload boot support where iscsid is restarted + * during the pivot root stage, the ep will be intact + * here when the new iscsid instance starts up and + * reconnects. + */ + iscsi_ep_disconnect(conn, true); + } + + session = iscsi_session_lookup(ev->u.b_conn.sid); + if (!session) { + err = -EINVAL; + break; + } + + ev->r.retcode = transport->bind_conn(session, conn, + ev->u.b_conn.transport_eph, + ev->u.b_conn.is_leading); + if (!ev->r.retcode) + conn->state = ISCSI_CONN_BOUND; + + if (ev->r.retcode || !transport->ep_connect) + break; + + ep = iscsi_lookup_endpoint(ev->u.b_conn.transport_eph); + if (ep) { + ep->conn = conn; + conn->ep = ep; + iscsi_put_endpoint(ep); + } else { + err = -ENOTCONN; + iscsi_cls_conn_printk(KERN_ERR, conn, + "Could not set ep conn binding\n"); + } + break; + case ISCSI_UEVENT_START_CONN: + ev->r.retcode = transport->start_conn(conn); + if (!ev->r.retcode) + conn->state = ISCSI_CONN_UP; + break; + case ISCSI_UEVENT_SEND_PDU: + pdu_len = nlh->nlmsg_len - sizeof(*nlh) - sizeof(*ev); + + if ((ev->u.send_pdu.hdr_size > pdu_len) || + (ev->u.send_pdu.data_size > (pdu_len - ev->u.send_pdu.hdr_size))) { + err = -EINVAL; + break; + } + + ev->r.retcode = transport->send_pdu(conn, + (struct iscsi_hdr *)((char *)ev + sizeof(*ev)), + (char *)ev + sizeof(*ev) + ev->u.send_pdu.hdr_size, + ev->u.send_pdu.data_size); + break; + default: + err = -ENOSYS; + } + + mutex_unlock(&conn->ep_mutex); + return err; +}
static int iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) { int err = 0; u32 portid; - u32 pdu_len; struct iscsi_uevent *ev = nlmsg_data(nlh); struct iscsi_transport *transport = NULL; struct iscsi_internal *priv; struct iscsi_cls_session *session; - struct iscsi_cls_conn *conn; struct iscsi_endpoint *ep = NULL;
if (!netlink_capable(skb, CAP_SYS_ADMIN)) @@ -3783,90 +3925,16 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) else err = -EINVAL; break; - case ISCSI_UEVENT_CREATE_CONN: - err = iscsi_if_create_conn(transport, ev); - break; - case ISCSI_UEVENT_DESTROY_CONN: - err = iscsi_if_destroy_conn(transport, ev); - break; - case ISCSI_UEVENT_BIND_CONN: - session = iscsi_session_lookup(ev->u.b_conn.sid); - conn = iscsi_conn_lookup(ev->u.b_conn.sid, ev->u.b_conn.cid); - - if (conn && conn->ep) - iscsi_if_ep_disconnect(transport, conn->ep->id); - - if (!session || !conn) { - err = -EINVAL; - break; - } - - mutex_lock(&conn_mutex); - ev->r.retcode = transport->bind_conn(session, conn, - ev->u.b_conn.transport_eph, - ev->u.b_conn.is_leading); - if (!ev->r.retcode) - conn->state = ISCSI_CONN_BOUND; - mutex_unlock(&conn_mutex); - - if (ev->r.retcode || !transport->ep_connect) - break; - - ep = iscsi_lookup_endpoint(ev->u.b_conn.transport_eph); - if (ep) { - ep->conn = conn; - - mutex_lock(&conn->ep_mutex); - conn->ep = ep; - mutex_unlock(&conn->ep_mutex); - iscsi_put_endpoint(ep); - } else - iscsi_cls_conn_printk(KERN_ERR, conn, - "Could not set ep conn " - "binding\n"); - break; case ISCSI_UEVENT_SET_PARAM: err = iscsi_set_param(transport, ev); break; - case ISCSI_UEVENT_START_CONN: - conn = iscsi_conn_lookup(ev->u.start_conn.sid, ev->u.start_conn.cid); - if (conn) { - mutex_lock(&conn_mutex); - ev->r.retcode = transport->start_conn(conn); - if (!ev->r.retcode) - conn->state = ISCSI_CONN_UP; - mutex_unlock(&conn_mutex); - } - else - err = -EINVAL; - break; + case ISCSI_UEVENT_CREATE_CONN: + case ISCSI_UEVENT_DESTROY_CONN: case ISCSI_UEVENT_STOP_CONN: - conn = iscsi_conn_lookup(ev->u.stop_conn.sid, ev->u.stop_conn.cid); - if (conn) - iscsi_if_stop_conn(conn, ev->u.stop_conn.flag); - else - err = -EINVAL; - break; + case ISCSI_UEVENT_START_CONN: + case ISCSI_UEVENT_BIND_CONN: case ISCSI_UEVENT_SEND_PDU: - pdu_len = nlh->nlmsg_len - sizeof(*nlh) - sizeof(*ev); - - if ((ev->u.send_pdu.hdr_size > pdu_len) || - (ev->u.send_pdu.data_size > (pdu_len - ev->u.send_pdu.hdr_size))) { - err = -EINVAL; - break; - } - - conn = iscsi_conn_lookup(ev->u.send_pdu.sid, ev->u.send_pdu.cid); - if (conn) { - mutex_lock(&conn_mutex); - ev->r.retcode = transport->send_pdu(conn, - (struct iscsi_hdr*)((char*)ev + sizeof(*ev)), - (char*)ev + sizeof(*ev) + ev->u.send_pdu.hdr_size, - ev->u.send_pdu.data_size); - mutex_unlock(&conn_mutex); - } - else - err = -EINVAL; + err = iscsi_if_transport_conn(transport, nlh); break; case ISCSI_UEVENT_GET_STATS: err = iscsi_if_get_stats(transport, nlh); @@ -4852,8 +4920,18 @@ static __init int iscsi_transport_init(void) goto release_nls; }
+ iscsi_conn_cleanup_workq = alloc_workqueue("%s", + WQ_SYSFS | WQ_MEM_RECLAIM | WQ_UNBOUND, 0, + "iscsi_conn_cleanup"); + if (!iscsi_conn_cleanup_workq) { + err = -ENOMEM; + goto destroy_wq; + } + return 0;
+destroy_wq: + destroy_workqueue(iscsi_eh_timer_workq); release_nls: netlink_kernel_release(nls); unregister_flashnode_bus: @@ -4875,6 +4953,7 @@ static __init int iscsi_transport_init(void)
static void __exit iscsi_transport_exit(void) { + destroy_workqueue(iscsi_conn_cleanup_workq); destroy_workqueue(iscsi_eh_timer_workq); netlink_kernel_release(nls); bus_unregister(&iscsi_flashnode_bus); diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index e30aad182f5b..be0423b5543f 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -209,15 +209,23 @@ enum iscsi_connection_state { ISCSI_CONN_BOUND, };
+#define ISCSI_CLS_CONN_BIT_CLEANUP 1 + struct iscsi_cls_conn { struct list_head conn_list; /* item in connlist */ - struct list_head conn_list_err; /* item in connlist_err */ void *dd_data; /* LLD private data */ struct iscsi_transport *transport; uint32_t cid; /* connection id */ + /* + * This protects the conn startup and binding/unbinding of the ep to + * the conn. Unbinding includes ep_disconnect and stop_conn. + */ struct mutex ep_mutex; struct iscsi_endpoint *ep;
+ unsigned long flags; + struct work_struct cleanup_work; + struct device dev; /* sysfs transport/container device */ enum iscsi_connection_state state; };
From: Mike Christie michael.christie@oracle.com
stable inclusion from stable-v5.10.76 commit 90c8e8c0829b78518cf9dc4f6fca286848ce4c5a category: bugfix bugzilla: 188286, https://gitee.com/openeuler/kernel/issues/I67L59 CVE: NA
----------------------------------------
[ Upstream commit 187a580c9e7895978dcd1e627b9c9e7e3d13ca96 ]
In commit 9e67600ed6b8 ("scsi: iscsi: Fix race condition between login and sync thread") we meant to add a check where before we call ->set_param() we make sure the iscsi_cls_connection is bound. The problem is that between versions 4 and 5 of the patch the deletion of the unchecked set_param() call was dropped so we ended up with 2 calls. As a result we can still hit a crash where we access the unbound connection on the first call.
This patch removes that first call.
Fixes: 9e67600ed6b8 ("scsi: iscsi: Fix race condition between login and sync thread") Link: https://lore.kernel.org/r/20211010161904.60471-1-michael.christie@oracle.com Reviewed-by: Lee Duncan lduncan@suse.com Reviewed-by: Li Feng fengli@smartx.com Signed-off-by: Mike Christie michael.christie@oracle.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Zhong Jinghua zhongjinghua@huawei.com --- drivers/scsi/scsi_transport_iscsi.c | 2 -- 1 file changed, 2 deletions(-)
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 06fa9e5d7cda..fa2f5f7d2b89 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2999,8 +2999,6 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev) session->recovery_tmo = value; break; default: - err = transport->set_param(conn, ev->u.set_param.param, - data, ev->u.set_param.len); if ((conn->state == ISCSI_CONN_BOUND) || (conn->state == ISCSI_CONN_UP)) { err = transport->set_param(conn, ev->u.set_param.param,
From: Mike Christie michael.christie@oracle.com
stable inclusion from stable-v5.10.112 commit 699bd835c36e095dc80bea21d9a15cd7f7bc8b9f category: bugfix bugzilla: 188286, https://gitee.com/openeuler/kernel/issues/I67L59 CVE: NA
----------------------------------------
[ Upstream commit c34f95e98d8fb750eefd4f3fe58b4f8b5e89253b ]
This patch moves iscsi_ep_disconnect() so it can be called earlier in the next patch.
Link: https://lore.kernel.org/r/20220408001314.5014-2-michael.christie@oracle.com Tested-by: Manish Rangankar mrangankar@marvell.com Reviewed-by: Lee Duncan lduncan@suse.com Reviewed-by: Chris Leech cleech@redhat.com Signed-off-by: Mike Christie michael.christie@oracle.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org conflict: drivers/scsi/scsi_transport_iscsi.c Signed-off-by: Zhong Jinghua zhongjinghua@huawei.com --- drivers/scsi/scsi_transport_iscsi.c | 36 ++++++++++++++--------------- 1 file changed, 18 insertions(+), 18 deletions(-)
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index fa2f5f7d2b89..fafff6304e64 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2226,6 +2226,24 @@ static void iscsi_stop_conn(struct iscsi_cls_conn *conn, int flag) ISCSI_DBG_TRANS_CONN(conn, "Stopping conn done.\n"); }
+static void iscsi_ep_disconnect(struct iscsi_cls_conn *conn, bool is_active) +{ + struct iscsi_cls_session *session = iscsi_conn_to_session(conn); + struct iscsi_endpoint *ep; + + ISCSI_DBG_TRANS_CONN(conn, "disconnect ep.\n"); + conn->state = ISCSI_CONN_FAILED; + + if (!conn->ep || !session->transport->ep_disconnect) + return; + + ep = conn->ep; + conn->ep = NULL; + + session->transport->ep_disconnect(ep); + ISCSI_DBG_TRANS_CONN(conn, "disconnect ep done.\n"); +} + static int iscsi_if_stop_conn(struct iscsi_transport *transport, struct iscsi_uevent *ev) { @@ -2266,24 +2284,6 @@ static int iscsi_if_stop_conn(struct iscsi_transport *transport, return 0; }
-static void iscsi_ep_disconnect(struct iscsi_cls_conn *conn, bool is_active) -{ - struct iscsi_cls_session *session = iscsi_conn_to_session(conn); - struct iscsi_endpoint *ep; - - ISCSI_DBG_TRANS_CONN(conn, "disconnect ep.\n"); - conn->state = ISCSI_CONN_FAILED; - - if (!conn->ep || !session->transport->ep_disconnect) - return; - - ep = conn->ep; - conn->ep = NULL; - - session->transport->ep_disconnect(ep); - ISCSI_DBG_TRANS_CONN(conn, "disconnect ep done.\n"); -} - static void iscsi_cleanup_conn_work_fn(struct work_struct *work) { struct iscsi_cls_conn *conn = container_of(work, struct iscsi_cls_conn,
From: Mike Christie michael.christie@oracle.com
stable inclusion from stable-v5.10.112 commit 73805795c99ff32154c9e37c4334968c037c0f7f category: bugfix bugzilla: 188286, https://gitee.com/openeuler/kernel/issues/I67L59 CVE: NA
----------------------------------------
[ Upstream commit cbd2283aaf47fef4ded4b29124b1ef3beb515f3a ]
When userspace restarts during boot or upgrades it won't know about the offload driver's endpoint and connection mappings. iscsid will start by cleaning up the old session by doing a stop_conn call. Later, if we are able to create a new connection, we clean up the old endpoint during the binding stage. The problem is that if we do stop_conn before doing the ep_disconnect call offload, drivers can still be executing I/O. We then might free tasks from the under the card/driver.
This moves the ep_disconnect call to before we do the stop_conn call for this case. It will then work and look like a normal recovery/cleanup procedure from the driver's point of view.
Link: https://lore.kernel.org/r/20220408001314.5014-3-michael.christie@oracle.com Tested-by: Manish Rangankar mrangankar@marvell.com Reviewed-by: Lee Duncan lduncan@suse.com Reviewed-by: Chris Leech cleech@redhat.com Signed-off-by: Mike Christie michael.christie@oracle.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org conflict: drivers/scsi/scsi_transport_iscsi.c Signed-off-by: Zhong Jinghua zhongjinghua@huawei.com --- drivers/scsi/scsi_transport_iscsi.c | 48 +++++++++++++++++------------ 1 file changed, 28 insertions(+), 20 deletions(-)
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index fafff6304e64..1c531f4a6ab9 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2244,6 +2244,23 @@ static void iscsi_ep_disconnect(struct iscsi_cls_conn *conn, bool is_active) ISCSI_DBG_TRANS_CONN(conn, "disconnect ep done.\n"); }
+static void iscsi_if_disconnect_bound_ep(struct iscsi_cls_conn *conn, + struct iscsi_endpoint *ep, + bool is_active) +{ + /* Check if this was a conn error and the kernel took ownership */ + if (!test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) { + iscsi_ep_disconnect(conn, is_active); + } else { + ISCSI_DBG_TRANS_CONN(conn, "flush kernel conn cleanup.\n"); + mutex_unlock(&conn->ep_mutex); + + flush_work(&conn->cleanup_work); + + mutex_lock(&conn->ep_mutex); + } +} + static int iscsi_if_stop_conn(struct iscsi_transport *transport, struct iscsi_uevent *ev) { @@ -2264,6 +2281,16 @@ static int iscsi_if_stop_conn(struct iscsi_transport *transport, cancel_work_sync(&conn->cleanup_work); iscsi_stop_conn(conn, flag); } else { + /* + * For offload, when iscsid is restarted it won't know about + * existing endpoints so it can't do a ep_disconnect. We clean + * it up here for userspace. + */ + mutex_lock(&conn->ep_mutex); + if (conn->ep) + iscsi_if_disconnect_bound_ep(conn, conn->ep, true); + mutex_unlock(&conn->ep_mutex); + /* * Figure out if it was the kernel or userspace initiating this. */ @@ -3072,16 +3099,7 @@ static int iscsi_if_ep_disconnect(struct iscsi_transport *transport, }
mutex_lock(&conn->ep_mutex); - /* Check if this was a conn error and the kernel took ownership */ - if (test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) { - ISCSI_DBG_TRANS_CONN(conn, "flush kernel conn cleanup.\n"); - mutex_unlock(&conn->ep_mutex); - - flush_work(&conn->cleanup_work); - goto put_ep; - } - - iscsi_ep_disconnect(conn, false); + iscsi_if_disconnect_bound_ep(conn, ep, false); mutex_unlock(&conn->ep_mutex); put_ep: iscsi_put_endpoint(ep); @@ -3792,16 +3810,6 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport,
switch (nlh->nlmsg_type) { case ISCSI_UEVENT_BIND_CONN: - if (conn->ep) { - /* - * For offload boot support where iscsid is restarted - * during the pivot root stage, the ep will be intact - * here when the new iscsid instance starts up and - * reconnects. - */ - iscsi_ep_disconnect(conn, true); - } - session = iscsi_session_lookup(ev->u.b_conn.sid); if (!session) { err = -EINVAL;
From: Mike Christie michael.christie@oracle.com
stable inclusion from stable-v5.10.112 commit 45226fac4d316fea68357b307cb46a4aac764ec6 category: bugfix bugzilla: 188286, https://gitee.com/openeuler/kernel/issues/I67L59 CVE: NA
----------------------------------------
[ Upstream commit 7c6e99c18167ed89729bf167ccb4a7e3ab3115ba ]
If iscsid is doing a stop_conn at the same time the kernel is starting error recovery we can hit a race that allows the cleanup work to run on a valid connection. In the race, iscsi_if_stop_conn sees the cleanup bit set, but it calls flush_work on the clean_work before iscsi_conn_error_event has queued it. The flush then returns before the queueing and so the cleanup_work can run later and disconnect/stop a conn while it's in a connected state.
The patch:
Commit 0ab710458da1 ("scsi: iscsi: Perform connection failure entirely in kernel space")
added the late stop_conn call bug originally, and the patch:
Commit 23d6fefbb3f6 ("scsi: iscsi: Fix in-kernel conn failure handling")
attempted to fix it but only fixed the normal EH case and left the above race for the iscsid restart case. For the normal EH case we don't hit the race because we only signal userspace to start recovery after we have done the queueing, so the flush will always catch the queued work or see it completed.
For iscsid restart cases like boot, we can hit the race because iscsid will call down to the kernel before the kernel has signaled any error, so both code paths can be running at the same time. This adds a lock around the setting of the cleanup bit and queueing so they happen together.
Link: https://lore.kernel.org/r/20220408001314.5014-6-michael.christie@oracle.com Fixes: 0ab710458da1 ("scsi: iscsi: Perform connection failure entirely in kernel space") Tested-by: Manish Rangankar mrangankar@marvell.com Reviewed-by: Lee Duncan lduncan@suse.com Reviewed-by: Chris Leech cleech@redhat.com Signed-off-by: Mike Christie michael.christie@oracle.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Zhong Jinghua zhongjinghua@huawei.com --- drivers/scsi/scsi_transport_iscsi.c | 17 +++++++++++++++++ include/scsi/scsi_transport_iscsi.h | 2 ++ 2 files changed, 19 insertions(+)
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 1c531f4a6ab9..f0d0fd0e69ea 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2249,9 +2249,12 @@ static void iscsi_if_disconnect_bound_ep(struct iscsi_cls_conn *conn, bool is_active) { /* Check if this was a conn error and the kernel took ownership */ + spin_lock_irq(&conn->lock); if (!test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) { + spin_unlock_irq(&conn->lock); iscsi_ep_disconnect(conn, is_active); } else { + spin_unlock_irq(&conn->lock); ISCSI_DBG_TRANS_CONN(conn, "flush kernel conn cleanup.\n"); mutex_unlock(&conn->ep_mutex);
@@ -2294,9 +2297,12 @@ static int iscsi_if_stop_conn(struct iscsi_transport *transport, /* * Figure out if it was the kernel or userspace initiating this. */ + spin_lock_irq(&conn->lock); if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) { + spin_unlock_irq(&conn->lock); iscsi_stop_conn(conn, flag); } else { + spin_unlock_irq(&conn->lock); ISCSI_DBG_TRANS_CONN(conn, "flush kernel conn cleanup.\n"); flush_work(&conn->cleanup_work); @@ -2305,7 +2311,9 @@ static int iscsi_if_stop_conn(struct iscsi_transport *transport, * Only clear for recovery to avoid extra cleanup runs during * termination. */ + spin_lock_irq(&conn->lock); clear_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags); + spin_unlock_irq(&conn->lock); } ISCSI_DBG_TRANS_CONN(conn, "iscsi if conn stop done.\n"); return 0; @@ -2326,7 +2334,9 @@ static void iscsi_cleanup_conn_work_fn(struct work_struct *work) */ if (conn->state != ISCSI_CONN_BOUND && conn->state != ISCSI_CONN_UP) { ISCSI_DBG_TRANS_CONN(conn, "Got error while conn is already failed. Ignoring.\n"); + spin_lock_irq(&conn->lock); clear_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags); + spin_unlock_irq(&conn->lock); mutex_unlock(&conn->ep_mutex); return; } @@ -2375,6 +2385,7 @@ iscsi_alloc_conn(struct iscsi_cls_session *session, int dd_size, uint32_t cid) conn->dd_data = &conn[1];
mutex_init(&conn->ep_mutex); + spin_lock_init(&conn->lock); INIT_LIST_HEAD(&conn->conn_list); INIT_WORK(&conn->cleanup_work, iscsi_cleanup_conn_work_fn); conn->transport = transport; @@ -2657,9 +2668,12 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error) struct iscsi_uevent *ev; struct iscsi_internal *priv; int len = nlmsg_total_size(sizeof(*ev)); + unsigned long flags;
+ spin_lock_irqsave(&conn->lock, flags); if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) queue_work(iscsi_conn_cleanup_workq, &conn->cleanup_work); + spin_unlock_irqrestore(&conn->lock, flags);
priv = iscsi_if_transport_lookup(conn->transport); if (!priv) @@ -3802,11 +3816,14 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport, return -EINVAL;
mutex_lock(&conn->ep_mutex); + spin_lock_irq(&conn->lock); if (test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) { + spin_unlock_irq(&conn->lock); mutex_unlock(&conn->ep_mutex); ev->r.retcode = -ENOTCONN; return 0; } + spin_unlock_irq(&conn->lock);
switch (nlh->nlmsg_type) { case ISCSI_UEVENT_BIND_CONN: diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index be0423b5543f..584b3eaa9936 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -223,6 +223,8 @@ struct iscsi_cls_conn { struct mutex ep_mutex; struct iscsi_endpoint *ep;
+ /* Used when accessing flags and queueing work. */ + spinlock_t lock; unsigned long flags; struct work_struct cleanup_work;
From: Mike Christie michael.christie@oracle.com
stable inclusion from stable-v5.10.112 commit 129db30599bc8b4a7cd0f219feb1e0154802e3d2 category: bugfix bugzilla: 188286, https://gitee.com/openeuler/kernel/issues/I67L59 CVE: NA
----------------------------------------
commit 0aadafb5c34403a7cced1a8d61877048dc059f70 upstream.
This patch fixes a bug where when using iSCSI offload we can free an endpoint while userspace still thinks it's active. That then causes the endpoint ID to be reused for a new connection's endpoint while userspace still thinks the ID is for the original connection. Userspace will then end up disconnecting a running connection's endpoint or trying to bind to another connection's endpoint.
This bug is a regression added in:
Commit 23d6fefbb3f6 ("scsi: iscsi: Fix in-kernel conn failure handling")
where we added a in kernel ep_disconnect call to fix a bug in:
Commit 0ab710458da1 ("scsi: iscsi: Perform connection failure entirely in kernel space")
where we would call stop_conn without having done ep_disconnect. This early ep_disconnect call will then free the endpoint and it's ID while userspace still thinks the ID is valid.
Fix the early release of the ID by having the in kernel recovery code keep a reference to the endpoint until userspace has called into the kernel to finish cleaning up the endpoint/connection. It requires the previous commit "scsi: iscsi: Release endpoint ID when its freed" which moved the freeing of the ID until when the endpoint is released.
Link: https://lore.kernel.org/r/20220408001314.5014-5-michael.christie@oracle.com Fixes: 23d6fefbb3f6 ("scsi: iscsi: Fix in-kernel conn failure handling") Tested-by: Manish Rangankar mrangankar@marvell.com Reviewed-by: Lee Duncan lduncan@suse.com Reviewed-by: Chris Leech cleech@redhat.com Signed-off-by: Mike Christie michael.christie@oracle.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Zhong Jinghua zhongjinghua@huawei.com --- drivers/scsi/scsi_transport_iscsi.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index f0d0fd0e69ea..89a23983e57c 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2259,7 +2259,11 @@ static void iscsi_if_disconnect_bound_ep(struct iscsi_cls_conn *conn, mutex_unlock(&conn->ep_mutex);
flush_work(&conn->cleanup_work); - + /* + * Userspace is now done with the EP so we can release the ref + * iscsi_cleanup_conn_work_fn took. + */ + iscsi_put_endpoint(ep); mutex_lock(&conn->ep_mutex); } } @@ -2341,6 +2345,12 @@ static void iscsi_cleanup_conn_work_fn(struct work_struct *work) return; }
+ /* + * Get a ref to the ep, so we don't release its ID until after + * userspace is done referencing it in iscsi_if_disconnect_bound_ep. + */ + if (conn->ep) + get_device(&conn->ep->dev); iscsi_ep_disconnect(conn, false);
if (system_state != SYSTEM_RUNNING) {
From: Mike Christie michael.christie@oracle.com
stable inclusion from stable-v5.10.112 commit 361288633bfa0927b936ed3e1f01b605fb239994 category: bugfix bugzilla: 188286, https://gitee.com/openeuler/kernel/issues/I67L59 CVE: NA
----------------------------------------
commit 03690d81974535f228e892a14f0d2d44404fe555 upstream.
If a driver raises a connection error before the connection is bound, we can leave a cleanup_work queued that can later run and disconnect/stop a connection that is logged in. The problem is that drivers can call iscsi_conn_error_event for endpoints that are connected but not yet bound when something like the network port they are using is brought down. iscsi_cleanup_conn_work_fn will check for this and exit early, but if the cleanup_work is stuck behind other works, it might not get run until after userspace has done ep_disconnect. Because the endpoint is not yet bound there was no way for ep_disconnect to flush the work.
The bug of leaving stop_conns queued was added in:
Commit 23d6fefbb3f6 ("scsi: iscsi: Fix in-kernel conn failure handling")
and:
Commit 0ab710458da1 ("scsi: iscsi: Perform connection failure entirely in kernel space")
was supposed to fix it, but left this case.
This patch moves the conn state check to before we even queue the work so we can avoid queueing.
Link: https://lore.kernel.org/r/20220408001314.5014-7-michael.christie@oracle.com Fixes: 0ab710458da1 ("scsi: iscsi: Perform connection failure entirely in kernel space") Tested-by: Manish Rangankar mrangankar@marvell.com Reviewed-by: Lee Duncan <lduncan@@suse.com> Reviewed-by: Chris Leech cleech@redhat.com Signed-off-by: Mike Christie michael.christie@oracle.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org conflict: drivers/scsi/scsi_transport_iscsi.c Signed-off-by: Zhong Jinghua zhongjinghua@huawei.com --- drivers/scsi/scsi_transport_iscsi.c | 65 ++++++++++++++++------------- 1 file changed, 36 insertions(+), 29 deletions(-)
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 89a23983e57c..b52ada64e692 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2211,10 +2211,10 @@ static void iscsi_stop_conn(struct iscsi_cls_conn *conn, int flag)
switch (flag) { case STOP_CONN_RECOVER: - conn->state = ISCSI_CONN_FAILED; + WRITE_ONCE(conn->state, ISCSI_CONN_FAILED); break; case STOP_CONN_TERM: - conn->state = ISCSI_CONN_DOWN; + WRITE_ONCE(conn->state, ISCSI_CONN_DOWN); break; default: iscsi_cls_conn_printk(KERN_ERR, conn, "invalid stop flag %d\n", @@ -2232,7 +2232,7 @@ static void iscsi_ep_disconnect(struct iscsi_cls_conn *conn, bool is_active) struct iscsi_endpoint *ep;
ISCSI_DBG_TRANS_CONN(conn, "disconnect ep.\n"); - conn->state = ISCSI_CONN_FAILED; + WRITE_ONCE(conn->state, ISCSI_CONN_FAILED);
if (!conn->ep || !session->transport->ep_disconnect) return; @@ -2330,21 +2330,6 @@ static void iscsi_cleanup_conn_work_fn(struct work_struct *work) struct iscsi_cls_session *session = iscsi_conn_to_session(conn);
mutex_lock(&conn->ep_mutex); - /* - * If we are not at least bound there is nothing for us to do. Userspace - * will do a ep_disconnect call if offload is used, but will not be - * doing a stop since there is nothing to clean up, so we have to clear - * the cleanup bit here. - */ - if (conn->state != ISCSI_CONN_BOUND && conn->state != ISCSI_CONN_UP) { - ISCSI_DBG_TRANS_CONN(conn, "Got error while conn is already failed. Ignoring.\n"); - spin_lock_irq(&conn->lock); - clear_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags); - spin_unlock_irq(&conn->lock); - mutex_unlock(&conn->ep_mutex); - return; - } - /* * Get a ref to the ep, so we don't release its ID until after * userspace is done referencing it in iscsi_if_disconnect_bound_ep. @@ -2400,7 +2385,7 @@ iscsi_alloc_conn(struct iscsi_cls_session *session, int dd_size, uint32_t cid) INIT_WORK(&conn->cleanup_work, iscsi_cleanup_conn_work_fn); conn->transport = transport; conn->cid = cid; - conn->state = ISCSI_CONN_DOWN; + WRITE_ONCE(conn->state, ISCSI_CONN_DOWN);
/* this is released in the dev's release function */ if (!get_device(&session->dev)) @@ -2679,10 +2664,30 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error) struct iscsi_internal *priv; int len = nlmsg_total_size(sizeof(*ev)); unsigned long flags; + int state;
spin_lock_irqsave(&conn->lock, flags); - if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) - queue_work(iscsi_conn_cleanup_workq, &conn->cleanup_work); + /* + * Userspace will only do a stop call if we are at least bound. And, we + * only need to do the in kernel cleanup if in the UP state so cmds can + * be released to upper layers. If in other states just wait for + * userspace to avoid races that can leave the cleanup_work queued. + */ + state = READ_ONCE(conn->state); + switch (state) { + case ISCSI_CONN_BOUND: + case ISCSI_CONN_UP: + if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, + &conn->flags)) { + queue_work(iscsi_conn_cleanup_workq, + &conn->cleanup_work); + } + break; + default: + ISCSI_DBG_TRANS_CONN(conn, "Got conn error in state %d\n", + state); + break; + } spin_unlock_irqrestore(&conn->lock, flags);
priv = iscsi_if_transport_lookup(conn->transport); @@ -3033,7 +3038,7 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev) char *data = (char*)ev + sizeof(*ev); struct iscsi_cls_conn *conn; struct iscsi_cls_session *session; - int err = 0, value = 0; + int err = 0, value = 0, state;
if (ev->u.set_param.len > PAGE_SIZE) return -EINVAL; @@ -3050,8 +3055,8 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev) session->recovery_tmo = value; break; default: - if ((conn->state == ISCSI_CONN_BOUND) || - (conn->state == ISCSI_CONN_UP)) { + state = READ_ONCE(conn->state); + if (state == ISCSI_CONN_BOUND || state == ISCSI_CONN_UP) { err = transport->set_param(conn, ev->u.set_param.param, data, ev->u.set_param.len); } else { @@ -3847,7 +3852,7 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport, ev->u.b_conn.transport_eph, ev->u.b_conn.is_leading); if (!ev->r.retcode) - conn->state = ISCSI_CONN_BOUND; + WRITE_ONCE(conn->state, ISCSI_CONN_BOUND);
if (ev->r.retcode || !transport->ep_connect) break; @@ -3866,7 +3871,8 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport, case ISCSI_UEVENT_START_CONN: ev->r.retcode = transport->start_conn(conn); if (!ev->r.retcode) - conn->state = ISCSI_CONN_UP; + WRITE_ONCE(conn->state, ISCSI_CONN_UP); + break; case ISCSI_UEVENT_SEND_PDU: pdu_len = nlh->nlmsg_len - sizeof(*nlh) - sizeof(*ev); @@ -4157,10 +4163,11 @@ static ssize_t show_conn_state(struct device *dev, { struct iscsi_cls_conn *conn = iscsi_dev_to_conn(dev->parent); const char *state = "unknown"; + int conn_state = READ_ONCE(conn->state);
- if (conn->state >= 0 && - conn->state < ARRAY_SIZE(connection_state_names)) - state = connection_state_names[conn->state]; + if (conn_state >= 0 && + conn_state < ARRAY_SIZE(connection_state_names)) + state = connection_state_names[conn_state];
return sprintf(buf, "%s\n", state); }
hulk inclusion category: bugfix bugzilla: 188286 CVE: NA, https://gitee.com/openeuler/kernel/issues/I67L59
----------------------------------------
iscsi_cls_conn introduces four members: lock, flags, cleanup_work, and state. Use iscsi_cls_conn_wrapper to avoid kabi changes.
Signed-off-by: Zhong Jinghua zhongjinghua@huawei.com --- drivers/scsi/scsi_transport_iscsi.c | 108 +++++++++++++++++----------- include/scsi/scsi_transport_iscsi.h | 11 ++- 2 files changed, 75 insertions(+), 44 deletions(-)
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index b52ada64e692..42c129d06322 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2207,14 +2207,16 @@ EXPORT_SYMBOL_GPL(iscsi_remove_session);
static void iscsi_stop_conn(struct iscsi_cls_conn *conn, int flag) { + struct iscsi_cls_conn_wrapper *conn_wrapper = conn_to_wrapper(conn); + ISCSI_DBG_TRANS_CONN(conn, "Stopping conn.\n");
switch (flag) { case STOP_CONN_RECOVER: - WRITE_ONCE(conn->state, ISCSI_CONN_FAILED); + WRITE_ONCE(conn_wrapper->state, ISCSI_CONN_FAILED); break; case STOP_CONN_TERM: - WRITE_ONCE(conn->state, ISCSI_CONN_DOWN); + WRITE_ONCE(conn_wrapper->state, ISCSI_CONN_DOWN); break; default: iscsi_cls_conn_printk(KERN_ERR, conn, "invalid stop flag %d\n", @@ -2229,10 +2231,11 @@ static void iscsi_stop_conn(struct iscsi_cls_conn *conn, int flag) static void iscsi_ep_disconnect(struct iscsi_cls_conn *conn, bool is_active) { struct iscsi_cls_session *session = iscsi_conn_to_session(conn); + struct iscsi_cls_conn_wrapper *conn_wrapper = conn_to_wrapper(conn); struct iscsi_endpoint *ep;
ISCSI_DBG_TRANS_CONN(conn, "disconnect ep.\n"); - WRITE_ONCE(conn->state, ISCSI_CONN_FAILED); + WRITE_ONCE(conn_wrapper->state, ISCSI_CONN_FAILED);
if (!conn->ep || !session->transport->ep_disconnect) return; @@ -2248,17 +2251,19 @@ static void iscsi_if_disconnect_bound_ep(struct iscsi_cls_conn *conn, struct iscsi_endpoint *ep, bool is_active) { + struct iscsi_cls_conn_wrapper *conn_wrapper = conn_to_wrapper(conn); + /* Check if this was a conn error and the kernel took ownership */ - spin_lock_irq(&conn->lock); - if (!test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) { - spin_unlock_irq(&conn->lock); + spin_lock_irq(&conn_wrapper->lock); + if (!test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn_wrapper->flags)) { + spin_unlock_irq(&conn_wrapper->lock); iscsi_ep_disconnect(conn, is_active); } else { - spin_unlock_irq(&conn->lock); + spin_unlock_irq(&conn_wrapper->lock); ISCSI_DBG_TRANS_CONN(conn, "flush kernel conn cleanup.\n"); mutex_unlock(&conn->ep_mutex);
- flush_work(&conn->cleanup_work); + flush_work(&conn_wrapper->cleanup_work); /* * Userspace is now done with the EP so we can release the ref * iscsi_cleanup_conn_work_fn took. @@ -2273,11 +2278,13 @@ static int iscsi_if_stop_conn(struct iscsi_transport *transport, { int flag = ev->u.stop_conn.flag; struct iscsi_cls_conn *conn; + struct iscsi_cls_conn_wrapper *conn_wrapper;
conn = iscsi_conn_lookup(ev->u.stop_conn.sid, ev->u.stop_conn.cid); if (!conn) return -EINVAL;
+ conn_wrapper = conn_to_wrapper(conn); ISCSI_DBG_TRANS_CONN(conn, "iscsi if conn stop.\n"); /* * If this is a termination we have to call stop_conn with that flag @@ -2285,7 +2292,7 @@ static int iscsi_if_stop_conn(struct iscsi_transport *transport, * avoid the extra run. */ if (flag == STOP_CONN_TERM) { - cancel_work_sync(&conn->cleanup_work); + cancel_work_sync(&conn_wrapper->cleanup_work); iscsi_stop_conn(conn, flag); } else { /* @@ -2301,23 +2308,24 @@ static int iscsi_if_stop_conn(struct iscsi_transport *transport, /* * Figure out if it was the kernel or userspace initiating this. */ - spin_lock_irq(&conn->lock); - if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) { - spin_unlock_irq(&conn->lock); + spin_lock_irq(&conn_wrapper->lock); + if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, + &conn_wrapper->flags)) { + spin_unlock_irq(&conn_wrapper->lock); iscsi_stop_conn(conn, flag); } else { - spin_unlock_irq(&conn->lock); + spin_unlock_irq(&conn_wrapper->lock); ISCSI_DBG_TRANS_CONN(conn, "flush kernel conn cleanup.\n"); - flush_work(&conn->cleanup_work); + flush_work(&conn_wrapper->cleanup_work); } /* * Only clear for recovery to avoid extra cleanup runs during * termination. */ - spin_lock_irq(&conn->lock); - clear_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags); - spin_unlock_irq(&conn->lock); + spin_lock_irq(&conn_wrapper->lock); + clear_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn_wrapper->flags); + spin_unlock_irq(&conn_wrapper->lock); } ISCSI_DBG_TRANS_CONN(conn, "iscsi if conn stop done.\n"); return 0; @@ -2325,8 +2333,10 @@ static int iscsi_if_stop_conn(struct iscsi_transport *transport,
static void iscsi_cleanup_conn_work_fn(struct work_struct *work) { - struct iscsi_cls_conn *conn = container_of(work, struct iscsi_cls_conn, - cleanup_work); + struct iscsi_cls_conn_wrapper *conn_wrapper = container_of(work, + struct iscsi_cls_conn_wrapper, + cleanup_work); + struct iscsi_cls_conn *conn = &conn_wrapper->conn; struct iscsi_cls_session *session = iscsi_conn_to_session(conn);
mutex_lock(&conn->ep_mutex); @@ -2372,20 +2382,23 @@ iscsi_alloc_conn(struct iscsi_cls_session *session, int dd_size, uint32_t cid) { struct iscsi_transport *transport = session->transport; struct iscsi_cls_conn *conn; + struct iscsi_cls_conn_wrapper *conn_wrapper;
- conn = kzalloc(sizeof(*conn) + dd_size, GFP_KERNEL); - if (!conn) + conn_wrapper = kzalloc(sizeof(*conn_wrapper) + dd_size, GFP_KERNEL); + if (!conn_wrapper) return NULL; + + conn = &conn_wrapper->conn; if (dd_size) - conn->dd_data = &conn[1]; + conn->dd_data = &conn_wrapper[1];
mutex_init(&conn->ep_mutex); - spin_lock_init(&conn->lock); + spin_lock_init(&conn_wrapper->lock); INIT_LIST_HEAD(&conn->conn_list); - INIT_WORK(&conn->cleanup_work, iscsi_cleanup_conn_work_fn); + INIT_WORK(&conn_wrapper->cleanup_work, iscsi_cleanup_conn_work_fn); conn->transport = transport; conn->cid = cid; - WRITE_ONCE(conn->state, ISCSI_CONN_DOWN); + WRITE_ONCE(conn_wrapper->state, ISCSI_CONN_DOWN);
/* this is released in the dev's release function */ if (!get_device(&session->dev)) @@ -2473,14 +2486,17 @@ iscsi_create_conn(struct iscsi_cls_session *session, int dd_size, uint32_t cid) { struct iscsi_transport *transport = session->transport; struct iscsi_cls_conn *conn; + struct iscsi_cls_conn_wrapper *conn_wrapper; unsigned long flags; int err;
- conn = kzalloc(sizeof(*conn) + dd_size, GFP_KERNEL); - if (!conn) + conn_wrapper = kzalloc(sizeof(*conn_wrapper) + dd_size, GFP_KERNEL); + if (!conn_wrapper) return NULL; + + conn = &conn_wrapper->conn; if (dd_size) - conn->dd_data = &conn[1]; + conn->dd_data = &conn_wrapper[1];
mutex_init(&conn->ep_mutex); INIT_LIST_HEAD(&conn->conn_list); @@ -2662,25 +2678,26 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error) struct sk_buff *skb; struct iscsi_uevent *ev; struct iscsi_internal *priv; + struct iscsi_cls_conn_wrapper *conn_wrapper = conn_to_wrapper(conn); int len = nlmsg_total_size(sizeof(*ev)); unsigned long flags; int state;
- spin_lock_irqsave(&conn->lock, flags); + spin_lock_irqsave(&conn_wrapper->lock, flags); /* * Userspace will only do a stop call if we are at least bound. And, we * only need to do the in kernel cleanup if in the UP state so cmds can * be released to upper layers. If in other states just wait for * userspace to avoid races that can leave the cleanup_work queued. */ - state = READ_ONCE(conn->state); + state = READ_ONCE(conn_wrapper->state); switch (state) { case ISCSI_CONN_BOUND: case ISCSI_CONN_UP: if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, - &conn->flags)) { + &conn_wrapper->flags)) { queue_work(iscsi_conn_cleanup_workq, - &conn->cleanup_work); + &conn_wrapper->cleanup_work); } break; default: @@ -2688,7 +2705,7 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error) state); break; } - spin_unlock_irqrestore(&conn->lock, flags); + spin_unlock_irqrestore(&conn_wrapper->lock, flags);
priv = iscsi_if_transport_lookup(conn->transport); if (!priv) @@ -3018,13 +3035,15 @@ static int iscsi_if_destroy_conn(struct iscsi_transport *transport, struct iscsi_uevent *ev) { struct iscsi_cls_conn *conn; + struct iscsi_cls_conn_wrapper *conn_wrapper;
conn = iscsi_conn_lookup(ev->u.d_conn.sid, ev->u.d_conn.cid); if (!conn) return -EINVAL;
+ conn_wrapper = conn_to_wrapper(conn); ISCSI_DBG_TRANS_CONN(conn, "Flushing cleanup during destruction\n"); - flush_work(&conn->cleanup_work); + flush_work(&conn_wrapper->cleanup_work); ISCSI_DBG_TRANS_CONN(conn, "Destroying transport conn\n");
if (transport->destroy_conn) @@ -3037,6 +3056,7 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev) { char *data = (char*)ev + sizeof(*ev); struct iscsi_cls_conn *conn; + struct iscsi_cls_conn_wrapper *conn_wrapper; struct iscsi_cls_session *session; int err = 0, value = 0, state;
@@ -3048,6 +3068,7 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev) if (!conn || !session) return -EINVAL;
+ conn_wrapper = conn_to_wrapper(conn); switch (ev->u.set_param.param) { case ISCSI_PARAM_SESS_RECOVERY_TMO: sscanf(data, "%d", &value); @@ -3055,7 +3076,7 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev) session->recovery_tmo = value; break; default: - state = READ_ONCE(conn->state); + state = READ_ONCE(conn_wrapper->state); if (state == ISCSI_CONN_BOUND || state == ISCSI_CONN_UP) { err = transport->set_param(conn, ev->u.set_param.param, data, ev->u.set_param.len); @@ -3794,6 +3815,7 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport, struct iscsi_uevent *ev = nlmsg_data(nlh); struct iscsi_cls_session *session; struct iscsi_cls_conn *conn = NULL; + struct iscsi_cls_conn_wrapper *conn_wrapper; struct iscsi_endpoint *ep; uint32_t pdu_len; int err = 0; @@ -3830,15 +3852,16 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport, if (!conn) return -EINVAL;
+ conn_wrapper = conn_to_wrapper(conn); mutex_lock(&conn->ep_mutex); - spin_lock_irq(&conn->lock); - if (test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) { - spin_unlock_irq(&conn->lock); + spin_lock_irq(&conn_wrapper->lock); + if (test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn_wrapper->flags)) { + spin_unlock_irq(&conn_wrapper->lock); mutex_unlock(&conn->ep_mutex); ev->r.retcode = -ENOTCONN; return 0; } - spin_unlock_irq(&conn->lock); + spin_unlock_irq(&conn_wrapper->lock);
switch (nlh->nlmsg_type) { case ISCSI_UEVENT_BIND_CONN: @@ -3852,7 +3875,7 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport, ev->u.b_conn.transport_eph, ev->u.b_conn.is_leading); if (!ev->r.retcode) - WRITE_ONCE(conn->state, ISCSI_CONN_BOUND); + WRITE_ONCE(conn_wrapper->state, ISCSI_CONN_BOUND);
if (ev->r.retcode || !transport->ep_connect) break; @@ -3871,7 +3894,7 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport, case ISCSI_UEVENT_START_CONN: ev->r.retcode = transport->start_conn(conn); if (!ev->r.retcode) - WRITE_ONCE(conn->state, ISCSI_CONN_UP); + WRITE_ONCE(conn_wrapper->state, ISCSI_CONN_UP);
break; case ISCSI_UEVENT_SEND_PDU: @@ -4162,8 +4185,9 @@ static ssize_t show_conn_state(struct device *dev, struct device_attribute *attr, char *buf) { struct iscsi_cls_conn *conn = iscsi_dev_to_conn(dev->parent); + struct iscsi_cls_conn_wrapper *conn_wrapper = conn_to_wrapper(conn); const char *state = "unknown"; - int conn_state = READ_ONCE(conn->state); + int conn_state = READ_ONCE(conn_wrapper->state);
if (conn_state >= 0 && conn_state < ARRAY_SIZE(connection_state_names)) diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index 584b3eaa9936..7b9b2b46820e 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -223,15 +223,22 @@ struct iscsi_cls_conn { struct mutex ep_mutex; struct iscsi_endpoint *ep;
+ struct device dev; /* sysfs transport/container device */ +}; + +struct iscsi_cls_conn_wrapper { + struct iscsi_cls_conn conn; + /* Used when accessing flags and queueing work. */ spinlock_t lock; unsigned long flags; struct work_struct cleanup_work; - - struct device dev; /* sysfs transport/container device */ enum iscsi_connection_state state; };
+#define conn_to_wrapper(ic_conn) \ + container_of(ic_conn, struct iscsi_cls_conn_wrapper, conn) + #define iscsi_dev_to_conn(_dev) \ container_of(_dev, struct iscsi_cls_conn, dev)