driver inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAGSQ9
----------------------------------------------------------------------
Currently the notifier block of bonding events is in the hr_dev structure. bond_grp is dynamic allocated in the event handler. Since all these hr_dev would response bonding events, we had to add complicated filter to choose a suitable hr_dev to handle the events. Besides we also had to concern about the validity of bond_grp pointers in many concurrency cases as they may have been freed.
Refactor the bonding event handler by: 1. allocating/deallocating bond_grp structures when driver inits/exits; 2. registering notifier block of bonding events in bond_grp
Signed-off-by: Junxian Huang huangjunxian6@hisilicon.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_bond.c | 397 +++++++++++--------- drivers/infiniband/hw/hns/hns_roce_bond.h | 3 + drivers/infiniband/hw/hns/hns_roce_device.h | 1 - drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 9 +- drivers/infiniband/hw/hns/hns_roce_main.c | 2 +- 5 files changed, 218 insertions(+), 194 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_bond.c b/drivers/infiniband/hw/hns/hns_roce_bond.c index 238914c58869..a15c7b26551a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_bond.c +++ b/drivers/infiniband/hw/hns/hns_roce_bond.c @@ -52,32 +52,6 @@ static int get_netdev_bond_slave_id(struct net_device *net_dev, return -ENOENT; }
-static bool is_hrdev_bond_slave(struct hns_roce_dev *hr_dev, - struct net_device *upper_dev) -{ - struct hns_roce_bond_group *bond_grp; - struct net_device *net_dev; - u8 bus_num; - - if (!hr_dev || !upper_dev) - return false; - - if (!netif_is_lag_master(upper_dev)) - return false; - - net_dev = get_hr_netdev(hr_dev, 0); - bus_num = get_hr_bus_num(hr_dev); - - if (upper_dev == get_upper_dev_from_ndev(net_dev)) - return true; - - bond_grp = hns_roce_get_bond_grp(net_dev, bus_num); - if (bond_grp && upper_dev == bond_grp->upper_dev) - return true; - - return false; -} - struct hns_roce_bond_group *hns_roce_get_bond_grp(struct net_device *net_dev, u8 bus_num) { @@ -92,8 +66,10 @@ struct hns_roce_bond_group *hns_roce_get_bond_grp(struct net_device *net_dev, bond_grp = die_info->bgrps[i]; if (!bond_grp) continue; - if (get_netdev_bond_slave_id(net_dev, bond_grp) >= 0 || - (bond_grp->upper_dev == get_upper_dev_from_ndev(net_dev))) + if (get_netdev_bond_slave_id(net_dev, bond_grp) >= 0) + return bond_grp; + if (bond_grp->upper_dev && + bond_grp->upper_dev == get_upper_dev_from_ndev(net_dev)) return bond_grp; }
@@ -107,8 +83,8 @@ bool hns_roce_bond_is_active(struct hns_roce_dev *hr_dev) u8 bus_num = get_hr_bus_num(hr_dev);
bond_grp = hns_roce_get_bond_grp(net_dev, bus_num); - - if (bond_grp && bond_grp->bond_state != HNS_ROCE_BOND_NOT_BONDED) + if (bond_grp && bond_grp->bond_state != HNS_ROCE_BOND_NOT_BONDED && + bond_grp->bond_state != HNS_ROCE_BOND_NOT_ATTACHED) return true;
return false; @@ -508,39 +484,6 @@ static void hns_roce_do_bond_work(struct work_struct *work) hns_roce_queue_bond_work(bond_grp, HZ); }
-int hns_roce_bond_init(struct hns_roce_dev *hr_dev) -{ - struct net_device *net_dev = get_hr_netdev(hr_dev, 0); - struct hns_roce_v2_priv *priv = hr_dev->priv; - struct hns_roce_bond_group *bond_grp; - u8 bus_num = get_hr_bus_num(hr_dev); - int ret; - - bond_grp = hns_roce_get_bond_grp(net_dev, bus_num); - if (priv->handle->rinfo.reset_state == HNS_ROCE_STATE_RST_INIT && - bond_grp) { - bond_grp->main_hr_dev = hr_dev; - ret = hns_roce_recover_bond(bond_grp); - if (ret) { - ibdev_err(&hr_dev->ib_dev, - "failed to recover RoCE bond, ret = %d.\n", - ret); - return ret; - } - } - - hr_dev->bond_nb.notifier_call = hns_roce_bond_event; - ret = register_netdevice_notifier(&hr_dev->bond_nb); - if (ret) { - ibdev_err(&hr_dev->ib_dev, - "failed to register notifier for RoCE bond, ret = %d.\n", - ret); - hr_dev->bond_nb.notifier_call = NULL; - } - - return ret; -} - static struct hns_roce_die_info *alloc_die_info(int bus_num) { struct hns_roce_die_info *die_info; @@ -612,9 +555,143 @@ static int remove_bond_id(int bus_num, u8 bond_id) return 0; }
+static int hns_roce_alloc_bond_grp(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_bond_group *bgrps[ROCE_BOND_NUM_MAX]; + struct hns_roce_bond_group *bond_grp; + int ret; + int i; + + for (i = 0; i < ROCE_BOND_NUM_MAX; i++) { + bond_grp = kvzalloc(sizeof(*bond_grp), GFP_KERNEL); + if (!bond_grp) { + ret = -ENOMEM; + goto mem_err; + } + + mutex_init(&bond_grp->bond_mutex); + INIT_DELAYED_WORK(&bond_grp->bond_work, hns_roce_do_bond_work); + init_completion(&bond_grp->bond_work_done); + + bond_grp->bond_ready = false; + bond_grp->bond_state = HNS_ROCE_BOND_NOT_ATTACHED; + bond_grp->bus_num = get_hr_bus_num(hr_dev); + + ret = alloc_bond_id(bond_grp); + if (ret) { + ibdev_err(&hr_dev->ib_dev, + "failed to alloc bond ID, ret = %d.\n", ret); + goto alloc_id_err; + } + + bond_grp->bond_nb.notifier_call = hns_roce_bond_event; + ret = register_netdevice_notifier(&bond_grp->bond_nb); + if (ret) { + ibdev_err(&hr_dev->ib_dev, + "failed to register bond nb, ret = %d.\n", ret); + goto register_nb_err; + } + bgrps[i] = bond_grp; + } + + return 0; + +register_nb_err: + remove_bond_id(bond_grp->bus_num, bond_grp->bond_id); +alloc_id_err: + mutex_destroy(&bond_grp->bond_mutex); + kvfree(bond_grp); +mem_err: + for (i--; i >= 0; i--) { + unregister_netdevice_notifier(&bgrps[i]->bond_nb); + cancel_delayed_work_sync(&bgrps[i]->bond_work); + complete(&bgrps[i]->bond_work_done); + remove_bond_id(bgrps[i]->bus_num, bgrps[i]->bond_id); + mutex_destroy(&bgrps[i]->bond_mutex); + kvfree(bgrps[i]); + } + return ret; +} + +void hns_roce_dealloc_bond_grp(void) +{ + struct hns_roce_bond_group *bond_grp; + struct hns_roce_die_info *die_info; + unsigned long id; + int i; + + xa_for_each(&roce_bond_xa, id, die_info) { + for (i = 0; i < ROCE_BOND_NUM_MAX; i++) { + bond_grp = die_info->bgrps[i]; + if (!bond_grp) + continue; + unregister_netdevice_notifier(&bond_grp->bond_nb); + cancel_delayed_work_sync(&bond_grp->bond_work); + remove_bond_id(bond_grp->bus_num, bond_grp->bond_id); + mutex_destroy(&bond_grp->bond_mutex); + kvfree(bond_grp); + } + } +} + +int hns_roce_bond_init(struct hns_roce_dev *hr_dev) +{ + struct net_device *net_dev = get_hr_netdev(hr_dev, 0); + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hns_roce_bond_group *bond_grp; + u8 bus_num = get_hr_bus_num(hr_dev); + int ret = 0; + + if (priv->handle->rinfo.reset_state == HNS_ROCE_STATE_RST_INIT) { + bond_grp = hns_roce_get_bond_grp(net_dev, bus_num); + if (!bond_grp) + return 0; + + bond_grp->main_hr_dev = hr_dev; + ret = hns_roce_recover_bond(bond_grp); + if (ret) + ibdev_err(&hr_dev->ib_dev, + "failed to recover RoCE bond, ret = %d.\n", + ret); + return ret; + } + + if (!xa_load(&roce_bond_xa, bus_num)) { + ret = hns_roce_alloc_bond_grp(hr_dev); + if (ret) + ibdev_err(&hr_dev->ib_dev, + "failed to alloc RoCE bond, ret = %d.\n", + ret); + } + + return ret; +} + +static void hns_roce_attach_bond_grp(struct hns_roce_bond_group *bond_grp, + struct hns_roce_dev *hr_dev, + struct net_device *upper_dev) +{ + bond_grp->upper_dev = upper_dev; + bond_grp->main_hr_dev = hr_dev; + bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED; + bond_grp->bond_ready = false; + hns_roce_bond_info_update(bond_grp, upper_dev, true); +} + +static void hns_roce_detach_bond_grp(struct hns_roce_bond_group *bond_grp) +{ + cancel_delayed_work(&bond_grp->bond_work); + bond_grp->upper_dev = NULL; + bond_grp->main_hr_dev = NULL; + bond_grp->bond_ready = false; + bond_grp->bond_state = HNS_ROCE_BOND_NOT_ATTACHED; + bond_grp->slave_map = 0; + bond_grp->slave_map_diff = 0; + memset(bond_grp->bond_func_info, 0, sizeof(bond_grp->bond_func_info)); +} + int hns_roce_cleanup_bond(struct hns_roce_bond_group *bond_grp) { - bool completion_no_waiter; int ret;
ret = bond_grp->main_hr_dev ? @@ -622,29 +699,19 @@ int hns_roce_cleanup_bond(struct hns_roce_bond_group *bond_grp) if (ret) BOND_ERR_LOG("failed to clear RoCE bond, ret = %d.\n", ret);
- cancel_delayed_work(&bond_grp->bond_work); - ret = remove_bond_id(bond_grp->bus_num, bond_grp->bond_id); - if (ret) - BOND_ERR_LOG("failed to remove bond id %u, ret = %d.\n", - bond_grp->bond_id, ret); - - completion_no_waiter = completion_done(&bond_grp->bond_work_done); + hns_roce_detach_bond_grp(bond_grp); complete(&bond_grp->bond_work_done); - if (completion_no_waiter) - kfree(bond_grp);
return ret; }
-static bool hns_roce_bond_lowerstate_event(struct hns_roce_dev *hr_dev, - struct hns_roce_bond_group *bond_grp, +static bool hns_roce_bond_lowerstate_event(struct hns_roce_bond_group *bond_grp, struct netdev_notifier_changelowerstate_info *info) { struct net_device *net_dev = netdev_notifier_info_to_dev((struct netdev_notifier_info *)info);
- if (!netif_is_lag_port(net_dev) || - (!bond_grp || hr_dev != bond_grp->main_hr_dev)) + if (!netif_is_lag_port(net_dev)) return false;
mutex_lock(&bond_grp->bond_mutex); @@ -674,48 +741,6 @@ static bool is_bond_setting_supported(struct netdev_lag_upper_info *bond_info) return true; }
-static void hns_roce_bond_info_update(struct hns_roce_bond_group *bond_grp, - struct net_device *upper_dev, - bool slave_inc) -{ - struct hns_roce_v2_priv *priv; - struct hns_roce_dev *hr_dev; - struct net_device *net_dev; - u8 func_idx, i; - - if (!slave_inc) { - for (i = 0; i < ROCE_BOND_FUNC_MAX; ++i) { - net_dev = bond_grp->bond_func_info[i].net_dev; - if (net_dev && upper_dev != - get_upper_dev_from_ndev(net_dev)) { - bond_grp->slave_map_diff |= (1U << i); - bond_grp->slave_map &= ~(1U << i); - } - } - return; - } - - rcu_read_lock(); - for_each_netdev_in_bond_rcu(upper_dev, net_dev) { - hr_dev = hns_roce_get_hrdev_by_netdev(net_dev); - if (hr_dev) { - func_idx = PCI_FUNC(hr_dev->pci_dev->devfn); - if (!bond_grp->bond_func_info[func_idx].net_dev) { - bond_grp->slave_map_diff |= (1U << func_idx); - bond_grp->slave_map |= (1U << func_idx); - priv = hr_dev->priv; - - bond_grp->bond_func_info[func_idx].net_dev = - net_dev; - - bond_grp->bond_func_info[func_idx].handle = - priv->handle; - } - } - } - rcu_read_unlock(); -} - static bool hns_roce_bond_upper_event(struct hns_roce_bond_group *bond_grp, struct netdev_notifier_changeupper_info *info) { @@ -755,43 +780,8 @@ static bool hns_roce_bond_upper_event(struct hns_roce_bond_group *bond_grp, return changed; }
-static struct hns_roce_bond_group *hns_roce_alloc_bond_grp(struct hns_roce_dev *main_hr_dev, - struct net_device *upper_dev) -{ - struct hns_roce_bond_group *bond_grp; - int ret; - - bond_grp = kzalloc(sizeof(*bond_grp), GFP_KERNEL); - if (!bond_grp) - return NULL; - - mutex_init(&bond_grp->bond_mutex); - - INIT_DELAYED_WORK(&bond_grp->bond_work, hns_roce_do_bond_work); - - init_completion(&bond_grp->bond_work_done); - - bond_grp->upper_dev = upper_dev; - bond_grp->main_hr_dev = main_hr_dev; - bond_grp->bond_ready = false; - bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED; - bond_grp->bus_num = main_hr_dev->pci_dev->bus->number; - - ret = alloc_bond_id(bond_grp); - if (ret) { - ibdev_err(&main_hr_dev->ib_dev, - "failed to alloc bond ID, ret = %d.\n", ret); - kfree(bond_grp); - return NULL; - } - - hns_roce_bond_info_update(bond_grp, upper_dev, true); - - return bond_grp; -} - static bool is_dev_bond_supported(struct hns_roce_bond_group *bond_grp, - struct net_device *net_dev, int bus_num) + struct net_device *net_dev) { struct hns_roce_dev *hr_dev = hns_roce_get_hrdev_by_netdev(net_dev);
@@ -809,7 +799,7 @@ static bool is_dev_bond_supported(struct hns_roce_bond_group *bond_grp, if (hr_dev->is_vf || pci_num_vf(hr_dev->pci_dev) > 0) return false;
- if (bus_num != get_hr_bus_num(hr_dev)) + if (bond_grp->bus_num != get_hr_bus_num(hr_dev)) return false;
return true; @@ -832,8 +822,7 @@ static bool check_unlinking_bond_support(struct hns_roce_bond_group *bond_grp)
static bool check_linking_bond_support(struct netdev_lag_upper_info *bond_info, struct hns_roce_bond_group *bond_grp, - struct net_device *upper_dev, - int bus_num) + struct net_device *upper_dev) { struct net_device *net_dev; u8 slave_num = 0; @@ -843,7 +832,7 @@ static bool check_linking_bond_support(struct netdev_lag_upper_info *bond_info,
rcu_read_lock(); for_each_netdev_in_bond_rcu(upper_dev, net_dev) { - if (is_dev_bond_supported(bond_grp, net_dev, bus_num)) { + if (is_dev_bond_supported(bond_grp, net_dev)) { slave_num++; } else { rcu_read_unlock(); @@ -856,19 +845,14 @@ static bool check_linking_bond_support(struct netdev_lag_upper_info *bond_info, }
static enum bond_support_type - check_bond_support(struct hns_roce_dev *hr_dev, - struct net_device **upper_dev, + check_bond_support(struct hns_roce_bond_group *bond_grp, + struct net_device *upper_dev, struct netdev_notifier_changeupper_info *info) { - struct net_device *net_dev = get_hr_netdev(hr_dev, 0); - struct hns_roce_bond_group *bond_grp; - int bus_num = get_hr_bus_num(hr_dev); bool bond_grp_exist = false; bool support;
- *upper_dev = info->upper_dev; - bond_grp = hns_roce_get_bond_grp(net_dev, bus_num); - if (bond_grp && *upper_dev == bond_grp->upper_dev) + if (upper_dev == bond_grp->upper_dev) bond_grp_exist = true;
if (!info->linking && !bond_grp_exist) @@ -876,7 +860,7 @@ static enum bond_support_type
if (info->linking) support = check_linking_bond_support(info->upper_info, bond_grp, - *upper_dev, bus_num); + upper_dev); else support = check_unlinking_bond_support(bond_grp); if (support) @@ -885,16 +869,56 @@ static enum bond_support_type return bond_grp_exist ? BOND_EXISTING_NOT_SUPPORT : BOND_NOT_SUPPORT; }
+static bool upper_event_filter(struct netdev_notifier_changeupper_info *info, + struct hns_roce_bond_group *bond_grp, + struct net_device *net_dev) +{ + struct net_device *upper_dev = info->upper_dev; + struct hns_roce_bond_group *bond_grp_tmp; + struct hns_roce_dev *hr_dev; + u8 bus_num; + + if (!info->linking) + return bond_grp->upper_dev == upper_dev; + + hr_dev = hns_roce_get_hrdev_by_netdev(net_dev); + if (!hr_dev) + return false; + + bus_num = get_hr_bus_num(hr_dev); + if (bond_grp->bus_num != bus_num) + return false; + + bond_grp_tmp = hns_roce_get_bond_grp(net_dev, bus_num); + if (bond_grp_tmp && bond_grp_tmp != bond_grp) + return false; + + if (bond_grp->bond_state != HNS_ROCE_BOND_NOT_ATTACHED && + bond_grp->upper_dev != upper_dev) + return false; + + return true; +} + +static bool lowerstate_event_filter(struct hns_roce_bond_group *bond_grp, + struct net_device *net_dev) +{ + struct hns_roce_bond_group *bond_grp_tmp; + + bond_grp_tmp = hns_roce_get_bond_grp(net_dev, bond_grp->bus_num); + return bond_grp_tmp == bond_grp; +} + int hns_roce_bond_event(struct notifier_block *self, unsigned long event, void *ptr) { + struct hns_roce_bond_group *bond_grp = + container_of(self, struct hns_roce_bond_group, bond_nb); struct net_device *net_dev = netdev_notifier_info_to_dev(ptr); - struct hns_roce_dev *hr_dev = - container_of(self, struct hns_roce_dev, bond_nb); + struct netdev_notifier_changeupper_info *info; enum bond_support_type support = BOND_SUPPORT; - struct hns_roce_bond_group *bond_grp; - u8 bus_num = get_hr_bus_num(hr_dev); struct net_device *upper_dev; + struct hns_roce_dev *hr_dev; bool changed; int slave_id;
@@ -902,30 +926,27 @@ int hns_roce_bond_event(struct notifier_block *self, return NOTIFY_DONE;
if (event == NETDEV_CHANGEUPPER) { - support = check_bond_support(hr_dev, &upper_dev, ptr); + if (!upper_event_filter(ptr, bond_grp, net_dev)) + return NOTIFY_DONE; + info = (struct netdev_notifier_changeupper_info *)ptr; + upper_dev = info->upper_dev; + support = check_bond_support(bond_grp, upper_dev, ptr); if (support == BOND_NOT_SUPPORT) return NOTIFY_DONE; } else { + if (!lowerstate_event_filter(bond_grp, net_dev)) + return NOTIFY_DONE; upper_dev = get_upper_dev_from_ndev(net_dev); }
- if (upper_dev && !is_hrdev_bond_slave(hr_dev, upper_dev)) - return NOTIFY_DONE; - else if (!upper_dev && hr_dev != hns_roce_get_hrdev_by_netdev(net_dev)) - return NOTIFY_DONE; - - bond_grp = hns_roce_get_bond_grp(get_hr_netdev(hr_dev, 0), bus_num); if (event == NETDEV_CHANGEUPPER) { - if (!bond_grp) { - bond_grp = hns_roce_alloc_bond_grp(hr_dev, upper_dev); - if (!bond_grp) { - ibdev_err(&hr_dev->ib_dev, - "failed to alloc RoCE bond_grp!\n"); + if (bond_grp->bond_state == HNS_ROCE_BOND_NOT_ATTACHED) { + hr_dev = hns_roce_get_hrdev_by_netdev(net_dev); + if (!hr_dev) return NOTIFY_DONE; - } - } else if (hr_dev != bond_grp->main_hr_dev) { - return NOTIFY_DONE; + hns_roce_attach_bond_grp(bond_grp, hr_dev, upper_dev); } + /* In the case of netdev being unregistered, the roce * instance shouldn't be inited. */ @@ -942,7 +963,7 @@ int hns_roce_bond_event(struct notifier_block *self, } changed = hns_roce_bond_upper_event(bond_grp, ptr); } else { - changed = hns_roce_bond_lowerstate_event(hr_dev, bond_grp, ptr); + changed = hns_roce_bond_lowerstate_event(bond_grp, ptr); } if (changed) hns_roce_queue_bond_work(bond_grp, HZ); diff --git a/drivers/infiniband/hw/hns/hns_roce_bond.h b/drivers/infiniband/hw/hns/hns_roce_bond.h index e75fe75f7f4f..84cd243403ef 100644 --- a/drivers/infiniband/hw/hns/hns_roce_bond.h +++ b/drivers/infiniband/hw/hns/hns_roce_bond.h @@ -33,6 +33,7 @@ enum bond_support_type { };
enum hns_roce_bond_state { + HNS_ROCE_BOND_NOT_ATTACHED, HNS_ROCE_BOND_NOT_BONDED, HNS_ROCE_BOND_IS_BONDED, HNS_ROCE_BOND_REGISTERING, @@ -72,6 +73,7 @@ struct hns_roce_bond_group { struct hns_roce_func_info bond_func_info[ROCE_BOND_FUNC_MAX]; struct delayed_work bond_work; struct completion bond_work_done; + struct notifier_block bond_nb; };
struct hns_roce_die_info { @@ -88,5 +90,6 @@ struct net_device *hns_roce_get_bond_netdev(struct hns_roce_dev *hr_dev); struct hns_roce_bond_group *hns_roce_get_bond_grp(struct net_device *net_dev, u8 bus_num); bool is_bond_slave_in_reset(struct hns_roce_bond_group *bond_grp); +void hns_roce_dealloc_bond_grp(void);
#endif diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 0bad0165aa21..499c6cee27ef 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1202,7 +1202,6 @@ struct hns_roce_dev { u32 congest_algo_tmpl_id; u64 dwqe_page;
- struct notifier_block bond_nb; struct hns_roce_port port_data[HNS_ROCE_MAX_PORTS]; atomic64_t *dfx_cnt; struct hns_roce_poe_ctx poe_ctx; /* poe ch array */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index d7fba8f7ceb4..7fd688def734 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2536,6 +2536,9 @@ static int hns_roce_query_caps(struct hns_roce_dev *hr_dev) caps->flags |= le16_to_cpu(resp_d->cap_flags_ex) << HNS_ROCE_CAP_FLAGS_EX_SHIFT;
+ if (hr_dev->is_vf) + caps->flags &= ~HNS_ROCE_CAP_FLAG_BOND; + caps->num_cqs = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_CQS); caps->gid_table_len[0] = hr_reg_read(resp_c, PF_CAPS_C_MAX_GID); caps->max_cqes = 1 << hr_reg_read(resp_c, PF_CAPS_C_CQ_DEPTH); @@ -7803,11 +7806,8 @@ static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, if (handle->rinfo.instance_state == HNS_ROCE_STATE_BOND_UNINIT) { bond_grp = hns_roce_get_bond_grp(handle->rinfo.netdev, handle->pdev->bus->number); - if (bond_grp) { + if (bond_grp) wait_for_completion(&bond_grp->bond_work_done); - if (bond_grp->bond_state == HNS_ROCE_BOND_NOT_BONDED) - kfree(bond_grp); - } }
if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED) @@ -8031,6 +8031,7 @@ static int __init hns_roce_hw_v2_init(void)
static void __exit hns_roce_hw_v2_exit(void) { + hns_roce_dealloc_bond_grp(); hnae3_unregister_client(&hns_roce_hw_v2_client); hns_roce_cleanup_debugfs(); } diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 2e005edd0eb2..c0944dafed0b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -880,7 +880,6 @@ static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev, if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND)) goto normal_unregister;
- unregister_netdevice_notifier(&hr_dev->bond_nb); bond_grp = hns_roce_get_bond_grp(net_dev, bus_num); if (!bond_grp) goto normal_unregister; @@ -890,6 +889,7 @@ static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev, * is unregistered, re-initialized the remaining slaves before * the bond resources cleanup. */ + cancel_delayed_work_sync(&bond_grp->bond_work); bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED; for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { net_dev = bond_grp->bond_func_info[i].net_dev;
driver inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAGSQ9
----------------------------------------------------------------------
Encapsulate upper event and lowerstate event handlers to improve readability and maintainability.
Signed-off-by: Junxian Huang huangjunxian6@hisilicon.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_bond.c | 135 +++++++++++----------- 1 file changed, 70 insertions(+), 65 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_bond.c b/drivers/infiniband/hw/hns/hns_roce_bond.c index a15c7b26551a..da509debd90f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_bond.c +++ b/drivers/infiniband/hw/hns/hns_roce_bond.c @@ -705,15 +705,18 @@ int hns_roce_cleanup_bond(struct hns_roce_bond_group *bond_grp) return ret; }
-static bool hns_roce_bond_lowerstate_event(struct hns_roce_bond_group *bond_grp, - struct netdev_notifier_changelowerstate_info *info) +static bool lowerstate_event_filter(struct hns_roce_bond_group *bond_grp, + struct net_device *net_dev) { - struct net_device *net_dev = - netdev_notifier_info_to_dev((struct netdev_notifier_info *)info); + struct hns_roce_bond_group *bond_grp_tmp;
- if (!netif_is_lag_port(net_dev)) - return false; + bond_grp_tmp = hns_roce_get_bond_grp(net_dev, bond_grp->bus_num); + return bond_grp_tmp == bond_grp; +}
+static void lowerstate_event_setting(struct hns_roce_bond_group *bond_grp, + struct netdev_notifier_changelowerstate_info *info) +{ mutex_lock(&bond_grp->bond_mutex);
if (bond_grp->bond_ready && @@ -721,6 +724,21 @@ static bool hns_roce_bond_lowerstate_event(struct hns_roce_bond_group *bond_grp, bond_grp->bond_state = HNS_ROCE_BOND_SLAVE_CHANGESTATE;
mutex_unlock(&bond_grp->bond_mutex); +} + +static bool hns_roce_bond_lowerstate_event(struct hns_roce_bond_group *bond_grp, + struct netdev_notifier_changelowerstate_info *info) +{ + struct net_device *net_dev = + netdev_notifier_info_to_dev((struct netdev_notifier_info *)info); + + if (!netif_is_lag_port(net_dev)) + return false; + + if (!lowerstate_event_filter(bond_grp, net_dev)) + return false; + + lowerstate_event_setting(bond_grp, info);
return true; } @@ -741,16 +759,12 @@ static bool is_bond_setting_supported(struct netdev_lag_upper_info *bond_info) return true; }
-static bool hns_roce_bond_upper_event(struct hns_roce_bond_group *bond_grp, - struct netdev_notifier_changeupper_info *info) +static void upper_event_setting(struct hns_roce_bond_group *bond_grp, + struct netdev_notifier_changeupper_info *info) { struct netdev_lag_upper_info *bond_upper_info = NULL; struct net_device *upper_dev = info->upper_dev; bool slave_inc = info->linking; - bool changed = false; - - if (!bond_grp || !upper_dev || !netif_is_lag_master(upper_dev)) - return false;
if (slave_inc) bond_upper_info = info->upper_info; @@ -766,18 +780,14 @@ static bool hns_roce_bond_upper_event(struct hns_roce_bond_group *bond_grp,
if (bond_grp->bond_state == HNS_ROCE_BOND_NOT_BONDED) { bond_grp->bond_ready = true; - changed = true; } else { bond_grp->bond_state = slave_inc ? HNS_ROCE_BOND_SLAVE_INC : HNS_ROCE_BOND_SLAVE_DEC; bond_grp->bond_ready = true; - changed = true; }
mutex_unlock(&bond_grp->bond_mutex); - - return changed; }
static bool is_dev_bond_supported(struct hns_roce_bond_group *bond_grp, @@ -900,13 +910,50 @@ static bool upper_event_filter(struct netdev_notifier_changeupper_info *info, return true; }
-static bool lowerstate_event_filter(struct hns_roce_bond_group *bond_grp, - struct net_device *net_dev) +static bool hns_roce_bond_upper_event(struct hns_roce_bond_group *bond_grp, + struct netdev_notifier_changeupper_info *info) { - struct hns_roce_bond_group *bond_grp_tmp; + struct net_device *net_dev = + netdev_notifier_info_to_dev((struct netdev_notifier_info *)info); + struct net_device *upper_dev = info->upper_dev; + enum bond_support_type support = BOND_SUPPORT; + struct hns_roce_dev *hr_dev; + int slave_id;
- bond_grp_tmp = hns_roce_get_bond_grp(net_dev, bond_grp->bus_num); - return bond_grp_tmp == bond_grp; + if (!upper_dev || !netif_is_lag_master(upper_dev)) + return false; + + if (!upper_event_filter(info, bond_grp, net_dev)) + return false; + + support = check_bond_support(bond_grp, upper_dev, info); + if (support == BOND_NOT_SUPPORT) + return false; + + if (bond_grp->bond_state == HNS_ROCE_BOND_NOT_ATTACHED) { + hr_dev = hns_roce_get_hrdev_by_netdev(net_dev); + if (!hr_dev) + return false; + hns_roce_attach_bond_grp(bond_grp, hr_dev, upper_dev); + } + + /* In the case of netdev being unregistered, the roce + * instance shouldn't be inited. + */ + if (net_dev->reg_state >= NETREG_UNREGISTERING) { + slave_id = get_netdev_bond_slave_id(net_dev, bond_grp); + if (slave_id >= 0) + bond_grp->bond_func_info[slave_id].handle = NULL; + } + + if (support == BOND_EXISTING_NOT_SUPPORT) { + bond_grp->bond_ready = false; + return true; + } + + upper_event_setting(bond_grp, info); + + return true; }
int hns_roce_bond_event(struct notifier_block *self, @@ -914,57 +961,15 @@ int hns_roce_bond_event(struct notifier_block *self, { struct hns_roce_bond_group *bond_grp = container_of(self, struct hns_roce_bond_group, bond_nb); - struct net_device *net_dev = netdev_notifier_info_to_dev(ptr); - struct netdev_notifier_changeupper_info *info; - enum bond_support_type support = BOND_SUPPORT; - struct net_device *upper_dev; - struct hns_roce_dev *hr_dev; bool changed; - int slave_id;
if (event != NETDEV_CHANGEUPPER && event != NETDEV_CHANGELOWERSTATE) return NOTIFY_DONE;
- if (event == NETDEV_CHANGEUPPER) { - if (!upper_event_filter(ptr, bond_grp, net_dev)) - return NOTIFY_DONE; - info = (struct netdev_notifier_changeupper_info *)ptr; - upper_dev = info->upper_dev; - support = check_bond_support(bond_grp, upper_dev, ptr); - if (support == BOND_NOT_SUPPORT) - return NOTIFY_DONE; - } else { - if (!lowerstate_event_filter(bond_grp, net_dev)) - return NOTIFY_DONE; - upper_dev = get_upper_dev_from_ndev(net_dev); - } - - if (event == NETDEV_CHANGEUPPER) { - if (bond_grp->bond_state == HNS_ROCE_BOND_NOT_ATTACHED) { - hr_dev = hns_roce_get_hrdev_by_netdev(net_dev); - if (!hr_dev) - return NOTIFY_DONE; - hns_roce_attach_bond_grp(bond_grp, hr_dev, upper_dev); - } - - /* In the case of netdev being unregistered, the roce - * instance shouldn't be inited. - */ - if (net_dev->reg_state >= NETREG_UNREGISTERING) { - slave_id = get_netdev_bond_slave_id(net_dev, bond_grp); - if (slave_id >= 0) - bond_grp->bond_func_info[slave_id].handle = NULL; - } - - if (support == BOND_EXISTING_NOT_SUPPORT) { - bond_grp->bond_ready = false; - hns_roce_queue_bond_work(bond_grp, HZ); - return NOTIFY_DONE; - } + if (event == NETDEV_CHANGEUPPER) changed = hns_roce_bond_upper_event(bond_grp, ptr); - } else { + else changed = hns_roce_bond_lowerstate_event(bond_grp, ptr); - } if (changed) hns_roce_queue_bond_work(bond_grp, HZ);
Upstream: Yes AR20220107959359
bond_grp is freed after hns_roce_cleanup_bond(). Move the clear bond printing into hns_roce_cleanup_bond() to avoid UAF. Plus, the return value of hns_roce_cleanup_bond() is removed since it's not being used any more.
Fixes: 991fd7d3bc75 ("RDMA/hns: Fix several memory issues in roce bonding") Signed-off-by: Junxian Huang huangjunxian6@hisilicon.com --- drivers/infiniband/hw/hns/hns_roce_bond.c | 12 +++++------- drivers/infiniband/hw/hns/hns_roce_bond.h | 2 +- 2 files changed, 6 insertions(+), 8 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_bond.c b/drivers/infiniband/hw/hns/hns_roce_bond.c index da509debd90f..e7103943050c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_bond.c +++ b/drivers/infiniband/hw/hns/hns_roce_bond.c @@ -258,10 +258,7 @@ static void hns_roce_clear_bond(struct hns_roce_bond_group *bond_grp) }
out: - ret = hns_roce_cleanup_bond(bond_grp); - if (!ret) - ibdev_info(&bond_grp->main_hr_dev->ib_dev, - "RoCE clear bond finished!\n"); + hns_roce_cleanup_bond(bond_grp); }
static void hns_roce_slave_changestate(struct hns_roce_bond_group *bond_grp) @@ -690,7 +687,7 @@ static void hns_roce_detach_bond_grp(struct hns_roce_bond_group *bond_grp) memset(bond_grp->bond_func_info, 0, sizeof(bond_grp->bond_func_info)); }
-int hns_roce_cleanup_bond(struct hns_roce_bond_group *bond_grp) +void hns_roce_cleanup_bond(struct hns_roce_bond_group *bond_grp) { int ret;
@@ -698,11 +695,12 @@ int hns_roce_cleanup_bond(struct hns_roce_bond_group *bond_grp) hns_roce_cmd_bond(bond_grp, HNS_ROCE_CLEAR_BOND) : -EIO; if (ret) BOND_ERR_LOG("failed to clear RoCE bond, ret = %d.\n", ret); + else + ibdev_info(&bond_grp->main_hr_dev->ib_dev, + "RoCE clear bond finished!\n");
hns_roce_detach_bond_grp(bond_grp); complete(&bond_grp->bond_work_done); - - return ret; }
static bool lowerstate_event_filter(struct hns_roce_bond_group *bond_grp, diff --git a/drivers/infiniband/hw/hns/hns_roce_bond.h b/drivers/infiniband/hw/hns/hns_roce_bond.h index 84cd243403ef..160657a32bfd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_bond.h +++ b/drivers/infiniband/hw/hns/hns_roce_bond.h @@ -84,7 +84,7 @@ struct hns_roce_die_info { int hns_roce_bond_init(struct hns_roce_dev *hr_dev); int hns_roce_bond_event(struct notifier_block *self, unsigned long event, void *ptr); -int hns_roce_cleanup_bond(struct hns_roce_bond_group *bond_grp); +void hns_roce_cleanup_bond(struct hns_roce_bond_group *bond_grp); bool hns_roce_bond_is_active(struct hns_roce_dev *hr_dev); struct net_device *hns_roce_get_bond_netdev(struct hns_roce_dev *hr_dev); struct hns_roce_bond_group *hns_roce_get_bond_grp(struct net_device *net_dev,
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAGSQ9
----------------------------------------------------------------------
Currently the complete process of a single RoCE bonding operation is: * check the constraints of RoCE bonding in the bond event * update the bond_grp information in the bond event * queue the delayed bond work * init/uninit slaves according to the bond information and bond state in the bond work
This process works fine when there is only one single bonding operation. But when there is concurrency between bond work and bond event, some unexpected outcomes may occur.
To fix the concurrency issues, the bond event just checks the constraints and queue the bond work now. The bond work checks the constraints of slaves again (as they may be changed between the bond work is queued and scheduled) and updates the information with a mutex.
It is possible that after the bond work unlocks, new bond events occur and the information in bond_grp structure become outdated. In this case the bond work will finish the job with its outdated information and the bond event will queue a new work to update again.
Fixes: e62a20278f18 ("RDMA/hns: support RoCE bonding") Signed-off-by: Junxian Huang huangjunxian6@hisilicon.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_bond.c | 449 ++++++++++++---------- drivers/infiniband/hw/hns/hns_roce_bond.h | 5 +- drivers/infiniband/hw/hns/hns_roce_main.c | 2 + 3 files changed, 254 insertions(+), 202 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_bond.c b/drivers/infiniband/hw/hns/hns_roce_bond.c index e7103943050c..1f3093b40a91 100644 --- a/drivers/infiniband/hw/hns/hns_roce_bond.c +++ b/drivers/infiniband/hw/hns/hns_roce_bond.c @@ -156,7 +156,7 @@ static void hns_roce_bond_get_active_slave(struct hns_roce_bond_group *bond_grp)
for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { net_dev = bond_grp->bond_func_info[i].net_dev; - if (!net_dev) + if (!net_dev || !(bond_grp->slave_map & (1U << i))) continue;
active = (bond_grp->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) ? @@ -179,29 +179,124 @@ static int hns_roce_recover_bond(struct hns_roce_bond_group *bond_grp) return hns_roce_cmd_bond(bond_grp, HNS_ROCE_SET_BOND); }
-static void hns_roce_set_bond(struct hns_roce_bond_group *bond_grp) +static int hns_roce_slave_uninit(struct hns_roce_bond_group *bond_grp, + u8 func_idx) +{ + struct net_device *net_dev; + int ret = 0; + + net_dev = bond_grp->bond_func_info[func_idx].net_dev; + if (hns_roce_get_hrdev_by_netdev(net_dev)) { + ret = hns_roce_bond_uninit_client(bond_grp, func_idx); + if (ret) { + BOND_ERR_LOG("failed to uninit slave %u, ret = %d.\n", + func_idx, ret); + bond_grp->bond_func_info[func_idx].net_dev = NULL; + } + } + + return ret; +} + +static struct hns_roce_dev + *hns_roce_slave_init(struct hns_roce_bond_group *bond_grp, + u8 func_idx, bool need_switch); + +static int switch_main_dev(struct hns_roce_bond_group *bond_grp, + u8 main_func_idx) +{ + struct hns_roce_dev *hr_dev; + struct net_device *net_dev; + int ret; + u8 i; + + bond_grp->main_hr_dev = NULL; + ret = hns_roce_bond_uninit_client(bond_grp, main_func_idx); + if (ret) { + BOND_ERR_LOG("failed to uninit main dev %u, ret = %d.\n", + main_func_idx, ret); + return ret; + } + + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { + net_dev = bond_grp->bond_func_info[i].net_dev; + if ((bond_grp->slave_map & (1U << i)) && net_dev) { + /* In case this slave is still being registered as + * a non-bonded PF, uninit it first and then re-init + * it as the main device. + */ + hns_roce_slave_uninit(bond_grp, i); + hr_dev = hns_roce_slave_init(bond_grp, i, false); + if (hr_dev) { + bond_grp->main_hr_dev = hr_dev; + break; + } + } + } + + if (!bond_grp->main_hr_dev) + return -ENODEV; + + return 0; +} + +static struct hns_roce_dev + *hns_roce_slave_init(struct hns_roce_bond_group *bond_grp, + u8 func_idx, bool need_switch) { struct hns_roce_dev *hr_dev = NULL; struct net_device *net_dev; + u8 main_func_idx; + int ret; + + if (need_switch) { + main_func_idx = PCI_FUNC(bond_grp->main_hr_dev->pci_dev->devfn); + if (func_idx == main_func_idx) { + ret = switch_main_dev(bond_grp, main_func_idx); + if (ret == -ENODEV) + return NULL; + } + } + + net_dev = bond_grp->bond_func_info[func_idx].net_dev; + if (net_dev) { + hr_dev = hns_roce_get_hrdev_by_netdev(net_dev); + if (hr_dev) + return hr_dev; + if (need_switch) + bond_grp->bond_func_info[func_idx].net_dev = NULL; + hr_dev = hns_roce_bond_init_client(bond_grp, func_idx); + if (!hr_dev) { + BOND_ERR_LOG("failed to init slave %u.\n", func_idx); + bond_grp->bond_func_info[func_idx].net_dev = net_dev; + } + } + + return hr_dev; +} + +static void hns_roce_set_bond(struct hns_roce_bond_group *bond_grp) +{ + struct hns_roce_dev *hr_dev; int ret; int i;
for (i = ROCE_BOND_FUNC_MAX - 1; i >= 0; i--) { - net_dev = bond_grp->bond_func_info[i].net_dev; - if (net_dev) { - ret = hns_roce_bond_uninit_client(bond_grp, i); + if (bond_grp->slave_map & (1 << i)) { + ret = hns_roce_slave_uninit(bond_grp, i); if (ret) - goto set_err; + goto out; } }
- bond_grp->bond_state = HNS_ROCE_BOND_REGISTERING; + mutex_lock(&bond_grp->bond_mutex); + bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED; + mutex_unlock(&bond_grp->bond_mutex); bond_grp->main_hr_dev = NULL;
for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { - net_dev = bond_grp->bond_func_info[i].net_dev; - if (net_dev) { - hr_dev = hns_roce_bond_init_client(bond_grp, i); + if (bond_grp->slave_map & (1 << i)) { + hr_dev = hns_roce_slave_init(bond_grp, i, false); if (hr_dev) { bond_grp->main_hr_dev = hr_dev; break; @@ -209,32 +304,32 @@ static void hns_roce_set_bond(struct hns_roce_bond_group *bond_grp) } }
- bond_grp->slave_map_diff = 0; - hns_roce_bond_get_active_slave(bond_grp); - - ret = bond_grp->main_hr_dev ? - hns_roce_cmd_bond(bond_grp, HNS_ROCE_SET_BOND) : -EIO; - if (ret) - goto set_err; + if (!bond_grp->main_hr_dev) { + ret = -ENODEV; + goto out; + }
- bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED; - complete(&bond_grp->bond_work_done); - ibdev_info(&bond_grp->main_hr_dev->ib_dev, "RoCE set bond finished!\n"); + hns_roce_bond_get_active_slave(bond_grp);
- return; + ret = hns_roce_cmd_bond(bond_grp, HNS_ROCE_SET_BOND);
-set_err: - bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED; - BOND_ERR_LOG("failed to set RoCE bond, ret = %d.\n", ret); - hns_roce_cleanup_bond(bond_grp); +out: + if (ret) { + BOND_ERR_LOG("failed to set RoCE bond, ret = %d.\n", ret); + hns_roce_cleanup_bond(bond_grp); + } else { + ibdev_info(&bond_grp->main_hr_dev->ib_dev, + "RoCE set bond finished!\n"); + complete(&bond_grp->bond_work_done); + } }
static void hns_roce_clear_bond(struct hns_roce_bond_group *bond_grp) { u8 main_func_idx = PCI_FUNC(bond_grp->main_hr_dev->pci_dev->devfn); - struct hns_roce_dev *hr_dev = NULL; - struct net_device *net_dev; - int i, ret; + struct hns_roce_dev *hr_dev; + int ret; + u8 i;
if (bond_grp->bond_state == HNS_ROCE_BOND_NOT_BONDED) goto out; @@ -242,19 +337,16 @@ static void hns_roce_clear_bond(struct hns_roce_bond_group *bond_grp) bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED; bond_grp->main_hr_dev = NULL;
- ret = hns_roce_bond_uninit_client(bond_grp, main_func_idx); + ret = hns_roce_slave_uninit(bond_grp, main_func_idx); if (ret) { BOND_ERR_LOG("failed to uninit bond, ret = %d.\n", ret); return; }
for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { - net_dev = bond_grp->bond_func_info[i].net_dev; - if (net_dev) { - hr_dev = hns_roce_bond_init_client(bond_grp, i); - if (hr_dev) - bond_grp->main_hr_dev = hr_dev; - } + hr_dev = hns_roce_slave_init(bond_grp, i, false); + if (hr_dev) + bond_grp->main_hr_dev = hr_dev; }
out: @@ -269,7 +361,10 @@ static void hns_roce_slave_changestate(struct hns_roce_bond_group *bond_grp)
ret = hns_roce_cmd_bond(bond_grp, HNS_ROCE_CHANGE_BOND);
- bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED; + mutex_lock(&bond_grp->bond_mutex); + if (bond_grp->bond_state == HNS_ROCE_BOND_SLAVE_CHANGESTATE) + bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED; + mutex_unlock(&bond_grp->bond_mutex); complete(&bond_grp->bond_work_done);
if (ret) @@ -281,139 +376,142 @@ static void hns_roce_slave_changestate(struct hns_roce_bond_group *bond_grp) "RoCE slave changestate finished!\n"); }
-static void hns_roce_slave_inc(struct hns_roce_bond_group *bond_grp) +static void hns_roce_slave_change_num(struct hns_roce_bond_group *bond_grp) { - u32 inc_slave_map = bond_grp->slave_map_diff; - u8 inc_func_idx = 0; int ret; + u8 i;
- while (inc_slave_map > 0) { - if (inc_slave_map & 1) { - ret = hns_roce_bond_uninit_client(bond_grp, inc_func_idx); - if (ret) { - BOND_ERR_LOG("failed to uninit slave %u, ret = %d.\n", - inc_func_idx, ret); - bond_grp->bond_func_info[inc_func_idx].net_dev = NULL; - bond_grp->slave_map &= ~(1U << inc_func_idx); + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { + if (bond_grp->slave_map & (1U << i)) { + if (i == PCI_FUNC(bond_grp->main_hr_dev->pci_dev->devfn)) + continue; + ret = hns_roce_slave_uninit(bond_grp, i); + if (ret) + goto out; + } else { + hns_roce_slave_init(bond_grp, i, true); + if (!bond_grp->main_hr_dev) { + ret = -ENODEV; + goto out; } } - inc_slave_map >>= 1; - inc_func_idx++; }
- bond_grp->slave_map_diff = 0; hns_roce_bond_get_active_slave(bond_grp);
ret = hns_roce_cmd_bond(bond_grp, HNS_ROCE_CHANGE_BOND);
- bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED; - complete(&bond_grp->bond_work_done); - - if (ret) - ibdev_err(&bond_grp->main_hr_dev->ib_dev, - "failed to increase slave, ret = %d.\n", ret); - else +out: + if (ret) { + BOND_ERR_LOG("failed to change RoCE bond slave num, ret = %d.\n", ret); + hns_roce_cleanup_bond(bond_grp); + } else { + mutex_lock(&bond_grp->bond_mutex); + if (bond_grp->bond_state == HNS_ROCE_BOND_SLAVE_CHANGE_NUM) + bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED; + mutex_unlock(&bond_grp->bond_mutex); ibdev_info(&bond_grp->main_hr_dev->ib_dev, - "RoCE slave increase finished!\n"); + "RoCE slave change num finished!\n"); + complete(&bond_grp->bond_work_done); + } }
-static int switch_main_dev(struct hns_roce_bond_group *bond_grp, - u32 *dec_slave_map, u8 main_func_idx) +static void hns_roce_bond_info_update_nolock(struct hns_roce_bond_group *bond_grp, + struct net_device *upper_dev) { + struct hns_roce_v2_priv *priv; struct hns_roce_dev *hr_dev; struct net_device *net_dev; - int ret; - int i; + int func_idx;
- bond_grp->main_hr_dev = NULL; - ret = hns_roce_bond_uninit_client(bond_grp, main_func_idx); - if (ret) { - BOND_ERR_LOG("failed to uninit main dev %u, ret = %d.\n", - main_func_idx, ret); - *dec_slave_map &= ~(1U << main_func_idx); - bond_grp->slave_map |= (1U << main_func_idx); - return ret; - } - - for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { - net_dev = bond_grp->bond_func_info[i].net_dev; - if (!(*dec_slave_map & (1 << i)) && net_dev) { - bond_grp->bond_state = HNS_ROCE_BOND_REGISTERING; - hr_dev = hns_roce_bond_init_client(bond_grp, i); - if (hr_dev) { - bond_grp->main_hr_dev = hr_dev; - break; + bond_grp->slave_map = 0; + rcu_read_lock(); + for_each_netdev_in_bond_rcu(upper_dev, net_dev) { + hr_dev = hns_roce_get_hrdev_by_netdev(net_dev); + if (hr_dev) { + func_idx = PCI_FUNC(hr_dev->pci_dev->devfn); + if (!bond_grp->bond_func_info[func_idx].net_dev) { + priv = hr_dev->priv; + bond_grp->bond_func_info[func_idx].net_dev = + net_dev; + + bond_grp->bond_func_info[func_idx].handle = + priv->handle; } + } else { + func_idx = get_netdev_bond_slave_id(net_dev, bond_grp); + if (func_idx < 0) + continue; } + bond_grp->slave_map |= (1 << func_idx); } - - if (!bond_grp->main_hr_dev) - return -ENODEV; - - return 0; + rcu_read_unlock(); }
-static void hns_roce_slave_dec(struct hns_roce_bond_group *bond_grp) +static bool is_dev_bond_supported(struct hns_roce_bond_group *bond_grp, + struct net_device *net_dev) { - u8 main_func_idx = PCI_FUNC(bond_grp->main_hr_dev->pci_dev->devfn); - u32 dec_slave_map = bond_grp->slave_map_diff; - struct net_device *net_dev; - u8 dec_func_idx = 0; - int ret; + struct hns_roce_dev *hr_dev = hns_roce_get_hrdev_by_netdev(net_dev);
- if (dec_slave_map & (1 << main_func_idx)) { - ret = switch_main_dev(bond_grp, &dec_slave_map, main_func_idx); - if (ret == -ENODEV) - goto dec_err; - } - - while (dec_slave_map > 0) { - if (dec_slave_map & 1) { - net_dev = bond_grp->bond_func_info[dec_func_idx].net_dev; - bond_grp->bond_func_info[dec_func_idx].net_dev = NULL; - if (!hns_roce_bond_init_client(bond_grp, dec_func_idx)) { - BOND_ERR_LOG("failed to re-init slave %u.\n", - dec_func_idx); - bond_grp->slave_map |= (1U << dec_func_idx); - bond_grp->bond_func_info[dec_func_idx].net_dev = net_dev; - } - } - dec_slave_map >>= 1; - dec_func_idx++; + if (!hr_dev) { + if (bond_grp && + get_netdev_bond_slave_id(net_dev, bond_grp) >= 0) + return true; + else + return false; }
- bond_grp->slave_map_diff = 0; - hns_roce_bond_get_active_slave(bond_grp); + if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND)) + return false;
- ret = bond_grp->main_hr_dev ? - hns_roce_cmd_bond(bond_grp, HNS_ROCE_CHANGE_BOND) : -EIO; - if (ret) - goto dec_err; + if (hr_dev->is_vf || pci_num_vf(hr_dev->pci_dev) > 0) + return false;
- bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED; - complete(&bond_grp->bond_work_done); - ibdev_info(&bond_grp->main_hr_dev->ib_dev, - "RoCE slave decrease finished!\n"); + if (bond_grp->bus_num != get_hr_bus_num(hr_dev)) + return false;
- return; + return true; +}
-dec_err: - bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED; - BOND_ERR_LOG("failed to decrease RoCE bond slave, ret = %d.\n", ret); - hns_roce_cleanup_bond(bond_grp); +static bool check_slave_support(struct hns_roce_bond_group *bond_grp, + struct net_device *upper_dev) +{ + struct net_device *net_dev; + u8 slave_num = 0; + + rcu_read_lock(); + for_each_netdev_in_bond_rcu(upper_dev, net_dev) { + if (is_dev_bond_supported(bond_grp, net_dev)) { + slave_num++; + continue; + } + rcu_read_unlock(); + return false; + } + rcu_read_unlock(); + + return (slave_num > 1 && slave_num <= ROCE_BOND_FUNC_MAX); }
static void hns_roce_do_bond(struct hns_roce_bond_group *bond_grp) { - enum hns_roce_bond_state bond_state = bond_grp->bond_state; - bool bond_ready = bond_grp->bond_ready; + enum hns_roce_bond_state bond_state; + bool bond_ready;
if (!bond_grp->main_hr_dev) return;
+ bond_ready = check_slave_support(bond_grp, bond_grp->upper_dev); + + mutex_lock(&bond_grp->bond_mutex); + hns_roce_bond_info_update_nolock(bond_grp, bond_grp->upper_dev); + bond_state = bond_grp->bond_state; + bond_grp->bond_ready = bond_ready; + mutex_unlock(&bond_grp->bond_mutex); + ibdev_info(&bond_grp->main_hr_dev->ib_dev, "do_bond: bond_ready - %d, bond_state - %d.\n", - bond_ready, bond_grp->bond_state); + bond_ready, bond_state);
reinit_completion(&bond_grp->bond_work_done);
@@ -429,11 +527,8 @@ static void hns_roce_do_bond(struct hns_roce_bond_group *bond_grp) case HNS_ROCE_BOND_SLAVE_CHANGESTATE: hns_roce_slave_changestate(bond_grp); return; - case HNS_ROCE_BOND_SLAVE_INC: - hns_roce_slave_inc(bond_grp); - return; - case HNS_ROCE_BOND_SLAVE_DEC: - hns_roce_slave_dec(bond_grp); + case HNS_ROCE_BOND_SLAVE_CHANGE_NUM: + hns_roce_slave_change_num(bond_grp); return; default: return; @@ -672,19 +767,21 @@ static void hns_roce_attach_bond_grp(struct hns_roce_bond_group *bond_grp, bond_grp->main_hr_dev = hr_dev; bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED; bond_grp->bond_ready = false; - hns_roce_bond_info_update(bond_grp, upper_dev, true); }
static void hns_roce_detach_bond_grp(struct hns_roce_bond_group *bond_grp) { + mutex_lock(&bond_grp->bond_mutex); + cancel_delayed_work(&bond_grp->bond_work); bond_grp->upper_dev = NULL; bond_grp->main_hr_dev = NULL; bond_grp->bond_ready = false; bond_grp->bond_state = HNS_ROCE_BOND_NOT_ATTACHED; bond_grp->slave_map = 0; - bond_grp->slave_map_diff = 0; memset(bond_grp->bond_func_info, 0, sizeof(bond_grp->bond_func_info)); + + mutex_unlock(&bond_grp->bond_mutex); }
void hns_roce_cleanup_bond(struct hns_roce_bond_group *bond_grp) @@ -767,50 +864,10 @@ static void upper_event_setting(struct hns_roce_bond_group *bond_grp, if (slave_inc) bond_upper_info = info->upper_info;
- mutex_lock(&bond_grp->bond_mutex); - if (bond_upper_info) bond_grp->tx_type = bond_upper_info->tx_type;
- hns_roce_bond_info_update(bond_grp, upper_dev, slave_inc); - bond_grp->bond = netdev_priv(upper_dev); - - if (bond_grp->bond_state == HNS_ROCE_BOND_NOT_BONDED) { - bond_grp->bond_ready = true; - } else { - bond_grp->bond_state = slave_inc ? - HNS_ROCE_BOND_SLAVE_INC : - HNS_ROCE_BOND_SLAVE_DEC; - bond_grp->bond_ready = true; - } - - mutex_unlock(&bond_grp->bond_mutex); -} - -static bool is_dev_bond_supported(struct hns_roce_bond_group *bond_grp, - struct net_device *net_dev) -{ - struct hns_roce_dev *hr_dev = hns_roce_get_hrdev_by_netdev(net_dev); - - if (!hr_dev) { - if (bond_grp && - get_netdev_bond_slave_id(net_dev, bond_grp) >= 0) - return true; - else - return false; - } - - if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND)) - return false; - - if (hr_dev->is_vf || pci_num_vf(hr_dev->pci_dev) > 0) - return false; - - if (bond_grp->bus_num != get_hr_bus_num(hr_dev)) - return false; - - return true; }
static bool check_unlinking_bond_support(struct hns_roce_bond_group *bond_grp) @@ -832,24 +889,10 @@ static bool check_linking_bond_support(struct netdev_lag_upper_info *bond_info, struct hns_roce_bond_group *bond_grp, struct net_device *upper_dev) { - struct net_device *net_dev; - u8 slave_num = 0; - if (!is_bond_setting_supported(bond_info)) return false;
- rcu_read_lock(); - for_each_netdev_in_bond_rcu(upper_dev, net_dev) { - if (is_dev_bond_supported(bond_grp, net_dev)) { - slave_num++; - } else { - rcu_read_unlock(); - return false; - } - } - rcu_read_unlock(); - - return (slave_num > 1 && slave_num <= ROCE_BOND_FUNC_MAX); + return check_slave_support(bond_grp, upper_dev); }
static enum bond_support_type @@ -924,14 +967,19 @@ static bool hns_roce_bond_upper_event(struct hns_roce_bond_group *bond_grp, if (!upper_event_filter(info, bond_grp, net_dev)) return false;
+ mutex_lock(&bond_grp->bond_mutex); support = check_bond_support(bond_grp, upper_dev, info); - if (support == BOND_NOT_SUPPORT) + if (support == BOND_NOT_SUPPORT) { + mutex_unlock(&bond_grp->bond_mutex); return false; + }
if (bond_grp->bond_state == HNS_ROCE_BOND_NOT_ATTACHED) { hr_dev = hns_roce_get_hrdev_by_netdev(net_dev); - if (!hr_dev) + if (!hr_dev) { + mutex_unlock(&bond_grp->bond_mutex); return false; + } hns_roce_attach_bond_grp(bond_grp, hr_dev, upper_dev); }
@@ -940,16 +988,20 @@ static bool hns_roce_bond_upper_event(struct hns_roce_bond_group *bond_grp, */ if (net_dev->reg_state >= NETREG_UNREGISTERING) { slave_id = get_netdev_bond_slave_id(net_dev, bond_grp); - if (slave_id >= 0) + if (slave_id >= 0) { + bond_grp->bond_func_info[slave_id].net_dev = NULL; bond_grp->bond_func_info[slave_id].handle = NULL; + } }
- if (support == BOND_EXISTING_NOT_SUPPORT) { - bond_grp->bond_ready = false; - return true; + if (support == BOND_SUPPORT) { + bond_grp->bond_ready = true; + if (bond_grp->bond_state != HNS_ROCE_BOND_NOT_BONDED) + bond_grp->bond_state = HNS_ROCE_BOND_SLAVE_CHANGE_NUM; } - - upper_event_setting(bond_grp, info); + mutex_unlock(&bond_grp->bond_mutex); + if (support == BOND_SUPPORT) + upper_event_setting(bond_grp, info);
return true; } @@ -968,6 +1020,7 @@ int hns_roce_bond_event(struct notifier_block *self, changed = hns_roce_bond_upper_event(bond_grp, ptr); else changed = hns_roce_bond_lowerstate_event(bond_grp, ptr); + if (changed) hns_roce_queue_bond_work(bond_grp, HZ);
diff --git a/drivers/infiniband/hw/hns/hns_roce_bond.h b/drivers/infiniband/hw/hns/hns_roce_bond.h index 160657a32bfd..41fc76e5c98a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_bond.h +++ b/drivers/infiniband/hw/hns/hns_roce_bond.h @@ -36,9 +36,7 @@ enum hns_roce_bond_state { HNS_ROCE_BOND_NOT_ATTACHED, HNS_ROCE_BOND_NOT_BONDED, HNS_ROCE_BOND_IS_BONDED, - HNS_ROCE_BOND_REGISTERING, - HNS_ROCE_BOND_SLAVE_INC, - HNS_ROCE_BOND_SLAVE_DEC, + HNS_ROCE_BOND_SLAVE_CHANGE_NUM, HNS_ROCE_BOND_SLAVE_CHANGESTATE, };
@@ -59,7 +57,6 @@ struct hns_roce_bond_group { u8 active_slave_num; u32 slave_map; u32 active_slave_map; - u32 slave_map_diff; u8 bond_id; u8 bus_num; struct bonding *bond; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index c0944dafed0b..76989bc0e02a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -890,7 +890,9 @@ static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev, * the bond resources cleanup. */ cancel_delayed_work_sync(&bond_grp->bond_work); + mutex_lock(&bond_grp->bond_mutex); bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED; + mutex_unlock(&bond_grp->bond_mutex); for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { net_dev = bond_grp->bond_func_info[i].net_dev; if (net_dev && net_dev != iboe->netdevs[0])
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,转换为PR失败! 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/Y... 失败原因:补丁集缺失封面信息 建议解决方法:请提供补丁集并重新发送您的补丁集到邮件列表
FeedBack: The patch(es) which you have sent to kernel@openeuler.org has been converted to PR failed! Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/Y... Failed Reason: the cover of the patches is missing Suggest Solution: please checkout and apply the patches' cover and send all again
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,转换为PR失败! 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/Y... 失败原因:补丁集缺失封面信息 建议解决方法:请提供补丁集并重新发送您的补丁集到邮件列表
FeedBack: The patch(es) which you have sent to kernel@openeuler.org has been converted to PR failed! Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/Y... Failed Reason: the cover of the patches is missing Suggest Solution: please checkout and apply the patches' cover and send all again