From: Junxian Huang huangjunxian6@hisilicon.com
driver inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5Z6L8
----------------------------------------------------------
Support hns roce bonding
Signed-off-by: Junxian Huang huangjunxian6@hisilicon.com Signed-off-by: ChunZhi Hu huchunzhi@huawei.com Reviewed-by: Yangyang Li liyangyang20@huawei.com --- drivers/infiniband/hw/hns/Makefile | 3 +- drivers/infiniband/hw/hns/hns_roce_bond.c | 670 ++++++++++++++++++++ drivers/infiniband/hw/hns/hns_roce_bond.h | 64 ++ drivers/infiniband/hw/hns/hns_roce_device.h | 10 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 63 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 17 + drivers/infiniband/hw/hns/hns_roce_main.c | 26 +- 7 files changed, 846 insertions(+), 7 deletions(-) create mode 100644 drivers/infiniband/hw/hns/hns_roce_bond.c create mode 100644 drivers/infiniband/hw/hns/hns_roce_bond.h
diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile index a7d259238305..8ffbf009b948 100644 --- a/drivers/infiniband/hw/hns/Makefile +++ b/drivers/infiniband/hw/hns/Makefile @@ -7,7 +7,8 @@ ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3
hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \ hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \ - hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o + hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o \ + hns_roce_bond.o
ifdef CONFIG_INFINIBAND_HNS_HIP08 hns-roce-hw-v2-objs := hns_roce_hw_v2.o $(hns-roce-objs) diff --git a/drivers/infiniband/hw/hns/hns_roce_bond.c b/drivers/infiniband/hw/hns/hns_roce_bond.c new file mode 100644 index 000000000000..14255685a59f --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_bond.c @@ -0,0 +1,670 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2016-2022 Hisilicon Limited. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/pci.h> +#include "hnae3.h" +#include "hns_roce_device.h" +#include "hns_roce_hw_v2.h" +#include "hns_roce_bond.h" + +static DEFINE_MUTEX(roce_bond_mutex); + +static struct hns_roce_dev *hns_roce_get_hrdev_by_netdev(struct net_device *net_dev) +{ + struct hns_roce_dev *hr_dev; + struct ib_device *ibdev; + + ibdev = ib_device_get_by_netdev(net_dev, RDMA_DRIVER_HNS); + if (!ibdev) + return NULL; + + hr_dev = container_of(ibdev, struct hns_roce_dev, ib_dev); + ib_device_put(ibdev); + + return hr_dev; +} + +bool hns_roce_bond_is_active(struct hns_roce_dev *hr_dev) +{ + struct net_device *upper_dev; + struct net_device *net_dev; + + if (!netif_is_lag_port(hr_dev->iboe.netdevs[0])) + return false; + + rcu_read_lock(); + upper_dev = netdev_master_upper_dev_get_rcu(hr_dev->iboe.netdevs[0]); + for_each_netdev_in_bond_rcu(upper_dev, net_dev) { + hr_dev = hns_roce_get_hrdev_by_netdev(net_dev); + if (hr_dev && hr_dev->bond_grp && + hr_dev->bond_grp->bond_state == HNS_ROCE_BOND_IS_BONDED) { + rcu_read_unlock(); + return true; + } + } + rcu_read_unlock(); + + return false; +} + +struct net_device *hns_roce_get_bond_netdev(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_bond_group *bond_grp = hr_dev->bond_grp; + struct net_device *net_dev = NULL; + int i; + + if (!(hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND)) + return NULL; + + if (!netif_is_lag_port(hr_dev->iboe.netdevs[0])) + return NULL; + + if (!bond_grp) + return NULL; + + mutex_lock(&bond_grp->bond_mutex); + + if (bond_grp->bond_state != HNS_ROCE_BOND_IS_BONDED) + goto out; + + if (bond_grp->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { + net_dev = bond_grp->bond_func_info[i].net_dev; + if (net_dev && + bond_grp->bond_func_info[i].state.tx_enabled) + break; + } + } else { + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { + net_dev = bond_grp->bond_func_info[i].net_dev; + if (net_dev && get_port_state(net_dev) == IB_PORT_ACTIVE) + break; + } + } + +out: + mutex_unlock(&bond_grp->bond_mutex); + + return net_dev; +} + +static void hns_roce_queue_bond_work(struct hns_roce_dev *hr_dev, + unsigned long delay) +{ + schedule_delayed_work(&hr_dev->bond_work, delay); +} + +static void hns_roce_bond_get_active_slave(struct hns_roce_bond_group *bond_grp) +{ + struct net_device *net_dev; + u32 active_slave_map = 0; + u8 active_slave_num = 0; + bool active; + u8 i; + + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { + net_dev = bond_grp->bond_func_info[i].net_dev; + if (net_dev) { + active = (bond_grp->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) ? + bond_grp->bond_func_info[i].state.tx_enabled : + bond_grp->bond_func_info[i].state.link_up; + if (active) { + active_slave_num++; + active_slave_map |= (1 << i); + } + } + } + + bond_grp->active_slave_num = active_slave_num; + bond_grp->active_slave_map = active_slave_map; +} + +static struct hns_roce_dev + *hns_roce_bond_init_client(struct hns_roce_bond_group *bond_grp, + int func_idx) +{ + struct hnae3_handle *handle; + int ret; + + handle = bond_grp->bond_func_info[func_idx].handle; + ret = hns_roce_hw_v2_init_instance(handle); + if (ret) + return NULL; + + return handle->priv; +} + +static void hns_roce_bond_uninit_client(struct hns_roce_bond_group *bond_grp, + int func_idx) +{ + struct hnae3_handle *handle; + + handle = bond_grp->bond_func_info[func_idx].handle; + hns_roce_hw_v2_uninit_instance(handle, 0); +} + +static void hns_roce_set_bond(struct hns_roce_bond_group *bond_grp) +{ + u8 main_func_idx = PCI_FUNC(bond_grp->main_hr_dev->pci_dev->devfn); + struct net_device *main_net_dev = bond_grp->main_net_dev; + struct hns_roce_dev *hr_dev; + struct net_device *net_dev; + int ret; + int i; + + hns_roce_bond_get_active_slave(bond_grp); + /* bond_grp will be kfree during uninit_instance of main_hr_dev. + * Thus the main_hr_dev is switched before the uninit_instance + * of the previous main_hr_dev. + */ + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { + net_dev = bond_grp->bond_func_info[i].net_dev; + if (net_dev && net_dev != main_net_dev) + hns_roce_bond_uninit_client(bond_grp, i); + } + + bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED; + + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { + net_dev = bond_grp->bond_func_info[i].net_dev; + if (net_dev && net_dev != main_net_dev) { + hr_dev = hns_roce_bond_init_client(bond_grp, i); + if (hr_dev) { + bond_grp->bond_id = + hr_dev->ib_dev.name[ROCE_BOND_NAME_ID_IDX] + - '0'; + bond_grp->main_hr_dev->bond_grp = NULL; + bond_grp->main_hr_dev = hr_dev; + bond_grp->main_net_dev = net_dev; + hr_dev->bond_grp = bond_grp; + break; + } + } + } + + if (!hr_dev) + return; + + hns_roce_bond_uninit_client(bond_grp, main_func_idx); + ret = hns_roce_cmd_bond(hr_dev, HNS_ROCE_SET_BOND); + if (ret) { + ibdev_err(&hr_dev->ib_dev, "failed to set RoCE bond!\n"); + return; + } + + ibdev_info(&hr_dev->ib_dev, "RoCE set bond finished!\n"); +} + +static void hns_roce_clear_bond(struct hns_roce_bond_group *bond_grp) +{ + u8 main_func_idx = PCI_FUNC(bond_grp->main_hr_dev->pci_dev->devfn); + struct net_device *main_net_dev = bond_grp->main_net_dev; + struct hnae3_handle *handle; + struct hns_roce_dev *hr_dev; + struct net_device *net_dev; + int ret; + int i; + + bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED; + + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { + net_dev = bond_grp->bond_func_info[i].net_dev; + if (net_dev && net_dev != main_net_dev) + hns_roce_bond_init_client(bond_grp, i); + } + + ret = hns_roce_cmd_bond(bond_grp->main_hr_dev, HNS_ROCE_CLEAR_BOND); + if (ret) + return; + handle = bond_grp->bond_func_info[main_func_idx].handle; + + /* bond_grp will be freed in uninit_instance(main_net_dev) */ + hns_roce_bond_uninit_client(bond_grp, main_func_idx); + + ret = hns_roce_hw_v2_init_instance(handle); + if (ret) { + ibdev_err(&hr_dev->ib_dev, "failed to clear RoCE bond!\n"); + return; + } + + hr_dev = handle->priv; + + ibdev_info(&hr_dev->ib_dev, "RoCE clear bond finished!\n"); +} + +static void hns_roce_slave_changestate(struct hns_roce_bond_group *bond_grp) +{ + int ret; + + hns_roce_bond_get_active_slave(bond_grp); + bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED; + + ret = hns_roce_cmd_bond(bond_grp->main_hr_dev, HNS_ROCE_CHANGE_BOND); + if (ret) { + ibdev_err(&bond_grp->main_hr_dev->ib_dev, + "failed to change RoCE bond slave state!\n"); + return; + } + + ibdev_info(&bond_grp->main_hr_dev->ib_dev, + "RoCE slave changestate finished!\n"); +} + +static void hns_roce_slave_inc(struct hns_roce_bond_group *bond_grp) +{ + u32 inc_slave_map = bond_grp->slave_map_diff; + u8 inc_func_idx = 0; + int ret; + + hns_roce_bond_get_active_slave(bond_grp); + + while (inc_slave_map > 0) { + if (inc_slave_map & 1) + hns_roce_bond_uninit_client(bond_grp, inc_func_idx); + inc_slave_map >>= 1; + inc_func_idx++; + } + + bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED; + + ret = hns_roce_cmd_bond(bond_grp->main_hr_dev, HNS_ROCE_CHANGE_BOND); + if (ret) { + ibdev_err(&bond_grp->main_hr_dev->ib_dev, + "failed to increase RoCE bond slave!\n"); + return; + } + + ibdev_info(&bond_grp->main_hr_dev->ib_dev, + "RoCE slave increase finished!\n"); +} + +static void hns_roce_slave_dec(struct hns_roce_bond_group *bond_grp) +{ + u32 dec_slave_map = bond_grp->slave_map_diff; + struct hns_roce_dev *hr_dev; + struct net_device *net_dev; + u8 main_func_idx = 0; + u8 dec_func_idx = 0; + int ret; + int i; + + hns_roce_bond_get_active_slave(bond_grp); + + bond_grp->bond_state = HNS_ROCE_BOND_IS_BONDED; + + main_func_idx = PCI_FUNC(bond_grp->main_hr_dev->pci_dev->devfn); + if (dec_slave_map & (1 << main_func_idx)) { + hns_roce_cmd_bond(hr_dev, HNS_ROCE_CLEAR_BOND); + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { + net_dev = bond_grp->bond_func_info[i].net_dev; + if (!(dec_slave_map & (1 << i)) && net_dev) { + hr_dev = hns_roce_bond_init_client(bond_grp, i); + if (hr_dev) { + bond_grp->main_hr_dev = hr_dev; + bond_grp->main_net_dev = net_dev; + hr_dev->bond_grp = bond_grp; + break; + } + } + } + hns_roce_bond_uninit_client(bond_grp, main_func_idx); + } + + while (dec_slave_map > 0) { + if (dec_slave_map & 1) { + hns_roce_bond_init_client(bond_grp, dec_func_idx); + bond_grp->bond_func_info[dec_func_idx].net_dev = NULL; + } + dec_slave_map >>= 1; + dec_func_idx++; + } + + if (bond_grp->slave_map_diff & (1 << main_func_idx)) + ret = hns_roce_cmd_bond(hr_dev, HNS_ROCE_SET_BOND); + else + ret = hns_roce_cmd_bond(bond_grp->main_hr_dev, + HNS_ROCE_CHANGE_BOND); + if (ret) { + ibdev_err(&bond_grp->main_hr_dev->ib_dev, + "failed to decrease RoCE bond slave!\n"); + return; + } + + ibdev_info(&bond_grp->main_hr_dev->ib_dev, + "RoCE slave decrease finished!\n"); +} + +static void hns_roce_do_bond(struct hns_roce_bond_group *bond_grp) +{ + enum hns_roce_bond_state bond_state; + bool bond_ready; + + bond_ready = bond_grp->bond_ready; + bond_state = bond_grp->bond_state; + ibdev_info(&bond_grp->main_hr_dev->ib_dev, + "do_bond: bond_ready - %d, bond_state - %d.\n", + bond_ready, bond_grp->bond_state); + + if (bond_ready && bond_state == HNS_ROCE_BOND_NOT_BONDED) + hns_roce_set_bond(bond_grp); + else if (bond_ready && bond_state == HNS_ROCE_BOND_SLAVE_CHANGESTATE) + hns_roce_slave_changestate(bond_grp); + else if (bond_ready && bond_state == HNS_ROCE_BOND_SLAVE_INC) + hns_roce_slave_inc(bond_grp); + else if (bond_ready && bond_state == HNS_ROCE_BOND_SLAVE_DEC) + hns_roce_slave_dec(bond_grp); + else if (!bond_ready && bond_state != HNS_ROCE_BOND_NOT_BONDED) + hns_roce_clear_bond(bond_grp); +} + +void hns_roce_do_bond_work(struct work_struct *work) +{ + struct delayed_work *delayed_work; + struct hns_roce_dev *hr_dev; + int status; + + delayed_work = to_delayed_work(work); + hr_dev = container_of(delayed_work, struct hns_roce_dev, bond_work); + status = mutex_trylock(&roce_bond_mutex); + if (!status) { + /* delay 1 sec */ + hns_roce_queue_bond_work(hr_dev, HZ); + return; + } + + hns_roce_do_bond(hr_dev->bond_grp); + mutex_unlock(&roce_bond_mutex); +} + +int hns_roce_bond_init(struct hns_roce_dev *hr_dev) +{ + int ret; + + INIT_DELAYED_WORK(&hr_dev->bond_work, hns_roce_do_bond_work); + + hr_dev->bond_nb.notifier_call = hns_roce_bond_event; + ret = register_netdevice_notifier(&hr_dev->bond_nb); + if (ret) { + ibdev_err(&hr_dev->ib_dev, + "failed to register notifier for RoCE bond!\n"); + hr_dev->bond_nb.notifier_call = NULL; + } + + return ret; +} + +void hns_roce_cleanup_bond(struct hns_roce_dev *hr_dev) +{ + unregister_netdevice_notifier(&hr_dev->bond_nb); + cancel_delayed_work(&hr_dev->bond_work); + + if (hr_dev->bond_grp && hr_dev == hr_dev->bond_grp->main_hr_dev) + kfree(hr_dev->bond_grp); + + hr_dev->bond_grp = NULL; +} + +static bool hns_roce_bond_lowerstate_event(struct hns_roce_dev *hr_dev, + struct netdev_notifier_changelowerstate_info *info) +{ + struct hns_roce_bond_group *bond_grp = hr_dev->bond_grp; + struct netdev_lag_lower_state_info *bond_lower_info; + struct net_device *net_dev; + int i; + + net_dev = netdev_notifier_info_to_dev((struct netdev_notifier_info *)info); + if (!netif_is_lag_port(net_dev)) + return false; + + bond_lower_info = info->lower_state_info; + if (!bond_lower_info) + return false; + + if (!bond_grp) { + hr_dev->slave_state = *bond_lower_info; + return false; + } + + mutex_lock(&bond_grp->bond_mutex); + + for (i = 0; i < ROCE_BOND_FUNC_MAX; i++) { + if (net_dev == bond_grp->bond_func_info[i].net_dev) { + bond_grp->bond_func_info[i].state = *bond_lower_info; + break; + } + } + + if (bond_grp->bond_ready && + bond_grp->bond_state == HNS_ROCE_BOND_IS_BONDED) + bond_grp->bond_state = HNS_ROCE_BOND_SLAVE_CHANGESTATE; + + mutex_unlock(&bond_grp->bond_mutex); + + return true; +} + +static inline bool hns_roce_bond_mode_is_supported(enum netdev_lag_tx_type tx_type) +{ + if (tx_type != NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && + tx_type != NETDEV_LAG_TX_TYPE_HASH) + return false; + + return true; +} + +static void hns_roce_bond_info_record(struct hns_roce_bond_group *bond_grp, + struct net_device *upper_dev) +{ + struct hns_roce_v2_priv *priv; + struct hns_roce_dev *hr_dev; + struct net_device *net_dev; + u8 func_idx; + + bond_grp->slave_num = 0; + bond_grp->slave_map = 0; + + rcu_read_lock(); + for_each_netdev_in_bond_rcu(upper_dev, net_dev) { + hr_dev = hns_roce_get_hrdev_by_netdev(net_dev); + if (hr_dev) { + func_idx = PCI_FUNC(hr_dev->pci_dev->devfn); + bond_grp->slave_map |= (1 << func_idx); + bond_grp->slave_num++; + if (!bond_grp->bond_func_info[func_idx].net_dev) { + priv = hr_dev->priv; + + bond_grp->bond_func_info[func_idx].net_dev = + net_dev; + + bond_grp->bond_func_info[func_idx].handle = + priv->handle; + + bond_grp->bond_func_info[func_idx].state = + hr_dev->slave_state; + } + } + } + rcu_read_unlock(); +} + +static bool hns_roce_bond_upper_event(struct hns_roce_dev *hr_dev, + struct netdev_notifier_changeupper_info *info) +{ + struct hns_roce_bond_group *bond_grp = hr_dev->bond_grp; + struct net_device *upper_dev = info->upper_dev; + struct netdev_lag_upper_info *bond_upper_info; + u32 pre_slave_map = bond_grp->slave_map; + u8 pre_slave_num = bond_grp->slave_num; + bool changed = false; + + if (!upper_dev || !netif_is_lag_master(upper_dev)) + return false; + + if (info->linking) + bond_upper_info = info->upper_info; + + mutex_lock(&bond_grp->bond_mutex); + + if (bond_upper_info) + bond_grp->tx_type = bond_upper_info->tx_type; + + hns_roce_bond_info_record(bond_grp, upper_dev); + + bond_grp->bond = netdev_priv(upper_dev); + if (!hns_roce_bond_mode_is_supported(bond_grp->tx_type) || + bond_grp->slave_num <= 1) { + changed = bond_grp->bond_ready; + bond_grp->bond_ready = false; + goto out; + } + + if (bond_grp->bond_state == HNS_ROCE_BOND_NOT_BONDED) { + bond_grp->bond_ready = true; + changed = true; + } else if (bond_grp->bond_state == HNS_ROCE_BOND_IS_BONDED && + bond_grp->slave_num != pre_slave_num) { + bond_grp->bond_state = bond_grp->slave_num > pre_slave_num ? + HNS_ROCE_BOND_SLAVE_INC : + HNS_ROCE_BOND_SLAVE_DEC; + bond_grp->slave_map_diff = pre_slave_map ^ bond_grp->slave_map; + bond_grp->bond_ready = true; + changed = true; + } + +out: + mutex_unlock(&bond_grp->bond_mutex); + + return changed; +} + +static struct hns_roce_bond_group *hns_roce_alloc_bond_grp(struct hns_roce_dev *main_hr_dev, + struct net_device *upper_dev) +{ + struct hns_roce_bond_group *bond_grp; + + bond_grp = kzalloc(sizeof(*bond_grp), GFP_KERNEL); + if (!bond_grp) + return NULL; + + mutex_init(&bond_grp->bond_mutex); + bond_grp->upper_dev = upper_dev; + bond_grp->main_hr_dev = main_hr_dev; + bond_grp->main_net_dev = main_hr_dev->iboe.netdevs[0]; + bond_grp->bond_ready = false; + bond_grp->bond_state = HNS_ROCE_BOND_NOT_BONDED; + + hns_roce_bond_info_record(bond_grp, upper_dev); + + return bond_grp; +} + +static bool hns_roce_is_slave(struct net_device *bond, + struct net_device *net_dev) +{ + struct net_device *upper_dev; + + rcu_read_lock(); + upper_dev = netdev_master_upper_dev_get_rcu(net_dev); + rcu_read_unlock(); + + return bond == upper_dev; +} + +static bool hns_roce_is_bond_grp_exist(struct net_device *upper_dev) +{ + struct hns_roce_dev *hr_dev; + struct net_device *net_dev; + + rcu_read_lock(); + for_each_netdev_in_bond_rcu(upper_dev, net_dev) { + hr_dev = hns_roce_get_hrdev_by_netdev(net_dev); + if (hr_dev && hr_dev->bond_grp) { + rcu_read_unlock(); + return true; + } + } + rcu_read_unlock(); + + return false; +} + +int hns_roce_bond_event(struct notifier_block *self, + unsigned long event, void *ptr) +{ + struct net_device *net_dev = netdev_notifier_info_to_dev(ptr); + struct hns_roce_dev *hr_dev = + container_of(self, struct hns_roce_dev, bond_nb); + struct net_device *upper_dev; + bool changed; + + if (event != NETDEV_CHANGEUPPER && event != NETDEV_CHANGELOWERSTATE) + return NOTIFY_DONE; + + rcu_read_lock(); + upper_dev = netdev_master_upper_dev_get_rcu(net_dev); + rcu_read_unlock(); + if (event == NETDEV_CHANGELOWERSTATE && !upper_dev && + hr_dev != hns_roce_get_hrdev_by_netdev(net_dev)) + return NOTIFY_DONE; + + if (upper_dev) { + if (!hns_roce_is_slave(upper_dev, hr_dev->iboe.netdevs[0])) + return NOTIFY_DONE; + + mutex_lock(&roce_bond_mutex); + if (!hr_dev->bond_grp) { + if (hns_roce_is_bond_grp_exist(upper_dev)) { + mutex_unlock(&roce_bond_mutex); + return NOTIFY_DONE; + } + hr_dev->bond_grp = hns_roce_alloc_bond_grp(hr_dev, + upper_dev); + if (!hr_dev->bond_grp) { + ibdev_err(&hr_dev->ib_dev, + "failed to alloc RoCE bond_grp!\n"); + mutex_unlock(&roce_bond_mutex); + return NOTIFY_DONE; + } + } + mutex_unlock(&roce_bond_mutex); + } + + changed = (event == NETDEV_CHANGEUPPER) ? + hns_roce_bond_upper_event(hr_dev, ptr) : + hns_roce_bond_lowerstate_event(hr_dev, ptr); + + if (changed) + hns_roce_queue_bond_work(hr_dev, HZ); + + return NOTIFY_DONE; +} diff --git a/drivers/infiniband/hw/hns/hns_roce_bond.h b/drivers/infiniband/hw/hns/hns_roce_bond.h new file mode 100644 index 000000000000..3b00f6061a9d --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_bond.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +#ifndef _HNS_ROCE_BOND_H +#define _HNS_ROCE_BOND_H + +#include <linux/netdevice.h> +#include <net/bonding.h> + +#define ROCE_BOND_FUNC_MAX 4 +#define ROCE_BOND_NAME_ID_IDX 9 + +enum { + BOND_MODE_1, + BOND_MODE_2_4, +}; + +enum hns_roce_bond_state { + HNS_ROCE_BOND_NOT_BONDED, + HNS_ROCE_BOND_IS_BONDED, + HNS_ROCE_BOND_SLAVE_INC, + HNS_ROCE_BOND_SLAVE_DEC, + HNS_ROCE_BOND_SLAVE_CHANGESTATE, +}; + +enum hns_roce_bond_cmd_type { + HNS_ROCE_SET_BOND, + HNS_ROCE_CHANGE_BOND, + HNS_ROCE_CLEAR_BOND, +}; + +struct hns_roce_func_info { + struct net_device *net_dev; + struct hnae3_handle *handle; + struct netdev_lag_lower_state_info state; +}; + +struct hns_roce_bond_group { + struct net_device *upper_dev; + struct net_device *main_net_dev; + struct hns_roce_dev *main_hr_dev; + u8 slave_num; + u8 active_slave_num; + u32 slave_map; + u32 active_slave_map; + u32 slave_map_diff; + u8 bond_id; + struct bonding *bond; + bool bond_ready; + enum hns_roce_bond_state bond_state; + enum netdev_lag_tx_type tx_type; + /* + * A mutex which protect bond_grp info + */ + struct mutex bond_mutex; + struct hns_roce_func_info bond_func_info[ROCE_BOND_FUNC_MAX]; +}; + +int hns_roce_bond_init(struct hns_roce_dev *hr_dev); +int hns_roce_bond_event(struct notifier_block *self, + unsigned long event, void *ptr); +void hns_roce_cleanup_bond(struct hns_roce_dev *hr_dev); +bool hns_roce_bond_is_active(struct hns_roce_dev *hr_dev); +struct net_device *hns_roce_get_bond_netdev(struct hns_roce_dev *hr_dev); + +#endif diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 51e59084f875..eb4582ce9c5c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -35,6 +35,7 @@
#include <rdma/ib_verbs.h> #include <rdma/hns-abi.h> +#include "hns_roce_bond.h"
#define PCI_REVISION_ID_HIP08 0x21 #define PCI_REVISION_ID_HIP09 0x30 @@ -147,6 +148,7 @@ enum { HNS_ROCE_CAP_FLAG_STASH = BIT(17), HNS_ROCE_CAP_FLAG_CQE_INLINE = BIT(19), HNS_ROCE_CAP_FLAG_RQ_INLINE = BIT(20), + HNS_ROCE_CAP_FLAG_BOND = BIT(21), };
#define HNS_ROCE_DB_TYPE_COUNT 2 @@ -898,6 +900,9 @@ struct hns_roce_hw { u8 *tc_mode, u8 *priority); const struct ib_device_ops *hns_roce_dev_ops; const struct ib_device_ops *hns_roce_dev_srq_ops; + int (*bond_init)(struct hns_roce_dev *hr_dev); + bool (*bond_is_active)(struct hns_roce_dev *hr_dev); + struct net_device *(*get_bond_netdev)(struct hns_roce_dev *hr_dev); };
struct hns_roce_dev { @@ -961,6 +966,11 @@ struct hns_roce_dev { u32 is_vf; u32 cong_algo_tmpl_id; u64 dwqe_page; + + struct notifier_block bond_nb; + struct delayed_work bond_work; + struct hns_roce_bond_group *bond_grp; + struct netdev_lag_lower_state_info slave_state; };
static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index bd45d07619e9..25800d5965bb 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -1350,6 +1350,61 @@ static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev, return ret; }
+static inline enum hns_roce_opcode_type + get_bond_opcode(enum hns_roce_bond_cmd_type bond_type) +{ + if (bond_type == HNS_ROCE_SET_BOND) + return HNS_ROCE_OPC_SET_BOND_INFO; + else if (bond_type == HNS_ROCE_CHANGE_BOND) + return HNS_ROCE_OPC_CHANGE_ACTIVE_PORT; + else + return HNS_ROCE_OPC_CLEAR_BOND_INFO; +} + +int hns_roce_cmd_bond(struct hns_roce_dev *hr_dev, + enum hns_roce_bond_cmd_type bond_type) +{ + enum hns_roce_opcode_type opcode = get_bond_opcode(bond_type); + struct hns_roce_bond_info *slave_info; + struct hns_roce_cmq_desc desc = { 0 }; + int ret; + + slave_info = (struct hns_roce_bond_info *)desc.data; + hns_roce_cmq_setup_basic_desc(&desc, opcode, false); + + slave_info->bond_id = cpu_to_le32(hr_dev->bond_grp->bond_id); + if (bond_type == HNS_ROCE_CLEAR_BOND) + goto out; + + if (hr_dev->bond_grp->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { + slave_info->bond_mode = cpu_to_le32(BOND_MODE_1); + if (hr_dev->bond_grp->active_slave_num != 1) + ibdev_err(&hr_dev->ib_dev, + "active slave cnt(%d) in Mode 1 is invalid.\n", + hr_dev->bond_grp->active_slave_num); + } else { + slave_info->bond_mode = cpu_to_le32(BOND_MODE_2_4); + slave_info->hash_policy = + cpu_to_le32(hr_dev->bond_grp->bond->params.xmit_policy); + } + + slave_info->active_slave_cnt = + cpu_to_le32(hr_dev->bond_grp->active_slave_num); + slave_info->active_slave_mask = + cpu_to_le32(hr_dev->bond_grp->active_slave_map); + slave_info->slave_mask = + cpu_to_le32(hr_dev->bond_grp->slave_map); + +out: + ret = hns_roce_cmq_send(hr_dev, &desc, 1); + if (ret) + ibdev_err(&hr_dev->ib_dev, + "cmq bond type(%d) failed, ret = %d.\n", + bond_type, ret); + + return ret; +} + static int config_hem_ba_to_hw(struct hns_roce_dev *hr_dev, dma_addr_t base_addr, u8 cmd, unsigned long tag) { @@ -6781,6 +6836,9 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { .get_dscp = hns_roce_hw_v2_get_dscp, .hns_roce_dev_ops = &hns_roce_v2_dev_ops, .hns_roce_dev_srq_ops = &hns_roce_v2_dev_srq_ops, + .bond_init = hns_roce_bond_init, + .bond_is_active = hns_roce_bond_is_active, + .get_bond_netdev = hns_roce_get_bond_netdev, };
static const struct pci_device_id hns_roce_hw_v2_pci_tbl[] = { @@ -6903,7 +6961,7 @@ static void __hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, ib_dealloc_device(&hr_dev->ib_dev); }
-static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) +int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) { const struct hnae3_ae_ops *ops = handle->ae_algo->ops; const struct pci_device_id *id; @@ -6946,8 +7004,7 @@ static int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) return -EBUSY; }
-static void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, - bool reset) +void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, bool reset) { if (handle->rinfo.instance_state != HNS_ROCE_STATE_INITED) return; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 39641b449a42..7da410ecb966 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -252,6 +252,9 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_EXT_CFG = 0x8512, HNS_ROCE_QUERY_RAM_ECC = 0x8513, HNS_SWITCH_PARAMETER_CFG = 0x1033, + HNS_ROCE_OPC_SET_BOND_INFO = 0x8601, + HNS_ROCE_OPC_CLEAR_BOND_INFO = 0x8602, + HNS_ROCE_OPC_CHANGE_ACTIVE_PORT = 0x8603, };
enum { @@ -1464,11 +1467,25 @@ struct hns_roce_sccc_clr_done { __le32 rsv[5]; };
+struct hns_roce_bond_info { + __le32 bond_id; + __le32 bond_mode; + __le32 active_slave_cnt; + __le32 active_slave_mask; + __le32 slave_mask; + __le32 hash_policy; +}; + +int hns_roce_hw_v2_init_instance(struct hnae3_handle *handle); +void hns_roce_hw_v2_uninit_instance(struct hnae3_handle *handle, bool reset); + int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_udata *udata); +int hns_roce_cmd_bond(struct hns_roce_dev *hr_dev, + enum hns_roce_bond_cmd_type bond_type);
static inline void hns_roce_write64(struct hns_roce_dev *hr_dev, __le32 val[2], void __iomem *dest) diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index cdb6def7923e..00138fa10f0b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -37,9 +37,12 @@ #include <rdma/ib_smi.h> #include <rdma/ib_user_verbs.h> #include <rdma/ib_cache.h> + +#include "hnae3.h" #include "hns_roce_common.h" #include "hns_roce_device.h" #include "hns_roce_hem.h" +#include "hns_roce_hw_v2.h"
static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u32 port, const u8 *addr) @@ -259,7 +262,9 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num,
spin_lock_irqsave(&hr_dev->iboe.lock, flags);
- net_dev = hr_dev->iboe.netdevs[port]; + net_dev = hr_dev->hw->get_bond_netdev(hr_dev); + if (!net_dev) + net_dev = hr_dev->iboe.netdevs[port]; if (!net_dev) { spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); dev_err(dev, "Find netdev %u failed!\n", port); @@ -534,6 +539,9 @@ static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev) { struct hns_roce_ib_iboe *iboe = &hr_dev->iboe;
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND) + hns_roce_cleanup_bond(hr_dev); + hr_dev->active = false; unregister_netdevice_notifier(&iboe->nb); ib_unregister_device(&hr_dev->ib_dev); @@ -706,7 +714,12 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) return ret; } dma_set_max_seg_size(dev, UINT_MAX); - ret = ib_register_device(ib_dev, "hns_%d", dev); + + if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND) && + (hr_dev->hw->bond_is_active(hr_dev))) + ret = ib_register_device(ib_dev, "hns_bond_%d", dev); + else + ret = ib_register_device(ib_dev, "hns_%d", dev); if (ret) { dev_err(dev, "ib_register_device failed!\n"); return ret; @@ -725,8 +738,15 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) goto error_failed_setup_mtu_mac; }
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_BOND) { + ret = hr_dev->hw->bond_init(hr_dev); + if (ret) + dev_err(dev, "roce bond init failed, ret = %d\n", ret); + } + hr_dev->active = true; - return 0; + + return ret;
error_failed_setup_mtu_mac: ib_unregister_device(ib_dev); -- 2.30.0