This patchset refactor some functions and add some new features for flow director.
patch 1~3: refactor large functions patch 4, 7: add traffic class and user-def field support for ethtool patch 5: use asynchronously configuration patch 6: clean up for hns3_del_all_fd_entries() patch 8, 9: add support for queue bonding mode
Jian Shen (9): net: hns3: refactor out hclge_add_fd_entry() net: hns3: refactor out hclge_fd_get_tuple() net: hns3: refactor for function hclge_fd_convert_tuple net: hns3: add support for traffic class tuple support for flow director by ethtool net: hns3: refactor flow director configuration net: hns3: refine for hns3_del_all_fd_entries() net: hns3: add support for user-def data of flow director net: hns3: add support for queue bonding mode of flow director net: hns3: add queue bonding mode support for VF
drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h | 8 + drivers/net/ethernet/hisilicon/hns3/hnae3.h | 9 +- drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c | 7 +- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 91 +- drivers/net/ethernet/hisilicon/hns3/hns3_enet.h | 14 +- drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c | 13 +- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c | 2 + .../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h | 21 + .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 1570 ++++++++++++++------ .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 63 + .../net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c | 33 + .../ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c | 2 + .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 74 + .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h | 7 + .../ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c | 17 + 15 files changed, 1450 insertions(+), 481 deletions(-)
From: Jian Shen shenjian15@huawei.com
The process of function hclge_add_fd_entry() is complex and prolix. To make it more readable, extract the process of fs->ring_cookie to a single function.
Signed-off-by: Jian Shen shenjian15@huawei.com Signed-off-by: Huazhong Tan tanhuazhong@huawei.com --- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 67 +++++++++++++--------- 1 file changed, 40 insertions(+), 27 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index a664383..4929220 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -6126,6 +6126,42 @@ static bool hclge_is_cls_flower_active(struct hnae3_handle *handle) return hdev->fd_active_type == HCLGE_FD_TC_FLOWER_ACTIVE; }
+static int hclge_fd_parse_ring_cookie(struct hclge_dev *hdev, u64 ring_cookie, + u16 *vport_id, u8 *action, u16 *queue_id) +{ + struct hclge_vport *vport = hdev->vport; + + if (ring_cookie == RX_CLS_FLOW_DISC) { + *action = HCLGE_FD_ACTION_DROP_PACKET; + } else { + u32 ring = ethtool_get_flow_spec_ring(ring_cookie); + u8 vf = ethtool_get_flow_spec_ring_vf(ring_cookie); + u16 tqps; + + if (vf > hdev->num_req_vfs) { + dev_err(&hdev->pdev->dev, + "Error: vf id (%u) > max vf num (%u)\n", + vf, hdev->num_req_vfs); + return -EINVAL; + } + + *vport_id = vf ? hdev->vport[vf].vport_id : vport->vport_id; + tqps = hdev->vport[vf].nic.kinfo.num_tqps; + + if (ring >= tqps) { + dev_err(&hdev->pdev->dev, + "Error: queue id (%u) > max tqp num (%u)\n", + ring, tqps - 1); + return -EINVAL; + } + + *action = HCLGE_FD_ACTION_SELECT_QUEUE; + *queue_id = ring; + } + + return 0; +} + static int hclge_add_fd_entry(struct hnae3_handle *handle, struct ethtool_rxnfc *cmd) { @@ -6162,33 +6198,10 @@ static int hclge_add_fd_entry(struct hnae3_handle *handle, if (ret) return ret;
- if (fs->ring_cookie == RX_CLS_FLOW_DISC) { - action = HCLGE_FD_ACTION_DROP_PACKET; - } else { - u32 ring = ethtool_get_flow_spec_ring(fs->ring_cookie); - u8 vf = ethtool_get_flow_spec_ring_vf(fs->ring_cookie); - u16 tqps; - - if (vf > hdev->num_req_vfs) { - dev_err(&hdev->pdev->dev, - "Error: vf id (%u) > max vf num (%u)\n", - vf, hdev->num_req_vfs); - return -EINVAL; - } - - dst_vport_id = vf ? hdev->vport[vf].vport_id : vport->vport_id; - tqps = vf ? hdev->vport[vf].alloc_tqps : vport->alloc_tqps; - - if (ring >= tqps) { - dev_err(&hdev->pdev->dev, - "Error: queue id (%u) > max tqp num (%u)\n", - ring, tqps - 1); - return -EINVAL; - } - - action = HCLGE_FD_ACTION_SELECT_QUEUE; - q_index = ring; - } + ret = hclge_fd_parse_ring_cookie(hdev, fs->ring_cookie, &dst_vport_id, + &action, &q_index); + if (ret) + return ret;
rule = kzalloc(sizeof(*rule), GFP_KERNEL); if (!rule)
From: Jian Shen shenjian15@huawei.com
The process of function hclge_fd_get_tuple() is complex and prolix. To make it more readable, extract the process of each flow-type tuple to a single function.
Signed-off-by: Jian Shen shenjian15@huawei.com Signed-off-by: Huazhong Tan tanhuazhong@huawei.com --- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 220 +++++++++++---------- 1 file changed, 117 insertions(+), 103 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 4929220..a17831f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -5935,144 +5935,158 @@ static int hclge_fd_update_rule_list(struct hclge_dev *hdev, return 0; }
-static int hclge_fd_get_tuple(struct hclge_dev *hdev, - struct ethtool_rx_flow_spec *fs, - struct hclge_fd_rule *rule) +static void hclge_fd_get_tcpip4_tuple(struct hclge_dev *hdev, + struct ethtool_rx_flow_spec *fs, + struct hclge_fd_rule *rule, u8 ip_proto) { - u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT); + rule->tuples.src_ip[IPV4_INDEX] = + be32_to_cpu(fs->h_u.tcp_ip4_spec.ip4src); + rule->tuples_mask.src_ip[IPV4_INDEX] = + be32_to_cpu(fs->m_u.tcp_ip4_spec.ip4src);
- switch (flow_type) { - case SCTP_V4_FLOW: - case TCP_V4_FLOW: - case UDP_V4_FLOW: - rule->tuples.src_ip[IPV4_INDEX] = - be32_to_cpu(fs->h_u.tcp_ip4_spec.ip4src); - rule->tuples_mask.src_ip[IPV4_INDEX] = - be32_to_cpu(fs->m_u.tcp_ip4_spec.ip4src); + rule->tuples.dst_ip[IPV4_INDEX] = + be32_to_cpu(fs->h_u.tcp_ip4_spec.ip4dst); + rule->tuples_mask.dst_ip[IPV4_INDEX] = + be32_to_cpu(fs->m_u.tcp_ip4_spec.ip4dst);
- rule->tuples.dst_ip[IPV4_INDEX] = - be32_to_cpu(fs->h_u.tcp_ip4_spec.ip4dst); - rule->tuples_mask.dst_ip[IPV4_INDEX] = - be32_to_cpu(fs->m_u.tcp_ip4_spec.ip4dst); + rule->tuples.src_port = be16_to_cpu(fs->h_u.tcp_ip4_spec.psrc); + rule->tuples_mask.src_port = be16_to_cpu(fs->m_u.tcp_ip4_spec.psrc);
- rule->tuples.src_port = be16_to_cpu(fs->h_u.tcp_ip4_spec.psrc); - rule->tuples_mask.src_port = - be16_to_cpu(fs->m_u.tcp_ip4_spec.psrc); + rule->tuples.dst_port = be16_to_cpu(fs->h_u.tcp_ip4_spec.pdst); + rule->tuples_mask.dst_port = be16_to_cpu(fs->m_u.tcp_ip4_spec.pdst);
- rule->tuples.dst_port = be16_to_cpu(fs->h_u.tcp_ip4_spec.pdst); - rule->tuples_mask.dst_port = - be16_to_cpu(fs->m_u.tcp_ip4_spec.pdst); + rule->tuples.ip_tos = fs->h_u.tcp_ip4_spec.tos; + rule->tuples_mask.ip_tos = fs->m_u.tcp_ip4_spec.tos;
- rule->tuples.ip_tos = fs->h_u.tcp_ip4_spec.tos; - rule->tuples_mask.ip_tos = fs->m_u.tcp_ip4_spec.tos; + rule->tuples.ether_proto = ETH_P_IP; + rule->tuples_mask.ether_proto = 0xFFFF;
- rule->tuples.ether_proto = ETH_P_IP; - rule->tuples_mask.ether_proto = 0xFFFF; + rule->tuples.ip_proto = ip_proto; + rule->tuples_mask.ip_proto = 0xFF; +}
- break; - case IP_USER_FLOW: - rule->tuples.src_ip[IPV4_INDEX] = - be32_to_cpu(fs->h_u.usr_ip4_spec.ip4src); - rule->tuples_mask.src_ip[IPV4_INDEX] = - be32_to_cpu(fs->m_u.usr_ip4_spec.ip4src); +static void hclge_fd_get_ip4_tuple(struct hclge_dev *hdev, + struct ethtool_rx_flow_spec *fs, + struct hclge_fd_rule *rule) +{ + rule->tuples.src_ip[IPV4_INDEX] = + be32_to_cpu(fs->h_u.usr_ip4_spec.ip4src); + rule->tuples_mask.src_ip[IPV4_INDEX] = + be32_to_cpu(fs->m_u.usr_ip4_spec.ip4src);
- rule->tuples.dst_ip[IPV4_INDEX] = - be32_to_cpu(fs->h_u.usr_ip4_spec.ip4dst); - rule->tuples_mask.dst_ip[IPV4_INDEX] = - be32_to_cpu(fs->m_u.usr_ip4_spec.ip4dst); + rule->tuples.dst_ip[IPV4_INDEX] = + be32_to_cpu(fs->h_u.usr_ip4_spec.ip4dst); + rule->tuples_mask.dst_ip[IPV4_INDEX] = + be32_to_cpu(fs->m_u.usr_ip4_spec.ip4dst);
- rule->tuples.ip_tos = fs->h_u.usr_ip4_spec.tos; - rule->tuples_mask.ip_tos = fs->m_u.usr_ip4_spec.tos; + rule->tuples.ip_tos = fs->h_u.usr_ip4_spec.tos; + rule->tuples_mask.ip_tos = fs->m_u.usr_ip4_spec.tos;
- rule->tuples.ip_proto = fs->h_u.usr_ip4_spec.proto; - rule->tuples_mask.ip_proto = fs->m_u.usr_ip4_spec.proto; + rule->tuples.ip_proto = fs->h_u.usr_ip4_spec.proto; + rule->tuples_mask.ip_proto = fs->m_u.usr_ip4_spec.proto;
- rule->tuples.ether_proto = ETH_P_IP; - rule->tuples_mask.ether_proto = 0xFFFF; + rule->tuples.ether_proto = ETH_P_IP; + rule->tuples_mask.ether_proto = 0xFFFF; +}
- break; - case SCTP_V6_FLOW: - case TCP_V6_FLOW: - case UDP_V6_FLOW: - be32_to_cpu_array(rule->tuples.src_ip, - fs->h_u.tcp_ip6_spec.ip6src, IPV6_SIZE); - be32_to_cpu_array(rule->tuples_mask.src_ip, - fs->m_u.tcp_ip6_spec.ip6src, IPV6_SIZE); +static void hclge_fd_get_tcpip6_tuple(struct hclge_dev *hdev, + struct ethtool_rx_flow_spec *fs, + struct hclge_fd_rule *rule, u8 ip_proto) +{ + be32_to_cpu_array(rule->tuples.src_ip, fs->h_u.tcp_ip6_spec.ip6src, + IPV6_SIZE); + be32_to_cpu_array(rule->tuples_mask.src_ip, fs->m_u.tcp_ip6_spec.ip6src, + IPV6_SIZE);
- be32_to_cpu_array(rule->tuples.dst_ip, - fs->h_u.tcp_ip6_spec.ip6dst, IPV6_SIZE); - be32_to_cpu_array(rule->tuples_mask.dst_ip, - fs->m_u.tcp_ip6_spec.ip6dst, IPV6_SIZE); + be32_to_cpu_array(rule->tuples.dst_ip, fs->h_u.tcp_ip6_spec.ip6dst, + IPV6_SIZE); + be32_to_cpu_array(rule->tuples_mask.dst_ip, fs->m_u.tcp_ip6_spec.ip6dst, + IPV6_SIZE);
- rule->tuples.src_port = be16_to_cpu(fs->h_u.tcp_ip6_spec.psrc); - rule->tuples_mask.src_port = - be16_to_cpu(fs->m_u.tcp_ip6_spec.psrc); + rule->tuples.src_port = be16_to_cpu(fs->h_u.tcp_ip6_spec.psrc); + rule->tuples_mask.src_port = be16_to_cpu(fs->m_u.tcp_ip6_spec.psrc);
- rule->tuples.dst_port = be16_to_cpu(fs->h_u.tcp_ip6_spec.pdst); - rule->tuples_mask.dst_port = - be16_to_cpu(fs->m_u.tcp_ip6_spec.pdst); + rule->tuples.dst_port = be16_to_cpu(fs->h_u.tcp_ip6_spec.pdst); + rule->tuples_mask.dst_port = be16_to_cpu(fs->m_u.tcp_ip6_spec.pdst);
- rule->tuples.ether_proto = ETH_P_IPV6; - rule->tuples_mask.ether_proto = 0xFFFF; + rule->tuples.ether_proto = ETH_P_IPV6; + rule->tuples_mask.ether_proto = 0xFFFF;
- break; - case IPV6_USER_FLOW: - be32_to_cpu_array(rule->tuples.src_ip, - fs->h_u.usr_ip6_spec.ip6src, IPV6_SIZE); - be32_to_cpu_array(rule->tuples_mask.src_ip, - fs->m_u.usr_ip6_spec.ip6src, IPV6_SIZE); + rule->tuples.ip_proto = ip_proto; + rule->tuples_mask.ip_proto = 0xFF; +}
- be32_to_cpu_array(rule->tuples.dst_ip, - fs->h_u.usr_ip6_spec.ip6dst, IPV6_SIZE); - be32_to_cpu_array(rule->tuples_mask.dst_ip, - fs->m_u.usr_ip6_spec.ip6dst, IPV6_SIZE); +static void hclge_fd_get_ip6_tuple(struct hclge_dev *hdev, + struct ethtool_rx_flow_spec *fs, + struct hclge_fd_rule *rule) +{ + be32_to_cpu_array(rule->tuples.src_ip, fs->h_u.usr_ip6_spec.ip6src, + IPV6_SIZE); + be32_to_cpu_array(rule->tuples_mask.src_ip, fs->m_u.usr_ip6_spec.ip6src, + IPV6_SIZE);
- rule->tuples.ip_proto = fs->h_u.usr_ip6_spec.l4_proto; - rule->tuples_mask.ip_proto = fs->m_u.usr_ip6_spec.l4_proto; + be32_to_cpu_array(rule->tuples.dst_ip, fs->h_u.usr_ip6_spec.ip6dst, + IPV6_SIZE); + be32_to_cpu_array(rule->tuples_mask.dst_ip, fs->m_u.usr_ip6_spec.ip6dst, + IPV6_SIZE);
- rule->tuples.ether_proto = ETH_P_IPV6; - rule->tuples_mask.ether_proto = 0xFFFF; + rule->tuples.ip_proto = fs->h_u.usr_ip6_spec.l4_proto; + rule->tuples_mask.ip_proto = fs->m_u.usr_ip6_spec.l4_proto;
- break; - case ETHER_FLOW: - ether_addr_copy(rule->tuples.src_mac, - fs->h_u.ether_spec.h_source); - ether_addr_copy(rule->tuples_mask.src_mac, - fs->m_u.ether_spec.h_source); + rule->tuples.ether_proto = ETH_P_IPV6; + rule->tuples_mask.ether_proto = 0xFFFF; +}
- ether_addr_copy(rule->tuples.dst_mac, - fs->h_u.ether_spec.h_dest); - ether_addr_copy(rule->tuples_mask.dst_mac, - fs->m_u.ether_spec.h_dest); +static void hclge_fd_get_ether_tuple(struct hclge_dev *hdev, + struct ethtool_rx_flow_spec *fs, + struct hclge_fd_rule *rule) +{ + ether_addr_copy(rule->tuples.src_mac, fs->h_u.ether_spec.h_source); + ether_addr_copy(rule->tuples_mask.src_mac, fs->m_u.ether_spec.h_source);
- rule->tuples.ether_proto = - be16_to_cpu(fs->h_u.ether_spec.h_proto); - rule->tuples_mask.ether_proto = - be16_to_cpu(fs->m_u.ether_spec.h_proto); + ether_addr_copy(rule->tuples.dst_mac, fs->h_u.ether_spec.h_dest); + ether_addr_copy(rule->tuples_mask.dst_mac, fs->m_u.ether_spec.h_dest);
- break; - default: - return -EOPNOTSUPP; - } + rule->tuples.ether_proto = be16_to_cpu(fs->h_u.ether_spec.h_proto); + rule->tuples_mask.ether_proto = be16_to_cpu(fs->m_u.ether_spec.h_proto); +} + +static int hclge_fd_get_tuple(struct hclge_dev *hdev, + struct ethtool_rx_flow_spec *fs, + struct hclge_fd_rule *rule) +{ + u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT);
switch (flow_type) { case SCTP_V4_FLOW: - case SCTP_V6_FLOW: - rule->tuples.ip_proto = IPPROTO_SCTP; - rule->tuples_mask.ip_proto = 0xFF; + hclge_fd_get_tcpip4_tuple(hdev, fs, rule, IPPROTO_SCTP); break; case TCP_V4_FLOW: - case TCP_V6_FLOW: - rule->tuples.ip_proto = IPPROTO_TCP; - rule->tuples_mask.ip_proto = 0xFF; + hclge_fd_get_tcpip4_tuple(hdev, fs, rule, IPPROTO_TCP); break; case UDP_V4_FLOW: + hclge_fd_get_tcpip4_tuple(hdev, fs, rule, IPPROTO_UDP); + break; + case IP_USER_FLOW: + hclge_fd_get_ip4_tuple(hdev, fs, rule); + break; + case SCTP_V6_FLOW: + hclge_fd_get_tcpip6_tuple(hdev, fs, rule, IPPROTO_SCTP); + break; + case TCP_V6_FLOW: + hclge_fd_get_tcpip6_tuple(hdev, fs, rule, IPPROTO_TCP); + break; case UDP_V6_FLOW: - rule->tuples.ip_proto = IPPROTO_UDP; - rule->tuples_mask.ip_proto = 0xFF; + hclge_fd_get_tcpip6_tuple(hdev, fs, rule, IPPROTO_UDP); break; - default: + case IPV6_USER_FLOW: + hclge_fd_get_ip6_tuple(hdev, fs, rule); break; + case ETHER_FLOW: + hclge_fd_get_ether_tuple(hdev, fs, rule); + break; + default: + return -EOPNOTSUPP; }
if (fs->flow_type & FLOW_EXT) {
From: Jian Shen shenjian15@huawei.com
Currently, there are too many branches for hclge_fd_convert_tuple(). And it may be more when add new tuples. Refactor it by sorting the tuples according to their length. So it only needs several KEY_OPT now, and being flexible to add new tuples.
Signed-off-by: Jian Shen shenjian15@huawei.com Signed-off-by: Huazhong Tan tanhuazhong@huawei.com --- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 189 +++++++++------------ .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 12 ++ 2 files changed, 97 insertions(+), 104 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index a17831f..3d601c9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -384,36 +384,56 @@ static const struct key_info meta_data_key_info[] = { };
static const struct key_info tuple_key_info[] = { - { OUTER_DST_MAC, 48}, - { OUTER_SRC_MAC, 48}, - { OUTER_VLAN_TAG_FST, 16}, - { OUTER_VLAN_TAG_SEC, 16}, - { OUTER_ETH_TYPE, 16}, - { OUTER_L2_RSV, 16}, - { OUTER_IP_TOS, 8}, - { OUTER_IP_PROTO, 8}, - { OUTER_SRC_IP, 32}, - { OUTER_DST_IP, 32}, - { OUTER_L3_RSV, 16}, - { OUTER_SRC_PORT, 16}, - { OUTER_DST_PORT, 16}, - { OUTER_L4_RSV, 32}, - { OUTER_TUN_VNI, 24}, - { OUTER_TUN_FLOW_ID, 8}, - { INNER_DST_MAC, 48}, - { INNER_SRC_MAC, 48}, - { INNER_VLAN_TAG_FST, 16}, - { INNER_VLAN_TAG_SEC, 16}, - { INNER_ETH_TYPE, 16}, - { INNER_L2_RSV, 16}, - { INNER_IP_TOS, 8}, - { INNER_IP_PROTO, 8}, - { INNER_SRC_IP, 32}, - { INNER_DST_IP, 32}, - { INNER_L3_RSV, 16}, - { INNER_SRC_PORT, 16}, - { INNER_DST_PORT, 16}, - { INNER_L4_RSV, 32}, + { OUTER_DST_MAC, 48, KEY_OPT_MAC, -1, -1 }, + { OUTER_SRC_MAC, 48, KEY_OPT_MAC, -1, -1 }, + { OUTER_VLAN_TAG_FST, 16, KEY_OPT_LE16, -1, -1 }, + { OUTER_VLAN_TAG_SEC, 16, KEY_OPT_LE16, -1, -1 }, + { OUTER_ETH_TYPE, 16, KEY_OPT_LE16, -1, -1 }, + { OUTER_L2_RSV, 16, KEY_OPT_LE16, -1, -1 }, + { OUTER_IP_TOS, 8, KEY_OPT_U8, -1, -1 }, + { OUTER_IP_PROTO, 8, KEY_OPT_U8, -1, -1 }, + { OUTER_SRC_IP, 32, KEY_OPT_IP, -1, -1 }, + { OUTER_DST_IP, 32, KEY_OPT_IP, -1, -1 }, + { OUTER_L3_RSV, 16, KEY_OPT_LE16, -1, -1 }, + { OUTER_SRC_PORT, 16, KEY_OPT_LE16, -1, -1 }, + { OUTER_DST_PORT, 16, KEY_OPT_LE16, -1, -1 }, + { OUTER_L4_RSV, 32, KEY_OPT_LE32, -1, -1 }, + { OUTER_TUN_VNI, 24, KEY_OPT_VNI, -1, -1 }, + { OUTER_TUN_FLOW_ID, 8, KEY_OPT_U8, -1, -1 }, + { INNER_DST_MAC, 48, KEY_OPT_MAC, + offsetof(struct hclge_fd_rule, tuples.dst_mac), + offsetof(struct hclge_fd_rule, tuples_mask.dst_mac) }, + { INNER_SRC_MAC, 48, KEY_OPT_MAC, + offsetof(struct hclge_fd_rule, tuples.src_mac), + offsetof(struct hclge_fd_rule, tuples_mask.src_mac) }, + { INNER_VLAN_TAG_FST, 16, KEY_OPT_LE16, + offsetof(struct hclge_fd_rule, tuples.vlan_tag1), + offsetof(struct hclge_fd_rule, tuples_mask.vlan_tag1) }, + { INNER_VLAN_TAG_SEC, 16, KEY_OPT_LE16, -1, -1 }, + { INNER_ETH_TYPE, 16, KEY_OPT_LE16, + offsetof(struct hclge_fd_rule, tuples.ether_proto), + offsetof(struct hclge_fd_rule, tuples_mask.ether_proto) }, + { INNER_L2_RSV, 16, KEY_OPT_LE16, -1, -1 }, + { INNER_IP_TOS, 8, KEY_OPT_U8, + offsetof(struct hclge_fd_rule, tuples.ip_tos), + offsetof(struct hclge_fd_rule, tuples_mask.ip_tos) }, + { INNER_IP_PROTO, 8, KEY_OPT_U8, + offsetof(struct hclge_fd_rule, tuples.ip_proto), + offsetof(struct hclge_fd_rule, tuples_mask.ip_proto) }, + { INNER_SRC_IP, 32, KEY_OPT_IP, + offsetof(struct hclge_fd_rule, tuples.src_ip), + offsetof(struct hclge_fd_rule, tuples_mask.src_ip) }, + { INNER_DST_IP, 32, KEY_OPT_IP, + offsetof(struct hclge_fd_rule, tuples.dst_ip), + offsetof(struct hclge_fd_rule, tuples_mask.dst_ip) }, + { INNER_L3_RSV, 16, KEY_OPT_LE16, -1, -1 }, + { INNER_SRC_PORT, 16, KEY_OPT_LE16, + offsetof(struct hclge_fd_rule, tuples.src_port), + offsetof(struct hclge_fd_rule, tuples_mask.src_port) }, + { INNER_DST_PORT, 16, KEY_OPT_LE16, + offsetof(struct hclge_fd_rule, tuples.dst_port), + offsetof(struct hclge_fd_rule, tuples_mask.dst_port) }, + { INNER_L4_RSV, 32, KEY_OPT_LE32, -1, -1 }, };
static int hclge_mac_update_stats_defective(struct hclge_dev *hdev) @@ -5371,96 +5391,57 @@ static int hclge_fd_ad_config(struct hclge_dev *hdev, u8 stage, int loc, static bool hclge_fd_convert_tuple(u32 tuple_bit, u8 *key_x, u8 *key_y, struct hclge_fd_rule *rule) { + int offset, moffset, ip_offset; + enum HCLGE_FD_KEY_OPT key_opt; u16 tmp_x_s, tmp_y_s; u32 tmp_x_l, tmp_y_l; + u8 *p = (u8 *)rule; int i;
- if (rule->unused_tuple & tuple_bit) + if (rule->unused_tuple & BIT(tuple_bit)) return true;
- switch (tuple_bit) { - case BIT(INNER_DST_MAC): - for (i = 0; i < ETH_ALEN; i++) { - calc_x(key_x[ETH_ALEN - 1 - i], rule->tuples.dst_mac[i], - rule->tuples_mask.dst_mac[i]); - calc_y(key_y[ETH_ALEN - 1 - i], rule->tuples.dst_mac[i], - rule->tuples_mask.dst_mac[i]); - } + key_opt = tuple_key_info[tuple_bit].key_opt; + offset = tuple_key_info[tuple_bit].offset; + moffset = tuple_key_info[tuple_bit].moffset;
- return true; - case BIT(INNER_SRC_MAC): - for (i = 0; i < ETH_ALEN; i++) { - calc_x(key_x[ETH_ALEN - 1 - i], rule->tuples.src_mac[i], - rule->tuples_mask.src_mac[i]); - calc_y(key_y[ETH_ALEN - 1 - i], rule->tuples.src_mac[i], - rule->tuples_mask.src_mac[i]); - } + switch (key_opt) { + case KEY_OPT_U8: + calc_x(*key_x, p[offset], p[moffset]); + calc_y(*key_y, p[offset], p[moffset]);
return true; - case BIT(INNER_VLAN_TAG_FST): - calc_x(tmp_x_s, rule->tuples.vlan_tag1, - rule->tuples_mask.vlan_tag1); - calc_y(tmp_y_s, rule->tuples.vlan_tag1, - rule->tuples_mask.vlan_tag1); + case KEY_OPT_LE16: + calc_x(tmp_x_s, *(u16 *)(&p[offset]), *(u16 *)(&p[moffset])); + calc_y(tmp_y_s, *(u16 *)(&p[offset]), *(u16 *)(&p[moffset])); *(__le16 *)key_x = cpu_to_le16(tmp_x_s); *(__le16 *)key_y = cpu_to_le16(tmp_y_s);
return true; - case BIT(INNER_ETH_TYPE): - calc_x(tmp_x_s, rule->tuples.ether_proto, - rule->tuples_mask.ether_proto); - calc_y(tmp_y_s, rule->tuples.ether_proto, - rule->tuples_mask.ether_proto); - *(__le16 *)key_x = cpu_to_le16(tmp_x_s); - *(__le16 *)key_y = cpu_to_le16(tmp_y_s); - - return true; - case BIT(INNER_IP_TOS): - calc_x(*key_x, rule->tuples.ip_tos, rule->tuples_mask.ip_tos); - calc_y(*key_y, rule->tuples.ip_tos, rule->tuples_mask.ip_tos); - - return true; - case BIT(INNER_IP_PROTO): - calc_x(*key_x, rule->tuples.ip_proto, - rule->tuples_mask.ip_proto); - calc_y(*key_y, rule->tuples.ip_proto, - rule->tuples_mask.ip_proto); - - return true; - case BIT(INNER_SRC_IP): - calc_x(tmp_x_l, rule->tuples.src_ip[IPV4_INDEX], - rule->tuples_mask.src_ip[IPV4_INDEX]); - calc_y(tmp_y_l, rule->tuples.src_ip[IPV4_INDEX], - rule->tuples_mask.src_ip[IPV4_INDEX]); + case KEY_OPT_LE32: + calc_x(tmp_x_l, *(u32 *)(&p[offset]), *(u32 *)(&p[moffset])); + calc_y(tmp_y_l, *(u32 *)(&p[offset]), *(u32 *)(&p[moffset])); *(__le32 *)key_x = cpu_to_le32(tmp_x_l); *(__le32 *)key_y = cpu_to_le32(tmp_y_l);
return true; - case BIT(INNER_DST_IP): - calc_x(tmp_x_l, rule->tuples.dst_ip[IPV4_INDEX], - rule->tuples_mask.dst_ip[IPV4_INDEX]); - calc_y(tmp_y_l, rule->tuples.dst_ip[IPV4_INDEX], - rule->tuples_mask.dst_ip[IPV4_INDEX]); - *(__le32 *)key_x = cpu_to_le32(tmp_x_l); - *(__le32 *)key_y = cpu_to_le32(tmp_y_l); - - return true; - case BIT(INNER_SRC_PORT): - calc_x(tmp_x_s, rule->tuples.src_port, - rule->tuples_mask.src_port); - calc_y(tmp_y_s, rule->tuples.src_port, - rule->tuples_mask.src_port); - *(__le16 *)key_x = cpu_to_le16(tmp_x_s); - *(__le16 *)key_y = cpu_to_le16(tmp_y_s); + case KEY_OPT_MAC: + for (i = 0; i < ETH_ALEN; i++) { + calc_x(key_x[ETH_ALEN - 1 - i], p[offset + i], + p[moffset + i]); + calc_y(key_y[ETH_ALEN - 1 - i], p[offset + i], + p[moffset + i]); + }
return true; - case BIT(INNER_DST_PORT): - calc_x(tmp_x_s, rule->tuples.dst_port, - rule->tuples_mask.dst_port); - calc_y(tmp_y_s, rule->tuples.dst_port, - rule->tuples_mask.dst_port); - *(__le16 *)key_x = cpu_to_le16(tmp_x_s); - *(__le16 *)key_y = cpu_to_le16(tmp_y_s); + case KEY_OPT_IP: + ip_offset = IPV4_INDEX * sizeof(u32); + calc_x(tmp_x_l, *(u32 *)(&p[offset + ip_offset]), + *(u32 *)(&p[moffset + ip_offset])); + calc_y(tmp_y_l, *(u32 *)(&p[offset + ip_offset]), + *(u32 *)(&p[moffset + ip_offset])); + *(__le32 *)key_x = cpu_to_le32(tmp_x_l); + *(__le32 *)key_y = cpu_to_le32(tmp_y_l);
return true; default: @@ -5548,12 +5529,12 @@ static int hclge_config_key(struct hclge_dev *hdev, u8 stage,
for (i = 0 ; i < MAX_TUPLE; i++) { bool tuple_valid; - u32 check_tuple;
tuple_size = tuple_key_info[i].key_length / 8; - check_tuple = key_cfg->tuple_active & BIT(i); + if (!(key_cfg->tuple_active & BIT(i))) + continue;
- tuple_valid = hclge_fd_convert_tuple(check_tuple, cur_key_x, + tuple_valid = hclge_fd_convert_tuple(i, cur_key_x, cur_key_y, rule); if (tuple_valid) { cur_key_x += tuple_size; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 19d7f28..6fe7455 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -548,9 +548,21 @@ enum HCLGE_FD_META_DATA { MAX_META_DATA, };
+enum HCLGE_FD_KEY_OPT { + KEY_OPT_U8, + KEY_OPT_LE16, + KEY_OPT_LE32, + KEY_OPT_MAC, + KEY_OPT_IP, + KEY_OPT_VNI, +}; + struct key_info { u8 key_type; u8 key_length; /* use bit as unit */ + enum HCLGE_FD_KEY_OPT key_opt; + int offset; + int moffset; };
#define MAX_KEY_LENGTH 400
From: Jian Shen shenjian15@huawei.com
The hardware supports to parse and match the traffic class field of IPv6 packet for flow director, uses the same tuple as ip tos. So removes the limitation of configure 'tclass' by driver.
Signed-off-by: Jian Shen shenjian15@huawei.com Signed-off-by: Huazhong Tan tanhuazhong@huawei.com --- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 27 ++++++++++++++++------ 1 file changed, 20 insertions(+), 7 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 3d601c9..2584444 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -5665,8 +5665,7 @@ static int hclge_fd_check_tcpip6_tuple(struct ethtool_tcpip6_spec *spec, if (!spec || !unused_tuple) return -EINVAL;
- *unused_tuple |= BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC) | - BIT(INNER_IP_TOS); + *unused_tuple |= BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC);
/* check whether src/dst ip address used */ if (ipv6_addr_any((struct in6_addr *)spec->ip6src)) @@ -5681,8 +5680,8 @@ static int hclge_fd_check_tcpip6_tuple(struct ethtool_tcpip6_spec *spec, if (!spec->pdst) *unused_tuple |= BIT(INNER_DST_PORT);
- if (spec->tclass) - return -EOPNOTSUPP; + if (!spec->tclass) + *unused_tuple |= BIT(INNER_IP_TOS);
return 0; } @@ -5694,7 +5693,7 @@ static int hclge_fd_check_ip6_tuple(struct ethtool_usrip6_spec *spec, return -EINVAL;
*unused_tuple |= BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC) | - BIT(INNER_IP_TOS) | BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT); + BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT);
/* check whether src/dst ip address used */ if (ipv6_addr_any((struct in6_addr *)spec->ip6src)) @@ -5706,8 +5705,8 @@ static int hclge_fd_check_ip6_tuple(struct ethtool_usrip6_spec *spec, if (!spec->l4_proto) *unused_tuple |= BIT(INNER_IP_PROTO);
- if (spec->tclass) - return -EOPNOTSUPP; + if (!spec->tclass) + *unused_tuple |= BIT(INNER_IP_TOS);
if (spec->l4_4_bytes) return -EOPNOTSUPP; @@ -5993,6 +5992,9 @@ static void hclge_fd_get_tcpip6_tuple(struct hclge_dev *hdev, rule->tuples.ether_proto = ETH_P_IPV6; rule->tuples_mask.ether_proto = 0xFFFF;
+ rule->tuples.ip_tos = fs->h_u.tcp_ip6_spec.tclass; + rule->tuples_mask.ip_tos = fs->m_u.tcp_ip6_spec.tclass; + rule->tuples.ip_proto = ip_proto; rule->tuples_mask.ip_proto = 0xFF; } @@ -6014,6 +6016,9 @@ static void hclge_fd_get_ip6_tuple(struct hclge_dev *hdev, rule->tuples.ip_proto = fs->h_u.usr_ip6_spec.l4_proto; rule->tuples_mask.ip_proto = fs->m_u.usr_ip6_spec.l4_proto;
+ rule->tuples.ip_tos = fs->h_u.tcp_ip6_spec.tclass; + rule->tuples_mask.ip_tos = fs->m_u.tcp_ip6_spec.tclass; + rule->tuples.ether_proto = ETH_P_IPV6; rule->tuples_mask.ether_proto = 0xFFFF; } @@ -6423,6 +6428,10 @@ static void hclge_fd_get_tcpip6_info(struct hclge_fd_rule *rule, cpu_to_be32_array(spec_mask->ip6dst, rule->tuples_mask.dst_ip, IPV6_SIZE);
+ spec->tclass = rule->tuples.ip_tos; + spec_mask->tclass = rule->unused_tuple & BIT(INNER_IP_TOS) ? + 0 : rule->tuples_mask.ip_tos; + spec->psrc = cpu_to_be16(rule->tuples.src_port); spec_mask->psrc = rule->unused_tuple & BIT(INNER_SRC_PORT) ? 0 : cpu_to_be16(rule->tuples_mask.src_port); @@ -6450,6 +6459,10 @@ static void hclge_fd_get_ip6_info(struct hclge_fd_rule *rule, cpu_to_be32_array(spec_mask->ip6dst, rule->tuples_mask.dst_ip, IPV6_SIZE);
+ spec->tclass = rule->tuples.ip_tos; + spec_mask->tclass = rule->unused_tuple & BIT(INNER_IP_TOS) ? + 0 : rule->tuples_mask.ip_tos; + spec->l4_proto = rule->tuples.ip_proto; spec_mask->l4_proto = rule->unused_tuple & BIT(INNER_IP_PROTO) ? 0 : rule->tuples_mask.ip_proto;
From: Jian Shen shenjian15@huawei.com
Currently, there are 3 flow director work modes in HNS3 driver, include EP(ethtool), tc flower and aRFS. The flow director rules are configured synchronously and need holding spin lock. With this limitation, all the commands with firmware are also needed to use spin lock.
To eliminate the limitation, configure flow director rules asynchronously. The rules are still kept in the fd_rule_list with below states. TO_ADD: the rule is waiting to add to hardware TO_DEL: the rule is waiting to remove from hardware ADDING: the rule is adding to hardware ACTIVE: the rule is already added in hardware
When receive a new request to add or delete flow director rule, check whether the rule location is existent, update the rule content and state, and request to schedule the service task to finish the configuration.
Signed-off-by: Jian Shen shenjian15@huawei.com Signed-off-by: Huazhong Tan tanhuazhong@huawei.com --- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 629 ++++++++++++++------- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 10 + 2 files changed, 420 insertions(+), 219 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 2584444..e35ff6e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -62,7 +62,7 @@ static void hclge_sync_vlan_filter(struct hclge_dev *hdev); static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev); static bool hclge_get_hw_reset_stat(struct hnae3_handle *handle); static void hclge_rfs_filter_expire(struct hclge_dev *hdev); -static void hclge_clear_arfs_rules(struct hnae3_handle *handle); +static void hclge_clear_arfs_rules(struct hclge_dev *hdev); static enum hnae3_reset_type hclge_get_reset_level(struct hnae3_ae_dev *ae_dev, unsigned long *addr); static int hclge_set_default_loopback(struct hclge_dev *hdev); @@ -70,6 +70,7 @@ static int hclge_set_default_loopback(struct hclge_dev *hdev); static void hclge_sync_mac_table(struct hclge_dev *hdev); static void hclge_restore_hw_table(struct hclge_dev *hdev); static void hclge_sync_promisc_mode(struct hclge_dev *hdev); +static void hclge_sync_fd_table(struct hclge_dev *hdev);
static struct hnae3_ae_algo ae_algo;
@@ -4261,6 +4262,7 @@ static void hclge_periodic_service_task(struct hclge_dev *hdev) hclge_update_link_status(hdev); hclge_sync_mac_table(hdev); hclge_sync_promisc_mode(hdev); + hclge_sync_fd_table(hdev);
if (time_is_after_jiffies(hdev->last_serv_processed + HZ)) { delta = jiffies - hdev->last_serv_processed; @@ -5162,6 +5164,198 @@ static void hclge_request_update_promisc_mode(struct hnae3_handle *handle) set_bit(HCLGE_STATE_PROMISC_CHANGED, &hdev->state); }
+static void hclge_sync_fd_state(struct hclge_dev *hdev) +{ + if (hlist_empty(&hdev->fd_rule_list)) + hdev->fd_active_type = HCLGE_FD_RULE_NONE; +} + +static void hclge_update_fd_rule_node(struct hclge_dev *hdev, + struct hclge_fd_rule *old_rule, + struct hclge_fd_rule *new_rule, + enum HCLGE_FD_NODE_STATE state) +{ + switch (state) { + case HCLGE_FD_TO_ADD: + /* if new request is TO_ADD, we should configure the + * new rule to hardware, no matter what the state of + * old rule is. Even though the old rule is already + * configured in the hardware, the new rule will replace + * it. + */ + new_rule->rule_node.next = old_rule->rule_node.next; + new_rule->rule_node.pprev = old_rule->rule_node.pprev; + memcpy(old_rule, new_rule, sizeof(*old_rule)); + kfree(new_rule); + break; + case HCLGE_FD_TO_DEL: + /* if new request is TO_DEL, and old rule is existent + * 1) the state of old rule is TO_DEL, we need do nothing, + * because we delete rule by location, other rule content + * is unncessary. + * 2) the state of old rule is ACTIVE, we need to change its + * state to TO_DEL, so the rule will be deleted when periodic + * task being scheduled. + * 3) the state of old rule is TO_ADD, it means the rule hasn't + * been added to hardware, so we just delete the rule node from + * fd_rule_list directly. + * 4) the state of old rule is ADDING, it means the rule is + * being configured to hardware. We also delete the rule node + * from fd_rule_list directly, and will handle configuration + * result of old rule in hclge_fd_sync_from_add_list(). + */ + if (old_rule->state == HCLGE_FD_TO_ADD || + old_rule->state == HCLGE_FD_ADDING) { + if (test_bit(old_rule->location, hdev->fd_bmap)) { + clear_bit(old_rule->location, hdev->fd_bmap); + hdev->hclge_fd_rule_num--; + } + hlist_del(&old_rule->rule_node); + kfree(old_rule); + hclge_sync_fd_state(hdev); + return; + } + old_rule->state = HCLGE_FD_TO_DEL; + break; + default: + break; + } +} + +static struct hclge_fd_rule *hclge_find_fd_rule(struct hlist_head *hlist, + u16 location, + struct hclge_fd_rule **parent) +{ + struct hclge_fd_rule *rule; + struct hlist_node *node; + + hlist_for_each_entry_safe(rule, node, hlist, rule_node) { + if (rule->location == location) + return rule; + else if (rule->location > location) + return NULL; + /* record the parent node, use to keep the nodes in fd_rule_list + * in ascend order. + */ + *parent = rule; + } + + return NULL; +} + +/* insert fd rule node in ascend order according to rule->location */ +static void hclge_fd_insert_rule_node(struct hlist_head *hlist, + struct hclge_fd_rule *rule, + struct hclge_fd_rule *parent) +{ + INIT_HLIST_NODE(&rule->rule_node); + + if (parent) + hlist_add_behind(&rule->rule_node, &parent->rule_node); + else + hlist_add_head(&rule->rule_node, hlist); +} + +static int hclge_update_fd_list(struct hclge_dev *hdev, + enum HCLGE_FD_NODE_STATE state, u16 location, + struct hclge_fd_rule *new_rule) +{ + struct hlist_head *hlist = &hdev->fd_rule_list; + struct hclge_fd_rule *fd_rule, *parent = NULL; + + fd_rule = hclge_find_fd_rule(hlist, location, &parent); + if (fd_rule) { + hclge_update_fd_rule_node(hdev, fd_rule, new_rule, state); + set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state); + hclge_task_schedule(hdev, 0); + return 0; + } + + /* if this rule is never added, unnecessary to delete */ + if (state == HCLGE_FD_TO_DEL) { + dev_err(&hdev->pdev->dev, + "failed to delete fd rule %u , it's inexistent\n", + location); + return -ENOENT; + } + + hclge_fd_insert_rule_node(hlist, new_rule, parent); + if (!test_bit(location, hdev->fd_bmap)) { + set_bit(location, hdev->fd_bmap); + hdev->hclge_fd_rule_num++; + } + set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state); + hclge_task_schedule(hdev, 0); + + return 0; +} + +static void hclge_fd_sync_from_add_list(struct hlist_head *add_list, + struct hlist_head *hlist) +{ + struct hclge_fd_rule *rule, *new_rule, *parent = NULL; + struct hlist_node *node; + + hlist_for_each_entry_safe(rule, node, add_list, rule_node) { + new_rule = hclge_find_fd_rule(hlist, rule->location, &parent); + if (new_rule) { + /* if new_rule state is TO_ADD, it means received a new + * TO_ADD request while adding the rule to hardware, so + * ignore the adding result, keep the new rule state. + * if still ADDING, change the rule state according to + * the adding result. + */ + if (new_rule->state == HCLGE_FD_ADDING) + new_rule->state = rule->state; + + hlist_del(&rule->rule_node); + kfree(rule); + continue; + } + /* if new_rule is inexist, it means received a new TO_DEL + * request while adding fd rule to hardware. + * if adding fail, unnecessary to retry; + * if adding success, sync it to the fd_rule_list, and change + * state to TO_DEL + */ + if (rule->state == HCLGE_FD_TO_ADD) { + hlist_del(&rule->rule_node); + kfree(rule); + } else if (rule->state == HCLGE_FD_ACTIVE) { + rule->state = HCLGE_FD_TO_DEL; + hlist_del(&rule->rule_node); + hclge_fd_insert_rule_node(hlist, rule, parent); + } + } +} + +static void hclge_fd_sync_from_del_list(struct hlist_head *del_list, + struct hlist_head *hlist) +{ + struct hclge_fd_rule *rule, *new_rule, *parent = NULL; + struct hlist_node *node; + + hlist_for_each_entry_safe(rule, node, del_list, rule_node) { + new_rule = hclge_find_fd_rule(hlist, rule->location, &parent); + if (new_rule) { + /* if new_rule exists, its state must be TO_ADD, it + * means received a new TO_ADD request with same + * location while removing the rule from hardware, so + * ignore the removing result, keep the new rule state. + */ + hlist_del(&rule->rule_node); + kfree(rule); + continue; + } + /* if new_rule is inexistent, it means no new request received + * for the location while removing fd rule to hardware. + * sync the fail removing fd rule nodes to the fd_rule_list; + */ + hlist_del(&rule->rule_node); + hclge_fd_insert_rule_node(hlist, rule, parent); + } +} + static int hclge_get_fd_mode(struct hclge_dev *hdev, u8 *fd_mode) { struct hclge_get_fd_mode_cmd *req; @@ -5847,74 +6041,6 @@ static int hclge_fd_check_spec(struct hclge_dev *hdev, return hclge_fd_check_ext_tuple(hdev, fs, unused_tuple); }
-static bool hclge_fd_rule_exist(struct hclge_dev *hdev, u16 location) -{ - struct hclge_fd_rule *rule = NULL; - struct hlist_node *node2; - - spin_lock_bh(&hdev->fd_rule_lock); - hlist_for_each_entry_safe(rule, node2, &hdev->fd_rule_list, rule_node) { - if (rule->location >= location) - break; - } - - spin_unlock_bh(&hdev->fd_rule_lock); - - return rule && rule->location == location; -} - -/* make sure being called after lock up with fd_rule_lock */ -static int hclge_fd_update_rule_list(struct hclge_dev *hdev, - struct hclge_fd_rule *new_rule, - u16 location, - bool is_add) -{ - struct hclge_fd_rule *rule = NULL, *parent = NULL; - struct hlist_node *node2; - - if (is_add && !new_rule) - return -EINVAL; - - hlist_for_each_entry_safe(rule, node2, - &hdev->fd_rule_list, rule_node) { - if (rule->location >= location) - break; - parent = rule; - } - - if (rule && rule->location == location) { - hlist_del(&rule->rule_node); - kfree(rule); - hdev->hclge_fd_rule_num--; - - if (!is_add) { - if (!hdev->hclge_fd_rule_num) - hdev->fd_active_type = HCLGE_FD_RULE_NONE; - clear_bit(location, hdev->fd_bmap); - - return 0; - } - } else if (!is_add) { - dev_err(&hdev->pdev->dev, - "delete fail, rule %u is inexistent\n", - location); - return -EINVAL; - } - - INIT_HLIST_NODE(&new_rule->rule_node); - - if (parent) - hlist_add_behind(&new_rule->rule_node, &parent->rule_node); - else - hlist_add_head(&new_rule->rule_node, &hdev->fd_rule_list); - - set_bit(location, hdev->fd_bmap); - hdev->hclge_fd_rule_num++; - hdev->fd_active_type = new_rule->rule_type; - - return 0; -} - static void hclge_fd_get_tcpip4_tuple(struct hclge_dev *hdev, struct ethtool_rx_flow_spec *fs, struct hclge_fd_rule *rule, u8 ip_proto) @@ -6088,33 +6214,42 @@ static int hclge_fd_get_tuple(struct hclge_dev *hdev, return 0; }
-/* make sure being called after lock up with fd_rule_lock */ static int hclge_fd_config_rule(struct hclge_dev *hdev, struct hclge_fd_rule *rule) { int ret;
- if (!rule) { + ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule); + if (ret) + return ret; + + return hclge_config_key(hdev, HCLGE_FD_STAGE_1, rule); +} + +static int hclge_add_fd_entry_common(struct hclge_dev *hdev, + struct hclge_fd_rule *rule) +{ + int ret; + + spin_lock_bh(&hdev->fd_rule_lock); + + if (hdev->fd_active_type != rule->rule_type && + (hdev->fd_active_type == HCLGE_FD_TC_FLOWER_ACTIVE || + hdev->fd_active_type == HCLGE_FD_EP_ACTIVE)) { dev_err(&hdev->pdev->dev, - "The flow director rule is NULL\n"); + "mode conflict(new type %d, active type %d), please delete existent rules first\n", + rule->rule_type, hdev->fd_active_type); + spin_unlock_bh(&hdev->fd_rule_lock); return -EINVAL; }
- /* it will never fail here, so needn't to check return value */ - hclge_fd_update_rule_list(hdev, rule, rule->location, true); + hclge_clear_arfs_rules(hdev);
- ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule); - if (ret) - goto clear_rule; - - ret = hclge_config_key(hdev, HCLGE_FD_STAGE_1, rule); - if (ret) - goto clear_rule; + ret = hclge_update_fd_list(hdev, HCLGE_FD_TO_ADD, rule->location, rule); + hdev->fd_active_type = rule->rule_type;
- return 0; + spin_unlock_bh(&hdev->fd_rule_lock);
-clear_rule: - hclge_fd_update_rule_list(hdev, rule, rule->location, false); return ret; }
@@ -6186,12 +6321,6 @@ static int hclge_add_fd_entry(struct hnae3_handle *handle, return -EOPNOTSUPP; }
- if (hclge_is_cls_flower_active(handle)) { - dev_err(&hdev->pdev->dev, - "please delete all exist cls flower rules first\n"); - return -EINVAL; - } - fs = (struct ethtool_rx_flow_spec *)&cmd->fs;
ret = hclge_fd_check_spec(hdev, fs, &unused); @@ -6221,15 +6350,9 @@ static int hclge_add_fd_entry(struct hnae3_handle *handle, rule->action = action; rule->rule_type = HCLGE_FD_EP_ACTIVE;
- /* to avoid rule conflict, when user configure rule by ethtool, - * we need to clear all arfs rules - */ - spin_lock_bh(&hdev->fd_rule_lock); - hclge_clear_arfs_rules(handle); - - ret = hclge_fd_config_rule(hdev, rule); - - spin_unlock_bh(&hdev->fd_rule_lock); + ret = hclge_add_fd_entry_common(hdev, rule); + if (ret) + kfree(rule);
return ret; } @@ -6250,32 +6373,23 @@ static int hclge_del_fd_entry(struct hnae3_handle *handle, if (fs->location >= hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1]) return -EINVAL;
- if (hclge_is_cls_flower_active(handle) || !hdev->hclge_fd_rule_num || - !hclge_fd_rule_exist(hdev, fs->location)) { + spin_lock_bh(&hdev->fd_rule_lock); + if (hdev->fd_active_type == HCLGE_FD_TC_FLOWER_ACTIVE || + !hdev->hclge_fd_rule_num) { dev_err(&hdev->pdev->dev, "Delete fail, rule %u is inexistent\n", fs->location); + spin_unlock_bh(&hdev->fd_rule_lock); return -ENOENT; } - - ret = hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, fs->location, - NULL, false); - if (ret) - return ret; - - spin_lock_bh(&hdev->fd_rule_lock); - ret = hclge_fd_update_rule_list(hdev, NULL, fs->location, false); - + ret = hclge_update_fd_list(hdev, HCLGE_FD_TO_DEL, fs->location, NULL); spin_unlock_bh(&hdev->fd_rule_lock);
return ret; }
-/* make sure being called after lock up with fd_rule_lock */ -static void hclge_del_all_fd_entries(struct hnae3_handle *handle, - bool clear_list) +static void hclge_clear_fd_rules_in_list(struct hclge_dev *hdev, + bool clear_list) { - struct hclge_vport *vport = hclge_get_vport(handle); - struct hclge_dev *hdev = vport->back; struct hclge_fd_rule *rule; struct hlist_node *node; u16 location; @@ -6289,6 +6403,7 @@ static void hclge_del_all_fd_entries(struct hnae3_handle *handle, NULL, false);
if (clear_list) { + spin_lock_bh(&hdev->fd_rule_lock); hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) { hlist_del(&rule->rule_node); @@ -6298,16 +6413,25 @@ static void hclge_del_all_fd_entries(struct hnae3_handle *handle, hdev->hclge_fd_rule_num = 0; bitmap_zero(hdev->fd_bmap, hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1]); + spin_unlock_bh(&hdev->fd_rule_lock); } }
+static void hclge_del_all_fd_entries(struct hnae3_handle *handle, + bool clear_list) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + + hclge_clear_fd_rules_in_list(hdev, clear_list); +} + static int hclge_restore_fd_entries(struct hnae3_handle *handle) { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; struct hclge_fd_rule *rule; struct hlist_node *node; - int ret;
/* Return ok here, because reset error handling will check this * return value. If error is returned here, the reset process will @@ -6322,25 +6446,11 @@ static int hclge_restore_fd_entries(struct hnae3_handle *handle)
spin_lock_bh(&hdev->fd_rule_lock); hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) { - ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule); - if (!ret) - ret = hclge_config_key(hdev, HCLGE_FD_STAGE_1, rule); - - if (ret) { - dev_warn(&hdev->pdev->dev, - "Restore rule %u failed, remove it\n", - rule->location); - clear_bit(rule->location, hdev->fd_bmap); - hlist_del(&rule->rule_node); - kfree(rule); - hdev->hclge_fd_rule_num--; - } + if (rule->state == HCLGE_FD_ACTIVE) + rule->state = HCLGE_FD_TO_ADD; } - - if (hdev->hclge_fd_rule_num) - hdev->fd_active_type = HCLGE_FD_EP_ACTIVE; - spin_unlock_bh(&hdev->fd_rule_lock); + set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state);
return 0; } @@ -6609,6 +6719,9 @@ static int hclge_get_all_rules(struct hnae3_handle *handle, return -EMSGSIZE; }
+ if (rule->state == HCLGE_FD_TO_DEL) + continue; + rule_locs[cnt] = rule->location; cnt++; } @@ -6690,7 +6803,6 @@ static int hclge_add_fd_entry_by_arfs(struct hnae3_handle *handle, u16 queue_id, struct hclge_fd_rule_tuples new_tuples = {}; struct hclge_dev *hdev = vport->back; struct hclge_fd_rule *rule; - u16 tmp_queue_id; u16 bit_id; int ret;
@@ -6728,34 +6840,25 @@ static int hclge_add_fd_entry_by_arfs(struct hnae3_handle *handle, u16 queue_id, return -ENOMEM; }
- set_bit(bit_id, hdev->fd_bmap); rule->location = bit_id; rule->arfs.flow_id = flow_id; rule->queue_id = queue_id; hclge_fd_build_arfs_rule(&new_tuples, rule); - ret = hclge_fd_config_rule(hdev, rule); - - spin_unlock_bh(&hdev->fd_rule_lock); - - if (ret) + ret = hclge_update_fd_list(hdev, HCLGE_FD_TO_ADD, + rule->location, rule); + if (ret) { + kfree(rule); + spin_unlock_bh(&hdev->fd_rule_lock); return ret; - - return rule->location; + } + hdev->fd_active_type = HCLGE_FD_ARFS_ACTIVE; + } else if (rule->queue_id != queue_id) { + rule->queue_id = queue_id; + rule->state = HCLGE_FD_TO_ADD; + set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state); + hclge_task_schedule(hdev, 0); } - spin_unlock_bh(&hdev->fd_rule_lock); - - if (rule->queue_id == queue_id) - return rule->location; - - tmp_queue_id = rule->queue_id; - rule->queue_id = queue_id; - ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule); - if (ret) { - rule->queue_id = tmp_queue_id; - return ret; - } - return rule->location; }
@@ -6765,7 +6868,6 @@ static void hclge_rfs_filter_expire(struct hclge_dev *hdev) struct hnae3_handle *handle = &hdev->vport[0].nic; struct hclge_fd_rule *rule; struct hlist_node *node; - HLIST_HEAD(del_list);
spin_lock_bh(&hdev->fd_rule_lock); if (hdev->fd_active_type != HCLGE_FD_ARFS_ACTIVE) { @@ -6773,33 +6875,38 @@ static void hclge_rfs_filter_expire(struct hclge_dev *hdev) return; } hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) { + if (rule->state != HCLGE_FD_ACTIVE) + continue; if (rps_may_expire_flow(handle->netdev, rule->queue_id, rule->arfs.flow_id, rule->location)) { - hlist_del_init(&rule->rule_node); - hlist_add_head(&rule->rule_node, &del_list); - hdev->hclge_fd_rule_num--; - clear_bit(rule->location, hdev->fd_bmap); + rule->state = HCLGE_FD_TO_DEL; + set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state); } } spin_unlock_bh(&hdev->fd_rule_lock); - - hlist_for_each_entry_safe(rule, node, &del_list, rule_node) { - hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, - rule->location, NULL, false); - kfree(rule); - } #endif }
/* make sure being called after lock up with fd_rule_lock */ -static void hclge_clear_arfs_rules(struct hnae3_handle *handle) +static void hclge_clear_arfs_rules(struct hclge_dev *hdev) { #ifdef CONFIG_RFS_ACCEL - struct hclge_vport *vport = hclge_get_vport(handle); - struct hclge_dev *hdev = vport->back; + struct hclge_fd_rule *rule; + struct hlist_node *node; + + if (hdev->fd_active_type != HCLGE_FD_ARFS_ACTIVE) + return;
- if (hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE) - hclge_del_all_fd_entries(handle, true); + hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) { + if (rule->state == HCLGE_FD_ACTIVE) { + rule->state = HCLGE_FD_TO_DEL; + } else if (rule->state == HCLGE_FD_TO_ADD || + rule->state == HCLGE_FD_ADDING) { + hlist_del(&rule->rule_node); + kfree(rule); + } + } + set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state); #endif }
@@ -6982,12 +7089,6 @@ static int hclge_add_cls_flower(struct hnae3_handle *handle, struct hclge_fd_rule *rule; int ret;
- if (hdev->fd_active_type == HCLGE_FD_EP_ACTIVE) { - dev_err(&hdev->pdev->dev, - "please remove all exist fd rules via ethtool first\n"); - return -EINVAL; - } - ret = hclge_check_cls_flower(hdev, cls_flower, tc); if (ret) { dev_err(&hdev->pdev->dev, @@ -7000,8 +7101,10 @@ static int hclge_add_cls_flower(struct hnae3_handle *handle, return -ENOMEM;
ret = hclge_parse_cls_flower(hdev, cls_flower, rule); - if (ret) - goto err; + if (ret) { + kfree(rule); + return ret; + }
rule->action = HCLGE_FD_ACTION_SELECT_TC; rule->cls_flower.tc = tc; @@ -7010,22 +7113,10 @@ static int hclge_add_cls_flower(struct hnae3_handle *handle, rule->cls_flower.cookie = cls_flower->cookie; rule->rule_type = HCLGE_FD_TC_FLOWER_ACTIVE;
- spin_lock_bh(&hdev->fd_rule_lock); - hclge_clear_arfs_rules(handle); - - ret = hclge_fd_config_rule(hdev, rule); - - spin_unlock_bh(&hdev->fd_rule_lock); - - if (ret) { - dev_err(&hdev->pdev->dev, - "failed to add cls flower rule, ret = %d\n", ret); - goto err; - } + ret = hclge_add_fd_entry_common(hdev, rule); + if (ret) + kfree(rule);
- return 0; -err: - kfree(rule); return ret; }
@@ -7059,28 +7150,131 @@ static int hclge_del_cls_flower(struct hnae3_handle *handle, return -EINVAL; }
- ret = hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, rule->location, - NULL, false); - if (ret) { - dev_err(&hdev->pdev->dev, - "failed to delete cls flower rule %u, ret = %d\n", - rule->location, ret); - spin_unlock_bh(&hdev->fd_rule_lock); - return ret; + ret = hclge_update_fd_list(hdev, HCLGE_FD_TO_DEL, rule->location, NULL); + + spin_unlock_bh(&hdev->fd_rule_lock); + + return ret; +} + +static void hclge_unsync_fd_list(struct hclge_dev *hdev, + struct hlist_head *hlist) +{ + struct hclge_fd_rule *rule; + struct hlist_node *node; + int ret; + + hlist_for_each_entry_safe(rule, node, hlist, rule_node) { + ret = hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, + rule->location, NULL, false); + if (ret) { + set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state); + return; + } + hlist_del(&rule->rule_node); + kfree(rule); } +}
- ret = hclge_fd_update_rule_list(hdev, NULL, rule->location, false); - if (ret) { - dev_err(&hdev->pdev->dev, - "failed to delete cls flower rule %u in list, ret = %d\n", - rule->location, ret); - spin_unlock_bh(&hdev->fd_rule_lock); - return ret; +static void hclge_sync_fd_list(struct hclge_dev *hdev, struct hlist_head *hlist) +{ + struct hclge_fd_rule *rule; + struct hlist_node *node; + int ret; + + hlist_for_each_entry_safe(rule, node, hlist, rule_node) { + ret = hclge_fd_config_rule(hdev, rule); + if (ret) { + set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state); + return; + } + rule->state = HCLGE_FD_ACTIVE; } +} + +static void hclge_sync_fd_rule_num(struct hclge_dev *hdev) +{ + struct hclge_fd_rule *rule; + struct hlist_node *node;
+ hdev->hclge_fd_rule_num = 0; + bitmap_zero(hdev->fd_bmap, hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1]); + hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) { + if (rule->state == HCLGE_FD_TO_DEL) + continue; + set_bit(rule->location, hdev->fd_bmap); + hdev->hclge_fd_rule_num++; + } + hclge_sync_fd_state(hdev); +} + +static void hclge_sync_fd_table(struct hclge_dev *hdev) +{ + struct hlist_head *hlist = &hdev->fd_rule_list; + struct hlist_head tmp_add_list, tmp_del_list; + struct hclge_fd_rule *parent_add = NULL; + struct hclge_fd_rule *parent_del = NULL; + struct hclge_fd_rule *rule, *new_rule; + struct hlist_node *node; + + if (test_and_clear_bit(HCLGE_STATE_FD_CLEAR_ALL, &hdev->state)) { + bool clear_list = hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE; + + hclge_clear_fd_rules_in_list(hdev, clear_list); + } + + if (!test_and_clear_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state)) + return; + + INIT_HLIST_HEAD(&tmp_add_list); + INIT_HLIST_HEAD(&tmp_del_list); + + spin_lock_bh(&hdev->fd_rule_lock); + /* move the fd rule node to the tmp_add_list and tmp_del_list, then + * we can add/delete these fd rule outside the spin lock + */ + hlist_for_each_entry_safe(rule, node, hlist, rule_node) { + switch (rule->state) { + case HCLGE_FD_TO_DEL: + hlist_del(&rule->rule_node); + hclge_fd_insert_rule_node(&tmp_del_list, rule, + parent_del); + parent_del = rule; + break; + case HCLGE_FD_TO_ADD: + if (!hdev->fd_en) + break; + new_rule = kmemdup(rule, sizeof(*new_rule), GFP_ATOMIC); + if (!new_rule) + goto stop_traverse; + rule->state = HCLGE_FD_ADDING; + new_rule->state = HCLGE_FD_TO_ADD; + hclge_fd_insert_rule_node(&tmp_add_list, new_rule, + parent_add); + parent_add = new_rule; + break; + default: + break; + } + } + +stop_traverse: spin_unlock_bh(&hdev->fd_rule_lock);
- return 0; + hclge_unsync_fd_list(hdev, &tmp_del_list); + hclge_sync_fd_list(hdev, &tmp_add_list); + + /* if some fd rule were added/deleted fail, move back to the + * fd_rule_list, and retry at next time. + */ + spin_lock_bh(&hdev->fd_rule_lock); + + hclge_fd_sync_from_del_list(&tmp_del_list, hlist); + hclge_fd_sync_from_add_list(&tmp_add_list, hlist); + + hclge_sync_fd_rule_num(hdev); + + spin_unlock_bh(&hdev->fd_rule_lock); }
static bool hclge_get_hw_reset_stat(struct hnae3_handle *handle) @@ -7120,18 +7314,15 @@ static void hclge_enable_fd(struct hnae3_handle *handle, bool enable) { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; - bool clear;
hdev->fd_en = enable; - clear = hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE;
- if (!enable) { - spin_lock_bh(&hdev->fd_rule_lock); - hclge_del_all_fd_entries(handle, clear); - spin_unlock_bh(&hdev->fd_rule_lock); - } else { + if (!enable) + set_bit(HCLGE_STATE_FD_CLEAR_ALL, &hdev->state); + else hclge_restore_fd_entries(handle); - } + + hclge_task_schedule(hdev, 0); }
static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable) @@ -7602,7 +7793,7 @@ static void hclge_ae_stop(struct hnae3_handle *handle)
set_bit(HCLGE_STATE_DOWN, &hdev->state); spin_lock_bh(&hdev->fd_rule_lock); - hclge_clear_arfs_rules(handle); + hclge_clear_arfs_rules(hdev); spin_unlock_bh(&hdev->fd_rule_lock);
/* If it is not PF reset, the firmware will disable the MAC, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 6fe7455..696bcc1 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -223,6 +223,8 @@ enum HCLGE_DEV_STATE { HCLGE_STATE_LINK_UPDATING, HCLGE_STATE_PROMISC_CHANGED, HCLGE_STATE_RST_FAIL, + HCLGE_STATE_FD_TBL_CHANGED, + HCLGE_STATE_FD_CLEAR_ALL, HCLGE_STATE_MAX };
@@ -592,6 +594,13 @@ enum HCLGE_FD_ACTION { HCLGE_FD_ACTION_SELECT_TC, };
+enum HCLGE_FD_NODE_STATE { + HCLGE_FD_TO_ADD, + HCLGE_FD_TO_DEL, + HCLGE_FD_ACTIVE, + HCLGE_FD_ADDING +}; + struct hclge_fd_key_cfg { u8 key_sel; u8 inner_sipv6_word_en; @@ -647,6 +656,7 @@ struct hclge_fd_rule { u16 vf_id; u16 location; enum HCLGE_FD_ACTIVE_RULE_TYPE rule_type; + enum HCLGE_FD_NODE_STATE state; u8 action; };
On Mon, 15 Mar 2021 20:23:47 +0800 Huazhong Tan wrote:
From: Jian Shen shenjian15@huawei.com
Currently, there are 3 flow director work modes in HNS3 driver, include EP(ethtool), tc flower and aRFS. The flow director rules are configured synchronously and need holding spin lock. With this limitation, all the commands with firmware are also needed to use spin lock.
To eliminate the limitation, configure flow director rules asynchronously. The rules are still kept in the fd_rule_list with below states. TO_ADD: the rule is waiting to add to hardware TO_DEL: the rule is waiting to remove from hardware ADDING: the rule is adding to hardware ACTIVE: the rule is already added in hardware
When receive a new request to add or delete flow director rule, check whether the rule location is existent, update the rule content and state, and request to schedule the service task to finish the configuration.
Signed-off-by: Jian Shen shenjian15@huawei.com Signed-off-by: Huazhong Tan tanhuazhong@huawei.com
How is the application supposed to know if the ethtool rule was already installed or installation is still pending?
With the firmware bloat on all devices this sort of async mechanism seems to be popping up in more and more drivers but IMHO we shouldn't weaken the semantics without amending the kernel <> user space API.
On 2021/3/16 4:00, Jakub Kicinski wrote:
On Mon, 15 Mar 2021 20:23:47 +0800 Huazhong Tan wrote:
From: Jian Shen shenjian15@huawei.com
Currently, there are 3 flow director work modes in HNS3 driver, include EP(ethtool), tc flower and aRFS. The flow director rules are configured synchronously and need holding spin lock. With this limitation, all the commands with firmware are also needed to use spin lock.
To eliminate the limitation, configure flow director rules asynchronously. The rules are still kept in the fd_rule_list with below states. TO_ADD: the rule is waiting to add to hardware TO_DEL: the rule is waiting to remove from hardware ADDING: the rule is adding to hardware ACTIVE: the rule is already added in hardware
When receive a new request to add or delete flow director rule, check whether the rule location is existent, update the rule content and state, and request to schedule the service task to finish the configuration.
Signed-off-by: Jian Shen shenjian15@huawei.com Signed-off-by: Huazhong Tan tanhuazhong@huawei.com
How is the application supposed to know if the ethtool rule was already installed or installation is still pending?
Yes, it's unable for the application to know whether pending or installed.
The primitive motivation is to move out the aRFS rule configuration from IO path. To keep consistent, so does the ethtool way. We thought of it before, considered that the time window between the two state is very small.
How about keep aRFS asynchronously, and the ethtool synchronously?
With the firmware bloat on all devices this sort of async mechanism seems to be popping up in more and more drivers but IMHO we shouldn't weaken the semantics without amending the kernel <> user space API.
.
On Wed, 17 Mar 2021 09:47:45 +0800 Huazhong Tan wrote:
On 2021/3/16 4:00, Jakub Kicinski wrote:
On Mon, 15 Mar 2021 20:23:47 +0800 Huazhong Tan wrote:
From: Jian Shen shenjian15@huawei.com
Currently, there are 3 flow director work modes in HNS3 driver, include EP(ethtool), tc flower and aRFS. The flow director rules are configured synchronously and need holding spin lock. With this limitation, all the commands with firmware are also needed to use spin lock.
To eliminate the limitation, configure flow director rules asynchronously. The rules are still kept in the fd_rule_list with below states. TO_ADD: the rule is waiting to add to hardware TO_DEL: the rule is waiting to remove from hardware ADDING: the rule is adding to hardware ACTIVE: the rule is already added in hardware
When receive a new request to add or delete flow director rule, check whether the rule location is existent, update the rule content and state, and request to schedule the service task to finish the configuration.
Signed-off-by: Jian Shen shenjian15@huawei.com Signed-off-by: Huazhong Tan tanhuazhong@huawei.com
How is the application supposed to know if the ethtool rule was already installed or installation is still pending?
Yes, it's unable for the application to know whether pending or installed.
The primitive motivation is to move out the aRFS rule configuration from IO path. To keep consistent, so does the ethtool way. We thought of it before, considered that the time window between the two state is very small.
How about keep aRFS asynchronously, and the ethtool synchronously?
That'd be fine by me.
From: Jian Shen shenjian15@huawei.com
For only PF driver can configure flow director rule, it's better to call hclge_del_all_fd_entries() directly in hclge layer, rather than call hns3_del_all_fd_entries() in hns3 layer. Then the ae_algo->ops.del_all_fd_entries can be removed.
Signed-off-by: Jian Shen shenjian15@huawei.com Signed-off-by: Huazhong Tan tanhuazhong@huawei.com --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 2 -- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 10 ---------- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 10 +++------- 3 files changed, 3 insertions(+), 19 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 3a6bf1a..01d6bfc 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -612,8 +612,6 @@ struct hnae3_ae_ops { struct ethtool_rxnfc *cmd); int (*del_fd_entry)(struct hnae3_handle *handle, struct ethtool_rxnfc *cmd); - void (*del_all_fd_entries)(struct hnae3_handle *handle, - bool clear_list); int (*get_fd_rule_cnt)(struct hnae3_handle *handle, struct ethtool_rxnfc *cmd); int (*get_fd_rule_info)(struct hnae3_handle *handle, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index bf4302a..44b775e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -4143,14 +4143,6 @@ static void hns3_uninit_phy(struct net_device *netdev) h->ae_algo->ops->mac_disconnect_phy(h); }
-static void hns3_del_all_fd_rules(struct net_device *netdev, bool clear_list) -{ - struct hnae3_handle *h = hns3_get_handle(netdev); - - if (h->ae_algo->ops->del_all_fd_entries) - h->ae_algo->ops->del_all_fd_entries(h, clear_list); -} - static int hns3_client_start(struct hnae3_handle *handle) { if (!handle->ae_algo->ops->client_start) @@ -4337,8 +4329,6 @@ static void hns3_client_uninit(struct hnae3_handle *handle, bool reset)
hns3_nic_uninit_irq(priv);
- hns3_del_all_fd_rules(netdev, true); - hns3_clear_all_ring(handle, true);
hns3_nic_uninit_vector_data(priv); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index e35ff6e..3f67893 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -6417,13 +6417,9 @@ static void hclge_clear_fd_rules_in_list(struct hclge_dev *hdev, } }
-static void hclge_del_all_fd_entries(struct hnae3_handle *handle, - bool clear_list) +static void hclge_del_all_fd_entries(struct hclge_dev *hdev) { - struct hclge_vport *vport = hclge_get_vport(handle); - struct hclge_dev *hdev = vport->back; - - hclge_clear_fd_rules_in_list(hdev, clear_list); + hclge_clear_fd_rules_in_list(hdev, true); }
static int hclge_restore_fd_entries(struct hnae3_handle *handle) @@ -11518,6 +11514,7 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev) hclge_misc_affinity_teardown(hdev); hclge_state_uninit(hdev); hclge_uninit_mac_table(hdev); + hclge_del_all_fd_entries(hdev);
if (mac->phydev) mdiobus_unregister(mac->mdio_bus); @@ -12341,7 +12338,6 @@ static const struct hnae3_ae_ops hclge_ops = { .get_link_mode = hclge_get_link_mode, .add_fd_entry = hclge_add_fd_entry, .del_fd_entry = hclge_del_fd_entry, - .del_all_fd_entries = hclge_del_all_fd_entries, .get_fd_rule_cnt = hclge_get_fd_rule_cnt, .get_fd_rule_info = hclge_get_fd_rule_info, .get_fd_all_rules = hclge_get_all_rules,
From: Jian Shen shenjian15@huawei.com
For DEVICE_VERSION_V3, the hardware supports to match specified data in the specified offset of packet payload. Each layer can have one offset, and can't be masked when configure flow director rule by ethtool command. The layer is choosed according to the flow-type, ether for L2, ip4/ipv6 for L3, and tcp4/tcp6/udp4/udp6 for L4. For example, tcp4/tcp6/udp4/udp6 rules share the same user-def offset, but each rule can have its own user-def value.
For the user-def field of ethtool -N/U command is 64 bits long. The bit 0~15 is used for user-def value, and bit 32~47 for user-def offset in HNS3 driver.
Signed-off-by: Jian Shen shenjian15@huawei.com Signed-off-by: Huazhong Tan tanhuazhong@huawei.com --- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h | 14 + .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 301 ++++++++++++++++++++- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 36 +++ 3 files changed, 337 insertions(+), 14 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index 804f4c8..565c5aa 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -243,6 +243,7 @@ enum hclge_opcode_type { HCLGE_OPC_FD_KEY_CONFIG = 0x1202, HCLGE_OPC_FD_TCAM_OP = 0x1203, HCLGE_OPC_FD_AD_OP = 0x1204, + HCLGE_OPC_FD_USER_DEF_OP = 0x1207,
/* MDIO command */ HCLGE_OPC_MDIO_CONFIG = 0x1900, @@ -1082,6 +1083,19 @@ struct hclge_fd_ad_config_cmd { u8 rsv2[8]; };
+#define HCLGE_FD_USER_DEF_OFT_S 0 +#define HCLGE_FD_USER_DEF_OFT_M GENMASK(14, 0) +#define HCLGE_FD_USER_DEF_EN_B 15 +struct hclge_fd_user_def_cfg_cmd { + __le16 ol2_cfg; + __le16 l2_cfg; + __le16 ol3_cfg; + __le16 l3_cfg; + __le16 ol4_cfg; + __le16 l4_cfg; + u8 rsv[12]; +}; + struct hclge_get_m7_bd_cmd { __le32 bd_num; u8 rsv[20]; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 3f67893..940b926 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -414,7 +414,9 @@ static const struct key_info tuple_key_info[] = { { INNER_ETH_TYPE, 16, KEY_OPT_LE16, offsetof(struct hclge_fd_rule, tuples.ether_proto), offsetof(struct hclge_fd_rule, tuples_mask.ether_proto) }, - { INNER_L2_RSV, 16, KEY_OPT_LE16, -1, -1 }, + { INNER_L2_RSV, 16, KEY_OPT_LE16, + offsetof(struct hclge_fd_rule, tuples.l2_user_def), + offsetof(struct hclge_fd_rule, tuples_mask.l2_user_def) }, { INNER_IP_TOS, 8, KEY_OPT_U8, offsetof(struct hclge_fd_rule, tuples.ip_tos), offsetof(struct hclge_fd_rule, tuples_mask.ip_tos) }, @@ -427,14 +429,18 @@ static const struct key_info tuple_key_info[] = { { INNER_DST_IP, 32, KEY_OPT_IP, offsetof(struct hclge_fd_rule, tuples.dst_ip), offsetof(struct hclge_fd_rule, tuples_mask.dst_ip) }, - { INNER_L3_RSV, 16, KEY_OPT_LE16, -1, -1 }, + { INNER_L3_RSV, 16, KEY_OPT_LE16, + offsetof(struct hclge_fd_rule, tuples.l3_user_def), + offsetof(struct hclge_fd_rule, tuples_mask.l3_user_def) }, { INNER_SRC_PORT, 16, KEY_OPT_LE16, offsetof(struct hclge_fd_rule, tuples.src_port), offsetof(struct hclge_fd_rule, tuples_mask.src_port) }, { INNER_DST_PORT, 16, KEY_OPT_LE16, offsetof(struct hclge_fd_rule, tuples.dst_port), offsetof(struct hclge_fd_rule, tuples_mask.dst_port) }, - { INNER_L4_RSV, 32, KEY_OPT_LE32, -1, -1 }, + { INNER_L4_RSV, 32, KEY_OPT_LE32, + offsetof(struct hclge_fd_rule, tuples.l4_user_def), + offsetof(struct hclge_fd_rule, tuples_mask.l4_user_def) }, };
static int hclge_mac_update_stats_defective(struct hclge_dev *hdev) @@ -5256,15 +5262,75 @@ static void hclge_fd_insert_rule_node(struct hlist_head *hlist, hlist_add_head(&rule->rule_node, hlist); }
+static int hclge_fd_inc_user_def_refcnt(struct hclge_dev *hdev, + struct hclge_fd_rule *rule) +{ + struct hclge_fd_user_def_info *info; + struct hclge_fd_user_def_cfg *cfg; + + if (!rule || rule->rule_type != HCLGE_FD_EP_ACTIVE || + rule->ep.user_def.layer == HCLGE_FD_USER_DEF_NONE) + return 0; + + /* for valid layer is start from 1, so need minus 1 to get the cfg */ + cfg = &hdev->fd_cfg.user_def_cfg[rule->ep.user_def.layer - 1]; + info = &rule->ep.user_def; + + if (cfg->ref_cnt && cfg->offset != info->offset) { + dev_err(&hdev->pdev->dev, + "No available offset for layer%d fd rule, each layer only support one user def offset.\n", + info->layer + 1); + return -ENOSPC; + } + + if (!cfg->ref_cnt) { + cfg->offset = info->offset; + set_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state); + } + cfg->ref_cnt++; + + return 0; +} + +static void hclge_fd_dec_user_def_refcnt(struct hclge_dev *hdev, + struct hclge_fd_rule *rule) +{ + struct hclge_fd_user_def_cfg *cfg; + + if (!rule || rule->rule_type != HCLGE_FD_EP_ACTIVE || + rule->ep.user_def.layer == HCLGE_FD_USER_DEF_NONE) + return; + + /* for valid layer is start from 1, so need minus 1 to get the cfg */ + cfg = &hdev->fd_cfg.user_def_cfg[rule->ep.user_def.layer - 1]; + + if (!cfg->ref_cnt) + return; + + cfg->ref_cnt--; + if (!cfg->ref_cnt) { + cfg->offset = 0; + set_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state); + } +} + static int hclge_update_fd_list(struct hclge_dev *hdev, enum HCLGE_FD_NODE_STATE state, u16 location, struct hclge_fd_rule *new_rule) { struct hlist_head *hlist = &hdev->fd_rule_list; struct hclge_fd_rule *fd_rule, *parent = NULL; + int ret;
fd_rule = hclge_find_fd_rule(hlist, location, &parent); if (fd_rule) { + hclge_fd_dec_user_def_refcnt(hdev, fd_rule); + if (state == HCLGE_FD_TO_ADD) { + ret = hclge_fd_inc_user_def_refcnt(hdev, new_rule); + if (ret) + return ret; + } + hclge_update_fd_rule_node(hdev, fd_rule, new_rule, state); set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state); hclge_task_schedule(hdev, 0); @@ -5279,6 +5345,10 @@ static int hclge_update_fd_list(struct hclge_dev *hdev, return -ENOENT; }
+ ret = hclge_fd_inc_user_def_refcnt(hdev, new_rule); + if (ret) + return ret; + hclge_fd_insert_rule_node(hlist, new_rule, parent); if (!test_bit(location, hdev->fd_bmap)) { set_bit(location, hdev->fd_bmap); @@ -5434,6 +5504,53 @@ static int hclge_set_fd_key_config(struct hclge_dev *hdev, return ret; }
+static int hclge_fd_set_user_def_cmd(struct hclge_dev *hdev, + struct hclge_fd_user_def_cfg *cfg) +{ + struct hclge_fd_user_def_cfg_cmd *req; + struct hclge_desc desc; + u16 data = 0; + int ret; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_FD_USER_DEF_OP, false); + + req = (struct hclge_fd_user_def_cfg_cmd *)desc.data; + + hnae3_set_bit(data, HCLGE_FD_USER_DEF_EN_B, cfg[0].ref_cnt > 0); + hnae3_set_field(data, HCLGE_FD_USER_DEF_OFT_M, + HCLGE_FD_USER_DEF_OFT_S, cfg[0].offset); + req->ol2_cfg = cpu_to_le16(data); + + data = 0; + hnae3_set_bit(data, HCLGE_FD_USER_DEF_EN_B, cfg[1].ref_cnt > 0); + hnae3_set_field(data, HCLGE_FD_USER_DEF_OFT_M, + HCLGE_FD_USER_DEF_OFT_S, cfg[1].offset); + req->ol3_cfg = cpu_to_le16(data); + + data = 0; + hnae3_set_bit(data, HCLGE_FD_USER_DEF_EN_B, cfg[2].ref_cnt > 0); + hnae3_set_field(data, HCLGE_FD_USER_DEF_OFT_M, + HCLGE_FD_USER_DEF_OFT_S, cfg[2].offset); + req->ol4_cfg = cpu_to_le16(data); + + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) + dev_err(&hdev->pdev->dev, + "failed to set fd user def data, ret= %d\n", ret); + return ret; +} + +static void hclge_fd_disable_user_def(struct hclge_dev *hdev) +{ + struct hclge_fd_user_def_cfg *cfg = hdev->fd_cfg.user_def_cfg; + + spin_lock_bh(&hdev->fd_rule_lock); + memset(cfg, 0, sizeof(hdev->fd_cfg.user_def_cfg)); + spin_unlock_bh(&hdev->fd_rule_lock); + + hclge_fd_set_user_def_cmd(hdev, cfg); +} + static int hclge_init_fd_config(struct hclge_dev *hdev) { #define LOW_2_WORDS 0x03 @@ -5474,9 +5591,12 @@ static int hclge_init_fd_config(struct hclge_dev *hdev) BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT);
/* If use max 400bit key, we can support tuples for ether type */ - if (hdev->fd_cfg.fd_mode == HCLGE_FD_MODE_DEPTH_2K_WIDTH_400B_STAGE_1) + if (hdev->fd_cfg.fd_mode == HCLGE_FD_MODE_DEPTH_2K_WIDTH_400B_STAGE_1) { key_cfg->tuple_active |= BIT(INNER_DST_MAC) | BIT(INNER_SRC_MAC); + if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3) + key_cfg->tuple_active |= HCLGE_FD_TUPLE_USER_DEF_TUPLES; + }
/* roce_type is used to filter roce frames * dst_vport is used to specify the rule @@ -5970,9 +6090,98 @@ static int hclge_fd_check_ext_tuple(struct hclge_dev *hdev, return 0; }
+static int hclge_fd_get_user_def_layer(u32 flow_type, u32 *unused_tuple, + struct hclge_fd_user_def_info *info) +{ + switch (flow_type) { + case ETHER_FLOW: + info->layer = HCLGE_FD_USER_DEF_L2; + *unused_tuple &= ~BIT(INNER_L2_RSV); + break; + case IP_USER_FLOW: + case IPV6_USER_FLOW: + info->layer = HCLGE_FD_USER_DEF_L3; + *unused_tuple &= ~BIT(INNER_L3_RSV); + break; + case TCP_V4_FLOW: + case UDP_V4_FLOW: + case TCP_V6_FLOW: + case UDP_V6_FLOW: + info->layer = HCLGE_FD_USER_DEF_L4; + *unused_tuple &= ~BIT(INNER_L4_RSV); + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static bool hclge_fd_is_user_def_all_masked(struct ethtool_rx_flow_spec *fs) +{ + return be32_to_cpu(fs->m_ext.data[1] | fs->m_ext.data[0]) == 0; +} + +static int hclge_fd_parse_user_def_field(struct hclge_dev *hdev, + struct ethtool_rx_flow_spec *fs, + u32 *unused_tuple, + struct hclge_fd_user_def_info *info) +{ + u32 tuple_active = hdev->fd_cfg.key_cfg[HCLGE_FD_STAGE_1].tuple_active; + u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT); + u16 data, offset, data_mask, offset_mask; + int ret; + + info->layer = HCLGE_FD_USER_DEF_NONE; + *unused_tuple |= HCLGE_FD_TUPLE_USER_DEF_TUPLES; + + if (!(fs->flow_type & FLOW_EXT) || hclge_fd_is_user_def_all_masked(fs)) + return 0; + + /* user-def data from ethtool is 64 bit value, the bit0~15 is used + * for data, and bit32~47 is used for offset. + */ + data = be32_to_cpu(fs->h_ext.data[1]) & HCLGE_FD_USER_DEF_DATA; + data_mask = be32_to_cpu(fs->m_ext.data[1]) & HCLGE_FD_USER_DEF_DATA; + offset = be32_to_cpu(fs->h_ext.data[0]) & HCLGE_FD_USER_DEF_OFFSET; + offset_mask = be32_to_cpu(fs->m_ext.data[0]) & HCLGE_FD_USER_DEF_OFFSET; + + if (!(tuple_active & HCLGE_FD_TUPLE_USER_DEF_TUPLES)) { + dev_err(&hdev->pdev->dev, "user-def bytes are not supported\n"); + return -EOPNOTSUPP; + } + + if (offset > HCLGE_FD_MAX_USER_DEF_OFFSET) { + dev_err(&hdev->pdev->dev, + "user-def offset[%u] should be no more than %u\n", + offset, HCLGE_FD_MAX_USER_DEF_OFFSET); + return -EINVAL; + } + + if (offset_mask != HCLGE_FD_USER_DEF_OFFSET_UNMASK) { + dev_err(&hdev->pdev->dev, "user-def offset can't be masked\n"); + return -EINVAL; + } + + ret = hclge_fd_get_user_def_layer(flow_type, unused_tuple, info); + if (ret) { + dev_err(&hdev->pdev->dev, + "unsupported flow type for user-def bytes, ret = %d\n", + ret); + return ret; + } + + info->data = data; + info->data_mask = data_mask; + info->offset = offset; + + return 0; +} + static int hclge_fd_check_spec(struct hclge_dev *hdev, struct ethtool_rx_flow_spec *fs, - u32 *unused_tuple) + u32 *unused_tuple, + struct hclge_fd_user_def_info *info) { u32 flow_type; int ret; @@ -5985,11 +6194,9 @@ static int hclge_fd_check_spec(struct hclge_dev *hdev, return -EINVAL; }
- if ((fs->flow_type & FLOW_EXT) && - (fs->h_ext.data[0] != 0 || fs->h_ext.data[1] != 0)) { - dev_err(&hdev->pdev->dev, "user-def bytes are not supported\n"); - return -EOPNOTSUPP; - } + ret = hclge_fd_parse_user_def_field(hdev, fs, unused_tuple, info); + if (ret) + return ret;
flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT); switch (flow_type) { @@ -6163,9 +6370,33 @@ static void hclge_fd_get_ether_tuple(struct hclge_dev *hdev, rule->tuples_mask.ether_proto = be16_to_cpu(fs->m_u.ether_spec.h_proto); }
+static void hclge_fd_get_user_def_tuple(struct hclge_fd_user_def_info *info, + struct hclge_fd_rule *rule) +{ + switch (info->layer) { + case HCLGE_FD_USER_DEF_L2: + rule->tuples.l2_user_def = info->data; + rule->tuples_mask.l2_user_def = info->data_mask; + break; + case HCLGE_FD_USER_DEF_L3: + rule->tuples.l3_user_def = info->data; + rule->tuples_mask.l3_user_def = info->data_mask; + break; + case HCLGE_FD_USER_DEF_L4: + rule->tuples.l4_user_def = (u32)info->data << 16; + rule->tuples_mask.l4_user_def = (u32)info->data_mask << 16; + break; + default: + break; + } + + rule->ep.user_def = *info; +} + static int hclge_fd_get_tuple(struct hclge_dev *hdev, struct ethtool_rx_flow_spec *fs, - struct hclge_fd_rule *rule) + struct hclge_fd_rule *rule, + struct hclge_fd_user_def_info *info) { u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT);
@@ -6204,6 +6435,7 @@ static int hclge_fd_get_tuple(struct hclge_dev *hdev, if (fs->flow_type & FLOW_EXT) { rule->tuples.vlan_tag1 = be16_to_cpu(fs->h_ext.vlan_tci); rule->tuples_mask.vlan_tag1 = be16_to_cpu(fs->m_ext.vlan_tci); + hclge_fd_get_user_def_tuple(info, rule); }
if (fs->flow_type & FLOW_MAC_EXT) { @@ -6302,6 +6534,7 @@ static int hclge_add_fd_entry(struct hnae3_handle *handle, { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; + struct hclge_fd_user_def_info info; u16 dst_vport_id = 0, q_index = 0; struct ethtool_rx_flow_spec *fs; struct hclge_fd_rule *rule; @@ -6323,7 +6556,7 @@ static int hclge_add_fd_entry(struct hnae3_handle *handle,
fs = (struct ethtool_rx_flow_spec *)&cmd->fs;
- ret = hclge_fd_check_spec(hdev, fs, &unused); + ret = hclge_fd_check_spec(hdev, fs, &unused, &info); if (ret) return ret;
@@ -6336,7 +6569,7 @@ static int hclge_add_fd_entry(struct hnae3_handle *handle, if (!rule) return -ENOMEM;
- ret = hclge_fd_get_tuple(hdev, fs, rule); + ret = hclge_fd_get_tuple(hdev, fs, rule, &info); if (ret) { kfree(rule); return ret; @@ -6420,6 +6653,7 @@ static void hclge_clear_fd_rules_in_list(struct hclge_dev *hdev, static void hclge_del_all_fd_entries(struct hclge_dev *hdev) { hclge_clear_fd_rules_in_list(hdev, true); + hclge_fd_disable_user_def(hdev); }
static int hclge_restore_fd_entries(struct hnae3_handle *handle) @@ -6596,6 +6830,24 @@ static void hclge_fd_get_ether_info(struct hclge_fd_rule *rule, 0 : cpu_to_be16(rule->tuples_mask.ether_proto); }
+static void hclge_fd_get_user_def_info(struct ethtool_rx_flow_spec *fs, + struct hclge_fd_rule *rule) +{ + if ((rule->unused_tuple & HCLGE_FD_TUPLE_USER_DEF_TUPLES) == + HCLGE_FD_TUPLE_USER_DEF_TUPLES) { + fs->h_ext.data[0] = 0; + fs->h_ext.data[1] = 0; + fs->m_ext.data[0] = 0; + fs->m_ext.data[1] = 0; + } else { + fs->h_ext.data[0] = cpu_to_be32(rule->ep.user_def.offset); + fs->h_ext.data[1] = cpu_to_be32(rule->ep.user_def.data); + fs->m_ext.data[0] = + cpu_to_be32(HCLGE_FD_USER_DEF_OFFSET_UNMASK); + fs->m_ext.data[1] = cpu_to_be32(rule->ep.user_def.data_mask); + } +} + static void hclge_fd_get_ext_info(struct ethtool_rx_flow_spec *fs, struct hclge_fd_rule *rule) { @@ -6604,6 +6856,8 @@ static void hclge_fd_get_ext_info(struct ethtool_rx_flow_spec *fs, fs->m_ext.vlan_tci = rule->unused_tuple & BIT(INNER_VLAN_TAG_FST) ? 0 : cpu_to_be16(rule->tuples_mask.vlan_tag1); + + hclge_fd_get_user_def_info(fs, rule); }
if (fs->flow_type & FLOW_MAC_EXT) { @@ -7204,6 +7458,23 @@ static void hclge_sync_fd_rule_num(struct hclge_dev *hdev) hclge_sync_fd_state(hdev); }
+static void hclge_sync_fd_user_def_cfg(struct hclge_dev *hdev) +{ + struct hclge_fd_user_def_cfg cfg[HCLGE_FD_USER_DEF_LAYER_NUM]; + int ret; + + if (!test_and_clear_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state)) + return; + + spin_lock_bh(&hdev->fd_rule_lock); + memcpy(cfg, hdev->fd_cfg.user_def_cfg, sizeof(cfg)); + spin_unlock_bh(&hdev->fd_rule_lock); + + ret = hclge_fd_set_user_def_cmd(hdev, cfg); + if (ret) + set_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state); +} + static void hclge_sync_fd_table(struct hclge_dev *hdev) { struct hlist_head *hlist = &hdev->fd_rule_list; @@ -7219,6 +7490,8 @@ static void hclge_sync_fd_table(struct hclge_dev *hdev) hclge_clear_fd_rules_in_list(hdev, clear_list); }
+ hclge_sync_fd_user_def_cfg(hdev); + if (!test_and_clear_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state)) return;
@@ -9825,7 +10098,7 @@ static void hclge_restore_hw_table(struct hclge_dev *hdev) hclge_restore_mac_table_common(vport); hclge_restore_vport_vlan_table(vport); set_bit(HCLGE_STATE_PROMISC_CHANGED, &hdev->state); - + set_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state); hclge_restore_fd_entries(handle); }
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 696bcc1..2d1f7f8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -225,6 +225,7 @@ enum HCLGE_DEV_STATE { HCLGE_STATE_RST_FAIL, HCLGE_STATE_FD_TBL_CHANGED, HCLGE_STATE_FD_CLEAR_ALL, + HCLGE_STATE_FD_USER_DEF_CHANGED, HCLGE_STATE_MAX };
@@ -538,6 +539,9 @@ enum HCLGE_FD_TUPLE { MAX_TUPLE, };
+#define HCLGE_FD_TUPLE_USER_DEF_TUPLES \ + (BIT(INNER_L2_RSV) | BIT(INNER_L3_RSV) | BIT(INNER_L4_RSV)) + enum HCLGE_FD_META_DATA { PACKET_TYPE_ID, IP_FRAGEMENT, @@ -572,6 +576,11 @@ struct key_info { #define MAX_KEY_BYTES (MAX_KEY_DWORDS * 4) #define MAX_META_DATA_LENGTH 32
+#define HCLGE_FD_MAX_USER_DEF_OFFSET 9000 +#define HCLGE_FD_USER_DEF_DATA GENMASK(15, 0) +#define HCLGE_FD_USER_DEF_OFFSET GENMASK(15, 0) +#define HCLGE_FD_USER_DEF_OFFSET_UNMASK GENMASK(15, 0) + /* assigned by firmware, the real filter number for each pf may be less */ #define MAX_FD_FILTER_NUM 4096 #define HCLGE_ARFS_EXPIRE_INTERVAL 5UL @@ -601,6 +610,26 @@ enum HCLGE_FD_NODE_STATE { HCLGE_FD_ADDING };
+enum HCLGE_FD_USER_DEF_LAYER { + HCLGE_FD_USER_DEF_NONE, + HCLGE_FD_USER_DEF_L2, + HCLGE_FD_USER_DEF_L3, + HCLGE_FD_USER_DEF_L4, +}; + +#define HCLGE_FD_USER_DEF_LAYER_NUM 3 +struct hclge_fd_user_def_cfg { + u16 ref_cnt; + u16 offset; +}; + +struct hclge_fd_user_def_info { + enum HCLGE_FD_USER_DEF_LAYER layer; + u16 data; + u16 data_mask; + u16 offset; +}; + struct hclge_fd_key_cfg { u8 key_sel; u8 inner_sipv6_word_en; @@ -617,6 +646,7 @@ struct hclge_fd_cfg { u32 rule_num[MAX_STAGE_NUM]; /* rule entry number */ u16 cnt_num[MAX_STAGE_NUM]; /* rule hit counter number */ struct hclge_fd_key_cfg key_cfg[MAX_STAGE_NUM]; + struct hclge_fd_user_def_cfg user_def_cfg[HCLGE_FD_USER_DEF_LAYER_NUM]; };
#define IPV4_INDEX 3 @@ -633,6 +663,9 @@ struct hclge_fd_rule_tuples { u16 dst_port; u16 vlan_tag1; u16 ether_proto; + u16 l2_user_def; + u16 l3_user_def; + u32 l4_user_def; u8 ip_tos; u8 ip_proto; }; @@ -651,6 +684,9 @@ struct hclge_fd_rule { struct { u16 flow_id; /* only used for arfs */ } arfs; + struct { + struct hclge_fd_user_def_info user_def; + } ep; }; u16 queue_id; u16 vf_id;
From: Jian Shen shenjian15@huawei.com
For device version V3, it supports queue bonding, which can identify the tuple information of TCP stream, and create flow director rules automatically, in order to keep the tx and rx packets are in the same queue pair. The driver set FD_ADD field of TX BD for TCP SYN packet, and set FD_DEL filed for TCP FIN or RST packet. The hardware create or remove a fd rule according to the TX BD, and it also support to age-out a rule if not hit for a long time.
The queue bonding mode is default to be disabled, and can be enabled/disabled with ethtool priv-flags command.
Signed-off-by: Jian Shen shenjian15@huawei.com Signed-off-by: Huzhong Tan tanhuazhong@huawei.com --- drivers/net/ethernet/hisilicon/hns3/hnae3.h | 7 ++ drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c | 7 +- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 81 +++++++++++++- drivers/net/ethernet/hisilicon/hns3/hns3_enet.h | 14 ++- drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c | 13 ++- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c | 2 + .../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h | 7 ++ .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 119 ++++++++++++++++++++- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 3 + 9 files changed, 244 insertions(+), 9 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 01d6bfc..799c1e5 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -470,6 +470,10 @@ struct hnae3_ae_dev { * Check if any cls flower rule exist * dbg_read_cmd * Execute debugfs read command. + * request_flush_qb_config + * Request to update queue bonding configuration + * query_fd_qb_state + * Query whether hw queue bonding enabled */ struct hnae3_ae_ops { int (*init_ae_dev)(struct hnae3_ae_dev *ae_dev); @@ -655,6 +659,8 @@ struct hnae3_ae_ops { struct ethtool_link_ksettings *cmd); int (*set_phy_link_ksettings)(struct hnae3_handle *handle, const struct ethtool_link_ksettings *cmd); + void (*request_flush_qb_config)(struct hnae3_handle *handle); + bool (*query_fd_qb_state)(struct hnae3_handle *handle); };
struct hnae3_dcb_ops { @@ -743,6 +749,7 @@ struct hnae3_roce_private_info {
enum hnae3_pflag { HNAE3_PFLAG_LIMIT_PROMISC, + HNAE3_PFLAG_FD_QB_ENABLE, HNAE3_PFLAG_MAX };
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index 9d702bd..194896c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -243,8 +243,8 @@ static int hns3_dbg_bd_info(struct hnae3_handle *h, const char *cmd_buf) dev_info(dev, "(TX)vlan_tag: %u\n", le16_to_cpu(tx_desc->tx.outer_vlan_tag)); dev_info(dev, "(TX)tv: %u\n", le16_to_cpu(tx_desc->tx.tv)); - dev_info(dev, "(TX)paylen_ol4cs: %u\n", - le32_to_cpu(tx_desc->tx.paylen_ol4cs)); + dev_info(dev, "(TX)paylen_fdop_ol4cs: %u\n", + le32_to_cpu(tx_desc->tx.paylen_fdop_ol4cs)); dev_info(dev, "(TX)vld_ra_ri: %u\n", le16_to_cpu(tx_desc->tx.bdtp_fe_sc_vld_ra_ri)); dev_info(dev, "(TX)mss_hw_csum: %u\n", mss_hw_csum); @@ -367,6 +367,9 @@ static void hns3_dbg_dev_caps(struct hnae3_handle *h) "yes" : "no"); dev_info(&h->pdev->dev, "support imp-controlled PHY: %s\n", test_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, caps) ? "yes" : "no"); + dev_info(&h->pdev->dev, "support QB: %s\n", + test_bit(HNAE3_DEV_SUPPORT_QB_B, ae_dev->caps) ? + "yes" : "no"); }
static void hns3_dbg_dev_specs(struct hnae3_handle *h) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 44b775e..76dcf82 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -1061,6 +1061,73 @@ static int hns3_handle_vtags(struct hns3_enet_ring *tx_ring, return 0; }
+static bool hns3_query_fd_qb_state(struct hnae3_handle *handle) +{ + const struct hnae3_ae_ops *ops = handle->ae_algo->ops; + + if (!test_bit(HNAE3_PFLAG_FD_QB_ENABLE, &handle->priv_flags)) + return false; + + if (!ops->query_fd_qb_state) + return false; + + return ops->query_fd_qb_state(handle); +} + +/* fd_op is the field of tx bd indicates hw whether to add or delete + * a qb rule or do nothing. + */ +static u8 hns3_fd_qb_handle(struct hns3_enet_ring *ring, struct sk_buff *skb) +{ + struct hnae3_handle *handle = ring->tqp->handle; + union l4_hdr_info l4; + union l3_hdr_info l3; + u8 l4_proto_tmp = 0; + __be16 frag_off; + u8 ip_version; + u8 fd_op = 0; + + if (!hns3_query_fd_qb_state(handle)) + return 0; + + if (skb->encapsulation) { + ip_version = inner_ip_hdr(skb)->version; + l3.hdr = skb_inner_network_header(skb); + l4.hdr = skb_inner_transport_header(skb); + } else { + ip_version = ip_hdr(skb)->version; + l3.hdr = skb_network_header(skb); + l4.hdr = skb_transport_header(skb); + } + + if (ip_version == IP_VERSION_IPV6) { + unsigned char *exthdr; + + exthdr = l3.hdr + sizeof(*l3.v6); + l4_proto_tmp = l3.v6->nexthdr; + if (l4.hdr != exthdr) + ipv6_skip_exthdr(skb, exthdr - skb->data, + &l4_proto_tmp, &frag_off); + } else if (ip_version == IP_VERSION_IPV4) { + l4_proto_tmp = l3.v4->protocol; + } + + if (l4_proto_tmp != IPPROTO_TCP) + return 0; + + ring->fd_qb_tx_sample++; + if (l4.tcp->fin || l4.tcp->rst) { + hnae3_set_bit(fd_op, HNS3_TXD_FD_DEL_B, 1); + ring->fd_qb_tx_sample = 0; + } else if (l4.tcp->syn || + ring->fd_qb_tx_sample >= HNS3_FD_QB_FORCE_CNT_MAX) { + hnae3_set_bit(fd_op, HNS3_TXD_FD_ADD_B, 1); + ring->fd_qb_tx_sample = 0; + } + + return fd_op; +} + /* check if the hardware is capable of checksum offloading */ static bool hns3_check_hw_tx_csum(struct sk_buff *skb) { @@ -1080,12 +1147,13 @@ static bool hns3_check_hw_tx_csum(struct sk_buff *skb) static int hns3_fill_skb_desc(struct hns3_enet_ring *ring, struct sk_buff *skb, struct hns3_desc *desc) { + u32 paylen_fdop_ol4cs = skb->len; u32 ol_type_vlan_len_msec = 0; - u32 paylen_ol4cs = skb->len; u32 type_cs_vlan_tso = 0; u16 mss_hw_csum = 0; u16 inner_vtag = 0; u16 out_vtag = 0; + u8 fd_op; int ret;
ret = hns3_handle_vtags(ring, skb); @@ -1141,7 +1209,7 @@ static int hns3_fill_skb_desc(struct hns3_enet_ring *ring, return ret; }
- ret = hns3_set_tso(skb, &paylen_ol4cs, &mss_hw_csum, + ret = hns3_set_tso(skb, &paylen_fdop_ol4cs, &mss_hw_csum, &type_cs_vlan_tso); if (unlikely(ret < 0)) { u64_stats_update_begin(&ring->syncp); @@ -1152,11 +1220,15 @@ static int hns3_fill_skb_desc(struct hns3_enet_ring *ring, }
out_hw_tx_csum: + fd_op = hns3_fd_qb_handle(ring, skb); + hnae3_set_field(paylen_fdop_ol4cs, HNS3_TXD_FD_OP_M, + HNS3_TXD_FD_OP_S, fd_op); + /* Set txbd */ desc->tx.ol_type_vlan_len_msec = cpu_to_le32(ol_type_vlan_len_msec); desc->tx.type_cs_vlan_tso_len = cpu_to_le32(type_cs_vlan_tso); - desc->tx.paylen_ol4cs = cpu_to_le32(paylen_ol4cs); + desc->tx.paylen_fdop_ol4cs = cpu_to_le32(paylen_fdop_ol4cs); desc->tx.mss_hw_csum = cpu_to_le16(mss_hw_csum); desc->tx.vlan_tag = cpu_to_le16(inner_vtag); desc->tx.outer_vlan_tag = cpu_to_le16(out_vtag); @@ -4282,6 +4354,9 @@ static int hns3_client_init(struct hnae3_handle *handle) if (ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3) set_bit(HNAE3_PFLAG_LIMIT_PROMISC, &handle->supported_pflags);
+ if (test_bit(HNAE3_DEV_SUPPORT_QB_B, ae_dev->caps)) + set_bit(HNAE3_PFLAG_FD_QB_ENABLE, &handle->supported_pflags); + if (netif_msg_drv(handle)) hns3_info_show(priv);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index d069b04..7dddd5c4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -171,6 +171,11 @@ enum hns3_nic_state { #define HNS3_TXD_DECTTL_S 12 #define HNS3_TXD_DECTTL_M (0xf << HNS3_TXD_DECTTL_S)
+#define HNS3_TXD_FD_ADD_B 1 +#define HNS3_TXD_FD_DEL_B 0 +#define HNS3_TXD_FD_OP_M GENMASK(21, 20) +#define HNS3_TXD_FD_OP_S 20 + #define HNS3_TXD_OL4CS_B 22
#define HNS3_TXD_MSS_S 0 @@ -201,6 +206,8 @@ enum hns3_nic_state {
#define HNS3_RING_EN_B 0
+#define HNS3_FD_QB_FORCE_CNT_MAX 20 + enum hns3_pkt_l2t_type { HNS3_L2_TYPE_UNICAST, HNS3_L2_TYPE_MULTICAST, @@ -265,7 +272,7 @@ struct __packed hns3_desc { }; };
- __le32 paylen_ol4cs; + __le32 paylen_fdop_ol4cs; __le16 bdtp_fe_sc_vld_ra_ri; __le16 mss_hw_csum; } tx; @@ -361,6 +368,9 @@ enum hns3_pkt_ol4type { HNS3_OL4_TYPE_UNKNOWN };
+#define IP_VERSION_IPV4 0x4 +#define IP_VERSION_IPV6 0x6 + struct ring_stats { u64 sw_err_cnt; u64 seg_pkt_cnt; @@ -423,7 +433,7 @@ struct hns3_enet_ring { void *va; /* first buffer address for current packet */
u32 flag; /* ring attribute */ - + u32 fd_qb_tx_sample; int pending_buf; struct sk_buff *skb; struct sk_buff *tail_skb; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index a1d69c5..6b51d2f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -415,8 +415,19 @@ static void hns3_update_limit_promisc_mode(struct net_device *netdev, hns3_request_update_promisc_mode(handle); }
+static void hns3_update_fd_qb_state(struct net_device *netdev, bool enable) +{ + struct hnae3_handle *handle = hns3_get_handle(netdev); + + if (!handle->ae_algo->ops->request_flush_qb_config) + return; + + handle->ae_algo->ops->request_flush_qb_config(handle); +} + static const struct hns3_pflag_desc hns3_priv_flags[HNAE3_PFLAG_MAX] = { - { "limit_promisc", hns3_update_limit_promisc_mode } + { "limit_promisc", hns3_update_limit_promisc_mode }, + { "qb_enable", hns3_update_fd_qb_state }, };
static int hns3_get_sset_count(struct net_device *netdev, int stringset) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c index 3284a2c..3483f35 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c @@ -387,6 +387,8 @@ static void hclge_parse_capability(struct hclge_dev *hdev, set_bit(HNAE3_DEV_SUPPORT_PAUSE_B, ae_dev->caps); if (hnae3_get_bit(caps, HCLGE_CAP_PHY_IMP_B)) set_bit(HNAE3_DEV_SUPPORT_PHY_IMP_B, ae_dev->caps); + if (hnae3_get_bit(caps, HCLGE_CAP_QB_B)) + set_bit(HNAE3_DEV_SUPPORT_QB_B, ae_dev->caps); }
static __le32 hclge_build_api_caps(void) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index 565c5aa..1cef746 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -244,6 +244,7 @@ enum hclge_opcode_type { HCLGE_OPC_FD_TCAM_OP = 0x1203, HCLGE_OPC_FD_AD_OP = 0x1204, HCLGE_OPC_FD_USER_DEF_OP = 0x1207, + HCLGE_OPC_FD_QB_CTRL = 0x1210,
/* MDIO command */ HCLGE_OPC_MDIO_CONFIG = 0x1900, @@ -1083,6 +1084,12 @@ struct hclge_fd_ad_config_cmd { u8 rsv2[8]; };
+struct hclge_fd_qb_cfg_cmd { + u8 en; + u8 vf_id; + u8 rsv[22]; +}; + #define HCLGE_FD_USER_DEF_OFT_S 0 #define HCLGE_FD_USER_DEF_OFT_M GENMASK(14, 0) #define HCLGE_FD_USER_DEF_EN_B 15 diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 940b926..2e4c93b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -4255,6 +4255,95 @@ static void hclge_update_vport_alive(struct hclge_dev *hdev) } }
+static int hclge_set_fd_qb(struct hclge_dev *hdev, u8 vf_id, bool enable) +{ + struct hclge_fd_qb_cfg_cmd *req; + struct hclge_desc desc; + int ret; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_FD_QB_CTRL, false); + req = (struct hclge_fd_qb_cfg_cmd *)desc.data; + req->en = enable; + req->vf_id = vf_id; + + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + if (ret) + dev_err(&hdev->pdev->dev, + "failed to %s qb config for vport %u, ret = %d.\n", + enable ? "enable" : "disable", vf_id, ret); + return ret; +} + +static int hclge_sync_pf_qb_mode(struct hclge_dev *hdev) +{ + struct hclge_vport *vport = &hdev->vport[0]; + struct hnae3_handle *handle = &vport->nic; + bool request_enable = true; + int ret; + + if (!test_and_clear_bit(HCLGE_VPORT_STATE_QB_CHANGE, &vport->state)) + return 0; + + spin_lock_bh(&hdev->fd_rule_lock); + if (hdev->fd_active_type == HCLGE_FD_EP_ACTIVE || + hdev->fd_active_type == HCLGE_FD_TC_FLOWER_ACTIVE || + !test_bit(HNAE3_PFLAG_FD_QB_ENABLE, &handle->priv_flags)) + request_enable = false; + + if (request_enable == + test_bit(HCLGE_STATE_HW_QB_ENABLE, &hdev->state)) { + spin_unlock_bh(&hdev->fd_rule_lock); + return 0; + } + + if (request_enable) + hclge_clear_arfs_rules(hdev); + + ret = hclge_set_fd_qb(hdev, vport->vport_id, request_enable); + if (!ret) { + if (request_enable) { + set_bit(HCLGE_STATE_HW_QB_ENABLE, &hdev->state); + hdev->fd_active_type = HCLGE_FD_QB_ACTIVE; + } else { + clear_bit(HCLGE_STATE_HW_QB_ENABLE, &hdev->state); + hdev->fd_active_type = HCLGE_FD_RULE_NONE; + } + } else { + set_bit(HCLGE_VPORT_STATE_QB_CHANGE, &vport->state); + } + spin_unlock_bh(&hdev->fd_rule_lock); + + return ret; +} + +static int hclge_disable_fd_qb_mode(struct hclge_dev *hdev) +{ + struct hnae3_ae_dev *ae_dev = hdev->ae_dev; + int ret; + + if (!test_bit(HNAE3_DEV_SUPPORT_QB_B, ae_dev->caps) || + !test_bit(HCLGE_STATE_HW_QB_ENABLE, &hdev->state)) + return 0; + + ret = hclge_set_fd_qb(hdev, 0, false); + if (ret) + return ret; + + clear_bit(HCLGE_STATE_HW_QB_ENABLE, &hdev->state); + + return 0; +} + +static void hclge_sync_fd_qb_mode(struct hclge_dev *hdev) +{ + struct hnae3_ae_dev *ae_dev = hdev->ae_dev; + + if (!test_bit(HNAE3_DEV_SUPPORT_QB_B, ae_dev->caps)) + return; + + hclge_sync_pf_qb_mode(hdev); +} + static void hclge_periodic_service_task(struct hclge_dev *hdev) { unsigned long delta = round_jiffies_relative(HZ); @@ -4268,6 +4357,7 @@ static void hclge_periodic_service_task(struct hclge_dev *hdev) hclge_update_link_status(hdev); hclge_sync_mac_table(hdev); hclge_sync_promisc_mode(hdev); + hclge_sync_fd_qb_mode(hdev); hclge_sync_fd_table(hdev);
if (time_is_after_jiffies(hdev->last_serv_processed + HZ)) { @@ -5170,10 +5260,29 @@ static void hclge_request_update_promisc_mode(struct hnae3_handle *handle) set_bit(HCLGE_STATE_PROMISC_CHANGED, &hdev->state); }
+static bool hclge_query_fd_qb_state(struct hnae3_handle *handle) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + struct hclge_dev *hdev = vport->back; + + return test_bit(HCLGE_STATE_HW_QB_ENABLE, &hdev->state); +} + +static void hclge_flush_qb_config(struct hnae3_handle *handle) +{ + struct hclge_vport *vport = hclge_get_vport(handle); + + set_bit(HCLGE_VPORT_STATE_QB_CHANGE, &vport->state); +} + static void hclge_sync_fd_state(struct hclge_dev *hdev) { - if (hlist_empty(&hdev->fd_rule_list)) + struct hclge_vport *vport = &hdev->vport[0]; + + if (hlist_empty(&hdev->fd_rule_list)) { hdev->fd_active_type = HCLGE_FD_RULE_NONE; + set_bit(HCLGE_VPORT_STATE_QB_CHANGE, &vport->state); + } }
static void hclge_update_fd_rule_node(struct hclge_dev *hdev, @@ -6463,6 +6572,10 @@ static int hclge_add_fd_entry_common(struct hclge_dev *hdev, { int ret;
+ ret = hclge_disable_fd_qb_mode(hdev); + if (ret) + return ret; + spin_lock_bh(&hdev->fd_rule_lock);
if (hdev->fd_active_type != rule->rule_type && @@ -8095,6 +8208,7 @@ int hclge_vport_start(struct hclge_vport *vport) struct hclge_dev *hdev = vport->back;
set_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state); + set_bit(HCLGE_VPORT_STATE_QB_CHANGE, &vport->state); vport->last_active_jiffies = jiffies;
if (test_bit(vport->vport_id, hdev->vport_config_block)) { @@ -10099,6 +10213,7 @@ static void hclge_restore_hw_table(struct hclge_dev *hdev) hclge_restore_vport_vlan_table(vport); set_bit(HCLGE_STATE_PROMISC_CHANGED, &hdev->state); set_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state); + clear_bit(HCLGE_STATE_HW_QB_ENABLE, &hdev->state); hclge_restore_fd_entries(handle); }
@@ -12555,6 +12670,8 @@ static const struct hnae3_ae_ops hclge_ops = { .put_vector = hclge_put_vector, .set_promisc_mode = hclge_set_promisc_mode, .request_update_promisc_mode = hclge_request_update_promisc_mode, + .request_flush_qb_config = hclge_flush_qb_config, + .query_fd_qb_state = hclge_query_fd_qb_state, .set_loopback = hclge_set_loopback, .start = hclge_ae_start, .stop = hclge_ae_stop, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 2d1f7f8..9b3907a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -226,6 +226,7 @@ enum HCLGE_DEV_STATE { HCLGE_STATE_FD_TBL_CHANGED, HCLGE_STATE_FD_CLEAR_ALL, HCLGE_STATE_FD_USER_DEF_CHANGED, + HCLGE_STATE_HW_QB_ENABLE, HCLGE_STATE_MAX };
@@ -590,6 +591,7 @@ enum HCLGE_FD_ACTIVE_RULE_TYPE { HCLGE_FD_ARFS_ACTIVE, HCLGE_FD_EP_ACTIVE, HCLGE_FD_TC_FLOWER_ACTIVE, + HCLGE_FD_QB_ACTIVE, };
enum HCLGE_FD_PACKET_TYPE { @@ -951,6 +953,7 @@ struct hclge_rss_tuple_cfg { enum HCLGE_VPORT_STATE { HCLGE_VPORT_STATE_ALIVE, HCLGE_VPORT_STATE_MAC_TBL_CHANGE, + HCLGE_VPORT_STATE_QB_CHANGE, HCLGE_VPORT_STATE_MAX };
On Mon, 15 Mar 2021 20:23:50 +0800 Huazhong Tan wrote:
From: Jian Shen shenjian15@huawei.com
For device version V3, it supports queue bonding, which can identify the tuple information of TCP stream, and create flow director rules automatically, in order to keep the tx and rx packets are in the same queue pair. The driver set FD_ADD field of TX BD for TCP SYN packet, and set FD_DEL filed for TCP FIN or RST packet. The hardware create or remove a fd rule according to the TX BD, and it also support to age-out a rule if not hit for a long time.
The queue bonding mode is default to be disabled, and can be enabled/disabled with ethtool priv-flags command.
This seems like fairly well defined behavior, IMHO we should have a full device feature for it, rather than a private flag.
Does the device need to be able to parse the frame fully for this mechanism to work? Will it work even if the TCP segment is encapsulated in a custom tunnel?
On 2021/3/16 4:04, Jakub Kicinski wrote:
On Mon, 15 Mar 2021 20:23:50 +0800 Huazhong Tan wrote:
From: Jian Shen shenjian15@huawei.com
For device version V3, it supports queue bonding, which can identify the tuple information of TCP stream, and create flow director rules automatically, in order to keep the tx and rx packets are in the same queue pair. The driver set FD_ADD field of TX BD for TCP SYN packet, and set FD_DEL filed for TCP FIN or RST packet. The hardware create or remove a fd rule according to the TX BD, and it also support to age-out a rule if not hit for a long time.
The queue bonding mode is default to be disabled, and can be enabled/disabled with ethtool priv-flags command.
This seems like fairly well defined behavior, IMHO we should have a full device feature for it, rather than a private flag.
Should we add a NETIF_F_NTUPLE_HW feature for it?
Does the device need to be able to parse the frame fully for this mechanism to work? Will it work even if the TCP segment is encapsulated in a custom tunnel?
no, custom tunnel is not supported.
Linuxarm mailing list -- linuxarm@openeuler.org To unsubscribe send an email to linuxarm-leave@openeuler.org
On 2021/3/16 4:04, Jakub Kicinski wrote:
On Mon, 15 Mar 2021 20:23:50 +0800 Huazhong Tan wrote:
From: Jian Shen shenjian15@huawei.com
For device version V3, it supports queue bonding, which can identify the tuple information of TCP stream, and create flow director rules automatically, in order to keep the tx and rx packets are in the same queue pair. The driver set FD_ADD field of TX BD for TCP SYN packet, and set FD_DEL filed for TCP FIN or RST packet. The hardware create or remove a fd rule according to the TX BD, and it also support to age-out a rule if not hit for a long time.
The queue bonding mode is default to be disabled, and can be enabled/disabled with ethtool priv-flags command.
This seems like fairly well defined behavior, IMHO we should have a full device feature for it, rather than a private flag.
Should we add a NETIF_F_NTUPLE_HW feature for it?
Does the device need to be able to parse the frame fully for this mechanism to work? Will it work even if the TCP segment is encapsulated in a custom tunnel?
no, custom tunnel is not supported.
On Thu, 18 Mar 2021 09:02:54 +0800 Huazhong Tan wrote:
On 2021/3/16 4:04, Jakub Kicinski wrote:
On Mon, 15 Mar 2021 20:23:50 +0800 Huazhong Tan wrote:
From: Jian Shen shenjian15@huawei.com
For device version V3, it supports queue bonding, which can identify the tuple information of TCP stream, and create flow director rules automatically, in order to keep the tx and rx packets are in the same queue pair. The driver set FD_ADD field of TX BD for TCP SYN packet, and set FD_DEL filed for TCP FIN or RST packet. The hardware create or remove a fd rule according to the TX BD, and it also support to age-out a rule if not hit for a long time.
The queue bonding mode is default to be disabled, and can be enabled/disabled with ethtool priv-flags command.
This seems like fairly well defined behavior, IMHO we should have a full device feature for it, rather than a private flag.
Should we add a NETIF_F_NTUPLE_HW feature for it?
It'd be better to keep the configuration close to the existing RFS config, no? Perhaps a new file under
/sys/class/net/$dev/queues/rx-$id/
to enable the feature would be more appropriate?
Otherwise I'd call it something like NETIF_F_RFS_AUTO ?
Alex, any thoughts? IIRC Intel HW had a similar feature?
Does the device need to be able to parse the frame fully for this mechanism to work? Will it work even if the TCP segment is encapsulated in a custom tunnel?
no, custom tunnel is not supported.
Hm, okay, it's just queue mapping, if device gets it wrong not the end of the world (provided security boundaries are preserved).
On Wed, Mar 17, 2021 at 6:28 PM Jakub Kicinski kuba@kernel.org wrote:
On Thu, 18 Mar 2021 09:02:54 +0800 Huazhong Tan wrote:
On 2021/3/16 4:04, Jakub Kicinski wrote:
On Mon, 15 Mar 2021 20:23:50 +0800 Huazhong Tan wrote:
From: Jian Shen shenjian15@huawei.com
For device version V3, it supports queue bonding, which can identify the tuple information of TCP stream, and create flow director rules automatically, in order to keep the tx and rx packets are in the same queue pair. The driver set FD_ADD field of TX BD for TCP SYN packet, and set FD_DEL filed for TCP FIN or RST packet. The hardware create or remove a fd rule according to the TX BD, and it also support to age-out a rule if not hit for a long time.
The queue bonding mode is default to be disabled, and can be enabled/disabled with ethtool priv-flags command.
This seems like fairly well defined behavior, IMHO we should have a full device feature for it, rather than a private flag.
Should we add a NETIF_F_NTUPLE_HW feature for it?
It'd be better to keep the configuration close to the existing RFS config, no? Perhaps a new file under
/sys/class/net/$dev/queues/rx-$id/
to enable the feature would be more appropriate?
Otherwise I'd call it something like NETIF_F_RFS_AUTO ?
Alex, any thoughts? IIRC Intel HW had a similar feature?
Yeah, this is pretty much what Intel used to put out as ATR aka Flow Director. Although with that there was also a component of XPS. Flow Director was the name of the hardware feature and ATR, Application Targeted Routing, was the software feature that had the Tx path adding rules by default.
The i40e driver supports disabling it via the "flow-director-atr" private flag.
As far as tying this into NTUPLE that is definitely a no-go. Generally NTUPLE rules and ATR are mutually exclusive since they compete for resources within the same device.
Does the device need to be able to parse the frame fully for this mechanism to work? Will it work even if the TCP segment is encapsulated in a custom tunnel?
no, custom tunnel is not supported.
Hm, okay, it's just queue mapping, if device gets it wrong not the end of the world (provided security boundaries are preserved).
So yes/no in terms of this not causing serious issues. Where this tends to get ugly is if it is combined with something like XPS, which appears to be enabled for hns3. In that case the flow can jump queues and when it does that can lead to the Rx either jumping to follow causing an out of order issue on the Rx side, or being left behind, with being left behind which is the safer case.
Really I think this feature would be better served by implementing Accelerated RFS and adding support for ndo_rx_flow_steer.
Hi Jakub,
在 2021/3/18 9:28, Jakub Kicinski 写道:
On Thu, 18 Mar 2021 09:02:54 +0800 Huazhong Tan wrote:
On 2021/3/16 4:04, Jakub Kicinski wrote:
On Mon, 15 Mar 2021 20:23:50 +0800 Huazhong Tan wrote:
From: Jian Shen shenjian15@huawei.com
For device version V3, it supports queue bonding, which can identify the tuple information of TCP stream, and create flow director rules automatically, in order to keep the tx and rx packets are in the same queue pair. The driver set FD_ADD field of TX BD for TCP SYN packet, and set FD_DEL filed for TCP FIN or RST packet. The hardware create or remove a fd rule according to the TX BD, and it also support to age-out a rule if not hit for a long time.
The queue bonding mode is default to be disabled, and can be enabled/disabled with ethtool priv-flags command.
This seems like fairly well defined behavior, IMHO we should have a full device feature for it, rather than a private flag.
Should we add a NETIF_F_NTUPLE_HW feature for it?
It'd be better to keep the configuration close to the existing RFS config, no? Perhaps a new file under
/sys/class/net/$dev/queues/rx-$id/
to enable the feature would be more appropriate?
Otherwise I'd call it something like NETIF_F_RFS_AUTO ?
I noticed that the enum NETIF_F_XXX_BIT has already used 64 bits since
NETIF_F_HW_HSR_DUP_BIT being added, while the prototype of netdev_features_t
is u64. So there is no useable bit for new feature if I understand correct.
Is there any solution or plan for it ?
Alex, any thoughts? IIRC Intel HW had a similar feature?
Does the device need to be able to parse the frame fully for this mechanism to work? Will it work even if the TCP segment is encapsulated in a custom tunnel?
no, custom tunnel is not supported.
Hm, okay, it's just queue mapping, if device gets it wrong not the end of the world (provided security boundaries are preserved). .
On Fri, 18 Jun 2021 09:18:21 +0800 shenjian (K) wrote:
Hi Jakub,
在 2021/3/18 9:28, Jakub Kicinski 写道:
On Thu, 18 Mar 2021 09:02:54 +0800 Huazhong Tan wrote:
On 2021/3/16 4:04, Jakub Kicinski wrote:
On Mon, 15 Mar 2021 20:23:50 +0800 Huazhong Tan wrote:
From: Jian Shen shenjian15@huawei.com
For device version V3, it supports queue bonding, which can identify the tuple information of TCP stream, and create flow director rules automatically, in order to keep the tx and rx packets are in the same queue pair. The driver set FD_ADD field of TX BD for TCP SYN packet, and set FD_DEL filed for TCP FIN or RST packet. The hardware create or remove a fd rule according to the TX BD, and it also support to age-out a rule if not hit for a long time.
The queue bonding mode is default to be disabled, and can be enabled/disabled with ethtool priv-flags command.
This seems like fairly well defined behavior, IMHO we should have a full device feature for it, rather than a private flag.
Should we add a NETIF_F_NTUPLE_HW feature for it?
It'd be better to keep the configuration close to the existing RFS config, no? Perhaps a new file under
/sys/class/net/$dev/queues/rx-$id/
to enable the feature would be more appropriate?
Otherwise I'd call it something like NETIF_F_RFS_AUTO ?
I noticed that the enum NETIF_F_XXX_BIT has already used 64 bits since
NETIF_F_HW_HSR_DUP_BIT being added, while the prototype of netdev_features_t
is u64. So there is no useable bit for new feature if I understand correct.
Is there any solution or plan for it ?
I think you'll need to start a new word.
在 2021/6/19 6:01, Jakub Kicinski 写道:
On Fri, 18 Jun 2021 09:18:21 +0800 shenjian (K) wrote:
Hi Jakub,
在 2021/3/18 9:28, Jakub Kicinski 写道:
On Thu, 18 Mar 2021 09:02:54 +0800 Huazhong Tan wrote:
On 2021/3/16 4:04, Jakub Kicinski wrote:
On Mon, 15 Mar 2021 20:23:50 +0800 Huazhong Tan wrote:
From: Jian Shen shenjian15@huawei.com
For device version V3, it supports queue bonding, which can identify the tuple information of TCP stream, and create flow director rules automatically, in order to keep the tx and rx packets are in the same queue pair. The driver set FD_ADD field of TX BD for TCP SYN packet, and set FD_DEL filed for TCP FIN or RST packet. The hardware create or remove a fd rule according to the TX BD, and it also support to age-out a rule if not hit for a long time.
The queue bonding mode is default to be disabled, and can be enabled/disabled with ethtool priv-flags command.
This seems like fairly well defined behavior, IMHO we should have a full device feature for it, rather than a private flag.
Should we add a NETIF_F_NTUPLE_HW feature for it?
It'd be better to keep the configuration close to the existing RFS config, no? Perhaps a new file under
/sys/class/net/$dev/queues/rx-$id/
to enable the feature would be more appropriate?
Otherwise I'd call it something like NETIF_F_RFS_AUTO ?
I noticed that the enum NETIF_F_XXX_BIT has already used 64 bits since
NETIF_F_HW_HSR_DUP_BIT being added, while the prototype of netdev_features_t
is u64. So there is no useable bit for new feature if I understand correct.
Is there any solution or plan for it ?
I think you'll need to start a new word. .
what about define a netdev feature bitmap
#define __DECLARE_NETDEV_FEATURE_T(name) \ DECLARE_BITMAP(name, NETDEV_FEATURE_COUNT)
like __ETHTOOL_DECLARE_LINK_MODE_MASK does
From: Jian Shen shenjian15@huawei.com
For device version V3, the hardware supports queue bonding mode. VF can not enable queue bond mode unless PF enables it. So VF needs to query whether PF support queue bonding mode when initializing, and query whether PF enables queue bonding mode periodically. For the resource limited, to avoid a VF occupy to many FD rule space, only trust VF is allowed to enable queue bonding mode.
Signed-off-by: Jian Shen shenjian15@huawei.com Signed-off-by: Huzhong Tan tanhuazhong@huawei.com --- drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h | 8 +++ .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 52 ++++++++++++++- .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 2 + .../net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c | 33 ++++++++++ .../ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c | 2 + .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 74 ++++++++++++++++++++++ .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h | 7 ++ .../ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c | 17 +++++ 8 files changed, 194 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h index 33defa4..797adc9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h +++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h @@ -46,6 +46,8 @@ enum HCLGE_MBX_OPCODE { HCLGE_MBX_PUSH_PROMISC_INFO, /* (PF -> VF) push vf promisc info */ HCLGE_MBX_VF_UNINIT, /* (VF -> PF) vf is unintializing */ HCLGE_MBX_HANDLE_VF_TBL, /* (VF -> PF) store/clear hw table */ + HCLGE_MBX_SET_QB = 0x28, /* (VF -> PF) set queue bonding */ + HCLGE_MBX_PUSH_QB_STATE, /* (PF -> VF) push qb state */
HCLGE_MBX_GET_VF_FLR_STATUS = 200, /* (M7 -> PF) get vf flr status */ HCLGE_MBX_PUSH_LINK_STATUS, /* (M7 -> PF) get port link status */ @@ -75,6 +77,12 @@ enum hclge_mbx_tbl_cfg_subcode { HCLGE_MBX_VPORT_LIST_CLEAR, };
+enum hclge_mbx_qb_cfg_subcode { + HCLGE_MBX_QB_CHECK_CAPS = 0, /* query whether support qb */ + HCLGE_MBX_QB_ENABLE, /* request pf enable qb */ + HCLGE_MBX_QB_GET_STATE /* query whether qb enabled */ +}; + #define HCLGE_MBX_MAX_MSG_SIZE 14 #define HCLGE_MBX_MAX_RESP_DATA_SIZE 8U #define HCLGE_MBX_MAX_RING_CHAIN_PARAM_NUM 4 diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 2e4c93b..1b4b086 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -4280,6 +4280,7 @@ static int hclge_sync_pf_qb_mode(struct hclge_dev *hdev) struct hnae3_handle *handle = &vport->nic; bool request_enable = true; int ret; + u16 i;
if (!test_and_clear_bit(HCLGE_VPORT_STATE_QB_CHANGE, &vport->state)) return 0; @@ -4308,6 +4309,11 @@ static int hclge_sync_pf_qb_mode(struct hclge_dev *hdev) clear_bit(HCLGE_STATE_HW_QB_ENABLE, &hdev->state); hdev->fd_active_type = HCLGE_FD_RULE_NONE; } + + for (i = 1; i < hdev->num_alloc_vport; i++) { + vport = &hdev->vport[i]; + set_bit(HCLGE_VPORT_STATE_QB_CHANGE, &vport->state); + } } else { set_bit(HCLGE_VPORT_STATE_QB_CHANGE, &vport->state); } @@ -4316,10 +4322,33 @@ static int hclge_sync_pf_qb_mode(struct hclge_dev *hdev) return ret; }
+static int hclge_sync_vf_qb_mode(struct hclge_vport *vport) +{ + struct hclge_dev *hdev = vport->back; + bool request_enable = false; + int ret; + + if (!test_and_clear_bit(HCLGE_VPORT_STATE_QB_CHANGE, &vport->state)) + return 0; + + if (vport->vf_info.trusted && vport->vf_info.request_qb_en && + test_bit(HCLGE_STATE_HW_QB_ENABLE, &hdev->state)) + request_enable = true; + + ret = hclge_set_fd_qb(hdev, vport->vport_id, request_enable); + if (ret) + set_bit(HCLGE_VPORT_STATE_QB_CHANGE, &vport->state); + vport->vf_info.qb_en = request_enable ? 1 : 0; + + return ret; +} + static int hclge_disable_fd_qb_mode(struct hclge_dev *hdev) { struct hnae3_ae_dev *ae_dev = hdev->ae_dev; + struct hclge_vport *vport; int ret; + u16 i;
if (!test_bit(HNAE3_DEV_SUPPORT_QB_B, ae_dev->caps) || !test_bit(HCLGE_STATE_HW_QB_ENABLE, &hdev->state)) @@ -4331,17 +4360,35 @@ static int hclge_disable_fd_qb_mode(struct hclge_dev *hdev)
clear_bit(HCLGE_STATE_HW_QB_ENABLE, &hdev->state);
+ for (i = 1; i < hdev->num_alloc_vport; i++) { + vport = &hdev->vport[i]; + set_bit(HCLGE_VPORT_STATE_QB_CHANGE, &vport->state); + } + return 0; }
static void hclge_sync_fd_qb_mode(struct hclge_dev *hdev) { struct hnae3_ae_dev *ae_dev = hdev->ae_dev; + struct hclge_vport *vport; + int ret; + u16 i;
if (!test_bit(HNAE3_DEV_SUPPORT_QB_B, ae_dev->caps)) return;
- hclge_sync_pf_qb_mode(hdev); + ret = hclge_sync_pf_qb_mode(hdev); + if (ret) + return; + + for (i = 1; i < hdev->num_alloc_vport; i++) { + vport = &hdev->vport[i]; + + ret = hclge_sync_vf_qb_mode(vport); + if (ret) + return; + } }
static void hclge_periodic_service_task(struct hclge_dev *hdev) @@ -11662,6 +11709,9 @@ static int hclge_set_vf_trust(struct hnae3_handle *handle, int vf, bool enable)
vport->vf_info.trusted = new_trusted;
+ set_bit(HCLGE_VPORT_STATE_QB_CHANGE, &vport->state); + hclge_task_schedule(hdev, 0); + return 0; }
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 9b3907a..9dfefcf 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -975,6 +975,8 @@ struct hclge_vf_info { u32 max_tx_rate; u32 trusted; u16 promisc_enable; + u8 request_qb_en; + u8 qb_en; };
struct hclge_vport { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index 51a36e7..5edeca6 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -683,6 +683,36 @@ static void hclge_handle_vf_tbl(struct hclge_vport *vport, } }
+static void hclge_handle_vf_qb(struct hclge_vport *vport, + struct hclge_mbx_vf_to_pf_cmd *mbx_req, + struct hclge_respond_to_vf_msg *resp_msg) +{ + struct hclge_dev *hdev = vport->back; + + if (mbx_req->msg.subcode == HCLGE_MBX_QB_CHECK_CAPS) { + struct hnae3_handle *handle = &hdev->vport[0].nic; + + resp_msg->data[0] = test_bit(HNAE3_PFLAG_FD_QB_ENABLE, + &handle->supported_pflags); + resp_msg->len = sizeof(u8); + } else if (mbx_req->msg.subcode == HCLGE_MBX_QB_ENABLE) { + vport->vf_info.request_qb_en = mbx_req->msg.data[0]; + set_bit(HCLGE_VPORT_STATE_QB_CHANGE, &vport->state); + } else if (mbx_req->msg.subcode == HCLGE_MBX_QB_GET_STATE) { + u16 msg_data = vport->vf_info.qb_en; + int ret; + + ret = hclge_send_mbx_msg(vport, (u8 *)&msg_data, + sizeof(msg_data), + HCLGE_MBX_PUSH_QB_STATE, + vport->vport_id); + if (ret) + dev_err(&hdev->pdev->dev, + "failed to inform qb state to vport %u, ret = %d\n", + vport->vport_id, ret); + } +} + void hclge_mbx_handler(struct hclge_dev *hdev) { struct hclge_cmq_ring *crq = &hdev->hw.cmq.crq; @@ -830,6 +860,9 @@ void hclge_mbx_handler(struct hclge_dev *hdev) case HCLGE_MBX_HANDLE_VF_TBL: hclge_handle_vf_tbl(vport, req); break; + case HCLGE_MBX_SET_QB: + hclge_handle_vf_qb(vport, req, &resp_msg); + break; default: dev_err(&hdev->pdev->dev, "un-supported mailbox message, code = %u\n", diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c index 46700c4..dbfd31a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c @@ -360,6 +360,8 @@ static void hclgevf_parse_capability(struct hclgevf_dev *hdev, set_bit(HNAE3_DEV_SUPPORT_HW_TX_CSUM_B, ae_dev->caps); if (hnae3_get_bit(caps, HCLGEVF_CAP_UDP_TUNNEL_CSUM_B)) set_bit(HNAE3_DEV_SUPPORT_UDP_TUNNEL_CSUM_B, ae_dev->caps); + if (hnae3_get_bit(caps, HCLGEVF_CAP_QB_B)) + set_bit(HNAE3_DEV_SUPPORT_QB_B, ae_dev->caps); }
static __le32 hclgevf_build_api_caps(void) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 700e068..33acbd4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -470,6 +470,74 @@ static int hclgevf_knic_setup(struct hclgevf_dev *hdev) return 0; }
+static void hclgevf_update_fd_qb_state(struct hclgevf_dev *hdev) +{ + struct hnae3_handle *handle = &hdev->nic; + struct hclge_vf_to_pf_msg send_msg; + int ret; + + if (!hdev->qb_cfg.pf_support_qb || + !test_bit(HNAE3_PFLAG_FD_QB_ENABLE, &handle->priv_flags)) + return; + + hclgevf_build_send_msg(&send_msg, HCLGE_MBX_SET_QB, + HCLGE_MBX_QB_GET_STATE); + ret = hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0); + if (ret) + dev_err(&hdev->pdev->dev, "failed to get qb state, ret = %d", + ret); +} + +static void hclgevf_get_pf_qb_caps(struct hclgevf_dev *hdev) +{ + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); + struct hclge_vf_to_pf_msg send_msg; + u8 resp_msg; + int ret; + + if (!test_bit(HNAE3_DEV_SUPPORT_QB_B, ae_dev->caps)) + return; + + hclgevf_build_send_msg(&send_msg, HCLGE_MBX_SET_QB, + HCLGE_MBX_QB_CHECK_CAPS); + ret = hclgevf_send_mbx_msg(hdev, &send_msg, true, &resp_msg, + sizeof(resp_msg)); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to get qb caps from PF, ret = %d", ret); + return; + } + + hdev->qb_cfg.pf_support_qb = resp_msg > 0; +} + +static void hclgevf_set_fd_qb(struct hnae3_handle *handle) +{ +#define HCLGEVF_QB_MBX_STATE_OFFSET 0 + + struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); + struct hclge_vf_to_pf_msg send_msg; + u8 resp_msg; + int ret; + + hclgevf_build_send_msg(&send_msg, HCLGE_MBX_SET_QB, + HCLGE_MBX_QB_ENABLE); + send_msg.data[HCLGEVF_QB_MBX_STATE_OFFSET] = + test_bit(HNAE3_PFLAG_FD_QB_ENABLE, &handle->priv_flags) ? 1 : 0; + ret = hclgevf_send_mbx_msg(hdev, &send_msg, true, &resp_msg, + sizeof(resp_msg)); + if (ret) + dev_err(&hdev->pdev->dev, "failed to set qb state, ret = %d", + ret); +} + +static bool hclgevf_query_fd_qb_state(struct hnae3_handle *handle) +{ + struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle); + + return hdev->qb_cfg.hw_qb_en; +} + static void hclgevf_request_link_info(struct hclgevf_dev *hdev) { struct hclge_vf_to_pf_msg send_msg; @@ -2320,6 +2388,8 @@ static void hclgevf_periodic_service_task(struct hclgevf_dev *hdev)
hclgevf_sync_promisc_mode(hdev);
+ hclgevf_update_fd_qb_state(hdev); + hdev->last_serv_processed = jiffies;
out: @@ -3348,6 +3418,8 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev) goto err_config; }
+ hclgevf_get_pf_qb_caps(hdev); + hdev->last_reset_time = jiffies; dev_info(&hdev->pdev->dev, "finished initializing %s driver\n", HCLGEVF_DRIVER_NAME); @@ -3775,6 +3847,8 @@ static const struct hnae3_ae_ops hclgevf_ops = { .set_promisc_mode = hclgevf_set_promisc_mode, .request_update_promisc_mode = hclgevf_request_update_promisc_mode, .get_cmdq_stat = hclgevf_get_cmdq_stat, + .request_flush_qb_config = hclgevf_set_fd_qb, + .query_fd_qb_state = hclgevf_query_fd_qb_state, };
static struct hnae3_ae_algo ae_algovf = { diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index 8c27ecd..0e0c2fa 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -262,6 +262,11 @@ struct hclgevf_mac_table_cfg { struct list_head mc_mac_list; };
+struct hclgevf_qb_cfg { + bool pf_support_qb; + bool hw_qb_en; +}; + struct hclgevf_dev { struct pci_dev *pdev; struct hnae3_ae_dev *ae_dev; @@ -328,6 +333,8 @@ struct hclgevf_dev { u32 flag; unsigned long serv_processed_cnt; unsigned long last_serv_processed; + + struct hclgevf_qb_cfg qb_cfg; };
static inline bool hclgevf_is_reset_pending(struct hclgevf_dev *hdev) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c index 5b2dcd9..1c89372 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c @@ -217,6 +217,7 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev) case HCLGE_MBX_LINK_STAT_MODE: case HCLGE_MBX_PUSH_VLAN_INFO: case HCLGE_MBX_PUSH_PROMISC_INFO: + case HCLGE_MBX_PUSH_QB_STATE: /* set this mbx event as pending. This is required as we * might loose interrupt event when mbx task is busy * handling. This shall be cleared when mbx task just @@ -268,6 +269,19 @@ static void hclgevf_parse_promisc_info(struct hclgevf_dev *hdev, "Promisc mode is closed by host for being untrusted.\n"); }
+static void hclgevf_parse_qb_info(struct hclgevf_dev *hdev, u16 qb_state) +{ +#define HCLGEVF_HW_QB_ON 1 +#define HCLGEVF_HW_QB_OFF 0 + + if (qb_state > HCLGEVF_HW_QB_ON) { + dev_warn(&hdev->pdev->dev, "Invalid state, ignored.\n"); + return; + } + + hdev->qb_cfg.hw_qb_en = qb_state > HCLGEVF_HW_QB_OFF; +} + void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev) { enum hnae3_reset_type reset_type; @@ -336,6 +350,9 @@ void hclgevf_mbx_async_handler(struct hclgevf_dev *hdev) case HCLGE_MBX_PUSH_PROMISC_INFO: hclgevf_parse_promisc_info(hdev, msg_q[1]); break; + case HCLGE_MBX_PUSH_QB_STATE: + hclgevf_parse_qb_info(hdev, msg_q[1]); + break; default: dev_err(&hdev->pdev->dev, "fetched unsupported(%u) message from arq\n",