hulk inclusion category: feature Link: https://gitee.com/openeuler/kernel/issues/ICBFCS CVE: NA -------------------------------- Support rps affinity policy setting (0 as no rps, 1 as numa, 2 as cluster. Also use rps cpus map instead of traversation to improve performance. Signed-off-by: Yue Haibing <yuehaibing@huawei.com> --- include/linux/venetcls.h | 6 +- net/venetcls/venetcls.h | 3 +- net/venetcls/venetcls_flow.c | 121 +++++++++++++++++++++------------ net/venetcls/venetcls_main.c | 22 +++--- net/venetcls/venetcls_ntuple.c | 4 +- 5 files changed, 100 insertions(+), 56 deletions(-) diff --git a/include/linux/venetcls.h b/include/linux/venetcls.h index fab7e57fde89..acbffdb91ee8 100644 --- a/include/linux/venetcls.h +++ b/include/linux/venetcls.h @@ -52,9 +52,9 @@ venetcls_skb_set_cpu(struct sk_buff *skb, enqueue_f enq_func, int *ret) cpu = -1; last_qtail = 0; /* mode 1 always use vecls_set_cpu hook for physical NIC or lo. - * mode 0 set this hook to NULL, to avoid unneeded ops in - * venetcls_skblist_set_cpu() for physical NIC flows, and use - * vecls_set_localcpu hook for loopback flows. + * mode 0 set this hook to NULL if rps_policy is 0 , to avoid + * unneeded ops in venetcls_skblist_set_cpu() for physical NIC + * flows, and use vecls_set_localcpu hook for loopback flows. */ if (ops->vecls_set_cpu) ops->vecls_set_cpu(skb, &cpu, &last_qtail); diff --git a/net/venetcls/venetcls.h b/net/venetcls/venetcls.h index 05fe2e3592f4..4313939e91d9 100644 --- a/net/venetcls/venetcls.h +++ b/net/venetcls/venetcls.h @@ -137,7 +137,8 @@ struct cfg_param { int cpu; }; -extern int lo_numa_rps; +extern int rps_policy; +extern int lo_rps_policy; extern int mode; extern int match_ip_flag; extern int debug; diff --git a/net/venetcls/venetcls_flow.c b/net/venetcls/venetcls_flow.c index 85d1abe4c708..758067a7c6f1 100644 --- a/net/venetcls/venetcls_flow.c +++ b/net/venetcls/venetcls_flow.c @@ -14,9 +14,9 @@ #include "venetcls.h" -static u16 *rps_cpus; -static int rps_cpus_nums; static u32 vecls_cpu_mask; +static u16 *rps_cpus, *cluster_rps_cpus; +static int rps_cpus_nums, cluster_rps_cpus_nums; static struct vecls_sock_flow_table __rcu *vecls_sock_flow_table; static DEFINE_MUTEX(vecls_sock_flow_mutex); static DEFINE_SPINLOCK(vecls_dev_flow_lock); @@ -70,13 +70,16 @@ static bool _vecls_timeout(struct net_device *dev, u16 rxq_index, static inline bool sk_is_loopback(struct sock *sk) { if (sk->sk_family == AF_INET) { - if (ipv4_is_loopback(sk->sk_daddr) && ipv4_is_loopback(sk->sk_rcv_saddr)) + if (ipv4_is_loopback(sk->sk_daddr) || ipv4_is_loopback(sk->sk_rcv_saddr)) return true; } if (sk->sk_family == AF_INET6) { - if (ipv6_addr_loopback(&sk->sk_v6_daddr) && - ipv6_addr_loopback(&sk->sk_v6_rcv_saddr)) + if (ipv6_addr_loopback(&sk->sk_v6_daddr) || + ipv6_addr_loopback(&sk->sk_v6_rcv_saddr) || + ipv6_addr_v4mapped_loopback(&sk->sk_v6_daddr) || + ipv6_addr_v4mapped_loopback(&sk->sk_v6_rcv_saddr) || + ipv6_addr_equal(&sk->sk_v6_daddr, &sk->sk_v6_rcv_saddr)) return true; } return false; @@ -104,7 +107,7 @@ void _vecls_flow_update(struct sock *sk, struct sk_buff *skb) cpu = raw_smp_processor_id(); rcu_read_lock(); tb = rcu_dereference(vecls_sock_flow_table); - if (lo_numa_rps && skb && sk_is_loopback(sk)) + if (lo_rps_policy && skb && sk_is_loopback(sk)) hash = READ_ONCE(skb->sym_hash); else hash = READ_ONCE(sk->sk_rxhash); @@ -216,37 +219,31 @@ static void set_vecls_cpu(struct net_device *dev, struct sk_buff *skb, rflow->cpu = next_cpu; } -static inline int get_cpu_in_numa(int tcpu, u32 hash) +static inline u32 get_rps_cpu(u32 last_recv_cpu, u32 hash, int policy) { - const struct cpumask *mask; - int nr_cpus, cpu, index; - - mask = cpumask_of_node(cpu_to_node(tcpu)); - nr_cpus = cpumask_weight(mask); - if (nr_cpus == 0) - return -1; - - index = reciprocal_scale(hash, nr_cpus); - if (index < 0) - return -1; - - cpu = cpumask_first(mask); - while (--nr_cpus > 0) { - if (index == 0) - break; - cpu = cpumask_next(cpu, mask); - index--; + u32 newcpu, index; + + if (policy == 1) { + newcpu = cpumask_first(cpumask_of_node(cpu_to_node(last_recv_cpu))); + index = rps_cpus[reciprocal_scale(hash, rps_cpus_nums - 1)]; + newcpu += index; + } else if (policy == 2) { + newcpu = cpumask_first(topology_cluster_cpumask(last_recv_cpu)); + index = cluster_rps_cpus[reciprocal_scale(hash, cluster_rps_cpus_nums - 1)]; + newcpu += index; + } else { + newcpu = last_recv_cpu; } - return cpu; + + return newcpu; } static void __vecls_set_cpu(struct sk_buff *skb, struct net_device *ndev, struct vecls_sock_flow_table *tb, struct vecls_dev_flow_table *dtb, int old_rxq_id, int *rcpu, int *last_qtail) { - u32 last_recv_cpu, hash, val, cpu, tcpu; + u32 last_recv_cpu, hash, val, cpu, tcpu, newcpu; struct vecls_dev_flow *rflow; - int newcpu; cpu = raw_smp_processor_id(); skb_reset_network_header(skb); @@ -262,11 +259,7 @@ static void __vecls_set_cpu(struct sk_buff *skb, struct net_device *ndev, if ((val ^ hash) & ~vecls_cpu_mask) return; - newcpu = get_cpu_in_numa(last_recv_cpu, hash); - if (newcpu >= 0) - *rcpu = newcpu; - else - newcpu = last_recv_cpu; + newcpu = get_rps_cpu(last_recv_cpu, hash, rps_policy); if (rflow->isvalid && cpu_to_node(rflow->cpu) == cpu_to_node(newcpu)) { rflow->timeout = jiffies; @@ -280,11 +273,10 @@ static void __vecls_set_cpu(struct sk_buff *skb, struct net_device *ndev, set_vecls_cpu(ndev, skb, rflow, old_rxq_id, newcpu); } -static inline void loopback_numa_rps(struct sk_buff *skb, int *rcpu) +static inline void do_loopback_rps(struct sk_buff *skb, int *rcpu) { + u32 last_recv_cpu, hash, val, newcpu; struct vecls_sock_flow_table *stb; - u32 last_recv_cpu, hash, val; - int newcpu, index; skb_reset_network_header(skb); hash = __skb_get_hash_symmetric(skb); @@ -306,9 +298,36 @@ static inline void loopback_numa_rps(struct sk_buff *skb, int *rcpu) if ((val ^ hash) & ~vecls_cpu_mask) return; - newcpu = cpumask_first(cpumask_of_node(cpu_to_node(last_recv_cpu))); - index = rps_cpus[reciprocal_scale(hash, rps_cpus_nums - 1)]; - newcpu += index; + newcpu = get_rps_cpu(last_recv_cpu, hash, lo_rps_policy); + *rcpu = newcpu; + vecls_debug("last:%u curcpu:%d newcpu:%d\n", last_recv_cpu, raw_smp_processor_id(), newcpu); +} + +static inline void do_flow_rps(struct sk_buff *skb, int *rcpu) +{ + u32 last_recv_cpu, hash, val, newcpu; + struct vecls_sock_flow_table *stb; + + skb_reset_network_header(skb); + hash = skb_get_hash(skb); + if (!hash) + return; + + rcu_read_lock(); + stb = rcu_dereference(vecls_sock_flow_table); + if (stb) { + val = READ_ONCE(stb->ents[hash & stb->mask]); + last_recv_cpu = val & vecls_cpu_mask; + } else { + rcu_read_unlock(); + return; + } + rcu_read_unlock(); + + if ((val ^ hash) & ~vecls_cpu_mask) + return; + + newcpu = get_rps_cpu(last_recv_cpu, hash, rps_policy); *rcpu = newcpu; vecls_debug("last:%u curcpu:%d newcpu:%d\n", last_recv_cpu, raw_smp_processor_id(), newcpu); } @@ -326,12 +345,17 @@ void _vecls_set_cpu(struct sk_buff *skb, int *cpu, int *last_qtail) if (!ndev) return; - if (lo_numa_rps && (ndev->type == ARPHRD_LOOPBACK && ndev->flags & IFF_LOOPBACK)) - loopback_numa_rps(skb, cpu); + if (lo_rps_policy && (ndev->type == ARPHRD_LOOPBACK && ndev->flags & IFF_LOOPBACK)) + do_loopback_rps(skb, cpu); if (!is_vecls_config_netdev(ndev->name)) return; + if (rps_policy && mode == 0) { + do_flow_rps(skb, cpu); + return; + } + rxqueue = ndev->_rx; if (skb_rx_queue_recorded(skb)) { rxq_id = skb_get_rx_queue(skb); @@ -484,6 +508,7 @@ static int vecls_sock_flow_table_release(void) synchronize_rcu(); vfree(tb); kfree(rps_cpus); + kfree(cluster_rps_cpus); return 0; } @@ -538,18 +563,30 @@ static int vecls_sock_flow_table_init(void) struct vecls_sock_flow_table *table; int size = sft_num, i; + cluster_rps_cpus_nums = cpumask_weight(topology_cluster_cpumask(0)); rps_cpus_nums = cpumask_weight(cpumask_of_node(0)); rps_cpus = kmalloc_array(rps_cpus_nums, sizeof(u16), GFP_KERNEL); if (!rps_cpus) return -ENOMEM; for (i = 0; i < rps_cpus_nums; i++) rps_cpus[i] = i; - vecls_debug("rps_cpus_nums:%d\n", rps_cpus_nums); + + cluster_rps_cpus = kmalloc_array(cluster_rps_cpus_nums, sizeof(u16), GFP_KERNEL); + if (!cluster_rps_cpus) { + kfree(rps_cpus); + return -ENOMEM; + } + for (i = 0; i < cluster_rps_cpus_nums; i++) + cluster_rps_cpus[i] = i; + + vecls_debug("rps_cpus_nums:%d cluster_rps_cpus_nums:%d\n", + rps_cpus_nums, cluster_rps_cpus_nums); size = roundup_pow_of_two(size); table = vmalloc(VECLS_SOCK_FLOW_TABLE_SIZE(size)); if (!table) { kfree(rps_cpus); + kfree(cluster_rps_cpus); return -ENOMEM; } diff --git a/net/venetcls/venetcls_main.c b/net/venetcls/venetcls_main.c index e1c74b4b669c..00ec0b0e2498 100644 --- a/net/venetcls/venetcls_main.c +++ b/net/venetcls/venetcls_main.c @@ -18,9 +18,13 @@ int vecls_numa_num; static int vecls_cluster_cpu_num, vecls_cluster_per_numa; static struct vecls_numa_info *vecls_numa_info_table; -int lo_numa_rps; -module_param(lo_numa_rps, int, 0644); -MODULE_PARM_DESC(lo_numa_rps, "enable loopback flow numa affinity"); +int rps_policy = 1; +module_param(rps_policy, int, 0644); +MODULE_PARM_DESC(rps_policy, "phy nic rps policy, default 1"); + +int lo_rps_policy; +module_param(lo_rps_policy, int, 0644); +MODULE_PARM_DESC(lo_rps_policy, "loopback rps policy, default 0"); int debug; module_param(debug, int, 0644); @@ -56,7 +60,7 @@ MODULE_PARM_DESC(irqname, "nic irq name string, default comp"); unsigned int dft_num = 0x1000; module_param(dft_num, uint, 0444); -MODULE_PARM_DESC(dft_num, "dev flow table entries, default 0x10000"); +MODULE_PARM_DESC(dft_num, "dev flow table entries, default 0x1000"); unsigned int sft_num = 0x100000; module_param(sft_num, uint, 0444); @@ -521,7 +525,7 @@ static int init_single_vecls_dev(char *if_name, unsigned int length) ret = vecls_filter_enable(dev_name, &old_state); if (ret) { vecls_error("dev [%s] not support ntuple! ret=%d\n", dev_name, ret); - if (lo_numa_rps) + if (lo_rps_policy) goto out; } @@ -1121,7 +1125,7 @@ static __init int vecls_init(void) err = vecls_ntuple_res_init(); if (err) goto clean_rxq; - if (lo_numa_rps) + if (lo_rps_policy) err = vecls_flow_res_init(); } else { err = vecls_flow_res_init(); @@ -1137,7 +1141,7 @@ static __init int vecls_init(void) goto clean_rxq; } #endif - if (lo_numa_rps) + if (lo_rps_policy) static_branch_inc(&vecls_localrps_needed); return 0; @@ -1151,7 +1155,7 @@ static __init int vecls_init(void) static __exit void vecls_exit(void) { - if (lo_numa_rps) + if (lo_rps_policy) static_branch_dec(&vecls_localrps_needed); #ifdef CONFIG_PROC_FS @@ -1159,7 +1163,7 @@ static __exit void vecls_exit(void) #endif if (mode == 0) { vecls_ntuple_res_clean(); - if (lo_numa_rps) + if (lo_rps_policy) vecls_flow_res_clean(); } else { vecls_flow_res_clean(); diff --git a/net/venetcls/venetcls_ntuple.c b/net/venetcls/venetcls_ntuple.c index ac73d548fdee..8fc5d8da06fe 100644 --- a/net/venetcls/venetcls_ntuple.c +++ b/net/venetcls/venetcls_ntuple.c @@ -680,7 +680,7 @@ int venetcls_ntuple_status(struct seq_file *seq, void *v) return 0; } -static const struct vecls_hook_ops vecls_ntuple_ops = { +static struct vecls_hook_ops vecls_ntuple_ops = { .vecls_flow_update = _vecls_flow_update, .vecls_set_localcpu = _vecls_set_cpu, .vecls_set_cpu = NULL, @@ -697,6 +697,8 @@ int vecls_ntuple_res_init(void) } init_vecls_sk_rules(); + if (rps_policy) + vecls_ntuple_ops.vecls_set_cpu = _vecls_set_cpu; RCU_INIT_POINTER(vecls_ops, &vecls_ntuple_ops); synchronize_rcu(); return 0; -- 2.34.1