From: Yue Haibing <yuehaibing@huawei.com> hulk inclusion category: feature Link: https://gitee.com/openeuler/kernel/issues/ICBFCS CVE: NA -------------------------------- Support rps affinity policy setting (0 as no rps, 1 as numa, 2 as cluster. Also use rps cpus map instead of traversation to improve performance. Signed-off-by: Yue Haibing <yuehaibing@huawei.com> --- include/linux/oenetcls.h | 8 +- net/oenetcls/oenetcls.h | 3 +- net/oenetcls/oenetcls_flow.c | 171 ++++++++++++++++++++------------- net/oenetcls/oenetcls_main.c | 20 ++-- net/oenetcls/oenetcls_ntuple.c | 4 +- 5 files changed, 124 insertions(+), 82 deletions(-) diff --git a/include/linux/oenetcls.h b/include/linux/oenetcls.h index b618aa6b807f..a1929fb0a193 100644 --- a/include/linux/oenetcls.h +++ b/include/linux/oenetcls.h @@ -51,9 +51,9 @@ oenetcls_skb_set_cpu(struct sk_buff *skb, enqueue_f enq_func, int *ret) ops = rcu_dereference(oecls_ops); if (ops) { /* mode 1 always use oecls_set_cpu hook for physical NIC or lo. - * mode 0 set this hook to NULL, to avoid unneeded ops in - * oenetcls_skblist_set_cpu() for physical NIC flows, and use - * oecls_set_localcpu hook for loopback flows. + * mode 0 set this hook to NULL if rps_policy is 0 , to avoid + * unneeded ops in oenetcls_skblist_set_cpu() for physical NIC + * flows, and use oecls_set_localcpu hook for loopback flows. */ if (ops->oecls_set_cpu) ops->oecls_set_cpu(skb, &cpu, &last_qtail); @@ -74,8 +74,6 @@ oenetcls_skb_set_localcpu(struct sk_buff *skb, enqueue_f enq_func, int *ret) struct net_device *dev = skb->dev; bool result = false; - if (!static_branch_unlikely(&oecls_localrps_needed)) - return result; if (!dev || !(dev->type == ARPHRD_LOOPBACK && dev->flags & IFF_LOOPBACK)) return result; diff --git a/net/oenetcls/oenetcls.h b/net/oenetcls/oenetcls.h index 55b0345cdba6..60f24165b3c4 100644 --- a/net/oenetcls/oenetcls.h +++ b/net/oenetcls/oenetcls.h @@ -147,7 +147,8 @@ extern int oecls_netdev_num; extern int oecls_numa_num; extern unsigned int dft_num; extern unsigned int sft_num; -extern int lo_numa_rps; +extern int rps_policy; +extern int lo_rps_policy; #define oecls_debug(fmt, ...) \ do { \ diff --git a/net/oenetcls/oenetcls_flow.c b/net/oenetcls/oenetcls_flow.c index fd5ed67312f1..934a50e8bbf8 100644 --- a/net/oenetcls/oenetcls_flow.c +++ b/net/oenetcls/oenetcls_flow.c @@ -15,9 +15,9 @@ #include <net/sock.h> #include "oenetcls.h" -static u16 *rps_cpus; -static int rps_cpus_nums; static u32 oecls_cpu_mask; +static u16 *rps_cpus, *cluster_rps_cpus; +static int rps_cpus_nums, cluster_rps_cpus_nums; static struct oecls_sock_flow_table __rcu *oecls_sock_flow_table; static DEFINE_MUTEX(oecls_sock_flow_mutex); static DEFINE_SPINLOCK(oecls_dev_flow_lock); @@ -106,7 +106,7 @@ void _oecls_flow_update(struct sock *sk, struct sk_buff *skb) cpu = raw_smp_processor_id(); rcu_read_lock(); tb = rcu_dereference(oecls_sock_flow_table); - if (lo_numa_rps && skb && sk_is_loopback(sk)) + if (lo_rps_policy && skb && sk_is_loopback(sk)) hash = READ_ONCE(skb->sym_hash); else hash = READ_ONCE(sk->sk_rxhash); @@ -125,13 +125,13 @@ void _oecls_flow_update(struct sock *sk, struct sk_buff *skb) rcu_read_unlock(); } -static int flow_get_queue_idx(struct net_device *dev, int nid, struct sk_buff *skb) +static int flow_get_queue_idx(struct net_device *dev, int nid, u32 hash) { struct oecls_numa_bound_dev_info *bound_dev = NULL; struct oecls_netdev_info *netdev_info; struct oecls_numa_info *numa_info; int rxq_id, rxq_num, i, devid; - u32 hash, index; + u32 index; numa_info = get_oecls_numa_info(nid); if (!numa_info) @@ -156,7 +156,6 @@ static int flow_get_queue_idx(struct net_device *dev, int nid, struct sk_buff *s if (rxq_num == 0) return -1; - hash = skb_get_hash(skb); index = hash % rxq_num; i = 0; @@ -167,24 +166,24 @@ static int flow_get_queue_idx(struct net_device *dev, int nid, struct sk_buff *s return rxq_id; } - oecls_debug("skb:%p, no found rxq\n", skb); + oecls_debug("no found rxq\n"); return -1; } static void set_oecls_cpu(struct net_device *dev, struct sk_buff *skb, - struct oecls_dev_flow *old_rflow, int old_rxq_id, u16 next_cpu) + struct oecls_dev_flow *old_rflow, int old_rxq_id, u16 next_cpu, u32 hash) { struct netdev_rx_queue *rxqueue; struct oecls_dev_flow_table *dtb; struct oecls_dev_flow *rflow; - u32 flow_id, hash; int rxq_index, rc; + u32 flow_id; if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap || !(dev->features & NETIF_F_NTUPLE)) return; - rxq_index = flow_get_queue_idx(dev, cpu_to_node(next_cpu), skb); + rxq_index = flow_get_queue_idx(dev, cpu_to_node(next_cpu), hash); if (rxq_index == skb_get_rx_queue(skb) || rxq_index < 0) return; @@ -193,7 +192,6 @@ static void set_oecls_cpu(struct net_device *dev, struct sk_buff *skb, if (!dtb) return; - hash = skb_get_hash(skb); flow_id = hash & dtb->mask; rflow = &dtb->flows[flow_id]; //Return if someone has configured this. @@ -227,46 +225,38 @@ static bool oecls_do_hash(void) return get_random_u32() % 100 < rcpu_probability; } -static inline int get_cpu_in_mask(int tcpu, u32 hash) +static inline u32 get_rps_cpu(u32 last_recv_cpu, u32 hash, int policy) { - const struct cpumask *mask; - int nr_cpus, cpu, index; - - mask = cpumask_of_node(cpu_to_node(tcpu)); - - nr_cpus = cpumask_weight(mask); - if (nr_cpus == 0) - return -1; - - index = reciprocal_scale(hash, nr_cpus); - if (index < 0) - return -1; - - cpu = cpumask_first(mask); - while (--nr_cpus > 0) { - if (index == 0) - break; - cpu = cpumask_next(cpu, mask); - index--; + u32 newcpu, index; + + if (policy == 1) { + newcpu = cpumask_first(cpumask_of_node(cpu_to_node(last_recv_cpu))); + index = rps_cpus[reciprocal_scale(hash, rps_cpus_nums - 1)]; + newcpu += index; + } else if (policy == 2) { + newcpu = cpumask_first(topology_cluster_cpumask(last_recv_cpu)); + index = cluster_rps_cpus[reciprocal_scale(hash, cluster_rps_cpus_nums - 1)]; + newcpu += index; + } else { + newcpu = last_recv_cpu; } - return cpu; + return newcpu; } static void __oecls_set_cpu(struct sk_buff *skb, struct net_device *ndev, struct oecls_sock_flow_table *tb, struct oecls_dev_flow_table *dtb, int old_rxq_id, int *rcpu, int *last_qtail) { - u32 last_recv_cpu, hash, val, cpu, tcpu; + u32 last_recv_cpu, hash, val, cpu, tcpu, newcpu; struct oecls_dev_flow *rflow; - int newcpu; - cpu = raw_smp_processor_id(); skb_reset_network_header(skb); hash = skb_get_hash(skb); if (!hash) return; + cpu = raw_smp_processor_id(); val = READ_ONCE(tb->ents[hash & tb->mask]); last_recv_cpu = val & oecls_cpu_mask; rflow = &dtb->flows[hash & dtb->mask]; @@ -282,7 +272,7 @@ static void __oecls_set_cpu(struct sk_buff *skb, struct net_device *ndev, } if (last_recv_cpu != cpu) return; - newcpu = get_cpu_in_mask(last_recv_cpu, hash); + newcpu = get_rps_cpu(last_recv_cpu, hash, rps_policy); if (newcpu < 0) newcpu = cpu; if (newcpu == cpu) { @@ -294,7 +284,7 @@ static void __oecls_set_cpu(struct sk_buff *skb, struct net_device *ndev, return; } - newcpu = get_cpu_in_mask(last_recv_cpu, hash); + newcpu = get_rps_cpu(last_recv_cpu, hash, rps_policy); if (newcpu >= 0) *rcpu = newcpu; else @@ -309,14 +299,13 @@ static void __oecls_set_cpu(struct sk_buff *skb, struct net_device *ndev, return; if (tcpu >= nr_cpu_ids) - set_oecls_cpu(ndev, skb, rflow, old_rxq_id, newcpu); + set_oecls_cpu(ndev, skb, rflow, old_rxq_id, newcpu, hash); } -static inline void loopback_numa_rps(struct sk_buff *skb, int *rcpu) +static inline void do_loopback_rps(struct sk_buff *skb, int *rcpu) { + u32 last_recv_cpu, hash, val, newcpu; struct oecls_sock_flow_table *stb; - u32 last_recv_cpu, hash, val; - int newcpu, index; skb_reset_network_header(skb); hash = __skb_get_hash_symmetric(skb); @@ -338,9 +327,36 @@ static inline void loopback_numa_rps(struct sk_buff *skb, int *rcpu) if ((val ^ hash) & ~oecls_cpu_mask) return; - newcpu = cpumask_first(cpumask_of_node(cpu_to_node(last_recv_cpu))); - index = rps_cpus[reciprocal_scale(hash, rps_cpus_nums - 1)]; - newcpu += index; + newcpu = get_rps_cpu(last_recv_cpu, hash, lo_rps_policy); + *rcpu = newcpu; + oecls_debug("last:%u curcpu:%d newcpu:%d\n", last_recv_cpu, raw_smp_processor_id(), newcpu); +} + +static inline void do_flow_rps(struct sk_buff *skb, int *rcpu) +{ + u32 last_recv_cpu, hash, val, newcpu; + struct oecls_sock_flow_table *stb; + + skb_reset_network_header(skb); + hash = skb_get_hash(skb); + if (!hash) + return; + + rcu_read_lock(); + stb = rcu_dereference(oecls_sock_flow_table); + if (stb) { + val = READ_ONCE(stb->ents[hash & stb->mask]); + last_recv_cpu = val & oecls_cpu_mask; + } else { + rcu_read_unlock(); + return; + } + rcu_read_unlock(); + + if ((val ^ hash) & ~oecls_cpu_mask) + return; + + newcpu = get_rps_cpu(last_recv_cpu, hash, rps_policy); *rcpu = newcpu; oecls_debug("last:%u curcpu:%d newcpu:%d\n", last_recv_cpu, raw_smp_processor_id(), newcpu); } @@ -358,14 +374,19 @@ void _oecls_set_cpu(struct sk_buff *skb, int *cpu, int *last_qtail) if (!ndev) return; - if (lo_numa_rps && (ndev->type == ARPHRD_LOOPBACK && ndev->flags & IFF_LOOPBACK)) { - loopback_numa_rps(skb, cpu); + if (lo_rps_policy && (ndev->type == ARPHRD_LOOPBACK && ndev->flags & IFF_LOOPBACK)) { + do_loopback_rps(skb, cpu); return; } if (!is_oecls_config_netdev(ndev->name)) return; + if (rps_policy && mode == 0) { + do_flow_rps(skb, cpu); + return; + } + rxqueue = ndev->_rx; if (skb_rx_queue_recorded(skb)) { rxq_id = skb_get_rx_queue(skb); @@ -377,7 +398,6 @@ void _oecls_set_cpu(struct sk_buff *skb, int *cpu, int *last_qtail) rxqueue += rxq_id; } - // oecls_debug("skb:%px, dev:%s, rxq_id:%d\n", skb, ndev->name, rxq_id); if (rxq_id < 0) return; @@ -506,30 +526,44 @@ static int oecls_sock_flow_table_release(void) mutex_unlock(&oecls_sock_flow_mutex); synchronize_rcu(); vfree(tb); - kfree(rps_cpus); return 0; } -static int oecls_sock_flow_table_init(void) +static int oecls_rps_cpus_init(void) { - struct oecls_sock_flow_table *table; - int size = sft_num, i; + int i; + cluster_rps_cpus_nums = cpumask_weight(topology_cluster_cpumask(0)); rps_cpus_nums = cpumask_weight(cpumask_of_node(0)); rps_cpus = kmalloc_array(rps_cpus_nums, sizeof(u16), GFP_KERNEL); if (!rps_cpus) return -ENOMEM; for (i = 0; i < rps_cpus_nums; i++) rps_cpus[i] = i; - oecls_debug("rps_cpus_nums:%d\n", rps_cpus_nums); - size = roundup_pow_of_two(size); - table = vmalloc(OECLS_SOCK_FLOW_TABLE_SIZE(size)); - if (!table) { + cluster_rps_cpus = kmalloc_array(cluster_rps_cpus_nums, sizeof(u16), GFP_KERNEL); + if (!cluster_rps_cpus) { kfree(rps_cpus); return -ENOMEM; } + for (i = 0; i < cluster_rps_cpus_nums; i++) + cluster_rps_cpus[i] = i; + + oecls_debug("rps_cpus_nums:%d cluster_rps_cpus_nums:%d\n", + rps_cpus_nums, cluster_rps_cpus_nums); + return 0; +} + +static int oecls_sock_flow_table_init(void) +{ + struct oecls_sock_flow_table *table; + int size = sft_num, i; + + size = roundup_pow_of_two(size); + table = vmalloc(OECLS_SOCK_FLOW_TABLE_SIZE(size)); + if (!table) + return -ENOMEM; oecls_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1; oecls_debug("nr_cpu_ids:%d, oecls_cpu_mask:0x%x\n", nr_cpu_ids, oecls_cpu_mask); @@ -557,37 +591,40 @@ int oecls_flow_res_init(void) { int err; - err = oecls_sock_flow_table_init(); + err = oecls_rps_cpus_init(); if (err) return err; + err = oecls_sock_flow_table_init(); + if (err) + goto free; + err = oecls_dev_flow_table_init(); - if (err) { - oecls_sock_flow_table_release(); - return err; - } + if (err) + goto clean; if (mode != 0) //for lo rps RCU_INIT_POINTER(oecls_ops, &oecls_flow_ops); synchronize_rcu(); - -#ifdef CONFIG_RPS static_branch_inc(&oecls_rps_needed); - oecls_debug("oecls_rps_needed true\n"); -#endif return 0; +clean: + oecls_sock_flow_table_release(); +free: + kfree(cluster_rps_cpus); + kfree(rps_cpus); + return err; } void oecls_flow_res_clean(void) { -#ifdef CONFIG_RPS static_branch_dec(&oecls_rps_needed); - oecls_debug("oecls_rps_needed false\n"); -#endif rcu_assign_pointer(oecls_ops, NULL); synchronize_rcu(); oecls_sock_flow_table_release(); oecls_dev_flow_table_release(); + kfree(cluster_rps_cpus); + kfree(rps_cpus); } diff --git a/net/oenetcls/oenetcls_main.c b/net/oenetcls/oenetcls_main.c index f0e69ac6b728..01a460beb053 100644 --- a/net/oenetcls/oenetcls_main.c +++ b/net/oenetcls/oenetcls_main.c @@ -60,9 +60,13 @@ unsigned int sft_num = 0x100000; module_param(sft_num, uint, 0444); MODULE_PARM_DESC(sft_num, "sock flow table entries, default 0x100000"); -int lo_numa_rps; -module_param(lo_numa_rps, int, 0644); -MODULE_PARM_DESC(lo_numa_rps, "enable loopback flow numa affinity"); +int rps_policy = 1; +module_param(rps_policy, int, 0644); +MODULE_PARM_DESC(rps_policy, "phy nic rps policy, default 1"); + +int lo_rps_policy; +module_param(lo_rps_policy, int, 0644); +MODULE_PARM_DESC(lo_rps_policy, "loopback rps policy, default 0"); static int rxq_multiplex_limit = 1; module_param(rxq_multiplex_limit, int, 0444); @@ -526,7 +530,7 @@ static int init_single_oecls_dev(char *if_name, unsigned int length) ret = oecls_filter_enable(dev_name, &old_state); if (ret) { oecls_error("dev [%s] not support ntuple! ret=%d\n", dev_name, ret); - if (lo_numa_rps) + if (lo_rps_policy) goto out; } } @@ -1119,7 +1123,7 @@ static __init int oecls_init(void) err = oecls_ntuple_res_init(); if (err) goto clean_rxq; - if (lo_numa_rps) + if (lo_rps_policy || rps_policy) err = oecls_flow_res_init(); } else { err = oecls_flow_res_init(); @@ -1128,7 +1132,7 @@ static __init int oecls_init(void) if (err) goto clean_rxq; - if (lo_numa_rps) + if (lo_rps_policy) static_branch_inc(&oecls_localrps_needed); return 0; @@ -1142,12 +1146,12 @@ static __init int oecls_init(void) static __exit void oecls_exit(void) { - if (lo_numa_rps) + if (lo_rps_policy) static_branch_dec(&oecls_localrps_needed); if (mode == 0) { oecls_ntuple_res_clean(); - if (lo_numa_rps) + if (lo_rps_policy || rps_policy) oecls_flow_res_clean(); } else { oecls_flow_res_clean(); diff --git a/net/oenetcls/oenetcls_ntuple.c b/net/oenetcls/oenetcls_ntuple.c index 76eee047783c..9d59fe509e6c 100644 --- a/net/oenetcls/oenetcls_ntuple.c +++ b/net/oenetcls/oenetcls_ntuple.c @@ -641,7 +641,7 @@ static void clean_oecls_sk_rules(void) mutex_unlock(&oecls_sk_rules.mutex); } -static const struct oecls_hook_ops oecls_ntuple_ops = { +static struct oecls_hook_ops oecls_ntuple_ops = { .oecls_flow_update = _oecls_flow_update, .oecls_set_localcpu = _oecls_set_cpu, .oecls_set_cpu = NULL, @@ -658,6 +658,8 @@ int oecls_ntuple_res_init(void) } init_oecls_sk_rules(); + if (rps_policy) + oecls_ntuple_ops.oecls_set_cpu = _oecls_set_cpu; RCU_INIT_POINTER(oecls_ops, &oecls_ntuple_ops); synchronize_rcu(); return 0; -- 2.34.1