From: Yue Haibing <yuehaibing@huawei.com> hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ICBFCS -------------------------------- Add numa affinity RPS to balance softirq, this fix high load performance regression. Fixes: d60758b0ffcd ("net/oenetcls: introduce oenetcls for network optimization") Signed-off-by: Yue Haibing <yuehaibing@huawei.com> Conflicts: net/oenetcls/oenetcls_ntuple.c Signed-off-by: Liu Jian <liujian56@huawei.com> --- include/linux/oenetcls.h | 32 ++++++++++++---- net/core/dev.c | 19 ++++++++- net/oenetcls/oenetcls_flow.c | 70 +++++++++++++++++++++++++++------- net/oenetcls/oenetcls_main.c | 7 +++- net/oenetcls/oenetcls_ntuple.c | 2 +- 5 files changed, 106 insertions(+), 24 deletions(-) diff --git a/include/linux/oenetcls.h b/include/linux/oenetcls.h index 29c0db40971f..09f89131f32b 100644 --- a/include/linux/oenetcls.h +++ b/include/linux/oenetcls.h @@ -5,12 +5,14 @@ struct oecls_hook_ops { void (*oecls_cfg_rxcls)(struct sock *sk, int is_del); void (*oecls_flow_update)(struct sock *sk); - void (*oecls_set_cpu)(struct sk_buff *skb); + void (*oecls_set_cpu)(struct sk_buff *skb, int *cpu, int *last_qtail); bool (*oecls_timeout)(struct net_device *dev, u16 rxq_index, u32 flow_id, u16 filter_id); }; +typedef int (*enqueue_f)(struct sk_buff *skb, int cpu, unsigned int *qtail); extern const struct oecls_hook_ops __rcu *oecls_ops; +extern struct static_key_false oecls_rps_needed; static inline void oenetcls_cfg_rxcls(struct sock *sk, int is_del) { @@ -34,27 +36,43 @@ static inline void oenetcls_flow_update(struct sock *sk) rcu_read_unlock(); } -static inline void oenetcls_skb_set_cpu(struct sk_buff *skb) +static inline bool +oenetcls_skb_set_cpu(struct sk_buff *skb, enqueue_f enq_func, int *ret) { const struct oecls_hook_ops *ops; + int cpu, last_qtail; + bool result = false; rcu_read_lock(); ops = rcu_dereference(oecls_ops); - if (ops && ops->oecls_set_cpu) - ops->oecls_set_cpu(skb); + if (ops && ops->oecls_set_cpu) { + ops->oecls_set_cpu(skb, &cpu, &last_qtail); + if (cpu >= 0) { + *ret = enq_func(skb, cpu, &last_qtail); + result = true; + } + } rcu_read_unlock(); + return result; } -static inline void oenetcls_skblist_set_cpu(struct list_head *head) +static inline void +oenetcls_skblist_set_cpu(struct list_head *head, enqueue_f enq_func) { const struct oecls_hook_ops *ops; struct sk_buff *skb, *next; + int cpu, last_qtail; rcu_read_lock(); ops = rcu_dereference(oecls_ops); if (ops && ops->oecls_set_cpu) { - list_for_each_entry_safe(skb, next, head, list) - ops->oecls_set_cpu(skb); + list_for_each_entry_safe(skb, next, head, list) { + ops->oecls_set_cpu(skb, &cpu, &last_qtail); + if (cpu >= 0) { + skb_list_del_init(skb); + enq_func(skb, cpu, &last_qtail); + } + } } rcu_read_unlock(); } diff --git a/net/core/dev.c b/net/core/dev.c index e2d61f786c8a..1fe135925df8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -162,6 +162,8 @@ #include <linux/oenetcls.h> const struct oecls_hook_ops __rcu *oecls_ops __read_mostly; EXPORT_SYMBOL_GPL(oecls_ops); +struct static_key_false oecls_rps_needed __read_mostly; +EXPORT_SYMBOL(oecls_rps_needed); #endif static DEFINE_SPINLOCK(ptype_lock); @@ -5883,6 +5885,10 @@ static int netif_receive_skb_internal(struct sk_buff *skb) rcu_read_lock(); #ifdef CONFIG_RPS +#if IS_ENABLED(CONFIG_OENETCLS) + if (static_branch_unlikely(&oecls_rps_needed)) + goto oecls_rps; +#endif if (static_branch_unlikely(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; int cpu = get_rps_cpu(skb->dev, skb, &rflow); @@ -5896,7 +5902,11 @@ static int netif_receive_skb_internal(struct sk_buff *skb) #endif #if IS_ENABLED(CONFIG_OENETCLS) - oenetcls_skb_set_cpu(skb); +oecls_rps: + if (oenetcls_skb_set_cpu(skb, enqueue_to_backlog, &ret)) { + rcu_read_unlock(); + return ret; + } #endif ret = __netif_receive_skb(skb); @@ -5920,6 +5930,10 @@ void netif_receive_skb_list_internal(struct list_head *head) rcu_read_lock(); #ifdef CONFIG_RPS +#if IS_ENABLED(CONFIG_OENETCLS) + if (static_branch_unlikely(&oecls_rps_needed)) + goto oecls_rps_list; +#endif if (static_branch_unlikely(&rps_needed)) { list_for_each_entry_safe(skb, next, head, list) { struct rps_dev_flow voidflow, *rflow = &voidflow; @@ -5935,7 +5949,8 @@ void netif_receive_skb_list_internal(struct list_head *head) #endif #if IS_ENABLED(CONFIG_OENETCLS) - oenetcls_skblist_set_cpu(head); +oecls_rps_list: + oenetcls_skblist_set_cpu(head, enqueue_to_backlog); #endif __netif_receive_skb_list(head); diff --git a/net/oenetcls/oenetcls_flow.c b/net/oenetcls/oenetcls_flow.c index eb7fae400e8e..d4d9a8f15660 100644 --- a/net/oenetcls/oenetcls_flow.c +++ b/net/oenetcls/oenetcls_flow.c @@ -134,8 +134,7 @@ static void set_oecls_cpu(struct net_device *dev, struct sk_buff *skb, struct oecls_dev_flow_table *dtb; struct oecls_dev_flow *rflow; u32 flow_id, hash; - u16 rxq_index; - int rc; + int rxq_index, rc; if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap || !(dev->features & NETIF_F_NTUPLE)) @@ -153,7 +152,8 @@ static void set_oecls_cpu(struct net_device *dev, struct sk_buff *skb, hash = skb_get_hash(skb); flow_id = hash & dtb->mask; rflow = &dtb->flows[flow_id]; - if (rflow->isvalid && rflow->cpu == next_cpu) { + //Return if someone has configured this. + if (rflow->isvalid && cpu_to_node(rflow->cpu) == cpu_to_node(next_cpu)) { rflow->timeout = jiffies; return; } @@ -172,15 +172,41 @@ static void set_oecls_cpu(struct net_device *dev, struct sk_buff *skb, rflow->cpu = next_cpu; } +static int get_cpu_in_mask(int tcpu, u32 hash) +{ + const struct cpumask *mask; + int nr_cpus, cpu, index; + + mask = cpumask_of_node(cpu_to_node(tcpu)); + + nr_cpus = cpumask_weight(mask); + if (nr_cpus == 0) + return -1; + + index = reciprocal_scale(hash, nr_cpus); + if (index < 0) + return -1; + + cpu = cpumask_first(mask); + while (--nr_cpus > 0) { + if (index == 0) + break; + cpu = cpumask_next(cpu, mask); + index--; + } + + return cpu; +} + static void __oecls_set_cpu(struct sk_buff *skb, struct net_device *ndev, struct oecls_sock_flow_table *tb, struct oecls_dev_flow_table *dtb, - int old_rxq_id) + int old_rxq_id, int *rcpu, int *last_qtail) { + u32 last_recv_cpu, hash, val, cpu, tcpu; struct oecls_dev_flow *rflow; - u32 last_recv_cpu, hash, val; - u32 tcpu = 0; - u32 cpu = raw_smp_processor_id(); + int newcpu; + cpu = raw_smp_processor_id(); skb_reset_network_header(skb); hash = skb_get_hash(skb); if (!hash) @@ -194,14 +220,20 @@ static void __oecls_set_cpu(struct sk_buff *skb, struct net_device *ndev, if ((val ^ hash) & ~oecls_cpu_mask) return; - if (cpu_to_node(cpu) == cpu_to_node(last_recv_cpu)) + newcpu = get_cpu_in_mask(last_recv_cpu, hash); + if (newcpu >= 0) + *rcpu = newcpu; + else + newcpu = last_recv_cpu; + + if (cpu_to_node(cpu) == cpu_to_node(newcpu)) return; if (tcpu >= nr_cpu_ids) - set_oecls_cpu(ndev, skb, rflow, old_rxq_id, last_recv_cpu); + set_oecls_cpu(ndev, skb, rflow, old_rxq_id, newcpu); } -static void _oecls_set_cpu(struct sk_buff *skb) +static void _oecls_set_cpu(struct sk_buff *skb, int *cpu, int *last_qtail) { struct net_device *ndev = skb->dev; struct oecls_sock_flow_table *stb; @@ -209,6 +241,8 @@ static void _oecls_set_cpu(struct sk_buff *skb) struct netdev_rx_queue *rxqueue; int rxq_id = -1; + *cpu = -1; + last_qtail = 0;//unused if (!ndev) return; @@ -234,7 +268,7 @@ static void _oecls_set_cpu(struct sk_buff *skb) stb = rcu_dereference(oecls_sock_flow_table); dtb = rcu_dereference(rxqueue->oecls_ftb); if (stb && dtb) - __oecls_set_cpu(skb, ndev, stb, dtb, rxq_id); + __oecls_set_cpu(skb, ndev, stb, dtb, rxq_id, cpu, last_qtail); rcu_read_unlock(); } @@ -246,13 +280,13 @@ static void oecls_dev_flow_table_free(struct rcu_head *rcu) vfree(table); } -static void oecls_dev_flow_table_cleanup(struct net_device *netdev, int qid) +static void oecls_dev_flow_table_cleanup(struct net_device *netdev, int queues) { struct oecls_dev_flow_table *dtb; struct netdev_rx_queue *queue; int i; - for (i = 0; i < qid; i++) { + for (i = 0; i < queues; i++) { queue = netdev->_rx + i; spin_lock(&oecls_dev_flow_lock); dtb = rcu_dereference_protected(queue->oecls_ftb, @@ -408,11 +442,21 @@ int oecls_flow_res_init(void) RCU_INIT_POINTER(oecls_ops, &oecls_flow_ops); synchronize_rcu(); + +#ifdef CONFIG_RPS + static_branch_inc(&oecls_rps_needed); + oecls_debug("oecls_rps_needed true\n"); +#endif + return 0; } void oecls_flow_res_clean(void) { +#ifdef CONFIG_RPS + static_branch_dec(&oecls_rps_needed); + oecls_debug("oecls_rps_needed false\n"); +#endif rcu_assign_pointer(oecls_ops, NULL); synchronize_rcu(); diff --git a/net/oenetcls/oenetcls_main.c b/net/oenetcls/oenetcls_main.c index aa5c63bf612a..458d92c23b3b 100644 --- a/net/oenetcls/oenetcls_main.c +++ b/net/oenetcls/oenetcls_main.c @@ -43,6 +43,10 @@ static int check_cap = 1; module_param(check_cap, int, 0444); MODULE_PARM_DESC(check_cap, "check_cap, default 1"); +static char irqname[64] = "comp"; +module_param_string(irqname, irqname, sizeof(irqname), 0644); +MODULE_PARM_DESC(irqname, "nic irq name string, default comp"); + static bool check_params(void) { if (mode != 0 && mode != 1) @@ -353,7 +357,8 @@ static struct oecls_netdev_info *alloc_oecls_netdev_info(void) static bool check_irq_name(const char *irq_name, struct oecls_netdev_info *oecls_dev) { - if (!strstr(irq_name, "TxRx") && !strstr(irq_name, "comp") && !strstr(irq_name, "rx")) + if (!strstr(irq_name, "TxRx") && !strstr(irq_name, "comp") && !strstr(irq_name, "rx") && + strlen(irqname) > 0 && !strstr(irq_name, irqname)) return false; if (strstr(irq_name, oecls_dev->dev_name)) diff --git a/net/oenetcls/oenetcls_ntuple.c b/net/oenetcls/oenetcls_ntuple.c index a8d572270f7f..def33d30f642 100644 --- a/net/oenetcls/oenetcls_ntuple.c +++ b/net/oenetcls/oenetcls_ntuple.c @@ -60,7 +60,7 @@ static void add_sk_rule(int devid, u32 dip4, u16 dport, void *sk, int action, in hlist_add_head(&entry->node, sk_hlist); return; out: - oecls_debug("alloc failed rule:%p entry:%p\n", rule, entry); + oecls_debug("alloc rule failed\n"); kfree(entry); kfree(rule); } -- 2.34.1