[PATCH OLK-6.6 0/9] Add new oenetcls features
Add new oenetcls features Yue Haibing (9): net/oenetcls: Balancing softirq to improve performance net/oenetcls: Add mode 2 for rps numa affinity net/oenetcls: Prioritize oenetcls hooks over rps net/oenetcls: Fix possible hash collision issue net/oenetcls: Add local flow NUMA-aware rps net/oenetcls: Supports rxq multiplexing net/oenetcls: Support ipv6 for ntuple mode net/oenetcls: Make OENETCLS default as module net/oenetcls: Add rps policy switch for phy NIC include/linux/oenetcls.h | 66 +++++-- include/linux/skbuff.h | 4 + include/net/sock.h | 4 + net/core/dev.c | 32 ++-- net/core/sock.c | 3 + net/ipv4/tcp.c | 5 +- net/oenetcls/Kconfig | 10 +- net/oenetcls/oenetcls.h | 30 +++- net/oenetcls/oenetcls_flow.c | 305 +++++++++++++++++++++++++++------ net/oenetcls/oenetcls_main.c | 193 ++++++++++++++++----- net/oenetcls/oenetcls_ntuple.c | 255 ++++++++++++++++----------- 11 files changed, 689 insertions(+), 218 deletions(-) -- 2.34.1
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://atomgit.com/openeuler/kernel/merge_requests/20223 邮件列表地址:https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/63P... FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://atomgit.com/openeuler/kernel/merge_requests/20223 Mailing list address: https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/63P...
From: Yue Haibing <yuehaibing@huawei.com> hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ICBFCS -------------------------------- Add numa affinity RPS to balance softirq, this fix high load performance regression. Fixes: d60758b0ffcd ("net/oenetcls: introduce oenetcls for network optimization") Signed-off-by: Yue Haibing <yuehaibing@huawei.com> --- include/linux/oenetcls.h | 32 ++++++++++++---- net/core/dev.c | 19 ++++++++- net/oenetcls/oenetcls_flow.c | 70 +++++++++++++++++++++++++++------- net/oenetcls/oenetcls_main.c | 7 +++- net/oenetcls/oenetcls_ntuple.c | 2 +- 5 files changed, 106 insertions(+), 24 deletions(-) diff --git a/include/linux/oenetcls.h b/include/linux/oenetcls.h index 29c0db40971f..09f89131f32b 100644 --- a/include/linux/oenetcls.h +++ b/include/linux/oenetcls.h @@ -5,12 +5,14 @@ struct oecls_hook_ops { void (*oecls_cfg_rxcls)(struct sock *sk, int is_del); void (*oecls_flow_update)(struct sock *sk); - void (*oecls_set_cpu)(struct sk_buff *skb); + void (*oecls_set_cpu)(struct sk_buff *skb, int *cpu, int *last_qtail); bool (*oecls_timeout)(struct net_device *dev, u16 rxq_index, u32 flow_id, u16 filter_id); }; +typedef int (*enqueue_f)(struct sk_buff *skb, int cpu, unsigned int *qtail); extern const struct oecls_hook_ops __rcu *oecls_ops; +extern struct static_key_false oecls_rps_needed; static inline void oenetcls_cfg_rxcls(struct sock *sk, int is_del) { @@ -34,27 +36,43 @@ static inline void oenetcls_flow_update(struct sock *sk) rcu_read_unlock(); } -static inline void oenetcls_skb_set_cpu(struct sk_buff *skb) +static inline bool +oenetcls_skb_set_cpu(struct sk_buff *skb, enqueue_f enq_func, int *ret) { const struct oecls_hook_ops *ops; + int cpu, last_qtail; + bool result = false; rcu_read_lock(); ops = rcu_dereference(oecls_ops); - if (ops && ops->oecls_set_cpu) - ops->oecls_set_cpu(skb); + if (ops && ops->oecls_set_cpu) { + ops->oecls_set_cpu(skb, &cpu, &last_qtail); + if (cpu >= 0) { + *ret = enq_func(skb, cpu, &last_qtail); + result = true; + } + } rcu_read_unlock(); + return result; } -static inline void oenetcls_skblist_set_cpu(struct list_head *head) +static inline void +oenetcls_skblist_set_cpu(struct list_head *head, enqueue_f enq_func) { const struct oecls_hook_ops *ops; struct sk_buff *skb, *next; + int cpu, last_qtail; rcu_read_lock(); ops = rcu_dereference(oecls_ops); if (ops && ops->oecls_set_cpu) { - list_for_each_entry_safe(skb, next, head, list) - ops->oecls_set_cpu(skb); + list_for_each_entry_safe(skb, next, head, list) { + ops->oecls_set_cpu(skb, &cpu, &last_qtail); + if (cpu >= 0) { + skb_list_del_init(skb); + enq_func(skb, cpu, &last_qtail); + } + } } rcu_read_unlock(); } diff --git a/net/core/dev.c b/net/core/dev.c index e2d61f786c8a..1fe135925df8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -162,6 +162,8 @@ #include <linux/oenetcls.h> const struct oecls_hook_ops __rcu *oecls_ops __read_mostly; EXPORT_SYMBOL_GPL(oecls_ops); +struct static_key_false oecls_rps_needed __read_mostly; +EXPORT_SYMBOL(oecls_rps_needed); #endif static DEFINE_SPINLOCK(ptype_lock); @@ -5883,6 +5885,10 @@ static int netif_receive_skb_internal(struct sk_buff *skb) rcu_read_lock(); #ifdef CONFIG_RPS +#if IS_ENABLED(CONFIG_OENETCLS) + if (static_branch_unlikely(&oecls_rps_needed)) + goto oecls_rps; +#endif if (static_branch_unlikely(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; int cpu = get_rps_cpu(skb->dev, skb, &rflow); @@ -5896,7 +5902,11 @@ static int netif_receive_skb_internal(struct sk_buff *skb) #endif #if IS_ENABLED(CONFIG_OENETCLS) - oenetcls_skb_set_cpu(skb); +oecls_rps: + if (oenetcls_skb_set_cpu(skb, enqueue_to_backlog, &ret)) { + rcu_read_unlock(); + return ret; + } #endif ret = __netif_receive_skb(skb); @@ -5920,6 +5930,10 @@ void netif_receive_skb_list_internal(struct list_head *head) rcu_read_lock(); #ifdef CONFIG_RPS +#if IS_ENABLED(CONFIG_OENETCLS) + if (static_branch_unlikely(&oecls_rps_needed)) + goto oecls_rps_list; +#endif if (static_branch_unlikely(&rps_needed)) { list_for_each_entry_safe(skb, next, head, list) { struct rps_dev_flow voidflow, *rflow = &voidflow; @@ -5935,7 +5949,8 @@ void netif_receive_skb_list_internal(struct list_head *head) #endif #if IS_ENABLED(CONFIG_OENETCLS) - oenetcls_skblist_set_cpu(head); +oecls_rps_list: + oenetcls_skblist_set_cpu(head, enqueue_to_backlog); #endif __netif_receive_skb_list(head); diff --git a/net/oenetcls/oenetcls_flow.c b/net/oenetcls/oenetcls_flow.c index eb7fae400e8e..d4d9a8f15660 100644 --- a/net/oenetcls/oenetcls_flow.c +++ b/net/oenetcls/oenetcls_flow.c @@ -134,8 +134,7 @@ static void set_oecls_cpu(struct net_device *dev, struct sk_buff *skb, struct oecls_dev_flow_table *dtb; struct oecls_dev_flow *rflow; u32 flow_id, hash; - u16 rxq_index; - int rc; + int rxq_index, rc; if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap || !(dev->features & NETIF_F_NTUPLE)) @@ -153,7 +152,8 @@ static void set_oecls_cpu(struct net_device *dev, struct sk_buff *skb, hash = skb_get_hash(skb); flow_id = hash & dtb->mask; rflow = &dtb->flows[flow_id]; - if (rflow->isvalid && rflow->cpu == next_cpu) { + //Return if someone has configured this. + if (rflow->isvalid && cpu_to_node(rflow->cpu) == cpu_to_node(next_cpu)) { rflow->timeout = jiffies; return; } @@ -172,15 +172,41 @@ static void set_oecls_cpu(struct net_device *dev, struct sk_buff *skb, rflow->cpu = next_cpu; } +static int get_cpu_in_mask(int tcpu, u32 hash) +{ + const struct cpumask *mask; + int nr_cpus, cpu, index; + + mask = cpumask_of_node(cpu_to_node(tcpu)); + + nr_cpus = cpumask_weight(mask); + if (nr_cpus == 0) + return -1; + + index = reciprocal_scale(hash, nr_cpus); + if (index < 0) + return -1; + + cpu = cpumask_first(mask); + while (--nr_cpus > 0) { + if (index == 0) + break; + cpu = cpumask_next(cpu, mask); + index--; + } + + return cpu; +} + static void __oecls_set_cpu(struct sk_buff *skb, struct net_device *ndev, struct oecls_sock_flow_table *tb, struct oecls_dev_flow_table *dtb, - int old_rxq_id) + int old_rxq_id, int *rcpu, int *last_qtail) { + u32 last_recv_cpu, hash, val, cpu, tcpu; struct oecls_dev_flow *rflow; - u32 last_recv_cpu, hash, val; - u32 tcpu = 0; - u32 cpu = raw_smp_processor_id(); + int newcpu; + cpu = raw_smp_processor_id(); skb_reset_network_header(skb); hash = skb_get_hash(skb); if (!hash) @@ -194,14 +220,20 @@ static void __oecls_set_cpu(struct sk_buff *skb, struct net_device *ndev, if ((val ^ hash) & ~oecls_cpu_mask) return; - if (cpu_to_node(cpu) == cpu_to_node(last_recv_cpu)) + newcpu = get_cpu_in_mask(last_recv_cpu, hash); + if (newcpu >= 0) + *rcpu = newcpu; + else + newcpu = last_recv_cpu; + + if (cpu_to_node(cpu) == cpu_to_node(newcpu)) return; if (tcpu >= nr_cpu_ids) - set_oecls_cpu(ndev, skb, rflow, old_rxq_id, last_recv_cpu); + set_oecls_cpu(ndev, skb, rflow, old_rxq_id, newcpu); } -static void _oecls_set_cpu(struct sk_buff *skb) +static void _oecls_set_cpu(struct sk_buff *skb, int *cpu, int *last_qtail) { struct net_device *ndev = skb->dev; struct oecls_sock_flow_table *stb; @@ -209,6 +241,8 @@ static void _oecls_set_cpu(struct sk_buff *skb) struct netdev_rx_queue *rxqueue; int rxq_id = -1; + *cpu = -1; + last_qtail = 0;//unused if (!ndev) return; @@ -234,7 +268,7 @@ static void _oecls_set_cpu(struct sk_buff *skb) stb = rcu_dereference(oecls_sock_flow_table); dtb = rcu_dereference(rxqueue->oecls_ftb); if (stb && dtb) - __oecls_set_cpu(skb, ndev, stb, dtb, rxq_id); + __oecls_set_cpu(skb, ndev, stb, dtb, rxq_id, cpu, last_qtail); rcu_read_unlock(); } @@ -246,13 +280,13 @@ static void oecls_dev_flow_table_free(struct rcu_head *rcu) vfree(table); } -static void oecls_dev_flow_table_cleanup(struct net_device *netdev, int qid) +static void oecls_dev_flow_table_cleanup(struct net_device *netdev, int queues) { struct oecls_dev_flow_table *dtb; struct netdev_rx_queue *queue; int i; - for (i = 0; i < qid; i++) { + for (i = 0; i < queues; i++) { queue = netdev->_rx + i; spin_lock(&oecls_dev_flow_lock); dtb = rcu_dereference_protected(queue->oecls_ftb, @@ -408,11 +442,21 @@ int oecls_flow_res_init(void) RCU_INIT_POINTER(oecls_ops, &oecls_flow_ops); synchronize_rcu(); + +#ifdef CONFIG_RPS + static_branch_inc(&oecls_rps_needed); + oecls_debug("oecls_rps_needed true\n"); +#endif + return 0; } void oecls_flow_res_clean(void) { +#ifdef CONFIG_RPS + static_branch_dec(&oecls_rps_needed); + oecls_debug("oecls_rps_needed false\n"); +#endif rcu_assign_pointer(oecls_ops, NULL); synchronize_rcu(); diff --git a/net/oenetcls/oenetcls_main.c b/net/oenetcls/oenetcls_main.c index aa5c63bf612a..458d92c23b3b 100644 --- a/net/oenetcls/oenetcls_main.c +++ b/net/oenetcls/oenetcls_main.c @@ -43,6 +43,10 @@ static int check_cap = 1; module_param(check_cap, int, 0444); MODULE_PARM_DESC(check_cap, "check_cap, default 1"); +static char irqname[64] = "comp"; +module_param_string(irqname, irqname, sizeof(irqname), 0644); +MODULE_PARM_DESC(irqname, "nic irq name string, default comp"); + static bool check_params(void) { if (mode != 0 && mode != 1) @@ -353,7 +357,8 @@ static struct oecls_netdev_info *alloc_oecls_netdev_info(void) static bool check_irq_name(const char *irq_name, struct oecls_netdev_info *oecls_dev) { - if (!strstr(irq_name, "TxRx") && !strstr(irq_name, "comp") && !strstr(irq_name, "rx")) + if (!strstr(irq_name, "TxRx") && !strstr(irq_name, "comp") && !strstr(irq_name, "rx") && + strlen(irqname) > 0 && !strstr(irq_name, irqname)) return false; if (strstr(irq_name, oecls_dev->dev_name)) diff --git a/net/oenetcls/oenetcls_ntuple.c b/net/oenetcls/oenetcls_ntuple.c index a8d572270f7f..def33d30f642 100644 --- a/net/oenetcls/oenetcls_ntuple.c +++ b/net/oenetcls/oenetcls_ntuple.c @@ -60,7 +60,7 @@ static void add_sk_rule(int devid, u32 dip4, u16 dport, void *sk, int action, in hlist_add_head(&entry->node, sk_hlist); return; out: - oecls_debug("alloc failed rule:%p entry:%p\n", rule, entry); + oecls_debug("alloc rule failed\n"); kfree(entry); kfree(rule); } -- 2.34.1
From: Yue Haibing <yuehaibing@huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/ICBFCS -------------------------------- Add numa affinity RPS to balance softirq for vm. Signed-off-by: Yue Haibing <yuehaibing@huawei.com> --- net/oenetcls/oenetcls.h | 2 ++ net/oenetcls/oenetcls_flow.c | 30 +++++++++++++++++++ net/oenetcls/oenetcls_main.c | 56 +++++++++++++++++++++++++++++++----- 3 files changed, 81 insertions(+), 7 deletions(-) diff --git a/net/oenetcls/oenetcls.h b/net/oenetcls/oenetcls.h index 0be09b153428..255d368ac911 100644 --- a/net/oenetcls/oenetcls.h +++ b/net/oenetcls/oenetcls.h @@ -131,6 +131,8 @@ struct cfg_param { extern int match_ip_flag; extern int debug; +extern int mode; +extern int rcpu_probability; extern int oecls_netdev_num; extern int oecls_numa_num; diff --git a/net/oenetcls/oenetcls_flow.c b/net/oenetcls/oenetcls_flow.c index d4d9a8f15660..07aca1308ec0 100644 --- a/net/oenetcls/oenetcls_flow.c +++ b/net/oenetcls/oenetcls_flow.c @@ -172,6 +172,17 @@ static void set_oecls_cpu(struct net_device *dev, struct sk_buff *skb, rflow->cpu = next_cpu; } +static bool oecls_do_hash(void) +{ + if (rcpu_probability <= 0) + return false; + + if (rcpu_probability >= 100) + return true; + + return get_random_u32() % 100 < rcpu_probability; +} + static int get_cpu_in_mask(int tcpu, u32 hash) { const struct cpumask *mask; @@ -220,6 +231,25 @@ static void __oecls_set_cpu(struct sk_buff *skb, struct net_device *ndev, if ((val ^ hash) & ~oecls_cpu_mask) return; + if (mode == 2) { + if (!oecls_do_hash()) { + *rcpu = last_recv_cpu; + return; + } + if (last_recv_cpu != cpu) + return; + newcpu = get_cpu_in_mask(last_recv_cpu, hash); + if (newcpu < 0) + newcpu = cpu; + if (newcpu == cpu) { + newcpu = cpumask_first(cpumask_of_node(cpu_to_node(cpu))); + newcpu = newcpu + (cpu + 1) % (nr_cpu_ids / oecls_numa_num); + } + oecls_debug("last_recv_cpu:%d irq_cpu:%d newcpu:%d\n", last_recv_cpu, cpu, newcpu); + *rcpu = newcpu; + return; + } + newcpu = get_cpu_in_mask(last_recv_cpu, hash); if (newcpu >= 0) *rcpu = newcpu; diff --git a/net/oenetcls/oenetcls_main.c b/net/oenetcls/oenetcls_main.c index 458d92c23b3b..4c542e3439d3 100644 --- a/net/oenetcls/oenetcls_main.c +++ b/net/oenetcls/oenetcls_main.c @@ -19,7 +19,7 @@ int debug; module_param(debug, int, 0644); MODULE_PARM_DESC(debug, "debug switch"); -static int mode; +int mode; module_param(mode, int, 0444); MODULE_PARM_DESC(mode, "mode, default 0"); @@ -43,13 +43,17 @@ static int check_cap = 1; module_param(check_cap, int, 0444); MODULE_PARM_DESC(check_cap, "check_cap, default 1"); +int rcpu_probability = -1; +module_param(rcpu_probability, int, 0444); +MODULE_PARM_DESC(rcpu_probability, "rcpu select policy probability, default -1"); + static char irqname[64] = "comp"; module_param_string(irqname, irqname, sizeof(irqname), 0644); MODULE_PARM_DESC(irqname, "nic irq name string, default comp"); static bool check_params(void) { - if (mode != 0 && mode != 1) + if (mode != 0 && mode != 1 && mode != 2) return false; if (strlen(ifname) == 0) @@ -358,7 +362,7 @@ static struct oecls_netdev_info *alloc_oecls_netdev_info(void) static bool check_irq_name(const char *irq_name, struct oecls_netdev_info *oecls_dev) { if (!strstr(irq_name, "TxRx") && !strstr(irq_name, "comp") && !strstr(irq_name, "rx") && - strlen(irqname) > 0 && !strstr(irq_name, irqname)) + !strstr(irq_name, "virtio0-input") && strlen(irqname) > 0 && !strstr(irq_name, irqname)) return false; if (strstr(irq_name, oecls_dev->dev_name)) @@ -501,10 +505,12 @@ static int init_single_oecls_dev(char *if_name, unsigned int length) goto out; } - ret = oecls_filter_enable(dev_name, &old_state); - if (ret) { - oecls_error("dev [%s] not support ntuple! ret=%d\n", dev_name, ret); - goto out; + if (mode != 2) { + ret = oecls_filter_enable(dev_name, &old_state); + if (ret) { + oecls_error("dev [%s] not support ntuple! ret=%d\n", dev_name, ret); + goto out; + } } oecls_dev = alloc_oecls_netdev_info(); @@ -1000,6 +1006,39 @@ static void set_netdev_xps_queue(bool enable) } } +static void fixup_rcpu_load(void) +{ + char *start = appname, *end; + char *task_name = "redis-proxy"; + + if (!strlen(appname)) + return; + + // support appname: app1#app2#appN + while (*start != '\0') { + end = strchr(start, '#'); + if (end == start) { + start++; + continue; + } + + if (!end) { + if (!strncmp(task_name, start, strlen(start))) { + rcpu_probability = 100; + return; + } + break; + } + + if (!strncmp(task_name, start, end - start)) { + rcpu_probability = 100; + return; + } + start = end + 1; + } + rcpu_probability = 65; +} + static __init int oecls_init(void) { struct oecls_numa_info *numa_info; @@ -1031,6 +1070,9 @@ static __init int oecls_init(void) set_netdev_xps_queue(true); #endif + if (mode == 2 && rcpu_probability < 0) + fixup_rcpu_load(); + if (mode == 0) err = oecls_ntuple_res_init(); else -- 2.34.1
From: Yue Haibing <yuehaibing@huawei.com> hulk inclusion category: cleanup bugzilla: https://gitee.com/openeuler/kernel/issues/ICBFCS -------------------------------- Prioritize oenetcls hooks over rps, clean up unnecessary oecls_rps goto statements. Signed-off-by: Yue Haibing <yuehaibing@huawei.com> --- net/core/dev.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 1fe135925df8..06d59a919a4b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5884,11 +5884,15 @@ static int netif_receive_skb_internal(struct sk_buff *skb) return NET_RX_SUCCESS; rcu_read_lock(); -#ifdef CONFIG_RPS #if IS_ENABLED(CONFIG_OENETCLS) - if (static_branch_unlikely(&oecls_rps_needed)) - goto oecls_rps; + if (static_branch_unlikely(&oecls_rps_needed)) { + if (oenetcls_skb_set_cpu(skb, enqueue_to_backlog, &ret)) { + rcu_read_unlock(); + return ret; + } + } #endif +#ifdef CONFIG_RPS if (static_branch_unlikely(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; int cpu = get_rps_cpu(skb->dev, skb, &rflow); @@ -5900,15 +5904,6 @@ static int netif_receive_skb_internal(struct sk_buff *skb) } } #endif - -#if IS_ENABLED(CONFIG_OENETCLS) -oecls_rps: - if (oenetcls_skb_set_cpu(skb, enqueue_to_backlog, &ret)) { - rcu_read_unlock(); - return ret; - } -#endif - ret = __netif_receive_skb(skb); rcu_read_unlock(); return ret; @@ -5929,11 +5924,11 @@ void netif_receive_skb_list_internal(struct list_head *head) list_splice_init(&sublist, head); rcu_read_lock(); -#ifdef CONFIG_RPS #if IS_ENABLED(CONFIG_OENETCLS) if (static_branch_unlikely(&oecls_rps_needed)) - goto oecls_rps_list; + oenetcls_skblist_set_cpu(head, enqueue_to_backlog); #endif +#ifdef CONFIG_RPS if (static_branch_unlikely(&rps_needed)) { list_for_each_entry_safe(skb, next, head, list) { struct rps_dev_flow voidflow, *rflow = &voidflow; @@ -5947,12 +5942,6 @@ void netif_receive_skb_list_internal(struct list_head *head) } } #endif - -#if IS_ENABLED(CONFIG_OENETCLS) -oecls_rps_list: - oenetcls_skblist_set_cpu(head, enqueue_to_backlog); -#endif - __netif_receive_skb_list(head); rcu_read_unlock(); } -- 2.34.1
From: Yue Haibing <yuehaibing@huawei.com> hulk inclusion category: bugfix Link: https://gitee.com/openeuler/kernel/issues/ICBFCS CVE: NA -------------------------------- Make hash table size configurable to fix possible hash collision Signed-off-by: Yue Haibing <yuehaibing@huawei.com> --- net/oenetcls/oenetcls.h | 4 ++-- net/oenetcls/oenetcls_flow.c | 11 +++++++---- net/oenetcls/oenetcls_main.c | 8 ++++++++ 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/net/oenetcls/oenetcls.h b/net/oenetcls/oenetcls.h index 255d368ac911..6d8e8e5e5b15 100644 --- a/net/oenetcls/oenetcls.h +++ b/net/oenetcls/oenetcls.h @@ -102,8 +102,6 @@ struct oecls_sock_flow_table { u32 ents[] ____cacheline_aligned_in_smp; }; -#define OECLS_DEV_FLOW_TABLE_NUM 0x1000 -#define OECLS_SOCK_FLOW_TABLE_NUM 0x100000 #define OECLS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct oecls_dev_flow_table) + \ ((_num) * sizeof(struct oecls_dev_flow))) #define OECLS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct oecls_sock_flow_table, ents[_num])) @@ -135,6 +133,8 @@ extern int mode; extern int rcpu_probability; extern int oecls_netdev_num; extern int oecls_numa_num; +extern unsigned int dft_num; +extern unsigned int sft_num; #define oecls_debug(fmt, ...) \ do { \ diff --git a/net/oenetcls/oenetcls_flow.c b/net/oenetcls/oenetcls_flow.c index 07aca1308ec0..0953b4bd91ae 100644 --- a/net/oenetcls/oenetcls_flow.c +++ b/net/oenetcls/oenetcls_flow.c @@ -256,6 +256,11 @@ static void __oecls_set_cpu(struct sk_buff *skb, struct net_device *ndev, else newcpu = last_recv_cpu; + if (rflow->isvalid && cpu_to_node(rflow->cpu) == cpu_to_node(newcpu)) { + rflow->timeout = jiffies; + return; + } + if (cpu_to_node(cpu) == cpu_to_node(newcpu)) return; @@ -347,9 +352,8 @@ static int oecls_dev_flow_table_release(void) static int _oecls_dev_flow_table_init(struct net_device *netdev) { struct oecls_dev_flow_table *table; - int size = OECLS_DEV_FLOW_TABLE_NUM; + int size = dft_num, i, j, ret = 0; struct netdev_rx_queue *queue; - int i, j, ret = 0; size = roundup_pow_of_two(size); oecls_debug("dev:%s, num_rx_queues:%d, mask:0x%x\n", netdev->name, netdev->num_rx_queues, @@ -427,8 +431,7 @@ static int oecls_sock_flow_table_release(void) static int oecls_sock_flow_table_init(void) { struct oecls_sock_flow_table *table; - int size = OECLS_SOCK_FLOW_TABLE_NUM; - int i; + int size = sft_num, i; size = roundup_pow_of_two(size); table = vmalloc(OECLS_SOCK_FLOW_TABLE_SIZE(size)); diff --git a/net/oenetcls/oenetcls_main.c b/net/oenetcls/oenetcls_main.c index 4c542e3439d3..f9574b344331 100644 --- a/net/oenetcls/oenetcls_main.c +++ b/net/oenetcls/oenetcls_main.c @@ -51,6 +51,14 @@ static char irqname[64] = "comp"; module_param_string(irqname, irqname, sizeof(irqname), 0644); MODULE_PARM_DESC(irqname, "nic irq name string, default comp"); +unsigned int dft_num = 0x1000; +module_param(dft_num, uint, 0444); +MODULE_PARM_DESC(dft_num, "dev flow table entries, default 0x1000"); + +unsigned int sft_num = 0x100000; +module_param(sft_num, uint, 0444); +MODULE_PARM_DESC(sft_num, "sock flow table entries, default 0x100000"); + static bool check_params(void) { if (mode != 0 && mode != 1 && mode != 2) -- 2.34.1
From: Yue Haibing <yuehaibing@huawei.com> hulk inclusion category: feature Link: https://gitee.com/openeuler/kernel/issues/ICBFCS CVE: NA -------------------------------- Use NUMA-aware flow tables for local flows to achieve better cache effectiveness and NUMA affinity. Also cache check_appname results in sk to avoid unnecessary dup check. Signed-off-by: Yue Haibing <yuehaibing@huawei.com> --- include/linux/oenetcls.h | 40 ++++++++++-- include/linux/skbuff.h | 4 ++ include/net/sock.h | 4 ++ net/core/dev.c | 8 +++ net/core/sock.c | 3 + net/ipv4/tcp.c | 5 +- net/oenetcls/oenetcls.h | 7 +++ net/oenetcls/oenetcls_flow.c | 112 +++++++++++++++++++++++++++++---- net/oenetcls/oenetcls_main.c | 30 +++++++-- net/oenetcls/oenetcls_ntuple.c | 3 +- 10 files changed, 191 insertions(+), 25 deletions(-) diff --git a/include/linux/oenetcls.h b/include/linux/oenetcls.h index 09f89131f32b..b618aa6b807f 100644 --- a/include/linux/oenetcls.h +++ b/include/linux/oenetcls.h @@ -2,10 +2,13 @@ #ifndef _LINUX_OENETCLS_H #define _LINUX_OENETCLS_H +#include <linux/if_arp.h> + struct oecls_hook_ops { void (*oecls_cfg_rxcls)(struct sock *sk, int is_del); - void (*oecls_flow_update)(struct sock *sk); + void (*oecls_flow_update)(struct sock *sk, struct sk_buff *skb); void (*oecls_set_cpu)(struct sk_buff *skb, int *cpu, int *last_qtail); + void (*oecls_set_localcpu)(struct sk_buff *skb, int *cpu, int *last_qtail); bool (*oecls_timeout)(struct net_device *dev, u16 rxq_index, u32 flow_id, u16 filter_id); }; @@ -13,6 +16,7 @@ struct oecls_hook_ops { typedef int (*enqueue_f)(struct sk_buff *skb, int cpu, unsigned int *qtail); extern const struct oecls_hook_ops __rcu *oecls_ops; extern struct static_key_false oecls_rps_needed; +extern struct static_key_false oecls_localrps_needed; static inline void oenetcls_cfg_rxcls(struct sock *sk, int is_del) { @@ -25,14 +29,14 @@ static inline void oenetcls_cfg_rxcls(struct sock *sk, int is_del) rcu_read_unlock(); } -static inline void oenetcls_flow_update(struct sock *sk) +static inline void oenetcls_flow_update(struct sock *sk, struct sk_buff *skb) { const struct oecls_hook_ops *ops; rcu_read_lock(); ops = rcu_dereference(oecls_ops); if (ops && ops->oecls_flow_update) - ops->oecls_flow_update(sk); + ops->oecls_flow_update(sk, skb); rcu_read_unlock(); } @@ -45,8 +49,16 @@ oenetcls_skb_set_cpu(struct sk_buff *skb, enqueue_f enq_func, int *ret) rcu_read_lock(); ops = rcu_dereference(oecls_ops); - if (ops && ops->oecls_set_cpu) { - ops->oecls_set_cpu(skb, &cpu, &last_qtail); + if (ops) { + /* mode 1 always use oecls_set_cpu hook for physical NIC or lo. + * mode 0 set this hook to NULL, to avoid unneeded ops in + * oenetcls_skblist_set_cpu() for physical NIC flows, and use + * oecls_set_localcpu hook for loopback flows. + */ + if (ops->oecls_set_cpu) + ops->oecls_set_cpu(skb, &cpu, &last_qtail); + else if (ops->oecls_set_localcpu) + ops->oecls_set_localcpu(skb, &cpu, &last_qtail); if (cpu >= 0) { *ret = enq_func(skb, cpu, &last_qtail); result = true; @@ -56,6 +68,24 @@ oenetcls_skb_set_cpu(struct sk_buff *skb, enqueue_f enq_func, int *ret) return result; } +static inline bool +oenetcls_skb_set_localcpu(struct sk_buff *skb, enqueue_f enq_func, int *ret) +{ + struct net_device *dev = skb->dev; + bool result = false; + + if (!static_branch_unlikely(&oecls_localrps_needed)) + return result; + if (!dev || !(dev->type == ARPHRD_LOOPBACK && dev->flags & IFF_LOOPBACK)) + return result; + + preempt_disable(); + if (oenetcls_skb_set_cpu(skb, enq_func, ret)) + result = true; + preempt_enable(); + return result; +} + static inline void oenetcls_skblist_set_cpu(struct list_head *head, enqueue_f enq_func) { diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 1597a5f9b5b8..0f985ba19006 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1056,7 +1056,11 @@ struct sk_buff { #else KABI_RESERVE(1) #endif +#if IS_ENABLED(CONFIG_OENETCLS) + KABI_USE(2, __u32 sym_hash) +#else KABI_RESERVE(2) +#endif KABI_RESERVE(3) KABI_RESERVE(4) diff --git a/include/net/sock.h b/include/net/sock.h index 26456cb2bf8f..c44b2025bc54 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -554,7 +554,11 @@ struct sock { u64 sk_gid_padding; }; #endif +#if IS_ENABLED(CONFIG_OENETCLS) + KABI_USE(1, u8 oecls_cmd_matched) +#else KABI_RESERVE(1) +#endif KABI_RESERVE(2) KABI_RESERVE(3) KABI_RESERVE(4) diff --git a/net/core/dev.c b/net/core/dev.c index 06d59a919a4b..f388233f4f75 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -164,6 +164,8 @@ const struct oecls_hook_ops __rcu *oecls_ops __read_mostly; EXPORT_SYMBOL_GPL(oecls_ops); struct static_key_false oecls_rps_needed __read_mostly; EXPORT_SYMBOL(oecls_rps_needed); +struct static_key_false oecls_localrps_needed __read_mostly; +EXPORT_SYMBOL(oecls_localrps_needed); #endif static DEFINE_SPINLOCK(ptype_lock); @@ -5196,6 +5198,12 @@ static int netif_rx_internal(struct sk_buff *skb) trace_netif_rx(skb); +#if IS_ENABLED(CONFIG_OENETCLS) + if (static_branch_unlikely(&oecls_localrps_needed)) { + if (oenetcls_skb_set_localcpu(skb, enqueue_to_backlog, &ret)) + return ret; + } +#endif #ifdef CONFIG_RPS if (static_branch_unlikely(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; diff --git a/net/core/sock.c b/net/core/sock.c index d63f5ee49054..45f7f9aaca46 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2172,6 +2172,9 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, sock_update_classid(&sk->sk_cgrp_data); sock_update_netprioidx(&sk->sk_cgrp_data); sk_tx_queue_clear(sk); +#if IS_ENABLED(CONFIG_OENETCLS) + sk->oecls_cmd_matched = 0; +#endif } return sk; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7c66c46c125f..2c98ef85072b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2543,6 +2543,9 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, if (used + offset < skb->len) continue; +#if IS_ENABLED(CONFIG_OENETCLS) + oenetcls_flow_update(sk, skb); +#endif if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) goto found_fin_ok; if (!(flags & MSG_PEEK)) @@ -2587,7 +2590,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, return inet_recv_error(sk, msg, len, addr_len); #if IS_ENABLED(CONFIG_OENETCLS) - oenetcls_flow_update(sk); + oenetcls_flow_update(sk, NULL); #endif if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue) && diff --git a/net/oenetcls/oenetcls.h b/net/oenetcls/oenetcls.h index 6d8e8e5e5b15..755d0ab299ee 100644 --- a/net/oenetcls/oenetcls.h +++ b/net/oenetcls/oenetcls.h @@ -13,6 +13,10 @@ #define OECLS_NO_FILTER 0xffff #define OECLS_NO_CPU 0xffff +#define OECLS_CMD_UNKNOWN 0 +#define OECLS_CMD_MATCHED 1 +#define OECLS_CMD_NO_MATCH 2 + struct oecls_netdev_queue_info { int irq; int affinity_cpu; @@ -135,6 +139,7 @@ extern int oecls_netdev_num; extern int oecls_numa_num; extern unsigned int dft_num; extern unsigned int sft_num; +extern int lo_numa_rps; #define oecls_debug(fmt, ...) \ do { \ @@ -183,5 +188,7 @@ int oecls_ntuple_res_init(void); void oecls_ntuple_res_clean(void); int oecls_flow_res_init(void); void oecls_flow_res_clean(void); +void _oecls_flow_update(struct sock *sk, struct sk_buff *skb); +void _oecls_set_cpu(struct sk_buff *skb, int *cpu, int *last_qtail); #endif /* _NET_OENETCLS_H */ diff --git a/net/oenetcls/oenetcls_flow.c b/net/oenetcls/oenetcls_flow.c index 0953b4bd91ae..bb52a5b78c47 100644 --- a/net/oenetcls/oenetcls_flow.c +++ b/net/oenetcls/oenetcls_flow.c @@ -1,15 +1,22 @@ // SPDX-License-Identifier: GPL-2.0-only #include <linux/inetdevice.h> -#include <linux/netdevice.h> -#include <linux/rtnetlink.h> +#include <linux/inet.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <linux/inet.h> #include <linux/irq.h> #include <linux/irqdesc.h> -#include <linux/inet.h> +#include <linux/netdevice.h> +#include <linux/oenetcls.h> +#include <linux/rtnetlink.h> +#include <net/inet_sock.h> +#include <net/ipv6.h> #include <net/netdev_rx_queue.h> #include <net/sock.h> -#include <linux/oenetcls.h> #include "oenetcls.h" +static u16 *rps_cpus; +static int rps_cpus_nums; static u32 oecls_cpu_mask; static struct oecls_sock_flow_table __rcu *oecls_sock_flow_table; static DEFINE_MUTEX(oecls_sock_flow_mutex); @@ -59,22 +66,50 @@ static bool _oecls_timeout(struct net_device *dev, u16 rxq_index, return expire; } -static void _oecls_flow_update(struct sock *sk) +static inline bool sk_is_loopback(struct sock *sk) +{ + if (sk->sk_family == AF_INET) { + if (ipv4_is_loopback(sk->sk_daddr) || ipv4_is_loopback(sk->sk_rcv_saddr)) + return true; + } + + if (sk->sk_family == AF_INET6) { + if (ipv6_addr_loopback(&sk->sk_v6_daddr) || + ipv6_addr_loopback(&sk->sk_v6_rcv_saddr) || + ipv6_addr_v4mapped_loopback(&sk->sk_v6_daddr) || + ipv6_addr_v4mapped_loopback(&sk->sk_v6_rcv_saddr) || + ipv6_addr_equal(&sk->sk_v6_daddr, &sk->sk_v6_rcv_saddr)) + return true; + } + return false; +} + +void _oecls_flow_update(struct sock *sk, struct sk_buff *skb) { struct oecls_sock_flow_table *tb; unsigned int hash, index; - u32 val; - u32 cpu = raw_smp_processor_id(); + u32 val, cpu; if (sk->sk_state != TCP_ESTABLISHED) return; - if (check_appname(current->comm)) + if (unlikely(sk->oecls_cmd_matched == OECLS_CMD_UNKNOWN)) { + if (check_appname(current->comm)) { + sk->oecls_cmd_matched = OECLS_CMD_NO_MATCH; + return; + } + sk->oecls_cmd_matched = OECLS_CMD_MATCHED; + } + if (sk->oecls_cmd_matched != OECLS_CMD_MATCHED) return; + cpu = raw_smp_processor_id(); rcu_read_lock(); tb = rcu_dereference(oecls_sock_flow_table); - hash = READ_ONCE(sk->sk_rxhash); + if (lo_numa_rps && skb && sk_is_loopback(sk)) + hash = READ_ONCE(skb->sym_hash); + else + hash = READ_ONCE(sk->sk_rxhash); if (tb && hash) { index = hash & tb->mask; val = hash & ~oecls_cpu_mask; @@ -183,7 +218,7 @@ static bool oecls_do_hash(void) return get_random_u32() % 100 < rcpu_probability; } -static int get_cpu_in_mask(int tcpu, u32 hash) +static inline int get_cpu_in_mask(int tcpu, u32 hash) { const struct cpumask *mask; int nr_cpus, cpu, index; @@ -268,7 +303,40 @@ static void __oecls_set_cpu(struct sk_buff *skb, struct net_device *ndev, set_oecls_cpu(ndev, skb, rflow, old_rxq_id, newcpu); } -static void _oecls_set_cpu(struct sk_buff *skb, int *cpu, int *last_qtail) +static inline void loopback_numa_rps(struct sk_buff *skb, int *rcpu) +{ + struct oecls_sock_flow_table *stb; + u32 last_recv_cpu, hash, val; + int newcpu, index; + + skb_reset_network_header(skb); + hash = __skb_get_hash_symmetric(skb); + if (!hash) + return; + + WRITE_ONCE(skb->sym_hash, hash); + rcu_read_lock(); + stb = rcu_dereference(oecls_sock_flow_table); + if (stb) { + val = READ_ONCE(stb->ents[hash & stb->mask]); + last_recv_cpu = val & oecls_cpu_mask; + } else { + rcu_read_unlock(); + return; + } + rcu_read_unlock(); + + if ((val ^ hash) & ~oecls_cpu_mask) + return; + + newcpu = cpumask_first(cpumask_of_node(cpu_to_node(last_recv_cpu))); + index = rps_cpus[reciprocal_scale(hash, rps_cpus_nums - 1)]; + newcpu += index; + *rcpu = newcpu; + oecls_debug("last:%u curcpu:%d newcpu:%d\n", last_recv_cpu, raw_smp_processor_id(), newcpu); +} + +void _oecls_set_cpu(struct sk_buff *skb, int *cpu, int *last_qtail) { struct net_device *ndev = skb->dev; struct oecls_sock_flow_table *stb; @@ -281,6 +349,11 @@ static void _oecls_set_cpu(struct sk_buff *skb, int *cpu, int *last_qtail) if (!ndev) return; + if (lo_numa_rps && (ndev->type == ARPHRD_LOOPBACK && ndev->flags & IFF_LOOPBACK)) { + loopback_numa_rps(skb, cpu); + return; + } + if (!is_oecls_config_netdev(ndev->name)) return; @@ -424,6 +497,7 @@ static int oecls_sock_flow_table_release(void) mutex_unlock(&oecls_sock_flow_mutex); synchronize_rcu(); vfree(tb); + kfree(rps_cpus); return 0; } @@ -433,10 +507,20 @@ static int oecls_sock_flow_table_init(void) struct oecls_sock_flow_table *table; int size = sft_num, i; + rps_cpus_nums = cpumask_weight(cpumask_of_node(0)); + rps_cpus = kmalloc_array(rps_cpus_nums, sizeof(u16), GFP_KERNEL); + if (!rps_cpus) + return -ENOMEM; + for (i = 0; i < rps_cpus_nums; i++) + rps_cpus[i] = i; + oecls_debug("rps_cpus_nums:%d\n", rps_cpus_nums); + size = roundup_pow_of_two(size); table = vmalloc(OECLS_SOCK_FLOW_TABLE_SIZE(size)); - if (!table) + if (!table) { + kfree(rps_cpus); return -ENOMEM; + } oecls_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1; oecls_debug("nr_cpu_ids:%d, oecls_cpu_mask:0x%x\n", nr_cpu_ids, oecls_cpu_mask); @@ -455,6 +539,7 @@ static int oecls_sock_flow_table_init(void) static const struct oecls_hook_ops oecls_flow_ops = { .oecls_flow_update = _oecls_flow_update, .oecls_set_cpu = _oecls_set_cpu, + .oecls_set_localcpu = NULL, .oecls_timeout = _oecls_timeout, .oecls_cfg_rxcls = NULL, }; @@ -473,7 +558,8 @@ int oecls_flow_res_init(void) return err; } - RCU_INIT_POINTER(oecls_ops, &oecls_flow_ops); + if (mode != 0) //for lo rps + RCU_INIT_POINTER(oecls_ops, &oecls_flow_ops); synchronize_rcu(); #ifdef CONFIG_RPS diff --git a/net/oenetcls/oenetcls_main.c b/net/oenetcls/oenetcls_main.c index f9574b344331..e6cffacca161 100644 --- a/net/oenetcls/oenetcls_main.c +++ b/net/oenetcls/oenetcls_main.c @@ -6,6 +6,7 @@ #include <linux/irq.h> #include <linux/irqdesc.h> #include <linux/rtnetlink.h> +#include <linux/oenetcls.h> #include "oenetcls.h" int oecls_netdev_num; @@ -59,6 +60,10 @@ unsigned int sft_num = 0x100000; module_param(sft_num, uint, 0444); MODULE_PARM_DESC(sft_num, "sock flow table entries, default 0x100000"); +int lo_numa_rps; +module_param(lo_numa_rps, int, 0644); +MODULE_PARM_DESC(lo_numa_rps, "enable loopback flow numa affinity"); + static bool check_params(void) { if (mode != 0 && mode != 1 && mode != 2) @@ -517,7 +522,8 @@ static int init_single_oecls_dev(char *if_name, unsigned int length) ret = oecls_filter_enable(dev_name, &old_state); if (ret) { oecls_error("dev [%s] not support ntuple! ret=%d\n", dev_name, ret); - goto out; + if (lo_numa_rps) + goto out; } } @@ -1081,14 +1087,22 @@ static __init int oecls_init(void) if (mode == 2 && rcpu_probability < 0) fixup_rcpu_load(); - if (mode == 0) + if (mode == 0) { err = oecls_ntuple_res_init(); - else + if (err) + goto clean_rxq; + if (lo_numa_rps) + err = oecls_flow_res_init(); + } else { err = oecls_flow_res_init(); + } if (err) goto clean_rxq; + if (lo_numa_rps) + static_branch_inc(&oecls_localrps_needed); + return 0; clean_rxq: @@ -1100,10 +1114,16 @@ static __init int oecls_init(void) static __exit void oecls_exit(void) { - if (mode == 0) + if (lo_numa_rps) + static_branch_dec(&oecls_localrps_needed); + + if (mode == 0) { oecls_ntuple_res_clean(); - else + if (lo_numa_rps) + oecls_flow_res_clean(); + } else { oecls_flow_res_clean(); + } #ifdef CONFIG_XPS set_netdev_xps_queue(false); diff --git a/net/oenetcls/oenetcls_ntuple.c b/net/oenetcls/oenetcls_ntuple.c index def33d30f642..c0b97ea7649e 100644 --- a/net/oenetcls/oenetcls_ntuple.c +++ b/net/oenetcls/oenetcls_ntuple.c @@ -582,7 +582,8 @@ static void clean_oecls_sk_rules(void) } static const struct oecls_hook_ops oecls_ntuple_ops = { - .oecls_flow_update = NULL, + .oecls_flow_update = _oecls_flow_update, + .oecls_set_localcpu = _oecls_set_cpu, .oecls_set_cpu = NULL, .oecls_timeout = NULL, .oecls_cfg_rxcls = ethtool_cfg_rxcls, -- 2.34.1
From: Yue Haibing <yuehaibing@huawei.com> hulk inclusion category: feature Link: https://gitee.com/openeuler/kernel/issues/ICBFCS CVE: NA -------------------------------- Multiple socks can multiplex rxq queues to achieve NUMA affinity. Signed-off-by: Yue Haibing <yuehaibing@huawei.com> --- net/oenetcls/oenetcls.h | 4 +- net/oenetcls/oenetcls_flow.c | 27 +++++++---- net/oenetcls/oenetcls_main.c | 89 ++++++++++++++++++++++++------------ 3 files changed, 80 insertions(+), 40 deletions(-) diff --git a/net/oenetcls/oenetcls.h b/net/oenetcls/oenetcls.h index 755d0ab299ee..4a7e7d37669d 100644 --- a/net/oenetcls/oenetcls.h +++ b/net/oenetcls/oenetcls.h @@ -17,6 +17,8 @@ #define OECLS_CMD_MATCHED 1 #define OECLS_CMD_NO_MATCH 2 +#define RXQ_MAX_USECNT 0xFF + struct oecls_netdev_queue_info { int irq; int affinity_cpu; @@ -42,7 +44,7 @@ struct oecls_numa_clusterinfo { }; struct oecls_numa_bound_dev_info { - DECLARE_BITMAP(bitmap_rxq, OECLS_MAX_RXQ_NUM_PER_DEV); + unsigned char bitmap_rxq[OECLS_MAX_RXQ_NUM_PER_DEV]; struct oecls_numa_clusterinfo *cluster_info; }; diff --git a/net/oenetcls/oenetcls_flow.c b/net/oenetcls/oenetcls_flow.c index bb52a5b78c47..fd5ed67312f1 100644 --- a/net/oenetcls/oenetcls_flow.c +++ b/net/oenetcls/oenetcls_flow.c @@ -127,27 +127,32 @@ void _oecls_flow_update(struct sock *sk, struct sk_buff *skb) static int flow_get_queue_idx(struct net_device *dev, int nid, struct sk_buff *skb) { + struct oecls_numa_bound_dev_info *bound_dev = NULL; struct oecls_netdev_info *netdev_info; - int netdev_loop; - u32 hash, index; struct oecls_numa_info *numa_info; - struct oecls_numa_bound_dev_info *bound_dev = NULL; - int rxq_id, rxq_num, i; + int rxq_id, rxq_num, i, devid; + u32 hash, index; numa_info = get_oecls_numa_info(nid); if (!numa_info) return -1; - for_each_oecls_netdev(netdev_loop, netdev_info) { + for_each_oecls_netdev(devid, netdev_info) { if (strcmp(netdev_info->dev_name, dev->name) == 0) { - bound_dev = &numa_info->bound_dev[netdev_loop]; + bound_dev = &numa_info->bound_dev[devid]; break; } } if (!bound_dev) return -1; - rxq_num = bitmap_weight(bound_dev->bitmap_rxq, OECLS_MAX_RXQ_NUM_PER_DEV); + + rxq_num = 0; + for (i = 0; i < OECLS_MAX_RXQ_NUM_PER_DEV; i++) { + if (bound_dev->bitmap_rxq[i] == RXQ_MAX_USECNT) + continue; + rxq_num++; + } if (rxq_num == 0) return -1; @@ -155,10 +160,14 @@ static int flow_get_queue_idx(struct net_device *dev, int nid, struct sk_buff *s index = hash % rxq_num; i = 0; - for_each_set_bit(rxq_id, bound_dev->bitmap_rxq, OECLS_MAX_RXQ_NUM_PER_DEV) - if (index == i++) + for (rxq_id = 0; rxq_id < OECLS_MAX_RXQ_NUM_PER_DEV; rxq_id++) { + if (bound_dev->bitmap_rxq[rxq_id] == RXQ_MAX_USECNT) + continue; + if (i++ == index) return rxq_id; + } + oecls_debug("skb:%p, no found rxq\n", skb); return -1; } diff --git a/net/oenetcls/oenetcls_main.c b/net/oenetcls/oenetcls_main.c index e6cffacca161..5205b09bbff2 100644 --- a/net/oenetcls/oenetcls_main.c +++ b/net/oenetcls/oenetcls_main.c @@ -64,6 +64,10 @@ int lo_numa_rps; module_param(lo_numa_rps, int, 0644); MODULE_PARM_DESC(lo_numa_rps, "enable loopback flow numa affinity"); +static int rxq_multiplex_limit = 1; +module_param(rxq_multiplex_limit, int, 0444); +MODULE_PARM_DESC(rxq_multiplex_limit, "rxq multiplex limit num, default 1"); + static bool check_params(void) { if (mode != 0 && mode != 1 && mode != 2) @@ -640,12 +644,12 @@ static int init_numa_rxq_bitmap(int nid, struct oecls_numa_info *numa_info) int bound_rxq_num, cluster_id, cluster_idx, cur_idx; struct oecls_numa_bound_dev_info *bound_dev; struct oecls_netdev_info *oecls_dev; - int rxq_id, devid, cpu, ret = 0; + int i, j, rxq_id, devid, cpu, ret = 0; for_each_oecls_netdev(devid, oecls_dev) { bound_rxq_num = 0; bound_dev = &numa_info->bound_dev[devid]; - bitmap_zero(bound_dev->bitmap_rxq, OECLS_MAX_RXQ_NUM_PER_DEV); + memset(bound_dev->bitmap_rxq, RXQ_MAX_USECNT, sizeof(bound_dev->bitmap_rxq)); bound_dev->cluster_info = kcalloc(oecls_cluster_per_numa, sizeof(*bound_dev->cluster_info), GFP_ATOMIC); if (!bound_dev->cluster_info) { @@ -653,25 +657,31 @@ static int init_numa_rxq_bitmap(int nid, struct oecls_numa_info *numa_info) goto out; } + for (i = 0; i < oecls_cluster_per_numa; i++) { + for (j = 0; j < OECLS_MAX_RXQ_NUM_PER_DEV; j++) { + bound_dev->cluster_info[i].rxqs[j].rxq_id = -1; + bound_dev->cluster_info[i].rxqs[j].status = RXQ_MAX_USECNT; + } + } + for (rxq_id = 0; rxq_id < oecls_dev->rxq_num; rxq_id++) { cpu = oecls_dev->rxq[rxq_id].affinity_cpu; if (cpu_to_node(cpu) == nid) { - set_bit(rxq_id, bound_dev->bitmap_rxq); + bound_dev->bitmap_rxq[rxq_id] = 0; cluster_id = cpu / oecls_cluster_cpu_num; cluster_idx = cluster_id % oecls_cluster_per_numa; bound_dev->cluster_info[cluster_idx].cluster_id = cluster_id; cur_idx = bound_dev->cluster_info[cluster_idx].cur_freeidx++; bound_dev->cluster_info[cluster_idx].rxqs[cur_idx].rxq_id = rxq_id; - bound_dev->cluster_info[cluster_idx].rxqs[cur_idx].status = 1; + bound_dev->cluster_info[cluster_idx].rxqs[cur_idx].status = 0; bound_rxq_num++; oecls_debug("cpu:%d cluster_id:%d cluster_idx:%d rxq_id:%d cur_idx:%d\n", cpu, cluster_id, cluster_idx, rxq_id, cur_idx); } } - oecls_debug("nid:%d, dev_id:%d, dev:%s, rxq_num:%d, bit_num:%d, bitmap_rxq:%*pbl\n", - nid, devid, oecls_dev->dev_name, oecls_dev->rxq_num, - bound_rxq_num, OECLS_MAX_RXQ_NUM_PER_DEV, bound_dev->bitmap_rxq); + oecls_debug("nid:%d, dev_id:%d, dev:%s, rxq_num:%d, bound_rxq_num:%d\n", + nid, devid, oecls_dev->dev_name, oecls_dev->rxq_num, bound_rxq_num); } return ret; @@ -680,26 +690,36 @@ static int init_numa_rxq_bitmap(int nid, struct oecls_numa_info *numa_info) return ret; } -static int get_cluster_rxq(int cpu, struct oecls_numa_bound_dev_info *bound_dev) +static int get_cluster_rxq(struct oecls_numa_bound_dev_info *bound_dev, int cpu) { int cluster_id = cpu / oecls_cluster_cpu_num; + int min_used_count = RXQ_MAX_USECNT; int i, j, rxq_id; for (i = 0; i < oecls_cluster_per_numa; i++) { if (cluster_id != bound_dev->cluster_info[i].cluster_id) continue; for (j = 0; j < OECLS_MAX_RXQ_NUM_PER_DEV; j++) { - if (bound_dev->cluster_info[i].rxqs[j].status == 1) { - bound_dev->cluster_info[i].rxqs[j].status = 2; - rxq_id = bound_dev->cluster_info[i].rxqs[j].rxq_id; - oecls_debug("cluster:%d cpu:%d alloc rxq_id:%d\n", - cluster_id, cpu, rxq_id); - return rxq_id; + if (bound_dev->cluster_info[i].rxqs[j].rxq_id == -1) + continue; + if (bound_dev->cluster_info[i].rxqs[j].status < min_used_count) { + min_used_count = bound_dev->cluster_info[i].rxqs[j].status; + break; } } + if (min_used_count >= RXQ_MAX_USECNT || min_used_count >= rxq_multiplex_limit) { + rxq_id = -1; + oecls_debug("cluster:%d no free rxq for cpu:%d\n", cluster_id, cpu); + } else { + rxq_id = bound_dev->cluster_info[i].rxqs[j].rxq_id; + bound_dev->cluster_info[i].rxqs[j].status++; + oecls_debug("cluster:%d cpu:%d alloc rxq_id:%d use:%d\n", cluster_id, cpu, + rxq_id, bound_dev->cluster_info[i].rxqs[j].status); + } } - oecls_debug("cluster:%d no free rxq for cpu:%d\n", cluster_id, cpu); - return -1; + oecls_debug("allcluster:%d rxq:%d for cpu:%d\n", cluster_id, rxq_id, cpu); + + return rxq_id; } static int put_cluster_rxq(struct oecls_numa_bound_dev_info *bound_dev, int rxq_id) @@ -708,10 +728,11 @@ static int put_cluster_rxq(struct oecls_numa_bound_dev_info *bound_dev, int rxq_ for (i = 0; i < oecls_cluster_per_numa; i++) { for (j = 0; j < OECLS_MAX_RXQ_NUM_PER_DEV; j++) { - if (bound_dev->cluster_info[i].rxqs[j].status == 2 && + if (bound_dev->cluster_info[i].rxqs[j].status > 0 && bound_dev->cluster_info[i].rxqs[j].rxq_id == rxq_id) { - bound_dev->cluster_info[i].rxqs[j].status = 1; - oecls_debug("free rxq_id:%d\n", rxq_id); + bound_dev->cluster_info[i].rxqs[j].status--; + oecls_debug("free rxq_id:%d use:%d\n", rxq_id, + bound_dev->cluster_info[i].rxqs[j].status); return 0; } } @@ -723,9 +744,9 @@ static int put_cluster_rxq(struct oecls_numa_bound_dev_info *bound_dev, int rxq_ int alloc_rxq_id(int cpu, int devid) { struct oecls_numa_bound_dev_info *bound_dev; + int i, rxq_id, min_used_count = RXQ_MAX_USECNT; struct oecls_numa_info *numa_info; int nid = cpu_to_node(cpu); - int rxq_id; numa_info = get_oecls_numa_info(nid); if (!numa_info) { @@ -740,22 +761,29 @@ int alloc_rxq_id(int cpu, int devid) bound_dev = &numa_info->bound_dev[devid]; if (strategy == 1) { - rxq_id = get_cluster_rxq(cpu, bound_dev); + rxq_id = get_cluster_rxq(bound_dev, cpu); if (rxq_id < 0 || rxq_id >= OECLS_MAX_RXQ_NUM_PER_DEV) - pr_info("failed to get rxq_id:%d in cluster, try numa\n", rxq_id); + oecls_debug("failed to get rxq_id:%d in cluster, try numa\n", rxq_id); else goto found; } - rxq_id = find_first_bit(bound_dev->bitmap_rxq, OECLS_MAX_RXQ_NUM_PER_DEV); - if (rxq_id >= OECLS_MAX_RXQ_NUM_PER_DEV) { - oecls_error("error rxq_id:%d\n", rxq_id); + for (i = 0; i < OECLS_MAX_RXQ_NUM_PER_DEV; i++) { + if (bound_dev->bitmap_rxq[i] < min_used_count) { + min_used_count = bound_dev->bitmap_rxq[i]; + rxq_id = i; + } + } + + if (min_used_count >= RXQ_MAX_USECNT || min_used_count >= rxq_multiplex_limit) { + oecls_error("alloc rxq fail! nid:%d, devid:%d\n", nid, devid); return -EINVAL; } found: - clear_bit(rxq_id, bound_dev->bitmap_rxq); - oecls_debug("alloc cpu:%d, nid:%d, devid:%d, rxq_id:%d\n", cpu, nid, devid, rxq_id); + bound_dev->bitmap_rxq[rxq_id]++; + oecls_debug("alloc nid:%d, dev_id:%d, rxq_id:%d use:%d\n", nid, devid, + rxq_id, bound_dev->bitmap_rxq[rxq_id]); return rxq_id; } @@ -785,13 +813,14 @@ void free_rxq_id(int cpu, int devid, int rxq_id) if (strategy == 1) put_cluster_rxq(bound_dev, rxq_id); - if (test_bit(rxq_id, bound_dev->bitmap_rxq)) { + if (bound_dev->bitmap_rxq[rxq_id] <= 0) { oecls_error("error nid:%d, devid:%d, rxq_id:%d\n", nid, devid, rxq_id); return; } - set_bit(rxq_id, bound_dev->bitmap_rxq); - oecls_debug("free nid:%d, dev_id:%d, rxq_id:%d\n", nid, devid, rxq_id); + bound_dev->bitmap_rxq[rxq_id]--; + oecls_debug("free nid:%d, dev_id:%d, rxq_id:%d use:%d\n", nid, devid, + rxq_id, bound_dev->bitmap_rxq[rxq_id]); } static int init_oecls_numa_info(void) -- 2.34.1
From: Yue Haibing <yuehaibing@huawei.com> hulk inclusion category: feature Link: https://gitee.com/openeuler/kernel/issues/ICBFCS CVE: NA -------------------------------- Support TCP_V6_FLOW ntuple rules for mode 0. Signed-off-by: Yue Haibing <yuehaibing@huawei.com> --- net/oenetcls/oenetcls.h | 12 +- net/oenetcls/oenetcls_main.c | 3 +- net/oenetcls/oenetcls_ntuple.c | 248 ++++++++++++++++++++------------- 3 files changed, 164 insertions(+), 99 deletions(-) diff --git a/net/oenetcls/oenetcls.h b/net/oenetcls/oenetcls.h index 4a7e7d37669d..55b0345cdba6 100644 --- a/net/oenetcls/oenetcls.h +++ b/net/oenetcls/oenetcls.h @@ -3,6 +3,7 @@ #define _NET_OENETCLS_H #include <linux/if.h> #include <linux/mutex.h> +#include <linux/skbuff.h> #include <linux/cpufeature.h> #define OECLS_MAX_NETDEV_NUM 8 @@ -55,7 +56,9 @@ struct oecls_numa_info { struct cmd_context { char netdev[IFNAMSIZ]; + bool is_ipv6; u32 dip4; + u32 dip6[4]; u16 dport; u16 action; u32 ruleid; @@ -76,11 +79,13 @@ struct oecls_sk_rule { struct hlist_node node; int devid; void *sk; - int dip4; - int dport; + bool is_ipv6; + u32 dip4; + u32 dip6[4]; + u16 dport; int action; int ruleid; - int cpu; + int nid; }; struct oecls_sk_entry { @@ -130,6 +135,7 @@ struct cfg_param { struct cmd_context ctx; struct sock *sk; bool is_del; + int nid; int cpu; }; diff --git a/net/oenetcls/oenetcls_main.c b/net/oenetcls/oenetcls_main.c index 5205b09bbff2..f0e69ac6b728 100644 --- a/net/oenetcls/oenetcls_main.c +++ b/net/oenetcls/oenetcls_main.c @@ -787,11 +787,10 @@ int alloc_rxq_id(int cpu, int devid) return rxq_id; } -void free_rxq_id(int cpu, int devid, int rxq_id) +void free_rxq_id(int nid, int devid, int rxq_id) { struct oecls_numa_bound_dev_info *bound_dev; struct oecls_numa_info *numa_info; - int nid = cpu_to_node(cpu); numa_info = get_oecls_numa_info(nid); if (!numa_info) { diff --git a/net/oenetcls/oenetcls_ntuple.c b/net/oenetcls/oenetcls_ntuple.c index c0b97ea7649e..76eee047783c 100644 --- a/net/oenetcls/oenetcls_ntuple.c +++ b/net/oenetcls/oenetcls_ntuple.c @@ -7,8 +7,9 @@ #include <linux/irqdesc.h> #include <linux/inet.h> #include <linux/jhash.h> -#include <net/sock.h> #include <linux/oenetcls.h> +#include <net/addrconf.h> +#include <net/sock.h> #include "oenetcls.h" struct oecls_sk_rule_list oecls_sk_rules, oecls_sk_list; @@ -24,9 +25,24 @@ static void init_oecls_sk_rules(void) mutex_init(&oecls_sk_rules.mutex); } -static inline struct hlist_head *get_rule_hashlist(u32 dip4, u16 dport) +static inline u32 get_hash(struct cmd_context ctx) +{ + u32 hash; + + if (ctx.is_ipv6) + hash = jhash_2words(jhash(ctx.dip6, 16, 0), ctx.dport, 0); + else + hash = jhash_2words(ctx.dip4, ctx.dport, 0); + + return hash; +} + +static inline struct hlist_head *get_rule_hashlist(struct cmd_context ctx) { - return oecls_sk_rules.hash + (jhash_2words(dip4, dport, 0) & OECLS_SK_RULE_HASHMASK); + u32 hash; + + hash = get_hash(ctx); + return oecls_sk_rules.hash + (hash & OECLS_SK_RULE_HASHMASK); } static inline struct hlist_head *get_sk_hashlist(void *sk) @@ -34,35 +50,39 @@ static inline struct hlist_head *get_sk_hashlist(void *sk) return oecls_sk_list.hash + (jhash(sk, sizeof(sk), 0) & OECLS_SK_RULE_HASHMASK); } -static void add_sk_rule(int devid, u32 dip4, u16 dport, void *sk, int action, int ruleid, int cpu) +static void add_sk_rule(int devid, struct cmd_context ctx, void *sk, int nid) { - struct hlist_head *hlist = get_rule_hashlist(dip4, dport); + struct hlist_head *hlist = get_rule_hashlist(ctx); struct hlist_head *sk_hlist = get_sk_hashlist(sk); struct oecls_sk_rule *rule; struct oecls_sk_entry *entry; rule = kzalloc(sizeof(*rule), GFP_ATOMIC); + if (!rule) { + oecls_error("alloc rule failed\n"); + return; + } entry = kzalloc(sizeof(*entry), GFP_ATOMIC); - if (!rule || !entry) - goto out; + if (!entry) { + oecls_error("alloc entry failed\n"); + kfree(rule); + return; + } rule->sk = sk; - rule->dip4 = dip4; - rule->dport = dport; + rule->is_ipv6 = ctx.is_ipv6; + rule->dip4 = ctx.dip4; + memcpy(rule->dip6, ctx.dip6, sizeof(rule->dip6)); + rule->dport = ctx.dport; rule->devid = devid; - rule->action = action; - rule->ruleid = ruleid; - rule->cpu = cpu; + rule->action = ctx.action; + rule->ruleid = ctx.ret_loc; + rule->nid = nid; hlist_add_head(&rule->node, hlist); entry->sk = sk; - entry->sk_rule_hash = jhash_2words(dip4, dport, 0); + entry->sk_rule_hash = get_hash(ctx); hlist_add_head(&entry->node, sk_hlist); - return; -out: - oecls_debug("alloc rule failed\n"); - kfree(entry); - kfree(rule); } static struct oecls_sk_entry *get_sk_entry(void *sk) @@ -92,13 +112,17 @@ static void del_sk_rule(struct oecls_sk_rule *rule) kfree(rule); } -static struct oecls_sk_rule *get_sk_rule(int devid, u32 dip4, u16 dport) +static struct oecls_sk_rule *get_sk_rule(int devid, struct cmd_context ctx) { - struct hlist_head *hlist = get_rule_hashlist(dip4, dport); + struct hlist_head *hlist = get_rule_hashlist(ctx); struct oecls_sk_rule *rule = NULL; hlist_for_each_entry(rule, hlist, node) { - if (rule->devid == devid && rule->dip4 == dip4 && rule->dport == dport) + if (rule->devid != devid || rule->dport != ctx.dport) + continue; + if (!rule->is_ipv6 && rule->dip4 == ctx.dip4) + break; + if (rule->is_ipv6 && !memcmp(rule->dip6, ctx.dip6, sizeof(rule->dip6))) break; } return rule; @@ -122,9 +146,9 @@ static struct oecls_sk_rule *get_rule_from_sk(int devid, void *sk) return rule; } -static inline bool reuseport_check(int devid, u32 dip4, u16 dport) +static inline bool reuseport_check(int devid, struct cmd_context ctx) { - return !!get_sk_rule(devid, dip4, dport); + return !!get_sk_rule(devid, ctx); } static u32 get_first_ip4_addr(struct net *net) @@ -146,7 +170,7 @@ static u32 get_first_ip4_addr(struct net *net) in_dev_for_each_ifa_rcu(ifa, in_dev) { if (!strcmp(dev->name, ifa->ifa_label)) { dip4 = ifa->ifa_local; - oecls_debug("dev: %s, dip4:%pI4\n", dev->name, &dip4); + oecls_debug("dev:%s dip4:%pI4\n", dev->name, &dip4); goto out; } } @@ -157,19 +181,63 @@ static u32 get_first_ip4_addr(struct net *net) return dip4; } -static void get_sk_rule_addr(struct sock *sk, u32 *dip4, u16 *dport) +static void get_first_ip6_addr(struct net *net, u32 *dip6) +{ + struct inet6_dev *idev; + struct net_device *dev; + struct inet6_ifaddr *ifp; + + rtnl_lock(); + rcu_read_lock(); + for_each_netdev(net, dev) { + if (dev->flags & IFF_LOOPBACK || !(dev->flags & IFF_UP)) + continue; + idev = __in6_dev_get(dev); + if (!idev) + continue; + list_for_each_entry_rcu(ifp, &idev->addr_list, if_list) { + if (ifp->scope == RT_SCOPE_HOST) + continue; + if (ifp->flags & (IFA_F_TENTATIVE | IFA_F_DEPRECATED)) + continue; + memcpy(dip6, &ifp->addr, sizeof(ifp->addr)); + oecls_debug("dev:%s dip:%pI6\n", dev->name, dip6); + goto out; + } + } +out: + rcu_read_unlock(); + rtnl_unlock(); +} + +static void get_sk_rule_addr(struct sock *sk, struct cfg_param *ctx_p) { + bool is_ipv6 = !!(sk->sk_family == AF_INET6); + u16 *dport = &ctx_p->ctx.dport; + u32 *dip4 = &ctx_p->ctx.dip4; + u32 *dip6 = &ctx_p->ctx.dip6[0]; + *dport = htons(sk->sk_num); + ctx_p->ctx.is_ipv6 = is_ipv6; if (!match_ip_flag) { *dip4 = 0; + memset(dip6, 0, sizeof(sk->sk_v6_rcv_saddr)); return; } - if (sk->sk_rcv_saddr) - *dip4 = sk->sk_rcv_saddr; - else - *dip4 = get_first_ip4_addr(sock_net(sk)); + if (is_ipv6) { + if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) + memcpy(dip6, &sk->sk_v6_rcv_saddr, sizeof(sk->sk_v6_rcv_saddr)); + else + get_first_ip6_addr(sock_net(sk), dip6); + + } else { + if (sk->sk_rcv_saddr) + *dip4 = sk->sk_rcv_saddr; + else + *dip4 = get_first_ip4_addr(sock_net(sk)); + } } static int rxclass_rule_del(struct cmd_context *ctx, __u32 loc) @@ -349,51 +417,21 @@ static int rxclass_rule_ins(struct cmd_context *ctx, return 0; } -static void flow_spec_to_ntuple(struct ethtool_rx_flow_spec *fsp, - struct ethtool_rx_ntuple_flow_spec *ntuple) -{ - int i; - - memset(ntuple, ~0, sizeof(*ntuple)); - ntuple->flow_type = fsp->flow_type; - ntuple->action = fsp->ring_cookie; - memcpy_r(&ntuple->h_u, &fsp->h_u, sizeof(fsp->h_u)); - memcpy_r(&ntuple->m_u, &fsp->m_u, sizeof(fsp->m_u)); - for (i = 0; i < sizeof(ntuple->m_u); i++) - ntuple->m_u.hdata[i] ^= 0xFF; - ntuple->flow_type &= ~FLOW_EXT; -} - -static int do_srxntuple(struct cmd_context *ctx, struct ethtool_rx_flow_spec *fsp) -{ - struct ethtool_rx_ntuple ntuplecmd; - struct ethtool_value eval; - int ret = 0; - - flow_spec_to_ntuple(fsp, &ntuplecmd.fs); - - eval.cmd = ETHTOOL_GFLAGS; - ret = send_ethtool_ioctl(ctx, &eval); - if (ret || !(eval.data & ETH_FLAG_NTUPLE)) - return -1; - - ntuplecmd.cmd = ETHTOOL_SRXNTUPLE; - ret = send_ethtool_ioctl(ctx, &ntuplecmd); - if (ret) - oecls_debug("Cannot add new rule via N-tuple, ret:%d\n", ret); - - return ret; -} - static int cfg_ethtool_rule(struct cmd_context *ctx, bool is_del) { struct ethtool_rx_flow_spec *fsp, rx_rule_fs; u32 rss_context = 0; - int ret; + bool is_ipv6 = ctx->is_ipv6; + int ret, i; - oecls_debug("is_del:%d netdev:%s, dip4:%pI4, dport:%d, action:%d, ruleid:%u, del_ruleid:%u\n", - is_del, ctx->netdev, &ctx->dip4, ntohs(ctx->dport), ctx->action, ctx->ruleid, - ctx->del_ruleid); + if (ctx->is_ipv6) + oecls_debug("del:%d dev:%s dip:%pI6 dport:%d action:%d ruleid:%u del_ruleid:%u\n", + is_del, ctx->netdev, &ctx->dip6, ntohs(ctx->dport), ctx->action, + ctx->ruleid, ctx->del_ruleid); + else + oecls_debug("del:%d dev:%s dip:%pI4 dport:%d action:%d ruleid:%u del_ruleid:%u\n", + is_del, ctx->netdev, &ctx->dip4, ntohs(ctx->dport), ctx->action, + ctx->ruleid, ctx->del_ruleid); if (is_del) return rxclass_rule_del(ctx, ctx->del_ruleid); @@ -402,21 +440,28 @@ static int cfg_ethtool_rule(struct cmd_context *ctx, bool is_del) fsp = &rx_rule_fs; memset(fsp, 0, sizeof(*fsp)); - fsp->flow_type = TCP_V4_FLOW; + if (is_ipv6) { + fsp->flow_type = TCP_V6_FLOW; + memcpy(fsp->h_u.tcp_ip6_spec.ip6dst, ctx->dip6, sizeof(ctx->dip6)); + fsp->h_u.tcp_ip6_spec.pdst = ctx->dport; + fsp->m_u.tcp_ip6_spec.pdst = (u16)~0ULL; + if (ctx->dip6[0] | ctx->dip6[1] | ctx->dip6[2] | ctx->dip6[3]) { + for (i = 0; i < 4; i++) + fsp->m_u.tcp_ip6_spec.ip6dst[i] = (u32)~0ULL; + } + } else { + fsp->flow_type = TCP_V4_FLOW; + fsp->h_u.tcp_ip4_spec.ip4dst = ctx->dip4; + fsp->h_u.tcp_ip4_spec.pdst = ctx->dport; + fsp->m_u.tcp_ip4_spec.pdst = (u16)~0ULL; + if (ctx->dip4) + fsp->m_u.tcp_ip4_spec.ip4dst = (u32)~0ULL; + } fsp->location = RX_CLS_LOC_ANY; - fsp->h_u.tcp_ip4_spec.ip4dst = ctx->dip4; - fsp->h_u.tcp_ip4_spec.pdst = ctx->dport; - if (ctx->dip4) - fsp->m_u.tcp_ip4_spec.ip4dst = (u32)~0ULL; - fsp->m_u.tcp_ip4_spec.pdst = (u16)~0ULL; if (ctx->ruleid) fsp->location = ctx->ruleid; fsp->ring_cookie = ctx->action; - ret = do_srxntuple(ctx, &rx_rule_fs); - if (!ret) - return 0; - ret = rxclass_rule_ins(ctx, &rx_rule_fs, rss_context); if (!ret) ctx->ret_loc = rx_rule_fs.location; @@ -428,16 +473,19 @@ static void cfg_work(struct work_struct *work) struct cfg_param *ctx_p = container_of(work, struct cfg_param, work); struct oecls_netdev_info *oecls_dev; struct oecls_sk_rule *rule; - int devid, rxq_id; - int err; + int devid, rxq_id, err; mutex_lock(&oecls_sk_rules.mutex); for_each_oecls_netdev(devid, oecls_dev) { strncpy(ctx_p->ctx.netdev, oecls_dev->dev_name, IFNAMSIZ); if (!ctx_p->is_del) { - if (reuseport_check(devid, ctx_p->ctx.dip4, ctx_p->ctx.dport)) { - oecls_error("dip4:%pI4, dport:%d reuse!\n", &ctx_p->ctx.dip4, - ctx_p->ctx.dport); + if (reuseport_check(devid, ctx_p->ctx)) { + if (ctx_p->ctx.is_ipv6) + oecls_debug("dip:%pI6, dport:%d reuse!\n", + &ctx_p->ctx.dip6, ntohs(ctx_p->ctx.dport)); + else + oecls_debug("dip:%pI4, dport:%d reuse!\n", + &ctx_p->ctx.dip4, ntohs(ctx_p->ctx.dport)); continue; } @@ -449,13 +497,13 @@ static void cfg_work(struct work_struct *work) // Config Ntuple rule to dev ctx_p->ctx.action = (u16)rxq_id; err = cfg_ethtool_rule(&ctx_p->ctx, ctx_p->is_del); - // Add sk rule only on success if (err) { - free_rxq_id(ctx_p->cpu, devid, rxq_id); + oecls_debug("Add sk:%p, dev_id:%d, rxq:%d, err:%d\n", + ctx_p->sk, devid, rxq_id, err); + free_rxq_id(ctx_p->nid, devid, rxq_id); continue; } - add_sk_rule(devid, ctx_p->ctx.dip4, ctx_p->ctx.dport, ctx_p->sk, - ctx_p->ctx.action, ctx_p->ctx.ret_loc, ctx_p->cpu); + add_sk_rule(devid, ctx_p->ctx, ctx_p->sk, ctx_p->nid); } else { rule = get_rule_from_sk(devid, ctx_p->sk); if (!rule) { @@ -469,7 +517,7 @@ static void cfg_work(struct work_struct *work) ctx_p->ctx.del_ruleid = rule->ruleid; err = cfg_ethtool_rule(&ctx_p->ctx, ctx_p->is_del); // Free the bound queue - free_rxq_id(rule->cpu, devid, rule->action); + free_rxq_id(rule->nid, devid, rule->action); // Delete sk rule del_sk_rule(rule); } @@ -503,7 +551,7 @@ static void del_ntuple_rule(struct sock *sk) ctx_p = kzalloc(sizeof(*ctx_p), GFP_ATOMIC); if (!ctx_p) return; - get_sk_rule_addr(sk, &ctx_p->ctx.dip4, &ctx_p->ctx.dport); + get_sk_rule_addr(sk, ctx_p); ctx_p->is_del = true; ctx_p->sk = sk; @@ -515,6 +563,7 @@ static void del_ntuple_rule(struct sock *sk) static void add_ntuple_rule(struct sock *sk) { struct cfg_param *ctx_p; + int cpu; if (check_appname(current->comm)) return; @@ -522,11 +571,13 @@ static void add_ntuple_rule(struct sock *sk) ctx_p = kzalloc(sizeof(*ctx_p), GFP_ATOMIC); if (!ctx_p) return; - get_sk_rule_addr(sk, &ctx_p->ctx.dip4, &ctx_p->ctx.dport); + get_sk_rule_addr(sk, ctx_p); + cpu = raw_smp_processor_id(); ctx_p->is_del = false; ctx_p->sk = sk; - ctx_p->cpu = raw_smp_processor_id(); + ctx_p->cpu = cpu; + ctx_p->nid = cpu_to_node(cpu); INIT_WORK(&ctx_p->work, cfg_work); queue_work(do_cfg_workqueue, &ctx_p->work); atomic_inc(&oecls_worker_count); @@ -534,14 +585,23 @@ static void add_ntuple_rule(struct sock *sk) static void ethtool_cfg_rxcls(struct sock *sk, int is_del) { + bool is_ipv6; + if (sk->sk_state != TCP_LISTEN) return; if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6) return; - oecls_debug("[cpu:%d] app:%s, sk:%p, is_del:%d, ip:%pI4, port:%d\n", raw_smp_processor_id(), - current->comm, sk, is_del, &sk->sk_rcv_saddr, (u16)sk->sk_num); + is_ipv6 = !!(sk->sk_family == AF_INET6); + if (is_ipv6) + oecls_debug("[cpu:%d] app:%s, sk:%p, is_del:%d, IPv6:%pI6, port:%d\n", + raw_smp_processor_id(), current->comm, sk, is_del, + &sk->sk_v6_rcv_saddr, (u16)sk->sk_num); + else + oecls_debug("[cpu:%d] app:%s, sk:%p, is_del:%d, IPv4:%pI4, port:%d\n", + raw_smp_processor_id(), current->comm, sk, is_del, + &sk->sk_rcv_saddr, (u16)sk->sk_num); if (is_del) del_ntuple_rule(sk); -- 2.34.1
From: Yue Haibing <yuehaibing@huawei.com> hulk inclusion category: feature Link: https://gitee.com/openeuler/kernel/issues/ICBFCS CVE: NA -------------------------------- Make OENETCLS depend on MODULES and use it default as module. Signed-off-by: Yue Haibing <yuehaibing@huawei.com> --- net/oenetcls/Kconfig | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/oenetcls/Kconfig b/net/oenetcls/Kconfig index 68d5c6904319..30bf73a8014b 100644 --- a/net/oenetcls/Kconfig +++ b/net/oenetcls/Kconfig @@ -1,9 +1,11 @@ # SPDX-License-Identifier: GPL-2.0-only config OENETCLS tristate "Network classification" + depends on MODULES && m default n help - Allows to configure ntuple rule, and bind interrupt to netdev - automatically. - Use OENETCLS && OENETCLS_HOOKS to enable oenetcls feature. - Use parameter mode to decide running mode. + This introduces a kind of network optimization method, which can + configure the flow steer rules, and bind interrupt to the netdev + queue automatically. + + This module can only be built as a loadable module. -- 2.34.1
From: Yue Haibing <yuehaibing@huawei.com> hulk inclusion category: feature Link: https://gitee.com/openeuler/kernel/issues/ICBFCS CVE: NA -------------------------------- Support rps affinity policy setting (0 as no rps, 1 as numa, 2 as cluster. Also use rps cpus map instead of traversation to improve performance. Signed-off-by: Yue Haibing <yuehaibing@huawei.com> --- include/linux/oenetcls.h | 8 +- net/oenetcls/oenetcls.h | 3 +- net/oenetcls/oenetcls_flow.c | 171 ++++++++++++++++++++------------- net/oenetcls/oenetcls_main.c | 20 ++-- net/oenetcls/oenetcls_ntuple.c | 4 +- 5 files changed, 124 insertions(+), 82 deletions(-) diff --git a/include/linux/oenetcls.h b/include/linux/oenetcls.h index b618aa6b807f..a1929fb0a193 100644 --- a/include/linux/oenetcls.h +++ b/include/linux/oenetcls.h @@ -51,9 +51,9 @@ oenetcls_skb_set_cpu(struct sk_buff *skb, enqueue_f enq_func, int *ret) ops = rcu_dereference(oecls_ops); if (ops) { /* mode 1 always use oecls_set_cpu hook for physical NIC or lo. - * mode 0 set this hook to NULL, to avoid unneeded ops in - * oenetcls_skblist_set_cpu() for physical NIC flows, and use - * oecls_set_localcpu hook for loopback flows. + * mode 0 set this hook to NULL if rps_policy is 0 , to avoid + * unneeded ops in oenetcls_skblist_set_cpu() for physical NIC + * flows, and use oecls_set_localcpu hook for loopback flows. */ if (ops->oecls_set_cpu) ops->oecls_set_cpu(skb, &cpu, &last_qtail); @@ -74,8 +74,6 @@ oenetcls_skb_set_localcpu(struct sk_buff *skb, enqueue_f enq_func, int *ret) struct net_device *dev = skb->dev; bool result = false; - if (!static_branch_unlikely(&oecls_localrps_needed)) - return result; if (!dev || !(dev->type == ARPHRD_LOOPBACK && dev->flags & IFF_LOOPBACK)) return result; diff --git a/net/oenetcls/oenetcls.h b/net/oenetcls/oenetcls.h index 55b0345cdba6..60f24165b3c4 100644 --- a/net/oenetcls/oenetcls.h +++ b/net/oenetcls/oenetcls.h @@ -147,7 +147,8 @@ extern int oecls_netdev_num; extern int oecls_numa_num; extern unsigned int dft_num; extern unsigned int sft_num; -extern int lo_numa_rps; +extern int rps_policy; +extern int lo_rps_policy; #define oecls_debug(fmt, ...) \ do { \ diff --git a/net/oenetcls/oenetcls_flow.c b/net/oenetcls/oenetcls_flow.c index fd5ed67312f1..934a50e8bbf8 100644 --- a/net/oenetcls/oenetcls_flow.c +++ b/net/oenetcls/oenetcls_flow.c @@ -15,9 +15,9 @@ #include <net/sock.h> #include "oenetcls.h" -static u16 *rps_cpus; -static int rps_cpus_nums; static u32 oecls_cpu_mask; +static u16 *rps_cpus, *cluster_rps_cpus; +static int rps_cpus_nums, cluster_rps_cpus_nums; static struct oecls_sock_flow_table __rcu *oecls_sock_flow_table; static DEFINE_MUTEX(oecls_sock_flow_mutex); static DEFINE_SPINLOCK(oecls_dev_flow_lock); @@ -106,7 +106,7 @@ void _oecls_flow_update(struct sock *sk, struct sk_buff *skb) cpu = raw_smp_processor_id(); rcu_read_lock(); tb = rcu_dereference(oecls_sock_flow_table); - if (lo_numa_rps && skb && sk_is_loopback(sk)) + if (lo_rps_policy && skb && sk_is_loopback(sk)) hash = READ_ONCE(skb->sym_hash); else hash = READ_ONCE(sk->sk_rxhash); @@ -125,13 +125,13 @@ void _oecls_flow_update(struct sock *sk, struct sk_buff *skb) rcu_read_unlock(); } -static int flow_get_queue_idx(struct net_device *dev, int nid, struct sk_buff *skb) +static int flow_get_queue_idx(struct net_device *dev, int nid, u32 hash) { struct oecls_numa_bound_dev_info *bound_dev = NULL; struct oecls_netdev_info *netdev_info; struct oecls_numa_info *numa_info; int rxq_id, rxq_num, i, devid; - u32 hash, index; + u32 index; numa_info = get_oecls_numa_info(nid); if (!numa_info) @@ -156,7 +156,6 @@ static int flow_get_queue_idx(struct net_device *dev, int nid, struct sk_buff *s if (rxq_num == 0) return -1; - hash = skb_get_hash(skb); index = hash % rxq_num; i = 0; @@ -167,24 +166,24 @@ static int flow_get_queue_idx(struct net_device *dev, int nid, struct sk_buff *s return rxq_id; } - oecls_debug("skb:%p, no found rxq\n", skb); + oecls_debug("no found rxq\n"); return -1; } static void set_oecls_cpu(struct net_device *dev, struct sk_buff *skb, - struct oecls_dev_flow *old_rflow, int old_rxq_id, u16 next_cpu) + struct oecls_dev_flow *old_rflow, int old_rxq_id, u16 next_cpu, u32 hash) { struct netdev_rx_queue *rxqueue; struct oecls_dev_flow_table *dtb; struct oecls_dev_flow *rflow; - u32 flow_id, hash; int rxq_index, rc; + u32 flow_id; if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap || !(dev->features & NETIF_F_NTUPLE)) return; - rxq_index = flow_get_queue_idx(dev, cpu_to_node(next_cpu), skb); + rxq_index = flow_get_queue_idx(dev, cpu_to_node(next_cpu), hash); if (rxq_index == skb_get_rx_queue(skb) || rxq_index < 0) return; @@ -193,7 +192,6 @@ static void set_oecls_cpu(struct net_device *dev, struct sk_buff *skb, if (!dtb) return; - hash = skb_get_hash(skb); flow_id = hash & dtb->mask; rflow = &dtb->flows[flow_id]; //Return if someone has configured this. @@ -227,46 +225,38 @@ static bool oecls_do_hash(void) return get_random_u32() % 100 < rcpu_probability; } -static inline int get_cpu_in_mask(int tcpu, u32 hash) +static inline u32 get_rps_cpu(u32 last_recv_cpu, u32 hash, int policy) { - const struct cpumask *mask; - int nr_cpus, cpu, index; - - mask = cpumask_of_node(cpu_to_node(tcpu)); - - nr_cpus = cpumask_weight(mask); - if (nr_cpus == 0) - return -1; - - index = reciprocal_scale(hash, nr_cpus); - if (index < 0) - return -1; - - cpu = cpumask_first(mask); - while (--nr_cpus > 0) { - if (index == 0) - break; - cpu = cpumask_next(cpu, mask); - index--; + u32 newcpu, index; + + if (policy == 1) { + newcpu = cpumask_first(cpumask_of_node(cpu_to_node(last_recv_cpu))); + index = rps_cpus[reciprocal_scale(hash, rps_cpus_nums - 1)]; + newcpu += index; + } else if (policy == 2) { + newcpu = cpumask_first(topology_cluster_cpumask(last_recv_cpu)); + index = cluster_rps_cpus[reciprocal_scale(hash, cluster_rps_cpus_nums - 1)]; + newcpu += index; + } else { + newcpu = last_recv_cpu; } - return cpu; + return newcpu; } static void __oecls_set_cpu(struct sk_buff *skb, struct net_device *ndev, struct oecls_sock_flow_table *tb, struct oecls_dev_flow_table *dtb, int old_rxq_id, int *rcpu, int *last_qtail) { - u32 last_recv_cpu, hash, val, cpu, tcpu; + u32 last_recv_cpu, hash, val, cpu, tcpu, newcpu; struct oecls_dev_flow *rflow; - int newcpu; - cpu = raw_smp_processor_id(); skb_reset_network_header(skb); hash = skb_get_hash(skb); if (!hash) return; + cpu = raw_smp_processor_id(); val = READ_ONCE(tb->ents[hash & tb->mask]); last_recv_cpu = val & oecls_cpu_mask; rflow = &dtb->flows[hash & dtb->mask]; @@ -282,7 +272,7 @@ static void __oecls_set_cpu(struct sk_buff *skb, struct net_device *ndev, } if (last_recv_cpu != cpu) return; - newcpu = get_cpu_in_mask(last_recv_cpu, hash); + newcpu = get_rps_cpu(last_recv_cpu, hash, rps_policy); if (newcpu < 0) newcpu = cpu; if (newcpu == cpu) { @@ -294,7 +284,7 @@ static void __oecls_set_cpu(struct sk_buff *skb, struct net_device *ndev, return; } - newcpu = get_cpu_in_mask(last_recv_cpu, hash); + newcpu = get_rps_cpu(last_recv_cpu, hash, rps_policy); if (newcpu >= 0) *rcpu = newcpu; else @@ -309,14 +299,13 @@ static void __oecls_set_cpu(struct sk_buff *skb, struct net_device *ndev, return; if (tcpu >= nr_cpu_ids) - set_oecls_cpu(ndev, skb, rflow, old_rxq_id, newcpu); + set_oecls_cpu(ndev, skb, rflow, old_rxq_id, newcpu, hash); } -static inline void loopback_numa_rps(struct sk_buff *skb, int *rcpu) +static inline void do_loopback_rps(struct sk_buff *skb, int *rcpu) { + u32 last_recv_cpu, hash, val, newcpu; struct oecls_sock_flow_table *stb; - u32 last_recv_cpu, hash, val; - int newcpu, index; skb_reset_network_header(skb); hash = __skb_get_hash_symmetric(skb); @@ -338,9 +327,36 @@ static inline void loopback_numa_rps(struct sk_buff *skb, int *rcpu) if ((val ^ hash) & ~oecls_cpu_mask) return; - newcpu = cpumask_first(cpumask_of_node(cpu_to_node(last_recv_cpu))); - index = rps_cpus[reciprocal_scale(hash, rps_cpus_nums - 1)]; - newcpu += index; + newcpu = get_rps_cpu(last_recv_cpu, hash, lo_rps_policy); + *rcpu = newcpu; + oecls_debug("last:%u curcpu:%d newcpu:%d\n", last_recv_cpu, raw_smp_processor_id(), newcpu); +} + +static inline void do_flow_rps(struct sk_buff *skb, int *rcpu) +{ + u32 last_recv_cpu, hash, val, newcpu; + struct oecls_sock_flow_table *stb; + + skb_reset_network_header(skb); + hash = skb_get_hash(skb); + if (!hash) + return; + + rcu_read_lock(); + stb = rcu_dereference(oecls_sock_flow_table); + if (stb) { + val = READ_ONCE(stb->ents[hash & stb->mask]); + last_recv_cpu = val & oecls_cpu_mask; + } else { + rcu_read_unlock(); + return; + } + rcu_read_unlock(); + + if ((val ^ hash) & ~oecls_cpu_mask) + return; + + newcpu = get_rps_cpu(last_recv_cpu, hash, rps_policy); *rcpu = newcpu; oecls_debug("last:%u curcpu:%d newcpu:%d\n", last_recv_cpu, raw_smp_processor_id(), newcpu); } @@ -358,14 +374,19 @@ void _oecls_set_cpu(struct sk_buff *skb, int *cpu, int *last_qtail) if (!ndev) return; - if (lo_numa_rps && (ndev->type == ARPHRD_LOOPBACK && ndev->flags & IFF_LOOPBACK)) { - loopback_numa_rps(skb, cpu); + if (lo_rps_policy && (ndev->type == ARPHRD_LOOPBACK && ndev->flags & IFF_LOOPBACK)) { + do_loopback_rps(skb, cpu); return; } if (!is_oecls_config_netdev(ndev->name)) return; + if (rps_policy && mode == 0) { + do_flow_rps(skb, cpu); + return; + } + rxqueue = ndev->_rx; if (skb_rx_queue_recorded(skb)) { rxq_id = skb_get_rx_queue(skb); @@ -377,7 +398,6 @@ void _oecls_set_cpu(struct sk_buff *skb, int *cpu, int *last_qtail) rxqueue += rxq_id; } - // oecls_debug("skb:%px, dev:%s, rxq_id:%d\n", skb, ndev->name, rxq_id); if (rxq_id < 0) return; @@ -506,30 +526,44 @@ static int oecls_sock_flow_table_release(void) mutex_unlock(&oecls_sock_flow_mutex); synchronize_rcu(); vfree(tb); - kfree(rps_cpus); return 0; } -static int oecls_sock_flow_table_init(void) +static int oecls_rps_cpus_init(void) { - struct oecls_sock_flow_table *table; - int size = sft_num, i; + int i; + cluster_rps_cpus_nums = cpumask_weight(topology_cluster_cpumask(0)); rps_cpus_nums = cpumask_weight(cpumask_of_node(0)); rps_cpus = kmalloc_array(rps_cpus_nums, sizeof(u16), GFP_KERNEL); if (!rps_cpus) return -ENOMEM; for (i = 0; i < rps_cpus_nums; i++) rps_cpus[i] = i; - oecls_debug("rps_cpus_nums:%d\n", rps_cpus_nums); - size = roundup_pow_of_two(size); - table = vmalloc(OECLS_SOCK_FLOW_TABLE_SIZE(size)); - if (!table) { + cluster_rps_cpus = kmalloc_array(cluster_rps_cpus_nums, sizeof(u16), GFP_KERNEL); + if (!cluster_rps_cpus) { kfree(rps_cpus); return -ENOMEM; } + for (i = 0; i < cluster_rps_cpus_nums; i++) + cluster_rps_cpus[i] = i; + + oecls_debug("rps_cpus_nums:%d cluster_rps_cpus_nums:%d\n", + rps_cpus_nums, cluster_rps_cpus_nums); + return 0; +} + +static int oecls_sock_flow_table_init(void) +{ + struct oecls_sock_flow_table *table; + int size = sft_num, i; + + size = roundup_pow_of_two(size); + table = vmalloc(OECLS_SOCK_FLOW_TABLE_SIZE(size)); + if (!table) + return -ENOMEM; oecls_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1; oecls_debug("nr_cpu_ids:%d, oecls_cpu_mask:0x%x\n", nr_cpu_ids, oecls_cpu_mask); @@ -557,37 +591,40 @@ int oecls_flow_res_init(void) { int err; - err = oecls_sock_flow_table_init(); + err = oecls_rps_cpus_init(); if (err) return err; + err = oecls_sock_flow_table_init(); + if (err) + goto free; + err = oecls_dev_flow_table_init(); - if (err) { - oecls_sock_flow_table_release(); - return err; - } + if (err) + goto clean; if (mode != 0) //for lo rps RCU_INIT_POINTER(oecls_ops, &oecls_flow_ops); synchronize_rcu(); - -#ifdef CONFIG_RPS static_branch_inc(&oecls_rps_needed); - oecls_debug("oecls_rps_needed true\n"); -#endif return 0; +clean: + oecls_sock_flow_table_release(); +free: + kfree(cluster_rps_cpus); + kfree(rps_cpus); + return err; } void oecls_flow_res_clean(void) { -#ifdef CONFIG_RPS static_branch_dec(&oecls_rps_needed); - oecls_debug("oecls_rps_needed false\n"); -#endif rcu_assign_pointer(oecls_ops, NULL); synchronize_rcu(); oecls_sock_flow_table_release(); oecls_dev_flow_table_release(); + kfree(cluster_rps_cpus); + kfree(rps_cpus); } diff --git a/net/oenetcls/oenetcls_main.c b/net/oenetcls/oenetcls_main.c index f0e69ac6b728..01a460beb053 100644 --- a/net/oenetcls/oenetcls_main.c +++ b/net/oenetcls/oenetcls_main.c @@ -60,9 +60,13 @@ unsigned int sft_num = 0x100000; module_param(sft_num, uint, 0444); MODULE_PARM_DESC(sft_num, "sock flow table entries, default 0x100000"); -int lo_numa_rps; -module_param(lo_numa_rps, int, 0644); -MODULE_PARM_DESC(lo_numa_rps, "enable loopback flow numa affinity"); +int rps_policy = 1; +module_param(rps_policy, int, 0644); +MODULE_PARM_DESC(rps_policy, "phy nic rps policy, default 1"); + +int lo_rps_policy; +module_param(lo_rps_policy, int, 0644); +MODULE_PARM_DESC(lo_rps_policy, "loopback rps policy, default 0"); static int rxq_multiplex_limit = 1; module_param(rxq_multiplex_limit, int, 0444); @@ -526,7 +530,7 @@ static int init_single_oecls_dev(char *if_name, unsigned int length) ret = oecls_filter_enable(dev_name, &old_state); if (ret) { oecls_error("dev [%s] not support ntuple! ret=%d\n", dev_name, ret); - if (lo_numa_rps) + if (lo_rps_policy) goto out; } } @@ -1119,7 +1123,7 @@ static __init int oecls_init(void) err = oecls_ntuple_res_init(); if (err) goto clean_rxq; - if (lo_numa_rps) + if (lo_rps_policy || rps_policy) err = oecls_flow_res_init(); } else { err = oecls_flow_res_init(); @@ -1128,7 +1132,7 @@ static __init int oecls_init(void) if (err) goto clean_rxq; - if (lo_numa_rps) + if (lo_rps_policy) static_branch_inc(&oecls_localrps_needed); return 0; @@ -1142,12 +1146,12 @@ static __init int oecls_init(void) static __exit void oecls_exit(void) { - if (lo_numa_rps) + if (lo_rps_policy) static_branch_dec(&oecls_localrps_needed); if (mode == 0) { oecls_ntuple_res_clean(); - if (lo_numa_rps) + if (lo_rps_policy || rps_policy) oecls_flow_res_clean(); } else { oecls_flow_res_clean(); diff --git a/net/oenetcls/oenetcls_ntuple.c b/net/oenetcls/oenetcls_ntuple.c index 76eee047783c..9d59fe509e6c 100644 --- a/net/oenetcls/oenetcls_ntuple.c +++ b/net/oenetcls/oenetcls_ntuple.c @@ -641,7 +641,7 @@ static void clean_oecls_sk_rules(void) mutex_unlock(&oecls_sk_rules.mutex); } -static const struct oecls_hook_ops oecls_ntuple_ops = { +static struct oecls_hook_ops oecls_ntuple_ops = { .oecls_flow_update = _oecls_flow_update, .oecls_set_localcpu = _oecls_set_cpu, .oecls_set_cpu = NULL, @@ -658,6 +658,8 @@ int oecls_ntuple_res_init(void) } init_oecls_sk_rules(); + if (rps_policy) + oecls_ntuple_ops.oecls_set_cpu = _oecls_set_cpu; RCU_INIT_POINTER(oecls_ops, &oecls_ntuple_ops); synchronize_rcu(); return 0; -- 2.34.1
participants (2)
-
Liu Jian -
patchwork bot