[PATCH OLK-6.6 9/9] net/oenetcls: Add rps policy switch for phy NIC

22 Jan 2026

From: Yue Haibing <yuehaibing@huawei.com>

hulk inclusion
category: feature
Link: https://gitee.com/openeuler/kernel/issues/ICBFCS
CVE: NA

--------------------------------

Support rps affinity policy setting (0 as no rps, 1 as numa, 2 as
cluster. Also use rps cpus map instead of traversation to improve
performance.

Signed-off-by: Yue Haibing <yuehaibing@huawei.com>
---
 include/linux/oenetcls.h       |   8 +-
 net/oenetcls/oenetcls.h        |   3 +-
 net/oenetcls/oenetcls_flow.c   | 171 ++++++++++++++++++++-------------
 net/oenetcls/oenetcls_main.c   |  20 ++--
 net/oenetcls/oenetcls_ntuple.c |   4 +-
 5 files changed, 124 insertions(+), 82 deletions(-)

diff --git a/include/linux/oenetcls.h b/include/linux/oenetcls.h
index b618aa6b807f..a1929fb0a193 100644
--- a/include/linux/oenetcls.h
+++ b/include/linux/oenetcls.h
@@ -51,9 +51,9 @@ oenetcls_skb_set_cpu(struct sk_buff *skb, enqueue_f enq_func, int *ret)
 	ops = rcu_dereference(oecls_ops);
 	if (ops) {
 		/* mode 1 always use oecls_set_cpu hook for physical NIC or lo.
-		 * mode 0 set this hook to NULL, to avoid unneeded ops in
-		 * oenetcls_skblist_set_cpu() for physical NIC flows, and use
-		 * oecls_set_localcpu hook for loopback flows.
+		 * mode 0 set this hook to NULL if rps_policy is 0 , to avoid
+		 * unneeded ops in oenetcls_skblist_set_cpu() for physical NIC
+		 * flows, and use oecls_set_localcpu hook for loopback flows.
 		 */
 		if (ops->oecls_set_cpu)
 			ops->oecls_set_cpu(skb, &cpu, &last_qtail);
@@ -74,8 +74,6 @@ oenetcls_skb_set_localcpu(struct sk_buff *skb, enqueue_f enq_func, int *ret)
 	struct net_device *dev = skb->dev;
 	bool result = false;
 
-	if (!static_branch_unlikely(&oecls_localrps_needed))
-		return result;
 	if (!dev || !(dev->type == ARPHRD_LOOPBACK && dev->flags & IFF_LOOPBACK))
 		return result;
 
diff --git a/net/oenetcls/oenetcls.h b/net/oenetcls/oenetcls.h
index 55b0345cdba6..60f24165b3c4 100644
--- a/net/oenetcls/oenetcls.h
+++ b/net/oenetcls/oenetcls.h
@@ -147,7 +147,8 @@ extern int oecls_netdev_num;
 extern int oecls_numa_num;
 extern unsigned int dft_num;
 extern unsigned int sft_num;
-extern int lo_numa_rps;
+extern int rps_policy;
+extern int lo_rps_policy;
 
 #define oecls_debug(fmt, ...)					\
 	do {							\
diff --git a/net/oenetcls/oenetcls_flow.c b/net/oenetcls/oenetcls_flow.c
index fd5ed67312f1..934a50e8bbf8 100644
--- a/net/oenetcls/oenetcls_flow.c
+++ b/net/oenetcls/oenetcls_flow.c
@@ -15,9 +15,9 @@
 #include <net/sock.h>
 #include "oenetcls.h"
 
-static u16 *rps_cpus;
-static int rps_cpus_nums;
 static u32 oecls_cpu_mask;
+static u16 *rps_cpus, *cluster_rps_cpus;
+static int rps_cpus_nums, cluster_rps_cpus_nums;
 static struct oecls_sock_flow_table __rcu *oecls_sock_flow_table;
 static DEFINE_MUTEX(oecls_sock_flow_mutex);
 static DEFINE_SPINLOCK(oecls_dev_flow_lock);
@@ -106,7 +106,7 @@ void _oecls_flow_update(struct sock *sk, struct sk_buff *skb)
 	cpu = raw_smp_processor_id();
 	rcu_read_lock();
 	tb = rcu_dereference(oecls_sock_flow_table);
-	if (lo_numa_rps && skb && sk_is_loopback(sk))
+	if (lo_rps_policy && skb && sk_is_loopback(sk))
 		hash = READ_ONCE(skb->sym_hash);
 	else
 		hash = READ_ONCE(sk->sk_rxhash);
@@ -125,13 +125,13 @@ void _oecls_flow_update(struct sock *sk, struct sk_buff *skb)
 	rcu_read_unlock();
 }
 
-static int flow_get_queue_idx(struct net_device *dev, int nid, struct sk_buff *skb)
+static int flow_get_queue_idx(struct net_device *dev, int nid, u32 hash)
 {
 	struct oecls_numa_bound_dev_info *bound_dev = NULL;
 	struct oecls_netdev_info *netdev_info;
 	struct oecls_numa_info *numa_info;
 	int rxq_id, rxq_num, i, devid;
-	u32 hash, index;
+	u32 index;
 
 	numa_info = get_oecls_numa_info(nid);
 	if (!numa_info)
@@ -156,7 +156,6 @@ static int flow_get_queue_idx(struct net_device *dev, int nid, struct sk_buff *s
 	if (rxq_num == 0)
 		return -1;
 
-	hash = skb_get_hash(skb);
 	index = hash % rxq_num;
 
 	i = 0;
@@ -167,24 +166,24 @@ static int flow_get_queue_idx(struct net_device *dev, int nid, struct sk_buff *s
 			return rxq_id;
 	}
 
-	oecls_debug("skb:%p, no found rxq\n", skb);
+	oecls_debug("no found rxq\n");
 	return -1;
 }
 
 static void set_oecls_cpu(struct net_device *dev, struct sk_buff *skb,
-			  struct oecls_dev_flow *old_rflow, int old_rxq_id, u16 next_cpu)
+			  struct oecls_dev_flow *old_rflow, int old_rxq_id, u16 next_cpu, u32 hash)
 {
 	struct netdev_rx_queue *rxqueue;
 	struct oecls_dev_flow_table *dtb;
 	struct oecls_dev_flow *rflow;
-	u32 flow_id, hash;
 	int rxq_index, rc;
+	u32 flow_id;
 
 	if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||
 	    !(dev->features & NETIF_F_NTUPLE))
 		return;
 
-	rxq_index = flow_get_queue_idx(dev, cpu_to_node(next_cpu), skb);
+	rxq_index = flow_get_queue_idx(dev, cpu_to_node(next_cpu), hash);
 	if (rxq_index == skb_get_rx_queue(skb) || rxq_index < 0)
 		return;
 
@@ -193,7 +192,6 @@ static void set_oecls_cpu(struct net_device *dev, struct sk_buff *skb,
 	if (!dtb)
 		return;
 
-	hash = skb_get_hash(skb);
 	flow_id = hash & dtb->mask;
 	rflow = &dtb->flows[flow_id];
 	//Return if someone has configured this.
@@ -227,46 +225,38 @@ static bool oecls_do_hash(void)
 	return get_random_u32() % 100 < rcpu_probability;
 }
 
-static inline int get_cpu_in_mask(int tcpu, u32 hash)
+static inline u32 get_rps_cpu(u32 last_recv_cpu, u32 hash, int policy)
 {
-	const struct cpumask *mask;
-	int nr_cpus, cpu, index;
-
-	mask = cpumask_of_node(cpu_to_node(tcpu));
-
-	nr_cpus = cpumask_weight(mask);
-	if (nr_cpus == 0)
-		return -1;
-
-	index = reciprocal_scale(hash, nr_cpus);
-	if (index < 0)
-		return -1;
-
-	cpu = cpumask_first(mask);
-	while (--nr_cpus > 0) {
-		if (index == 0)
-			break;
-		cpu = cpumask_next(cpu, mask);
-		index--;
+	u32 newcpu, index;
+
+	if (policy == 1) {
+		newcpu = cpumask_first(cpumask_of_node(cpu_to_node(last_recv_cpu)));
+		index = rps_cpus[reciprocal_scale(hash, rps_cpus_nums - 1)];
+		newcpu += index;
+	} else if (policy == 2) {
+		newcpu = cpumask_first(topology_cluster_cpumask(last_recv_cpu));
+		index = cluster_rps_cpus[reciprocal_scale(hash, cluster_rps_cpus_nums - 1)];
+		newcpu += index;
+	} else {
+		newcpu = last_recv_cpu;
 	}
 
-	return cpu;
+	return newcpu;
 }
 
 static void __oecls_set_cpu(struct sk_buff *skb, struct net_device *ndev,
 			    struct oecls_sock_flow_table *tb, struct oecls_dev_flow_table *dtb,
 			    int old_rxq_id, int *rcpu, int *last_qtail)
 {
-	u32 last_recv_cpu, hash, val, cpu, tcpu;
+	u32 last_recv_cpu, hash, val, cpu, tcpu, newcpu;
 	struct oecls_dev_flow *rflow;
-	int newcpu;
 
-	cpu = raw_smp_processor_id();
 	skb_reset_network_header(skb);
 	hash = skb_get_hash(skb);
 	if (!hash)
 		return;
 
+	cpu = raw_smp_processor_id();
 	val = READ_ONCE(tb->ents[hash & tb->mask]);
 	last_recv_cpu = val & oecls_cpu_mask;
 	rflow = &dtb->flows[hash & dtb->mask];
@@ -282,7 +272,7 @@ static void __oecls_set_cpu(struct sk_buff *skb, struct net_device *ndev,
 		}
 		if (last_recv_cpu != cpu)
 			return;
-		newcpu = get_cpu_in_mask(last_recv_cpu, hash);
+		newcpu = get_rps_cpu(last_recv_cpu, hash, rps_policy);
 		if (newcpu < 0)
 			newcpu = cpu;
 		if (newcpu == cpu) {
@@ -294,7 +284,7 @@ static void __oecls_set_cpu(struct sk_buff *skb, struct net_device *ndev,
 		return;
 	}
 
-	newcpu = get_cpu_in_mask(last_recv_cpu, hash);
+	newcpu = get_rps_cpu(last_recv_cpu, hash, rps_policy);
 	if (newcpu >= 0)
 		*rcpu = newcpu;
 	else
@@ -309,14 +299,13 @@ static void __oecls_set_cpu(struct sk_buff *skb, struct net_device *ndev,
 		return;
 
 	if (tcpu >= nr_cpu_ids)
-		set_oecls_cpu(ndev, skb, rflow, old_rxq_id, newcpu);
+		set_oecls_cpu(ndev, skb, rflow, old_rxq_id, newcpu, hash);
 }
 
-static inline void loopback_numa_rps(struct sk_buff *skb, int *rcpu)
+static inline void do_loopback_rps(struct sk_buff *skb, int *rcpu)
 {
+	u32 last_recv_cpu, hash, val, newcpu;
 	struct oecls_sock_flow_table *stb;
-	u32 last_recv_cpu, hash, val;
-	int newcpu, index;
 
 	skb_reset_network_header(skb);
 	hash = __skb_get_hash_symmetric(skb);
@@ -338,9 +327,36 @@ static inline void loopback_numa_rps(struct sk_buff *skb, int *rcpu)
 	if ((val ^ hash) & ~oecls_cpu_mask)
 		return;
 
-	newcpu = cpumask_first(cpumask_of_node(cpu_to_node(last_recv_cpu)));
-	index = rps_cpus[reciprocal_scale(hash, rps_cpus_nums - 1)];
-	newcpu += index;
+	newcpu = get_rps_cpu(last_recv_cpu, hash, lo_rps_policy);
+	*rcpu = newcpu;
+	oecls_debug("last:%u curcpu:%d newcpu:%d\n", last_recv_cpu, raw_smp_processor_id(), newcpu);
+}
+
+static inline void do_flow_rps(struct sk_buff *skb, int *rcpu)
+{
+	u32 last_recv_cpu, hash, val, newcpu;
+	struct oecls_sock_flow_table *stb;
+
+	skb_reset_network_header(skb);
+	hash = skb_get_hash(skb);
+	if (!hash)
+		return;
+
+	rcu_read_lock();
+	stb = rcu_dereference(oecls_sock_flow_table);
+	if (stb) {
+		val = READ_ONCE(stb->ents[hash & stb->mask]);
+		last_recv_cpu = val & oecls_cpu_mask;
+	} else {
+		rcu_read_unlock();
+		return;
+	}
+	rcu_read_unlock();
+
+	if ((val ^ hash) & ~oecls_cpu_mask)
+		return;
+
+	newcpu = get_rps_cpu(last_recv_cpu, hash, rps_policy);
 	*rcpu = newcpu;
 	oecls_debug("last:%u curcpu:%d newcpu:%d\n", last_recv_cpu, raw_smp_processor_id(), newcpu);
 }
@@ -358,14 +374,19 @@ void _oecls_set_cpu(struct sk_buff *skb, int *cpu, int *last_qtail)
 	if (!ndev)
 		return;
 
-	if (lo_numa_rps && (ndev->type == ARPHRD_LOOPBACK && ndev->flags & IFF_LOOPBACK)) {
-		loopback_numa_rps(skb, cpu);
+	if (lo_rps_policy && (ndev->type == ARPHRD_LOOPBACK && ndev->flags & IFF_LOOPBACK)) {
+		do_loopback_rps(skb, cpu);
 		return;
 	}
 
 	if (!is_oecls_config_netdev(ndev->name))
 		return;
 
+	if (rps_policy && mode == 0) {
+		do_flow_rps(skb, cpu);
+		return;
+	}
+
 	rxqueue = ndev->_rx;
 	if (skb_rx_queue_recorded(skb)) {
 		rxq_id = skb_get_rx_queue(skb);
@@ -377,7 +398,6 @@ void _oecls_set_cpu(struct sk_buff *skb, int *cpu, int *last_qtail)
 		rxqueue += rxq_id;
 	}
 
-	// oecls_debug("skb:%px, dev:%s, rxq_id:%d\n", skb, ndev->name, rxq_id);
 	if (rxq_id < 0)
 		return;
 
@@ -506,30 +526,44 @@ static int oecls_sock_flow_table_release(void)
 	mutex_unlock(&oecls_sock_flow_mutex);
 	synchronize_rcu();
 	vfree(tb);
-	kfree(rps_cpus);
 
 	return 0;
 }
 
-static int oecls_sock_flow_table_init(void)
+static int oecls_rps_cpus_init(void)
 {
-	struct oecls_sock_flow_table *table;
-	int size = sft_num, i;
+	int i;
 
+	cluster_rps_cpus_nums = cpumask_weight(topology_cluster_cpumask(0));
 	rps_cpus_nums = cpumask_weight(cpumask_of_node(0));
 	rps_cpus = kmalloc_array(rps_cpus_nums, sizeof(u16), GFP_KERNEL);
 	if (!rps_cpus)
 		return -ENOMEM;
 	for (i = 0; i < rps_cpus_nums; i++)
 		rps_cpus[i] = i;
-	oecls_debug("rps_cpus_nums:%d\n", rps_cpus_nums);
 
-	size = roundup_pow_of_two(size);
-	table = vmalloc(OECLS_SOCK_FLOW_TABLE_SIZE(size));
-	if (!table) {
+	cluster_rps_cpus = kmalloc_array(cluster_rps_cpus_nums, sizeof(u16), GFP_KERNEL);
+	if (!cluster_rps_cpus) {
 		kfree(rps_cpus);
 		return -ENOMEM;
 	}
+	for (i = 0; i < cluster_rps_cpus_nums; i++)
+		cluster_rps_cpus[i] = i;
+
+	oecls_debug("rps_cpus_nums:%d cluster_rps_cpus_nums:%d\n",
+		    rps_cpus_nums, cluster_rps_cpus_nums);
+	return 0;
+}
+
+static int oecls_sock_flow_table_init(void)
+{
+	struct oecls_sock_flow_table *table;
+	int size = sft_num, i;
+
+	size = roundup_pow_of_two(size);
+	table = vmalloc(OECLS_SOCK_FLOW_TABLE_SIZE(size));
+	if (!table)
+		return -ENOMEM;
 
 	oecls_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1;
 	oecls_debug("nr_cpu_ids:%d, oecls_cpu_mask:0x%x\n", nr_cpu_ids, oecls_cpu_mask);
@@ -557,37 +591,40 @@ int oecls_flow_res_init(void)
 {
 	int err;
 
-	err = oecls_sock_flow_table_init();
+	err = oecls_rps_cpus_init();
 	if (err)
 		return err;
 
+	err = oecls_sock_flow_table_init();
+	if (err)
+		goto free;
+
 	err = oecls_dev_flow_table_init();
-	if (err) {
-		oecls_sock_flow_table_release();
-		return err;
-	}
+	if (err)
+		goto clean;
 
 	if (mode != 0) //for lo rps
 		RCU_INIT_POINTER(oecls_ops, &oecls_flow_ops);
 	synchronize_rcu();
-
-#ifdef CONFIG_RPS
 	static_branch_inc(&oecls_rps_needed);
-	oecls_debug("oecls_rps_needed true\n");
-#endif
 
 	return 0;
+clean:
+	oecls_sock_flow_table_release();
+free:
+	kfree(cluster_rps_cpus);
+	kfree(rps_cpus);
+	return err;
 }
 
 void oecls_flow_res_clean(void)
 {
-#ifdef CONFIG_RPS
 	static_branch_dec(&oecls_rps_needed);
-	oecls_debug("oecls_rps_needed false\n");
-#endif
 	rcu_assign_pointer(oecls_ops, NULL);
 	synchronize_rcu();
 
 	oecls_sock_flow_table_release();
 	oecls_dev_flow_table_release();
+	kfree(cluster_rps_cpus);
+	kfree(rps_cpus);
 }
diff --git a/net/oenetcls/oenetcls_main.c b/net/oenetcls/oenetcls_main.c
index f0e69ac6b728..01a460beb053 100644
--- a/net/oenetcls/oenetcls_main.c
+++ b/net/oenetcls/oenetcls_main.c
@@ -60,9 +60,13 @@ unsigned int sft_num = 0x100000;
 module_param(sft_num, uint, 0444);
 MODULE_PARM_DESC(sft_num, "sock flow table entries, default 0x100000");
 
-int lo_numa_rps;
-module_param(lo_numa_rps, int, 0644);
-MODULE_PARM_DESC(lo_numa_rps, "enable loopback flow numa affinity");
+int rps_policy = 1;
+module_param(rps_policy, int, 0644);
+MODULE_PARM_DESC(rps_policy, "phy nic rps policy, default 1");
+
+int lo_rps_policy;
+module_param(lo_rps_policy, int, 0644);
+MODULE_PARM_DESC(lo_rps_policy, "loopback rps policy, default 0");
 
 static int rxq_multiplex_limit = 1;
 module_param(rxq_multiplex_limit, int, 0444);
@@ -526,7 +530,7 @@ static int init_single_oecls_dev(char *if_name, unsigned int length)
 		ret = oecls_filter_enable(dev_name, &old_state);
 		if (ret) {
 			oecls_error("dev [%s] not support ntuple! ret=%d\n", dev_name, ret);
-			if (lo_numa_rps)
+			if (lo_rps_policy)
 				goto out;
 		}
 	}
@@ -1119,7 +1123,7 @@ static __init int oecls_init(void)
 		err = oecls_ntuple_res_init();
 		if (err)
 			goto clean_rxq;
-		if (lo_numa_rps)
+		if (lo_rps_policy || rps_policy)
 			err = oecls_flow_res_init();
 	} else {
 		err = oecls_flow_res_init();
@@ -1128,7 +1132,7 @@ static __init int oecls_init(void)
 	if (err)
 		goto clean_rxq;
 
-	if (lo_numa_rps)
+	if (lo_rps_policy)
 		static_branch_inc(&oecls_localrps_needed);
 
 	return 0;
@@ -1142,12 +1146,12 @@ static __init int oecls_init(void)
 
 static __exit void oecls_exit(void)
 {
-	if (lo_numa_rps)
+	if (lo_rps_policy)
 		static_branch_dec(&oecls_localrps_needed);
 
 	if (mode == 0) {
 		oecls_ntuple_res_clean();
-		if (lo_numa_rps)
+		if (lo_rps_policy || rps_policy)
 			oecls_flow_res_clean();
 	} else {
 		oecls_flow_res_clean();
diff --git a/net/oenetcls/oenetcls_ntuple.c b/net/oenetcls/oenetcls_ntuple.c
index 76eee047783c..9d59fe509e6c 100644
--- a/net/oenetcls/oenetcls_ntuple.c
+++ b/net/oenetcls/oenetcls_ntuple.c
@@ -641,7 +641,7 @@ static void clean_oecls_sk_rules(void)
 	mutex_unlock(&oecls_sk_rules.mutex);
 }
 
-static const struct oecls_hook_ops oecls_ntuple_ops = {
+static struct oecls_hook_ops oecls_ntuple_ops = {
 	.oecls_flow_update = _oecls_flow_update,
 	.oecls_set_localcpu = _oecls_set_cpu,
 	.oecls_set_cpu = NULL,
@@ -658,6 +658,8 @@ int oecls_ntuple_res_init(void)
 	}
 
 	init_oecls_sk_rules();
+	if (rps_policy)
+		oecls_ntuple_ops.oecls_set_cpu = _oecls_set_cpu;
 	RCU_INIT_POINTER(oecls_ops, &oecls_ntuple_ops);
 	synchronize_rcu();
 	return 0;
-- 
2.34.1

    

[PATCH OLK-6.6 9/9] net/oenetcls: Add rps policy switch for phy NIC

Liu Jian