[PATCH OLK-6.6 5/9] net/oenetcls: Add local flow NUMA-aware rps

22 Jan 2026

From: Yue Haibing <yuehaibing@huawei.com>

hulk inclusion
category: feature
Link: https://gitee.com/openeuler/kernel/issues/ICBFCS
CVE: NA

--------------------------------

Use NUMA-aware flow tables for local flows to achieve better cache
effectiveness and NUMA affinity. Also cache check_appname results
in sk to avoid unnecessary dup check.

Signed-off-by: Yue Haibing <yuehaibing@huawei.com>
---
 include/linux/oenetcls.h       |  40 ++++++++++--
 include/linux/skbuff.h         |   4 ++
 include/net/sock.h             |   4 ++
 net/core/dev.c                 |   8 +++
 net/core/sock.c                |   3 +
 net/ipv4/tcp.c                 |   5 +-
 net/oenetcls/oenetcls.h        |   7 +++
 net/oenetcls/oenetcls_flow.c   | 112 +++++++++++++++++++++++++++++----
 net/oenetcls/oenetcls_main.c   |  30 +++++++--
 net/oenetcls/oenetcls_ntuple.c |   3 +-
 10 files changed, 191 insertions(+), 25 deletions(-)

diff --git a/include/linux/oenetcls.h b/include/linux/oenetcls.h
index 09f89131f32b..b618aa6b807f 100644
--- a/include/linux/oenetcls.h
+++ b/include/linux/oenetcls.h
@@ -2,10 +2,13 @@
 #ifndef _LINUX_OENETCLS_H
 #define _LINUX_OENETCLS_H
 
+#include <linux/if_arp.h>
+
 struct oecls_hook_ops {
 	void (*oecls_cfg_rxcls)(struct sock *sk, int is_del);
-	void (*oecls_flow_update)(struct sock *sk);
+	void (*oecls_flow_update)(struct sock *sk, struct sk_buff *skb);
 	void (*oecls_set_cpu)(struct sk_buff *skb, int *cpu, int *last_qtail);
+	void (*oecls_set_localcpu)(struct sk_buff *skb, int *cpu, int *last_qtail);
 	bool (*oecls_timeout)(struct net_device *dev, u16 rxq_index,
 							u32 flow_id, u16 filter_id);
 };
@@ -13,6 +16,7 @@ struct oecls_hook_ops {
 typedef int (*enqueue_f)(struct sk_buff *skb, int cpu, unsigned int *qtail);
 extern const struct oecls_hook_ops __rcu *oecls_ops;
 extern struct static_key_false oecls_rps_needed;
+extern struct static_key_false oecls_localrps_needed;
 
 static inline void oenetcls_cfg_rxcls(struct sock *sk, int is_del)
 {
@@ -25,14 +29,14 @@ static inline void oenetcls_cfg_rxcls(struct sock *sk, int is_del)
 	rcu_read_unlock();
 }
 
-static inline void oenetcls_flow_update(struct sock *sk)
+static inline void oenetcls_flow_update(struct sock *sk, struct sk_buff *skb)
 {
 	const struct oecls_hook_ops *ops;
 
 	rcu_read_lock();
 	ops = rcu_dereference(oecls_ops);
 	if (ops && ops->oecls_flow_update)
-		ops->oecls_flow_update(sk);
+		ops->oecls_flow_update(sk, skb);
 	rcu_read_unlock();
 }
 
@@ -45,8 +49,16 @@ oenetcls_skb_set_cpu(struct sk_buff *skb, enqueue_f enq_func, int *ret)
 
 	rcu_read_lock();
 	ops = rcu_dereference(oecls_ops);
-	if (ops && ops->oecls_set_cpu) {
-		ops->oecls_set_cpu(skb, &cpu, &last_qtail);
+	if (ops) {
+		/* mode 1 always use oecls_set_cpu hook for physical NIC or lo.
+		 * mode 0 set this hook to NULL, to avoid unneeded ops in
+		 * oenetcls_skblist_set_cpu() for physical NIC flows, and use
+		 * oecls_set_localcpu hook for loopback flows.
+		 */
+		if (ops->oecls_set_cpu)
+			ops->oecls_set_cpu(skb, &cpu, &last_qtail);
+		else if (ops->oecls_set_localcpu)
+			ops->oecls_set_localcpu(skb, &cpu, &last_qtail);
 		if (cpu >= 0) {
 			*ret = enq_func(skb, cpu, &last_qtail);
 			result = true;
@@ -56,6 +68,24 @@ oenetcls_skb_set_cpu(struct sk_buff *skb, enqueue_f enq_func, int *ret)
 	return result;
 }
 
+static inline bool
+oenetcls_skb_set_localcpu(struct sk_buff *skb, enqueue_f enq_func, int *ret)
+{
+	struct net_device *dev = skb->dev;
+	bool result = false;
+
+	if (!static_branch_unlikely(&oecls_localrps_needed))
+		return result;
+	if (!dev || !(dev->type == ARPHRD_LOOPBACK && dev->flags & IFF_LOOPBACK))
+		return result;
+
+	preempt_disable();
+	if (oenetcls_skb_set_cpu(skb, enq_func, ret))
+		result = true;
+	preempt_enable();
+	return result;
+}
+
 static inline void
 oenetcls_skblist_set_cpu(struct list_head *head, enqueue_f enq_func)
 {
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 1597a5f9b5b8..0f985ba19006 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1056,7 +1056,11 @@ struct sk_buff {
 #else
 	KABI_RESERVE(1)
 #endif
+#if IS_ENABLED(CONFIG_OENETCLS)
+	KABI_USE(2, __u32 sym_hash)
+#else
 	KABI_RESERVE(2)
+#endif
 	KABI_RESERVE(3)
 	KABI_RESERVE(4)
 
diff --git a/include/net/sock.h b/include/net/sock.h
index 26456cb2bf8f..c44b2025bc54 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -554,7 +554,11 @@ struct sock {
 		u64	sk_gid_padding;
 	};
 #endif
+#if IS_ENABLED(CONFIG_OENETCLS)
+	KABI_USE(1, u8 oecls_cmd_matched)
+#else
 	KABI_RESERVE(1)
+#endif
 	KABI_RESERVE(2)
 	KABI_RESERVE(3)
 	KABI_RESERVE(4)
diff --git a/net/core/dev.c b/net/core/dev.c
index 06d59a919a4b..f388233f4f75 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -164,6 +164,8 @@ const struct oecls_hook_ops __rcu *oecls_ops __read_mostly;
 EXPORT_SYMBOL_GPL(oecls_ops);
 struct static_key_false oecls_rps_needed __read_mostly;
 EXPORT_SYMBOL(oecls_rps_needed);
+struct static_key_false oecls_localrps_needed __read_mostly;
+EXPORT_SYMBOL(oecls_localrps_needed);
 #endif
 
 static DEFINE_SPINLOCK(ptype_lock);
@@ -5196,6 +5198,12 @@ static int netif_rx_internal(struct sk_buff *skb)
 
 	trace_netif_rx(skb);
 
+#if IS_ENABLED(CONFIG_OENETCLS)
+	if (static_branch_unlikely(&oecls_localrps_needed)) {
+		if (oenetcls_skb_set_localcpu(skb, enqueue_to_backlog, &ret))
+			return ret;
+	}
+#endif
 #ifdef CONFIG_RPS
 	if (static_branch_unlikely(&rps_needed)) {
 		struct rps_dev_flow voidflow, *rflow = &voidflow;
diff --git a/net/core/sock.c b/net/core/sock.c
index d63f5ee49054..45f7f9aaca46 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2172,6 +2172,9 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
 		sock_update_classid(&sk->sk_cgrp_data);
 		sock_update_netprioidx(&sk->sk_cgrp_data);
 		sk_tx_queue_clear(sk);
+#if IS_ENABLED(CONFIG_OENETCLS)
+		sk->oecls_cmd_matched = 0;
+#endif
 	}
 
 	return sk;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 7c66c46c125f..2c98ef85072b 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2543,6 +2543,9 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len,
 		if (used + offset < skb->len)
 			continue;
 
+#if IS_ENABLED(CONFIG_OENETCLS)
+		oenetcls_flow_update(sk, skb);
+#endif
 		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
 			goto found_fin_ok;
 		if (!(flags & MSG_PEEK))
@@ -2587,7 +2590,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
 		return inet_recv_error(sk, msg, len, addr_len);
 
 #if IS_ENABLED(CONFIG_OENETCLS)
-	oenetcls_flow_update(sk);
+	oenetcls_flow_update(sk, NULL);
 #endif
 	if (sk_can_busy_loop(sk) &&
 	    skb_queue_empty_lockless(&sk->sk_receive_queue) &&
diff --git a/net/oenetcls/oenetcls.h b/net/oenetcls/oenetcls.h
index 6d8e8e5e5b15..755d0ab299ee 100644
--- a/net/oenetcls/oenetcls.h
+++ b/net/oenetcls/oenetcls.h
@@ -13,6 +13,10 @@
 #define OECLS_NO_FILTER 0xffff
 #define OECLS_NO_CPU 0xffff
 
+#define OECLS_CMD_UNKNOWN      0
+#define OECLS_CMD_MATCHED      1
+#define OECLS_CMD_NO_MATCH     2
+
 struct oecls_netdev_queue_info {
 	int irq;
 	int affinity_cpu;
@@ -135,6 +139,7 @@ extern int oecls_netdev_num;
 extern int oecls_numa_num;
 extern unsigned int dft_num;
 extern unsigned int sft_num;
+extern int lo_numa_rps;
 
 #define oecls_debug(fmt, ...)					\
 	do {							\
@@ -183,5 +188,7 @@ int oecls_ntuple_res_init(void);
 void oecls_ntuple_res_clean(void);
 int oecls_flow_res_init(void);
 void oecls_flow_res_clean(void);
+void _oecls_flow_update(struct sock *sk, struct sk_buff *skb);
+void _oecls_set_cpu(struct sk_buff *skb, int *cpu, int *last_qtail);
 
 #endif	/* _NET_OENETCLS_H */
diff --git a/net/oenetcls/oenetcls_flow.c b/net/oenetcls/oenetcls_flow.c
index 0953b4bd91ae..bb52a5b78c47 100644
--- a/net/oenetcls/oenetcls_flow.c
+++ b/net/oenetcls/oenetcls_flow.c
@@ -1,15 +1,22 @@
 // SPDX-License-Identifier: GPL-2.0-only
 #include <linux/inetdevice.h>
-#include <linux/netdevice.h>
-#include <linux/rtnetlink.h>
+#include <linux/inet.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/inet.h>
 #include <linux/irq.h>
 #include <linux/irqdesc.h>
-#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/oenetcls.h>
+#include <linux/rtnetlink.h>
+#include <net/inet_sock.h>
+#include <net/ipv6.h>
 #include <net/netdev_rx_queue.h>
 #include <net/sock.h>
-#include <linux/oenetcls.h>
 #include "oenetcls.h"
 
+static u16 *rps_cpus;
+static int rps_cpus_nums;
 static u32 oecls_cpu_mask;
 static struct oecls_sock_flow_table __rcu *oecls_sock_flow_table;
 static DEFINE_MUTEX(oecls_sock_flow_mutex);
@@ -59,22 +66,50 @@ static bool _oecls_timeout(struct net_device *dev, u16 rxq_index,
 	return expire;
 }
 
-static void _oecls_flow_update(struct sock *sk)
+static inline bool sk_is_loopback(struct sock *sk)
+{
+	if (sk->sk_family == AF_INET) {
+		if (ipv4_is_loopback(sk->sk_daddr) || ipv4_is_loopback(sk->sk_rcv_saddr))
+			return true;
+	}
+
+	if (sk->sk_family == AF_INET6) {
+		if (ipv6_addr_loopback(&sk->sk_v6_daddr) ||
+		    ipv6_addr_loopback(&sk->sk_v6_rcv_saddr) ||
+		    ipv6_addr_v4mapped_loopback(&sk->sk_v6_daddr) ||
+		    ipv6_addr_v4mapped_loopback(&sk->sk_v6_rcv_saddr) ||
+		    ipv6_addr_equal(&sk->sk_v6_daddr, &sk->sk_v6_rcv_saddr))
+			return true;
+	}
+	return false;
+}
+
+void _oecls_flow_update(struct sock *sk, struct sk_buff *skb)
 {
 	struct oecls_sock_flow_table *tb;
 	unsigned int hash, index;
-	u32 val;
-	u32 cpu = raw_smp_processor_id();
+	u32 val, cpu;
 
 	if (sk->sk_state != TCP_ESTABLISHED)
 		return;
 
-	if (check_appname(current->comm))
+	if (unlikely(sk->oecls_cmd_matched == OECLS_CMD_UNKNOWN)) {
+		if (check_appname(current->comm)) {
+			sk->oecls_cmd_matched = OECLS_CMD_NO_MATCH;
+			return;
+		}
+		sk->oecls_cmd_matched = OECLS_CMD_MATCHED;
+	}
+	if (sk->oecls_cmd_matched != OECLS_CMD_MATCHED)
 		return;
 
+	cpu = raw_smp_processor_id();
 	rcu_read_lock();
 	tb = rcu_dereference(oecls_sock_flow_table);
-	hash = READ_ONCE(sk->sk_rxhash);
+	if (lo_numa_rps && skb && sk_is_loopback(sk))
+		hash = READ_ONCE(skb->sym_hash);
+	else
+		hash = READ_ONCE(sk->sk_rxhash);
 	if (tb && hash) {
 		index = hash & tb->mask;
 		val = hash & ~oecls_cpu_mask;
@@ -183,7 +218,7 @@ static bool oecls_do_hash(void)
 	return get_random_u32() % 100 < rcpu_probability;
 }
 
-static int get_cpu_in_mask(int tcpu, u32 hash)
+static inline int get_cpu_in_mask(int tcpu, u32 hash)
 {
 	const struct cpumask *mask;
 	int nr_cpus, cpu, index;
@@ -268,7 +303,40 @@ static void __oecls_set_cpu(struct sk_buff *skb, struct net_device *ndev,
 		set_oecls_cpu(ndev, skb, rflow, old_rxq_id, newcpu);
 }
 
-static void _oecls_set_cpu(struct sk_buff *skb, int *cpu, int *last_qtail)
+static inline void loopback_numa_rps(struct sk_buff *skb, int *rcpu)
+{
+	struct oecls_sock_flow_table *stb;
+	u32 last_recv_cpu, hash, val;
+	int newcpu, index;
+
+	skb_reset_network_header(skb);
+	hash = __skb_get_hash_symmetric(skb);
+	if (!hash)
+		return;
+
+	WRITE_ONCE(skb->sym_hash, hash);
+	rcu_read_lock();
+	stb = rcu_dereference(oecls_sock_flow_table);
+	if (stb) {
+		val = READ_ONCE(stb->ents[hash & stb->mask]);
+		last_recv_cpu = val & oecls_cpu_mask;
+	} else {
+		rcu_read_unlock();
+		return;
+	}
+	rcu_read_unlock();
+
+	if ((val ^ hash) & ~oecls_cpu_mask)
+		return;
+
+	newcpu = cpumask_first(cpumask_of_node(cpu_to_node(last_recv_cpu)));
+	index = rps_cpus[reciprocal_scale(hash, rps_cpus_nums - 1)];
+	newcpu += index;
+	*rcpu = newcpu;
+	oecls_debug("last:%u curcpu:%d newcpu:%d\n", last_recv_cpu, raw_smp_processor_id(), newcpu);
+}
+
+void _oecls_set_cpu(struct sk_buff *skb, int *cpu, int *last_qtail)
 {
 	struct net_device *ndev = skb->dev;
 	struct oecls_sock_flow_table *stb;
@@ -281,6 +349,11 @@ static void _oecls_set_cpu(struct sk_buff *skb, int *cpu, int *last_qtail)
 	if (!ndev)
 		return;
 
+	if (lo_numa_rps && (ndev->type == ARPHRD_LOOPBACK && ndev->flags & IFF_LOOPBACK)) {
+		loopback_numa_rps(skb, cpu);
+		return;
+	}
+
 	if (!is_oecls_config_netdev(ndev->name))
 		return;
 
@@ -424,6 +497,7 @@ static int oecls_sock_flow_table_release(void)
 	mutex_unlock(&oecls_sock_flow_mutex);
 	synchronize_rcu();
 	vfree(tb);
+	kfree(rps_cpus);
 
 	return 0;
 }
@@ -433,10 +507,20 @@ static int oecls_sock_flow_table_init(void)
 	struct oecls_sock_flow_table *table;
 	int size = sft_num, i;
 
+	rps_cpus_nums = cpumask_weight(cpumask_of_node(0));
+	rps_cpus = kmalloc_array(rps_cpus_nums, sizeof(u16), GFP_KERNEL);
+	if (!rps_cpus)
+		return -ENOMEM;
+	for (i = 0; i < rps_cpus_nums; i++)
+		rps_cpus[i] = i;
+	oecls_debug("rps_cpus_nums:%d\n", rps_cpus_nums);
+
 	size = roundup_pow_of_two(size);
 	table = vmalloc(OECLS_SOCK_FLOW_TABLE_SIZE(size));
-	if (!table)
+	if (!table) {
+		kfree(rps_cpus);
 		return -ENOMEM;
+	}
 
 	oecls_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1;
 	oecls_debug("nr_cpu_ids:%d, oecls_cpu_mask:0x%x\n", nr_cpu_ids, oecls_cpu_mask);
@@ -455,6 +539,7 @@ static int oecls_sock_flow_table_init(void)
 static const struct oecls_hook_ops oecls_flow_ops = {
 	.oecls_flow_update = _oecls_flow_update,
 	.oecls_set_cpu = _oecls_set_cpu,
+	.oecls_set_localcpu = NULL,
 	.oecls_timeout = _oecls_timeout,
 	.oecls_cfg_rxcls = NULL,
 };
@@ -473,7 +558,8 @@ int oecls_flow_res_init(void)
 		return err;
 	}
 
-	RCU_INIT_POINTER(oecls_ops, &oecls_flow_ops);
+	if (mode != 0) //for lo rps
+		RCU_INIT_POINTER(oecls_ops, &oecls_flow_ops);
 	synchronize_rcu();
 
 #ifdef CONFIG_RPS
diff --git a/net/oenetcls/oenetcls_main.c b/net/oenetcls/oenetcls_main.c
index f9574b344331..e6cffacca161 100644
--- a/net/oenetcls/oenetcls_main.c
+++ b/net/oenetcls/oenetcls_main.c
@@ -6,6 +6,7 @@
 #include <linux/irq.h>
 #include <linux/irqdesc.h>
 #include <linux/rtnetlink.h>
+#include <linux/oenetcls.h>
 #include "oenetcls.h"
 
 int oecls_netdev_num;
@@ -59,6 +60,10 @@ unsigned int sft_num = 0x100000;
 module_param(sft_num, uint, 0444);
 MODULE_PARM_DESC(sft_num, "sock flow table entries, default 0x100000");
 
+int lo_numa_rps;
+module_param(lo_numa_rps, int, 0644);
+MODULE_PARM_DESC(lo_numa_rps, "enable loopback flow numa affinity");
+
 static bool check_params(void)
 {
 	if (mode != 0 && mode != 1 && mode != 2)
@@ -517,7 +522,8 @@ static int init_single_oecls_dev(char *if_name, unsigned int length)
 		ret = oecls_filter_enable(dev_name, &old_state);
 		if (ret) {
 			oecls_error("dev [%s] not support ntuple! ret=%d\n", dev_name, ret);
-			goto out;
+			if (lo_numa_rps)
+				goto out;
 		}
 	}
 
@@ -1081,14 +1087,22 @@ static __init int oecls_init(void)
 	if (mode == 2 && rcpu_probability < 0)
 		fixup_rcpu_load();
 
-	if (mode == 0)
+	if (mode == 0) {
 		err = oecls_ntuple_res_init();
-	else
+		if (err)
+			goto clean_rxq;
+		if (lo_numa_rps)
+			err = oecls_flow_res_init();
+	} else {
 		err = oecls_flow_res_init();
+	}
 
 	if (err)
 		goto clean_rxq;
 
+	if (lo_numa_rps)
+		static_branch_inc(&oecls_localrps_needed);
+
 	return 0;
 
 clean_rxq:
@@ -1100,10 +1114,16 @@ static __init int oecls_init(void)
 
 static __exit void oecls_exit(void)
 {
-	if (mode == 0)
+	if (lo_numa_rps)
+		static_branch_dec(&oecls_localrps_needed);
+
+	if (mode == 0) {
 		oecls_ntuple_res_clean();
-	else
+		if (lo_numa_rps)
+			oecls_flow_res_clean();
+	} else {
 		oecls_flow_res_clean();
+	}
 
 #ifdef CONFIG_XPS
 	set_netdev_xps_queue(false);
diff --git a/net/oenetcls/oenetcls_ntuple.c b/net/oenetcls/oenetcls_ntuple.c
index def33d30f642..c0b97ea7649e 100644
--- a/net/oenetcls/oenetcls_ntuple.c
+++ b/net/oenetcls/oenetcls_ntuple.c
@@ -582,7 +582,8 @@ static void clean_oecls_sk_rules(void)
 }
 
 static const struct oecls_hook_ops oecls_ntuple_ops = {
-	.oecls_flow_update = NULL,
+	.oecls_flow_update = _oecls_flow_update,
+	.oecls_set_localcpu = _oecls_set_cpu,
 	.oecls_set_cpu = NULL,
 	.oecls_timeout = NULL,
 	.oecls_cfg_rxcls = ethtool_cfg_rxcls,
-- 
2.34.1

    

[PATCH OLK-6.6 5/9] net/oenetcls: Add local flow NUMA-aware rps

Liu Jian