[PATCH OLK-5.10] net/oenetcls: introduce oenetcls for network optimization

30 May 2025

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/ICBFCS
CVE: NA

--------------------------------

This introduces a kind of network optimization method named oenetcls. It
can configure the ntuple rule, and bind interrupt to the netdev queue
automatically.

Signed-off-by: Yue Haibing <yuehaibing@huawei.com>
Signed-off-by: Wang Liang <wangliang74@huawei.com>
Signed-off-by: Liu Jian <liujian56@huawei.com>
Signed-off-by: yuelg <yuelg@chinaunicom.cn>
---
 arch/arm64/configs/openeuler_defconfig |   2 +
 arch/x86/configs/openeuler_defconfig   |   1 +
 drivers/hooks/Kconfig                  |  10 +
 drivers/hooks/vendor_hooks.c           |   8 +
 include/linux/netdevice.h              |   2 +-
 include/trace/hooks/oenetcls.h         |  44 ++
 net/Kconfig                            |   1 +
 net/Makefile                           |   1 +
 net/core/dev.c                         |  21 +
 net/ipv4/af_inet.c                     |   5 +
 net/ipv4/tcp.c                         |  10 +
 net/oenetcls/Kconfig                   |  10 +
 net/oenetcls/Makefile                  |   8 +
 net/oenetcls/asmdefs.h                 |  61 ++
 net/oenetcls/memcpy-sve.S              | 157 ++++
 net/oenetcls/oenetcls.h                | 176 +++++
 net/oenetcls/oenetcls_flow.c           | 402 ++++++++++
 net/oenetcls/oenetcls_l0.c             | 193 +++++
 net/oenetcls/oenetcls_main.c           | 972 +++++++++++++++++++++++++
 net/oenetcls/oenetcls_ntuple.c         | 510 +++++++++++++
 20 files changed, 2593 insertions(+), 1 deletion(-)
 create mode 100644 include/trace/hooks/oenetcls.h
 create mode 100644 net/oenetcls/Kconfig
 create mode 100644 net/oenetcls/Makefile
 create mode 100644 net/oenetcls/asmdefs.h
 create mode 100644 net/oenetcls/memcpy-sve.S
 create mode 100644 net/oenetcls/oenetcls.h
 create mode 100644 net/oenetcls/oenetcls_flow.c
 create mode 100644 net/oenetcls/oenetcls_l0.c
 create mode 100644 net/oenetcls/oenetcls_main.c
 create mode 100644 net/oenetcls/oenetcls_ntuple.c

diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig
index 4a11c9f061cc..c7c652a295e1 100644
--- a/arch/arm64/configs/openeuler_defconfig
+++ b/arch/arm64/configs/openeuler_defconfig
@@ -6260,6 +6260,8 @@ CONFIG_USB4=m
 #
 CONFIG_VENDOR_HOOKS=y
 CONFIG_VENDOR_BOND_HOOKS=y
+CONFIG_OENETCLS_HOOKS=y
+CONFIG_OENETCLS=m
 # end of Vendor Hooks
 
 CONFIG_LIBNVDIMM=m
diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig
index 4fed721fb07a..c9e9e1a76881 100644
--- a/arch/x86/configs/openeuler_defconfig
+++ b/arch/x86/configs/openeuler_defconfig
@@ -7323,6 +7323,7 @@ CONFIG_USB4=m
 #
 CONFIG_VENDOR_HOOKS=y
 CONFIG_VENDOR_BOND_HOOKS=y
+# CONFIG_OENETCLS_HOOKS is not set
 # end of Vendor Hooks
 
 CONFIG_LIBNVDIMM=m
diff --git a/drivers/hooks/Kconfig b/drivers/hooks/Kconfig
index 6a00168e67ad..90b0f6ea4040 100644
--- a/drivers/hooks/Kconfig
+++ b/drivers/hooks/Kconfig
@@ -20,4 +20,14 @@ config VENDOR_BOND_HOOKS
           Allow vendor modules to attach bonding driver hooks defined via
           DECLARE_HOOK or DECLARE_RESTRICTED_HOOK.
 
+config OENETCLS_HOOKS
+        bool "Oenetcls driver Hooks"
+        depends on VENDOR_HOOKS
+        default n
+        help
+          Enable oenetcls vendor hooks
+          Allow vendor modules to attach oenetcls hooks defined via
+          DECLARE_HOOK or DECLARE_RESTRICTED_HOOK.
+          Use OENETCLS && OENETCLS_HOOKS to enable oenetcls feature.
+
 endmenu
diff --git a/drivers/hooks/vendor_hooks.c b/drivers/hooks/vendor_hooks.c
index 85bda58159f6..d9b85b57a742 100644
--- a/drivers/hooks/vendor_hooks.c
+++ b/drivers/hooks/vendor_hooks.c
@@ -9,6 +9,7 @@
 #define CREATE_TRACE_POINTS
 #include <trace/hooks/vendor_hooks.h>
 #include <trace/hooks/bonding.h>
+#include <trace/hooks/oenetcls.h>
 
 /*
  * Export tracepoints that act as a bare tracehook (ie: have no trace event
@@ -18,3 +19,10 @@
 #ifdef CONFIG_VENDOR_BOND_HOOKS
 EXPORT_TRACEPOINT_SYMBOL_GPL(vendor_bond_check_dev_link);
 #endif
+
+#ifdef CONFIG_OENETCLS_HOOKS
+EXPORT_TRACEPOINT_SYMBOL_GPL(oecls_flow_update);
+EXPORT_TRACEPOINT_SYMBOL_GPL(oecls_set_cpu);
+EXPORT_TRACEPOINT_SYMBOL_GPL(oecls_timeout);
+EXPORT_TRACEPOINT_SYMBOL_GPL(ethtool_cfg_rxcls);
+#endif
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index df61a63a5550..4b7b6fa1dcb1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -808,7 +808,7 @@ struct netdev_rx_queue {
 	struct xsk_buff_pool            *pool;
 #endif
 
-	KABI_RESERVE(1)
+	KABI_USE(1, void *__rcu oecls_ftb)
 	KABI_RESERVE(2)
 	KABI_RESERVE(3)
 	KABI_RESERVE(4)
diff --git a/include/trace/hooks/oenetcls.h b/include/trace/hooks/oenetcls.h
new file mode 100644
index 000000000000..c38545d7a6a2
--- /dev/null
+++ b/include/trace/hooks/oenetcls.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * oenetcls driver Hooks
+ *
+ * Copyright (c) 2025, Huawei Tech. Co., Ltd.
+ */
+
+#ifdef CONFIG_OENETCLS_HOOKS
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM oenetcls
+
+#define TRACE_INCLUDE_PATH trace/hooks
+#if !defined(_TRACE_OENETCLS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_OENETCLS_H
+#include <linux/tracepoint.h>
+#include <trace/hooks/vendor_hooks.h>
+
+struct sock;
+struct sk_buff;
+struct net_device;
+
+DECLARE_HOOK(oecls_flow_update,
+TP_PROTO(struct sock *sk),
+TP_ARGS(sk));
+
+DECLARE_HOOK(oecls_set_cpu,
+TP_PROTO(struct sk_buff *skb),
+TP_ARGS(skb));
+
+DECLARE_HOOK(oecls_timeout,
+TP_PROTO(struct net_device *dev, u16 rxq_index, u32 flow_id, u16 filter_id, bool *ret),
+TP_ARGS(dev, rxq_index, flow_id, filter_id, ret));
+
+DECLARE_HOOK(ethtool_cfg_rxcls,
+TP_PROTO(struct sock *sk, int is_del),
+TP_ARGS(sk, is_del));
+
+#endif
+/* This part must be outside protection */
+#include <trace/define_trace.h>
+
+#endif
+
diff --git a/net/Kconfig b/net/Kconfig
index 51a934426f9f..071f8ee3b89f 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -67,6 +67,7 @@ source "net/xfrm/Kconfig"
 source "net/iucv/Kconfig"
 source "net/smc/Kconfig"
 source "net/xdp/Kconfig"
+source "net/oenetcls/Kconfig"
 
 config INET
 	bool "TCP/IP networking"
diff --git a/net/Makefile b/net/Makefile
index 45c03aa92ace..dc637db866e3 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -87,3 +87,4 @@ obj-$(CONFIG_QRTR)		+= qrtr/
 obj-$(CONFIG_NET_NCSI)		+= ncsi/
 obj-$(CONFIG_XDP_SOCKETS)	+= xdp/
 obj-$(CONFIG_MPTCP)		+= mptcp/
+obj-$(CONFIG_OENETCLS)		+= oenetcls/
diff --git a/net/core/dev.c b/net/core/dev.c
index c49f3d306b5c..eda1975ef55b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -149,6 +149,7 @@
 #include <net/net_rship.h>
 
 #include "net-sysfs.h"
+#include <trace/hooks/oenetcls.h>
 
 #define MAX_GRO_SKBS 8
 
@@ -4484,6 +4485,11 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
 	bool expire = true;
 	unsigned int cpu;
 
+#if IS_ENABLED(CONFIG_OENETCLS_HOOKS)
+	trace_oecls_timeout(dev, rxq_index, flow_id, filter_id, &expire);
+	if (expire)
+		return true;
+#endif
 	rcu_read_lock();
 	flow_table = rcu_dereference(rxqueue->rps_flow_table);
 	if (flow_table && flow_id <= flow_table->mask) {
@@ -5819,6 +5825,11 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
 		}
 	}
 #endif
+
+#if IS_ENABLED(CONFIG_OENETCLS_HOOKS)
+	trace_oecls_set_cpu(skb);
+#endif
+
 	ret = __netif_receive_skb(skb);
 	rcu_read_unlock();
 	return ret;
@@ -5853,6 +5864,12 @@ static void netif_receive_skb_list_internal(struct list_head *head)
 		}
 	}
 #endif
+
+#if IS_ENABLED(CONFIG_OENETCLS_HOOKS)
+	list_for_each_entry_safe(skb, next, head, list)
+		trace_oecls_set_cpu(skb);
+#endif
+
 	__netif_receive_skb_list(head);
 	rcu_read_unlock();
 }
@@ -9984,6 +10001,10 @@ int __netdev_update_features(struct net_device *dev)
 	return err < 0 ? 0 : 1;
 }
 
+#if IS_ENABLED(CONFIG_OENETCLS)
+EXPORT_SYMBOL(__netdev_update_features);
+#endif
+
 /**
  *	netdev_update_features - recalculate device features
  *	@dev: the device to check
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c6ec22f314c4..b09b1d6575a4 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -119,6 +119,7 @@
 #include <net/compat.h>
 
 #include <trace/events/sock.h>
+#include <trace/hooks/oenetcls.h>
 
 /* The inetsw table contains everything that inet_create needs to
  * build a new socket.
@@ -232,6 +233,10 @@ int inet_listen(struct socket *sock, int backlog)
 		if (err)
 			goto out;
 		tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_LISTEN_CB, 0, NULL);
+
+#if IS_ENABLED(CONFIG_OENETCLS_HOOKS)
+		trace_ethtool_cfg_rxcls(sk, 0);
+#endif
 	}
 	err = 0;
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2a5147a4d1af..f014fad55bbf 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -280,6 +280,7 @@
 #include <asm/ioctls.h>
 #include <net/busy_poll.h>
 #include <net/net_rship.h>
+#include <trace/hooks/oenetcls.h>
 
 DEFINE_PER_CPU(unsigned int, tcp_orphan_count);
 EXPORT_PER_CPU_SYMBOL_GPL(tcp_orphan_count);
@@ -2176,6 +2177,10 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 	if (unlikely(flags & MSG_ERRQUEUE))
 		return inet_recv_error(sk, msg, len, addr_len);
 
+#if IS_ENABLED(CONFIG_OENETCLS_HOOKS)
+	trace_oecls_flow_update(sk);
+#endif
+
 	if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue) &&
 	    (sk->sk_state == TCP_ESTABLISHED))
 		sk_busy_loop(sk, nonblock);
@@ -2750,6 +2755,11 @@ void __tcp_close(struct sock *sk, long timeout)
 void tcp_close(struct sock *sk, long timeout)
 {
 	lock_sock(sk);
+
+#if IS_ENABLED(CONFIG_OENETCLS_HOOKS)
+	trace_ethtool_cfg_rxcls(sk, 1);
+#endif
+
 	__tcp_close(sk, timeout);
 	release_sock(sk);
 	if (!sk->sk_net_refcnt)
diff --git a/net/oenetcls/Kconfig b/net/oenetcls/Kconfig
new file mode 100644
index 000000000000..128b798d6b7f
--- /dev/null
+++ b/net/oenetcls/Kconfig
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config OENETCLS
+	tristate "Network classification"
+	depends on HISI_L0 && OENETCLS_HOOKS
+	default n
+	help
+	  Allows to configure ntuple rule, and bind interrupt to netdev
+	  automatically.
+	  Use OENETCLS && OENETCLS_HOOKS to enable oenetcls feature.
+	  Use parameter mode to decide running mode.
diff --git a/net/oenetcls/Makefile b/net/oenetcls/Makefile
new file mode 100644
index 000000000000..4d395d36a5fc
--- /dev/null
+++ b/net/oenetcls/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_OENETCLS) = oenetcls.o
+oenetcls-y                      := oenetcls_main.o oenetcls_ntuple.o oenetcls_flow.o oenetcls_l0.o
+ifeq ($(CONFIG_ARM64_SVE),y)
+oenetcls-y                      += memcpy-sve.o
+endif
+
diff --git a/net/oenetcls/asmdefs.h b/net/oenetcls/asmdefs.h
new file mode 100644
index 000000000000..8138a94c18af
--- /dev/null
+++ b/net/oenetcls/asmdefs.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASMDEFS_H
+#define _ASMDEFS_H
+
+/* Branch Target Identitication support.  */
+#define BTI_C		hint	34
+#define BTI_J		hint	36
+/* Return address signing support (pac-ret).  */
+#define PACIASP		hint	25; .cfi_window_save
+#define AUTIASP		hint	29; .cfi_window_save
+
+/* GNU_PROPERTY_AARCH64_* macros from elf.h.  */
+#define FEATURE_1_AND 0xc0000000
+#define FEATURE_1_BTI 1
+#define FEATURE_1_PAC 2
+
+/* Add a NT_GNU_PROPERTY_TYPE_0 note.  */
+#define GNU_PROPERTY(type, value)	\
+	.section .note.gnu.property, "a";	\
+	.p2align 3;				\
+	.word 4;				\
+	.word 16;				\
+	.word 5;				\
+	.asciz "GNU";				\
+	.word type;				\
+	.word 4;				\
+	.word value;				\
+	.word 0;				\
+	.text
+
+#ifndef WANT_GNU_PROPERTY
+#define WANT_GNU_PROPERTY 1
+#endif
+
+#if WANT_GNU_PROPERTY
+/* Add property note with supported features to all asm files.  */
+GNU_PROPERTY(FEATURE_1_AND, FEATURE_1_BTI|FEATURE_1_PAC)
+#endif
+
+#define ENTRY_ALIGN(name, alignment)	\
+	.global name;		\
+	.type name, %function;	\
+	.align alignment;	\
+name:				\
+	.cfi_startproc;		\
+	BTI_C;
+
+#define ENTRY(name)	ENTRY_ALIGN(name, 6)
+
+#define ENTRY_ALIAS(name)	\
+	.global name;		\
+	.type name, %function;	\
+  name:
+
+#define END(name)	\
+	.cfi_endproc;	\
+	.size name, .-name;
+
+#define L(l) .L ## l
+
+#endif
diff --git a/net/oenetcls/memcpy-sve.S b/net/oenetcls/memcpy-sve.S
new file mode 100644
index 000000000000..106e4c30294c
--- /dev/null
+++ b/net/oenetcls/memcpy-sve.S
@@ -0,0 +1,157 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include "asmdefs.h"
+
+.arch armv8-a+sve
+
+#define dstin	x0
+#define src	x1
+#define count	x2
+#define dst	x3
+#define srcend	x4
+#define dstend	x5
+#define tmp1	x6
+#define vlen	x6
+
+#define A_q	q0
+#define B_q	q1
+#define C_q	q2
+#define D_q	q3
+#define E_q	q4
+#define F_q	q5
+#define G_q	q6
+#define H_q	q7
+
+/* This implementation handles overlaps and supports both memcpy and memmove
+   from a single entry point.  It uses unaligned accesses and branchless
+   sequences to keep the code small, simple and improve performance.
+   SVE vectors are used to speedup small copies.
+
+   Copies are split into 3 main cases: small copies of up to 32 bytes, medium
+   copies of up to 128 bytes, and large copies.  The overhead of the overlap
+   check is negligible since it is only required for large copies.
+
+   Large copies use a software pipelined loop processing 64 bytes per iteration.
+   The source pointer is 16-byte aligned to minimize unaligned accesses.
+   The loop tail is handled by always copying 64 bytes from the end.
+*/
+
+ENTRY_ALIAS (__memmove_aarch64_sve)
+ENTRY (__memcpy_aarch64_sve)
+	cmp	count, 128
+	b.hi	L(copy_long)
+	cntb	vlen
+	cmp	count, vlen, lsl 1
+	b.hi	L(copy32_128)
+
+	whilelo p0.b, xzr, count
+	whilelo p1.b, vlen, count
+	ld1b	z0.b, p0/z, [src, 0, mul vl]
+	ld1b	z1.b, p1/z, [src, 1, mul vl]
+	st1b	z0.b, p0, [dstin, 0, mul vl]
+	st1b	z1.b, p1, [dstin, 1, mul vl]
+	ret
+
+	/* Medium copies: 33..128 bytes.  */
+L(copy32_128):
+	add	srcend, src, count
+	add	dstend, dstin, count
+	ldp	A_q, B_q, [src]
+	ldp	C_q, D_q, [srcend, -32]
+	cmp	count, 64
+	b.hi	L(copy128)
+	stp	A_q, B_q, [dstin]
+	stp	C_q, D_q, [dstend, -32]
+	ret
+
+	/* Copy 65..128 bytes.  */
+L(copy128):
+	ldp	E_q, F_q, [src, 32]
+	cmp	count, 96
+	b.ls	L(copy96)
+	ldp	G_q, H_q, [srcend, -64]
+	stp	G_q, H_q, [dstend, -64]
+L(copy96):
+	stp	A_q, B_q, [dstin]
+	stp	E_q, F_q, [dstin, 32]
+	stp	C_q, D_q, [dstend, -32]
+	ret
+
+	/* Copy more than 128 bytes.  */
+L(copy_long):
+	add	srcend, src, count
+	add	dstend, dstin, count
+
+	/* Use backwards copy if there is an overlap.  */
+	sub	tmp1, dstin, src
+	cmp	tmp1, count
+	b.lo	L(copy_long_backwards)
+
+	/* Copy 16 bytes and then align src to 16-byte alignment.  */
+	ldr	D_q, [src]
+	and	tmp1, src, 15
+	bic	src, src, 15
+	sub	dst, dstin, tmp1
+	add	count, count, tmp1	/* Count is now 16 too large.  */
+	ldp	A_q, B_q, [src, 16]
+	str	D_q, [dstin]
+	ldp	C_q, D_q, [src, 48]
+	subs	count, count, 128 + 16	/* Test and readjust count.  */
+	b.ls	L(copy64_from_end)
+L(loop64):
+	stp	A_q, B_q, [dst, 16]
+	ldp	A_q, B_q, [src, 80]
+	stp	C_q, D_q, [dst, 48]
+	ldp	C_q, D_q, [src, 112]
+	add	src, src, 64
+	add	dst, dst, 64
+	subs	count, count, 64
+	b.hi	L(loop64)
+
+	/* Write the last iteration and copy 64 bytes from the end.  */
+L(copy64_from_end):
+	ldp	E_q, F_q, [srcend, -64]
+	stp	A_q, B_q, [dst, 16]
+	ldp	A_q, B_q, [srcend, -32]
+	stp	C_q, D_q, [dst, 48]
+	stp	E_q, F_q, [dstend, -64]
+	stp	A_q, B_q, [dstend, -32]
+	ret
+
+	/* Large backwards copy for overlapping copies.
+	   Copy 16 bytes and then align srcend to 16-byte alignment.  */
+L(copy_long_backwards):
+	cbz	tmp1, L(return)
+	ldr	D_q, [srcend, -16]
+	and	tmp1, srcend, 15
+	bic	srcend, srcend, 15
+	sub	count, count, tmp1
+	ldp	A_q, B_q, [srcend, -32]
+	str	D_q, [dstend, -16]
+	ldp	C_q, D_q, [srcend, -64]
+	sub	dstend, dstend, tmp1
+	subs	count, count, 128
+	b.ls	L(copy64_from_start)
+
+L(loop64_backwards):
+	str	B_q, [dstend, -16]
+	str	A_q, [dstend, -32]
+	ldp	A_q, B_q, [srcend, -96]
+	str	D_q, [dstend, -48]
+	str	C_q, [dstend, -64]!
+	ldp	C_q, D_q, [srcend, -128]
+	sub	srcend, srcend, 64
+	subs	count, count, 64
+	b.hi	L(loop64_backwards)
+
+	/* Write the last iteration and copy 64 bytes from the start.  */
+L(copy64_from_start):
+	ldp	E_q, F_q, [src, 32]
+	stp	A_q, B_q, [dstend, -32]
+	ldp	A_q, B_q, [src]
+	stp	C_q, D_q, [dstend, -64]
+	stp	E_q, F_q, [dstin, 32]
+	stp	A_q, B_q, [dstin]
+L(return):
+	ret
+
+END (__memcpy_aarch64_sve)
diff --git a/net/oenetcls/oenetcls.h b/net/oenetcls/oenetcls.h
new file mode 100644
index 000000000000..72aeedcaf0a0
--- /dev/null
+++ b/net/oenetcls/oenetcls.h
@@ -0,0 +1,176 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _NET_OENETCLS_H
+#define _NET_OENETCLS_H
+#include <linux/if.h>
+#include <linux/mutex.h>
+#include <linux/cpufeature.h>
+
+#define OECLS_MAX_NETDEV_NUM 8
+#define OECLS_MAX_RXQ_NUM_PER_DEV 256
+#define OECLS_MAX_NUMA_NUM 16
+#define OECLS_MAX_CPU_NUM 1024
+
+#define OECLS_TIMEOUT (5 * HZ)
+#define OECLS_NO_FILTER 0xffff
+#define OECLS_NO_CPU 0xffff
+
+struct oecls_netdev_queue_info {
+	int irq;
+	int affinity_cpu;
+};
+
+struct oecls_netdev_info {
+	char				dev_name[IFNAMSIZ];
+	struct net_device		*netdev;
+	int				rxq_num;
+	struct oecls_netdev_queue_info	rxq[OECLS_MAX_RXQ_NUM_PER_DEV];
+	int				old_filter_state;
+};
+
+struct oecls_numa_bound_dev_info {
+	DECLARE_BITMAP(bitmap_rxq, OECLS_MAX_RXQ_NUM_PER_DEV);
+};
+
+struct oecls_numa_info {
+	DECLARE_BITMAP(avail_cpus, OECLS_MAX_CPU_NUM);
+	struct oecls_numa_bound_dev_info bound_dev[OECLS_MAX_NETDEV_NUM];
+};
+
+struct cmd_context {
+	char netdev[IFNAMSIZ];
+	u32 dip4;
+	u16 dport;
+	u16 action;
+	u32 ruleid;
+	u32 del_ruleid;
+	int ret_loc;
+};
+
+#define OECLS_SK_RULE_HASHSIZE	256
+#define OECLS_SK_RULE_HASHMASK	(OECLS_SK_RULE_HASHSIZE - 1)
+
+struct oecls_sk_rule_list {
+	struct hlist_head hash[OECLS_SK_RULE_HASHSIZE];
+	/* Mutex to synchronize access to ntuple rule locking */
+	struct mutex mutex;
+};
+
+struct oecls_sk_rule {
+	struct hlist_node node;
+	int devid;
+	void *sk;
+	int dip4;
+	int dport;
+	int action;
+	int ruleid;
+	int nid;
+};
+
+struct oecls_dev_flow {
+	unsigned short cpu;
+	unsigned short filter;
+	unsigned int last_qtail;
+	int isvalid;
+	unsigned long timeout;
+};
+
+struct oecls_dev_flow_table {
+	unsigned int	mask;
+	struct rcu_head rcu;
+	struct oecls_dev_flow flows[];
+};
+
+struct oecls_sock_flow_table {
+	u32 mask;
+	u32 ents[] ____cacheline_aligned_in_smp;
+};
+
+#define OECLS_DEV_FLOW_TABLE_NUM	0x1000
+#define OECLS_SOCK_FLOW_TABLE_NUM	0x100000
+#define OECLS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct oecls_dev_flow_table) + \
+		((_num) * sizeof(struct oecls_dev_flow)))
+#define OECLS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct oecls_sock_flow_table, ents[_num]))
+
+#define ETH_ALL_FLAGS	(ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | \
+			  ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH)
+#define ETH_ALL_FEATURES (NETIF_F_LRO | NETIF_F_HW_VLAN_CTAG_RX | \
+			  NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_NTUPLE | \
+			  NETIF_F_RXHASH)
+
+struct rmgr_ctrl {
+	int					driver_select;
+	unsigned long		*slot;
+	__u32				n_rules;
+	__u32				size;
+};
+
+extern int match_ip_flag;
+extern int debug;
+extern int oecls_netdev_num;
+extern int oecls_numa_num;
+
+#define oecls_debug(fmt, ...)					\
+	do {							\
+		if (debug)					\
+			trace_printk(fmt, ## __VA_ARGS__);	\
+	} while (0)
+
+#define oecls_error(fmt, ...) \
+	do { \
+		pr_err("oenetcls [%s:%d]: " fmt, __FILE__, __LINE__, ## __VA_ARGS__); \
+		trace_printk(fmt, ## __VA_ARGS__); \
+	} while (0)
+
+struct oecls_netdev_info *get_oecls_netdev_info(unsigned int index);
+
+#define for_each_oecls_netdev(devid, oecls_dev) \
+	for (devid = 0, oecls_dev = get_oecls_netdev_info(devid); \
+		(devid < oecls_netdev_num) && oecls_dev; \
+		devid++, oecls_dev = get_oecls_netdev_info(devid))
+
+struct oecls_numa_info *get_oecls_numa_info(unsigned int nid);
+
+#define for_each_oecls_numa(nid, numa_info) \
+	for (nid = 0, numa_info = get_oecls_numa_info(nid); \
+		(nid < oecls_numa_num) && numa_info; \
+		nid++, numa_info = get_oecls_numa_info(nid))
+
+#ifdef CONFIG_ARM64_SVE
+void *__memcpy_aarch64_sve(void *, const void *, size_t);
+#define memcpy_r(dst, src, len)					\
+	do {							\
+		if (system_supports_sve())			\
+			__memcpy_aarch64_sve(dst, src, len);	\
+		else						\
+			memcpy(dst, src, len);			\
+	} while (0)
+#else
+#define memcpy_r(dst, src, len) memcpy(dst, src, len)
+#endif
+
+int check_appname(char *task_name);
+int send_ethtool_ioctl(struct cmd_context *ctx, void *cmd);
+int alloc_rxq_id(int nid, int devid);
+void free_rxq_id(int nid, int devid, int rxq_id);
+void oecls_ntuple_res_init(void);
+void oecls_ntuple_res_clean(void);
+void oecls_flow_res_init(void);
+void oecls_flow_res_clean(void);
+
+#define L0_MAX_PAGE_SIZE (8192)
+#define L0_MAX_PAGE_NUM  (4096)
+
+struct l0_vma_data {
+	struct page *page;
+	unsigned long size;
+	int nid;
+};
+
+void clean_oecls_l0_cache(void);
+void init_oecls_l0_cache(void);
+void *alloc_from_l0(int size);
+void free_to_l0(void *addr);
+int l3t_shared_lock(int nid, unsigned long pfn, unsigned long size);
+int l3t_shared_unlock(int nid, unsigned long pfn, unsigned long size);
+
+#endif	/* _NET_OENETCLS_H */
diff --git a/net/oenetcls/oenetcls_flow.c b/net/oenetcls/oenetcls_flow.c
new file mode 100644
index 000000000000..c35ce5aca416
--- /dev/null
+++ b/net/oenetcls/oenetcls_flow.c
@@ -0,0 +1,402 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/inetdevice.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/irq.h>
+#include <linux/irqdesc.h>
+#include <linux/inet.h>
+#include <net/sock.h>
+#include <trace/hooks/oenetcls.h>
+#include "oenetcls.h"
+
+static u32 oecls_cpu_mask;
+static struct oecls_sock_flow_table __rcu *oecls_sock_flow_table;
+static DEFINE_MUTEX(oecls_sock_flow_mutex);
+static DEFINE_SPINLOCK(oecls_dev_flow_lock);
+
+bool is_oecls_config_netdev(const char *name)
+{
+	struct oecls_netdev_info *netdev_info;
+	int netdev_loop;
+
+	for_each_oecls_netdev(netdev_loop, netdev_info)
+		if (strcmp(netdev_info->dev_name, name) == 0)
+			return true;
+
+	return false;
+}
+
+static void oecls_timeout(void *data, struct net_device *dev, u16 rxq_index,
+			  u32 flow_id, u16 filter_id, bool *ret)
+{
+	struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
+	struct oecls_dev_flow_table *flow_table;
+	struct oecls_dev_flow *rflow;
+	bool expire = true;
+	unsigned int cpu;
+
+	rcu_read_lock();
+	flow_table = rcu_dereference(rxqueue->oecls_ftb);
+	if (flow_table && flow_id <= flow_table->mask) {
+		rflow = &flow_table->flows[flow_id];
+		cpu = READ_ONCE(rflow->cpu);
+		oecls_debug("dev:%s, rxq:%d, flow_id:%u, filter_id:%d/%d, cpu:%d", dev->name,
+			    rxq_index, flow_id, filter_id, rflow->filter, cpu);
+
+		if (rflow->filter == filter_id && cpu < nr_cpu_ids) {
+			if (time_before(jiffies, rflow->timeout + OECLS_TIMEOUT)) {
+				expire = false;
+			} else {
+				rflow->isvalid = 0;
+				WRITE_ONCE(rflow->cpu, OECLS_NO_CPU);
+			}
+		}
+	}
+	rcu_read_unlock();
+	oecls_debug("%s, dev:%s, rxq:%d, flow_id:%u, filter_id:%d, expire:%d\n", __func__,
+		    dev->name, rxq_index, flow_id, filter_id, expire);
+	*ret = expire;
+}
+
+static void oecls_flow_update(void *data, struct sock *sk)
+{
+	struct oecls_sock_flow_table *tb;
+	unsigned int hash, index;
+	u32 val;
+	u32 cpu = raw_smp_processor_id();
+
+	if (sk->sk_state != TCP_ESTABLISHED)
+		return;
+
+	if (check_appname(current->comm))
+		return;
+
+	rcu_read_lock();
+	tb = rcu_dereference(oecls_sock_flow_table);
+	hash = READ_ONCE(sk->sk_rxhash);
+	if (tb && hash) {
+		index = hash & tb->mask;
+		val = hash & ~oecls_cpu_mask;
+		val |= cpu;
+
+		if (READ_ONCE(tb->ents[index]) != val) {
+			WRITE_ONCE(tb->ents[index], val);
+
+			oecls_debug("[%s] sk:%p, hash:0x%x, index:0x%x, val:0x%x, cpu:%d\n",
+				    current->comm, sk, hash, index, val, cpu);
+		}
+	}
+	rcu_read_unlock();
+}
+
+static int flow_get_queue_idx(struct net_device *dev, int nid, struct sk_buff *skb)
+{
+	struct oecls_netdev_info *netdev_info;
+	int netdev_loop;
+	u32 hash, index;
+	struct oecls_numa_info *numa_info;
+	struct oecls_numa_bound_dev_info *bound_dev = NULL;
+	int rxq_id, rxq_num, i;
+
+	numa_info = get_oecls_numa_info(nid);
+	if (!numa_info)
+		return -1;
+
+	for_each_oecls_netdev(netdev_loop, netdev_info) {
+		if (strcmp(netdev_info->dev_name, dev->name) == 0) {
+			bound_dev = &numa_info->bound_dev[netdev_loop];
+			break;
+		}
+	}
+
+	if (!bound_dev)
+		return -1;
+	rxq_num = bitmap_weight(bound_dev->bitmap_rxq, OECLS_MAX_RXQ_NUM_PER_DEV);
+	if (rxq_num == 0)
+		return -1;
+
+	hash = skb_get_hash(skb);
+	index = hash % rxq_num;
+
+	i = 0;
+	for_each_set_bit(rxq_id, bound_dev->bitmap_rxq, OECLS_MAX_RXQ_NUM_PER_DEV)
+		if (index == i++)
+			return rxq_id;
+
+	return -1;
+}
+
+static void set_oecls_cpu(struct net_device *dev, struct sk_buff *skb,
+			  struct oecls_dev_flow *old_rflow, int old_rxq_id, u16 next_cpu)
+{
+	struct netdev_rx_queue *rxqueue;
+	struct oecls_dev_flow_table *dtb;
+	struct oecls_dev_flow *rflow;
+	u32 flow_id, hash;
+	u16 rxq_index;
+	int rc;
+
+	if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||
+	    !(dev->features & NETIF_F_NTUPLE))
+		return;
+
+	rxq_index = flow_get_queue_idx(dev, cpu_to_node(next_cpu), skb);
+	if (rxq_index == skb_get_rx_queue(skb) || rxq_index < 0)
+		return;
+
+	rxqueue = dev->_rx + rxq_index;
+	dtb = rcu_dereference(rxqueue->oecls_ftb);
+	if (!dtb)
+		return;
+
+	hash = skb_get_hash(skb);
+	flow_id = hash & dtb->mask;
+	rflow = &dtb->flows[flow_id];
+	if (rflow->isvalid && rflow->cpu == next_cpu) {
+		rflow->timeout = jiffies;
+		return;
+	}
+
+	rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb, rxq_index, flow_id);
+	oecls_debug("skb:%p, rxq:%d, hash:0x%x, flow_id:%u, old_rxq_id:%d, next_cpu:%d, rc:%d\n",
+		    skb, rxq_index, hash, flow_id, old_rxq_id, next_cpu, rc);
+	if (rc < 0)
+		return;
+
+	rflow->filter = rc;
+	rflow->isvalid = 1;
+	rflow->timeout = jiffies;
+	if (old_rflow->filter == rflow->filter)
+		old_rflow->filter = OECLS_NO_FILTER;
+	rflow->cpu = next_cpu;
+}
+
+static void __oecls_set_cpu(struct sk_buff *skb, struct net_device *ndev,
+			    struct oecls_sock_flow_table *tb, struct oecls_dev_flow_table *dtb,
+			    int old_rxq_id)
+{
+	struct oecls_dev_flow *rflow;
+	u32 last_recv_cpu, hash, val;
+	u32 tcpu = 0;
+	u32 cpu = raw_smp_processor_id();
+
+	skb_reset_network_header(skb);
+	hash = skb_get_hash(skb);
+	if (!hash)
+		return;
+
+	val = READ_ONCE(tb->ents[hash & tb->mask]);
+	last_recv_cpu = val & oecls_cpu_mask;
+	rflow = &dtb->flows[hash & dtb->mask];
+	tcpu = rflow->cpu;
+
+	if ((val ^ hash) & ~oecls_cpu_mask)
+		return;
+
+	if (cpu_to_node(cpu) == cpu_to_node(last_recv_cpu))
+		return;
+
+	if (tcpu >= nr_cpu_ids)
+		set_oecls_cpu(ndev, skb, rflow, old_rxq_id, last_recv_cpu);
+}
+
+static void oecls_set_cpu(void *data, struct sk_buff *skb)
+{
+	struct net_device *ndev = skb->dev;
+	struct oecls_sock_flow_table *stb;
+	struct oecls_dev_flow_table *dtb;
+	struct netdev_rx_queue *rxqueue;
+	int rxq_id = -1;
+
+	if (!ndev)
+		return;
+
+	if (!is_oecls_config_netdev(ndev->name))
+		return;
+
+	rxqueue = ndev->_rx;
+	if (skb_rx_queue_recorded(skb)) {
+		rxq_id = skb_get_rx_queue(skb);
+		if (rxq_id >= ndev->real_num_rx_queues) {
+			oecls_debug("ndev:%s, rxq:%d, real_num:%d\n", ndev->name,
+				    rxq_id, ndev->real_num_rx_queues);
+			return;
+		}
+		rxqueue += rxq_id;
+	}
+
+	// oecls_debug("skb:%px, dev:%s, rxq_id:%d\n", skb, ndev->name, rxq_id);
+	if (rxq_id < 0)
+		return;
+
+	rcu_read_lock();
+	stb = rcu_dereference(oecls_sock_flow_table);
+	dtb = rcu_dereference(rxqueue->oecls_ftb);
+	if (stb && dtb)
+		__oecls_set_cpu(skb, ndev, stb, dtb, rxq_id);
+
+	rcu_read_unlock();
+}
+
+static void oecls_dev_flow_table_free(struct rcu_head *rcu)
+{
+	struct oecls_dev_flow_table *table = container_of(rcu,
+			struct oecls_dev_flow_table, rcu);
+	vfree(table);
+}
+
+static void oecls_dev_flow_table_cleanup(struct net_device *netdev, int qid)
+{
+	struct oecls_dev_flow_table *dtb;
+	struct netdev_rx_queue *queue;
+	int i;
+
+	spin_lock(&oecls_dev_flow_lock);
+	for (i = 0; i < qid; i++) {
+		queue = netdev->_rx + i;
+		dtb = rcu_dereference_protected(queue->oecls_ftb,
+						lockdep_is_held(&oecls_dev_flow_lock));
+		rcu_assign_pointer(queue->oecls_ftb, NULL);
+	}
+	spin_unlock(&oecls_dev_flow_lock);
+	call_rcu(&dtb->rcu, oecls_dev_flow_table_free);
+}
+
+static int oecls_dev_flow_table_release(void)
+{
+	struct oecls_netdev_info *netdev_info;
+	int netdev_loop;
+	struct net_device *netdev;
+
+	for_each_oecls_netdev(netdev_loop, netdev_info) {
+		netdev = netdev_info->netdev;
+		if (!netdev)
+			continue;
+		oecls_dev_flow_table_cleanup(netdev, netdev->num_rx_queues);
+	}
+
+	return 0;
+}
+
+static int _oecls_dev_flow_table_init(struct net_device *netdev)
+{
+	struct oecls_dev_flow_table *table;
+	int size = OECLS_DEV_FLOW_TABLE_NUM;
+	struct netdev_rx_queue *queue;
+	int i, j, ret = 0;
+
+	size = roundup_pow_of_two(size);
+	oecls_debug("dev:%s, num_rx_queues:%d, mask:0x%x\n", netdev->name, netdev->num_rx_queues,
+		    size - 1);
+
+	for (i = 0; i < netdev->num_rx_queues; i++) {
+		table = vmalloc(OECLS_DEV_FLOW_TABLE_SIZE(size));
+		if (!table) {
+			ret = -ENOMEM;
+			goto fail;
+		}
+
+		table->mask = size - 1;
+		for (j = 0; j < size; j++) {
+			table->flows[j].cpu = OECLS_NO_CPU;
+			table->flows[j].isvalid = 0;
+		}
+
+		queue = netdev->_rx + i;
+
+		spin_lock(&oecls_dev_flow_lock);
+		rcu_assign_pointer(queue->oecls_ftb, table);
+		spin_unlock(&oecls_dev_flow_lock);
+	}
+	return ret;
+fail:
+	oecls_dev_flow_table_cleanup(netdev, i);
+	return ret;
+}
+
+static int oecls_dev_flow_table_init(void)
+{
+	struct oecls_netdev_info *netdev_info;
+	int netdev_loop;
+	struct net_device *ndev;
+	int i, err;
+
+	for_each_oecls_netdev(netdev_loop, netdev_info) {
+		ndev = netdev_info->netdev;
+		if (!ndev)
+			continue;
+		err = _oecls_dev_flow_table_init(ndev);
+		if (err)
+			goto out;
+	}
+
+	return 0;
+out:
+	for (i = 0; i < netdev_loop; i++) {
+		netdev_info = get_oecls_netdev_info(i);
+		ndev = netdev_info->netdev;
+		if (!ndev)
+			continue;
+		oecls_dev_flow_table_cleanup(ndev, ndev->num_rx_queues);
+	}
+	return err;
+}
+
+static int oecls_sock_flow_table_release(void)
+{
+	struct oecls_sock_flow_table *tb;
+
+	mutex_lock(&oecls_sock_flow_mutex);
+	tb = rcu_dereference_protected(oecls_sock_flow_table,
+				       lockdep_is_held(&oecls_sock_flow_mutex));
+	if (tb)
+		rcu_assign_pointer(oecls_sock_flow_table, NULL);
+	mutex_unlock(&oecls_sock_flow_mutex);
+	synchronize_rcu();
+	vfree(tb);
+
+	unregister_trace_oecls_flow_update(&oecls_flow_update, NULL);
+	unregister_trace_oecls_set_cpu(&oecls_set_cpu, NULL);
+	unregister_trace_oecls_timeout(&oecls_timeout, NULL);
+	return 0;
+}
+
+static int oecls_sock_flow_table_init(void)
+{
+	struct oecls_sock_flow_table *table;
+	int size = OECLS_SOCK_FLOW_TABLE_NUM;
+	int i;
+
+	size = roundup_pow_of_two(size);
+	table = vmalloc(OECLS_SOCK_FLOW_TABLE_SIZE(size));
+	if (!table)
+		return -ENOMEM;
+
+	oecls_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1;
+	oecls_debug("nr_cpu_ids:%d, oecls_cpu_mask:0x%x\n", nr_cpu_ids, oecls_cpu_mask);
+
+	table->mask = size - 1;
+	for (i = 0; i < size; i++)
+		table->ents[i] = OECLS_NO_CPU;
+
+	mutex_lock(&oecls_sock_flow_mutex);
+	rcu_assign_pointer(oecls_sock_flow_table, table);
+	mutex_unlock(&oecls_sock_flow_mutex);
+
+	register_trace_oecls_flow_update(oecls_flow_update, NULL);
+	register_trace_oecls_set_cpu(&oecls_set_cpu, NULL);
+	register_trace_oecls_timeout(&oecls_timeout, NULL);
+	return 0;
+}
+
+void oecls_flow_res_init(void)
+{
+	oecls_sock_flow_table_init();
+	oecls_dev_flow_table_init();
+}
+
+void oecls_flow_res_clean(void)
+{
+	oecls_sock_flow_table_release();
+	oecls_dev_flow_table_release();
+}
diff --git a/net/oenetcls/oenetcls_l0.c b/net/oenetcls/oenetcls_l0.c
new file mode 100644
index 000000000000..b820a1cf96bd
--- /dev/null
+++ b/net/oenetcls/oenetcls_l0.c
@@ -0,0 +1,193 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/miscdevice.h>
+#include <linux/kallsyms.h>
+#include <linux/mm.h>
+#include <linux/gfp.h>
+#include <linux/mempolicy.h>
+#include <linux/pfn_t.h>
+#include <linux/delay.h>
+#include "oenetcls.h"
+
+static struct vm_area_struct *oecls_l0_cache __ro_after_init;
+static void *oecls_l0_cache_head;
+static atomic_t alloc_num_l0 = ATOMIC_INIT(0);
+static int cur_offset_l0;
+static u8 slot_l0[L0_MAX_PAGE_NUM] = { 0 };
+
+void *alloc_from_l0(int size)
+{
+	int i;
+	void *addr = NULL;
+
+	if (!oecls_l0_cache_head)
+		return kzalloc(size, GFP_ATOMIC);
+
+	if (size > L0_MAX_PAGE_SIZE) {
+		oecls_error("size(%d) >= max_size(%d)!\n", size, L0_MAX_PAGE_SIZE);
+		return NULL;
+	}
+
+	if (atomic_read(&alloc_num_l0) >= L0_MAX_PAGE_NUM) {
+		oecls_error("alloc_num_l0:%d exceed max num!\n", atomic_read(&alloc_num_l0));
+		return NULL;
+	}
+
+	for (i = 0; i < L0_MAX_PAGE_NUM; i++) {
+		cur_offset_l0 = (cur_offset_l0 + 1) % L0_MAX_PAGE_NUM;
+		if (slot_l0[cur_offset_l0] == 0) {
+			addr = oecls_l0_cache_head + cur_offset_l0 * L0_MAX_PAGE_SIZE;
+			atomic_inc(&alloc_num_l0);
+			slot_l0[cur_offset_l0] = 1;
+			break;
+		}
+	}
+
+	if (!addr)
+		oecls_error("app:%s, pid:%d alloc fail!\n", current->comm, current->pid);
+	return addr;
+}
+
+void free_to_l0(void *addr)
+{
+	int offset = 0;
+
+	if (!addr)
+		return;
+
+	if (!oecls_l0_cache_head) {
+		kfree(addr);
+		return;
+	}
+
+	if (atomic_read(&alloc_num_l0) <= 0) {
+		oecls_error("alloc_num_l0:%d <= 0!\n", atomic_read(&alloc_num_l0));
+		return;
+	}
+
+	offset = (addr - oecls_l0_cache_head) / L0_MAX_PAGE_SIZE;
+	if (offset >= L0_MAX_PAGE_NUM) {
+		oecls_error("app:%s, pid:%d, offset:%d\n", current->comm, current->pid, offset);
+		return;
+	}
+
+	slot_l0[offset] = 0;
+	atomic_dec(&alloc_num_l0);
+}
+
+static int get_node_node(struct vm_area_struct *vma)
+{
+	struct mempolicy *pol;
+	nodemask_t *nmask;
+	int nid;
+
+	nid = get_vma_policy_node(vma, vma->vm_start, GFP_KERNEL, &pol, &nmask);
+	if (pol->mode == MPOL_BIND || pol->mode == MPOL_PREFERRED_MANY)
+		nid = first_node(*nmask);
+
+	return nid;
+}
+
+static int oecls_l0_cache_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	unsigned long cont_size = PAGE_ALIGN(vma->vm_end - vma->vm_start);
+	struct page *page = NULL;
+	struct l0_vma_data *data;
+	int page_cnt, nid;
+	int ret = 0;
+
+	if ((vma->vm_start % PMD_SIZE) || (vma->vm_end % PMD_SIZE)) {
+		oecls_error("error vma_start: %#lx, vma_end: %#lx\n", vma->vm_start, vma->vm_end);
+		return -EINVAL;
+	}
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data) {
+		oecls_error("kzalloc l0_vma_data fail!\n");
+		return -ENOMEM;
+	}
+
+	page_cnt = cont_size >> PAGE_SHIFT;
+	nid = get_node_node(vma);
+#ifdef CONFIG_CONTIG_ALLOC
+	page = alloc_contig_pages(page_cnt, GFP_KERNEL, nid, NULL);
+#endif
+	if (!page) {
+		ret = -ENOMEM;
+		oecls_error("alloc_contig_pages fail! page_cnt:%d, nid:%d\n", page_cnt, nid);
+		goto free_data;
+	}
+
+#if IS_ENABLED(CONFIG_HISI_L3T)
+	ret = l3t_shared_lock(nid, page_to_pfn(page), cont_size);
+#endif
+	if (ret) {
+		ret = -EOPNOTSUPP;
+		oecls_error("l3t_shared_lock fail! ret: %d\n", ret);
+		goto free_page;
+	}
+
+	data->page = page;
+	data->size = cont_size;
+	data->nid = nid;
+	vma->vm_private_data = data;
+	return 0;
+free_page:
+	free_contig_range(page_to_pfn(page), page_cnt);
+free_data:
+	kfree(data);
+	return ret;
+}
+
+void init_oecls_l0_cache(void)
+{
+	int ret = 0;
+	struct l0_vma_data *sbkpages;
+
+	oecls_l0_cache = kzalloc(sizeof(*oecls_l0_cache), GFP_KERNEL);
+	if (!oecls_l0_cache) {
+		oecls_error("kzalloc oecls_l0_cache fail!\n");
+		return;
+	}
+	oecls_l0_cache->vm_start = 0;
+	oecls_l0_cache->vm_end = L0_MAX_PAGE_NUM * L0_MAX_PAGE_SIZE;
+
+	ret = oecls_l0_cache_mmap(NULL, oecls_l0_cache);
+	if (ret != 0) {
+		kfree(oecls_l0_cache);
+		oecls_l0_cache = NULL;
+		return;
+	}
+	sbkpages = (struct l0_vma_data *)(oecls_l0_cache->vm_private_data);
+	oecls_l0_cache_head = page_to_virt(sbkpages->page);
+
+	atomic_set(&alloc_num_l0, 0);
+	oecls_debug("l0_cache:%p, l0_cache_head:%p\n", oecls_l0_cache, oecls_l0_cache_head);
+}
+
+void clean_oecls_l0_cache(void)
+{
+	struct l0_vma_data *sbkpages;
+
+	if (!oecls_l0_cache)
+		return;
+
+	oecls_debug("alloc_num_l0:%d\n", atomic_read(&alloc_num_l0));
+	while (atomic_read(&alloc_num_l0) != 0)
+		mdelay(1);
+
+	sbkpages = (struct l0_vma_data *)(oecls_l0_cache->vm_private_data);
+	if (sbkpages) {
+#if IS_ENABLED(CONFIG_HISI_L3T)
+		l3t_shared_unlock(sbkpages->nid, page_to_pfn(sbkpages->page), sbkpages->size);
+#endif
+		free_contig_range(page_to_pfn(sbkpages->page), sbkpages->size >> PAGE_SHIFT);
+		kfree(sbkpages);
+		oecls_l0_cache_head = NULL;
+	}
+
+	kfree(oecls_l0_cache);
+}
diff --git a/net/oenetcls/oenetcls_main.c b/net/oenetcls/oenetcls_main.c
new file mode 100644
index 000000000000..8cf5d02bb5b9
--- /dev/null
+++ b/net/oenetcls/oenetcls_main.c
@@ -0,0 +1,972 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/netdev_features.h>
+#include <linux/ethtool.h>
+#include <linux/irq.h>
+#include <linux/irqdesc.h>
+#include <linux/rtnetlink.h>
+#include "oenetcls.h"
+
+int oecls_netdev_num;
+static struct oecls_netdev_info oecls_netdev_info_table[OECLS_MAX_NETDEV_NUM];
+
+int oecls_numa_num;
+static struct oecls_numa_info oecls_numa_info_table[OECLS_MAX_NUMA_NUM];
+
+int debug;
+module_param(debug, int, 0644);
+MODULE_PARM_DESC(debug, "debug switch");
+
+static int mode;
+module_param(mode, int, 0444);
+MODULE_PARM_DESC(mode, "mode, default 0");
+
+static char ifname[64] = { 0 };
+module_param_string(ifname, ifname, sizeof(ifname), 0444);
+MODULE_PARM_DESC(ifname, "ifname");
+
+static char appname[64] = "redis-server";
+module_param_string(appname, appname, sizeof(appname), 0644);
+MODULE_PARM_DESC(appname, "appname, default redis-server");
+
+int match_ip_flag = 1;
+module_param(match_ip_flag, int, 0644);
+MODULE_PARM_DESC(match_ip_flag, "match ip flag");
+
+static int strategy;
+module_param(strategy, int, 0444);
+MODULE_PARM_DESC(strategy, "strategy, default 0");
+
+static bool check_params(void)
+{
+	if (mode != 0 && mode != 1)
+		return false;
+
+	if (strlen(ifname) == 0)
+		return false;
+
+	return true;
+}
+
+int check_appname(char *task_name)
+{
+	char *start = appname;
+	char *end;
+
+	if (!strlen(appname))
+		return 0;
+
+	// support appname: app1#app2#appN
+	while (*start != '\0') {
+		end = strchr(start, '#');
+		if (end == start) {
+			start++;
+			continue;
+		}
+
+		if (!end) {
+			if (!strncmp(task_name, start, strlen(start)))
+				return 0;
+			break;
+		}
+
+		if (!strncmp(task_name, start, end - start))
+			return 0;
+		start = end + 1;
+	}
+	return -EOPNOTSUPP;
+}
+
+static u32 __ethtool_get_flags(struct net_device *dev)
+{
+	u32 flags = 0;
+
+	if (dev->features & NETIF_F_LRO)
+		flags |= ETH_FLAG_LRO;
+	if (dev->features & NETIF_F_HW_VLAN_CTAG_RX)
+		flags |= ETH_FLAG_RXVLAN;
+	if (dev->features & NETIF_F_HW_VLAN_CTAG_TX)
+		flags |= ETH_FLAG_TXVLAN;
+	if (dev->features & NETIF_F_NTUPLE)
+		flags |= ETH_FLAG_NTUPLE;
+	if (dev->features & NETIF_F_RXHASH)
+		flags |= ETH_FLAG_RXHASH;
+
+	return flags;
+}
+
+static int __ethtool_set_flags(struct net_device *dev, u32 data)
+{
+	netdev_features_t features = 0, changed;
+
+	if (data & ~ETH_ALL_FLAGS)
+		return -EINVAL;
+
+	if (data & ETH_FLAG_LRO)
+		features |= NETIF_F_LRO;
+	if (data & ETH_FLAG_RXVLAN)
+		features |= NETIF_F_HW_VLAN_CTAG_RX;
+	if (data & ETH_FLAG_TXVLAN)
+		features |= NETIF_F_HW_VLAN_CTAG_TX;
+	if (data & ETH_FLAG_NTUPLE)
+		features |= NETIF_F_NTUPLE;
+	if (data & ETH_FLAG_RXHASH)
+		features |= NETIF_F_RXHASH;
+
+	/* allow changing only bits set in hw_features */
+	changed = (features ^ dev->features) & ETH_ALL_FEATURES;
+	if (changed & ~dev->hw_features)
+		return (changed & dev->hw_features) ? -EINVAL : -EOPNOTSUPP;
+
+	dev->wanted_features =
+		(dev->wanted_features & ~changed) | (features & changed);
+
+	__netdev_update_features(dev);
+
+	return 0;
+}
+
+static void ethtool_rxnfc_copy_to_user(void *useraddr,
+				       const struct ethtool_rxnfc *rxnfc,
+				       size_t size, const u32 *rule_buf)
+{
+	memcpy_r(useraddr, rxnfc, size);
+	useraddr += offsetof(struct ethtool_rxnfc, rule_locs);
+
+	if (rule_buf)
+		memcpy_r(useraddr, rule_buf, rxnfc->rule_cnt * sizeof(u32));
+}
+
+static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
+						u32 cmd, void *useraddr)
+{
+	struct ethtool_rxnfc info;
+	size_t info_size = sizeof(info);
+	int rc;
+
+	if (!dev->ethtool_ops->set_rxnfc)
+		return -EOPNOTSUPP;
+
+	if (cmd == ETHTOOL_SRXFH)
+		info_size = (offsetof(struct ethtool_rxnfc, data) +
+			     sizeof(info.data));
+
+	memcpy_r(&info, useraddr, info_size);
+	rc = dev->ethtool_ops->set_rxnfc(dev, &info);
+	if (rc)
+		return rc;
+
+	if (cmd == ETHTOOL_SRXCLSRLINS)
+		ethtool_rxnfc_copy_to_user(useraddr, &info, info_size, NULL);
+
+	return 0;
+}
+
+static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
+						u32 cmd, void *useraddr)
+{
+	struct ethtool_rxnfc info;
+	size_t info_size = sizeof(info);
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	int ret;
+	void *rule_buf = NULL;
+
+	if (!ops->get_rxnfc)
+		return -EOPNOTSUPP;
+
+	if (cmd == ETHTOOL_GRXFH)
+		info_size = (offsetof(struct ethtool_rxnfc, data) +
+			     sizeof(info.data));
+
+	memcpy_r(&info, useraddr, info_size);
+
+	/* If FLOW_RSS was requested then user-space must be using the
+	 * new definition, as FLOW_RSS is newer.
+	 */
+	if (cmd == ETHTOOL_GRXFH && info.flow_type & FLOW_RSS) {
+		info_size = sizeof(info);
+		memcpy_r(&info, useraddr, info_size);
+		/* Since malicious users may modify the original data,
+		 * we need to check whether FLOW_RSS is still requested.
+		 */
+		if (!(info.flow_type & FLOW_RSS))
+			return -EINVAL;
+	}
+
+	if (info.cmd != cmd)
+		return -EINVAL;
+
+	if (info.cmd == ETHTOOL_GRXCLSRLALL) {
+		if (info.rule_cnt > 0) {
+			if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32))
+				rule_buf = kcalloc(info.rule_cnt, sizeof(u32),
+						   GFP_KERNEL);
+			if (!rule_buf)
+				return -ENOMEM;
+		}
+	}
+
+	ret = ops->get_rxnfc(dev, &info, rule_buf);
+	if (ret < 0)
+		goto err_out;
+
+	ethtool_rxnfc_copy_to_user(useraddr, &info, info_size, rule_buf);
+err_out:
+	kfree(rule_buf);
+
+	return ret;
+}
+
+static noinline_for_stack int ethtool_get_channels(struct net_device *dev,
+						   void *useraddr)
+{
+	struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS };
+
+	if (!dev->ethtool_ops->get_channels)
+		return -EOPNOTSUPP;
+
+	dev->ethtool_ops->get_channels(dev, &channels);
+
+	memcpy_r(useraddr, &channels, sizeof(channels));
+	return 0;
+}
+
+static int ethtool_get_value(struct net_device *dev, char *useraddr,
+			     u32 cmd, u32 (*actor)(struct net_device *))
+{
+	struct ethtool_value edata = { .cmd = cmd };
+
+	if (!actor)
+		return -EOPNOTSUPP;
+
+	edata.data = actor(dev);
+
+	memcpy_r(useraddr, &edata, sizeof(edata));
+	return 0;
+}
+
+static int ethtool_set_value(struct net_device *dev, char *useraddr,
+			     int (*actor)(struct net_device *, u32))
+{
+	struct ethtool_value edata;
+
+	if (!actor)
+		return -EOPNOTSUPP;
+
+	memcpy_r(&edata, useraddr, sizeof(edata));
+
+	return actor(dev, edata.data);
+}
+
+static int dev_ethtool_kern(struct net *net, struct ifreq *ifr)
+{
+	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
+	void *useraddr = ifr->ifr_data;
+	u32 ethcmd, sub_cmd;
+	int rc;
+	netdev_features_t old_features;
+
+	if (!dev || !netif_device_present(dev))
+		return -ENODEV;
+
+	memcpy_r(ðcmd, useraddr, sizeof(ethcmd));
+
+	if (ethcmd == ETHTOOL_PERQUEUE)
+		memcpy_r(&sub_cmd, useraddr + sizeof(ethcmd), sizeof(sub_cmd));
+	else
+		sub_cmd = ethcmd;
+
+	/* Allow some commands to be done by anyone */
+	switch (sub_cmd) {
+	case ETHTOOL_GFLAGS:
+	case ETHTOOL_GRXFH:
+	case ETHTOOL_GRXRINGS:
+	case ETHTOOL_GRXCLSRLCNT:
+	case ETHTOOL_GRXCLSRULE:
+	case ETHTOOL_GRXCLSRLALL:
+	case ETHTOOL_GCHANNELS:
+		break;
+	default:
+		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+			return -EPERM;
+	}
+
+	if (dev->ethtool_ops->begin) {
+		rc = dev->ethtool_ops->begin(dev);
+		if (rc  < 0)
+			return rc;
+	}
+	old_features = dev->features;
+
+	switch (ethcmd) {
+	case ETHTOOL_GFLAGS:
+		rc = ethtool_get_value(dev, useraddr, ethcmd,
+				       __ethtool_get_flags);
+		break;
+	case ETHTOOL_SFLAGS:
+		rc = ethtool_set_value(dev, useraddr, __ethtool_set_flags);
+		break;
+	case ETHTOOL_GRXFH:
+	case ETHTOOL_GRXRINGS:
+	case ETHTOOL_GRXCLSRLCNT:
+	case ETHTOOL_GRXCLSRULE:
+	case ETHTOOL_GRXCLSRLALL:
+		rc = ethtool_get_rxnfc(dev, ethcmd, useraddr);
+		break;
+	case ETHTOOL_SRXFH:
+	case ETHTOOL_SRXCLSRLDEL:
+	case ETHTOOL_SRXCLSRLINS:
+		rc = ethtool_set_rxnfc(dev, ethcmd, useraddr);
+		break;
+	case ETHTOOL_GCHANNELS:
+		rc = ethtool_get_channels(dev, useraddr);
+		break;
+	default:
+		rc = -EOPNOTSUPP;
+	}
+
+	if (dev->ethtool_ops->complete)
+		dev->ethtool_ops->complete(dev);
+
+	if (old_features != dev->features)
+		netdev_features_change(dev);
+
+	return rc;
+}
+
+int send_ethtool_ioctl(struct cmd_context *ctx, void *cmd)
+{
+	struct ifreq ifr = {0};
+	int ret;
+
+	strncpy(ifr.ifr_name, ctx->netdev, sizeof(ctx->netdev));
+	ifr.ifr_data = cmd;
+
+	rtnl_lock();
+	ret = dev_ethtool_kern(&init_net, &ifr);
+	rtnl_unlock();
+
+	return ret;
+}
+
+struct oecls_netdev_info *get_oecls_netdev_info(unsigned int index)
+{
+	if (index >= OECLS_MAX_NETDEV_NUM)
+		return NULL;
+	return &oecls_netdev_info_table[index];
+}
+
+static struct oecls_netdev_info *alloc_oecls_netdev_info(void)
+{
+	if (oecls_netdev_num >= OECLS_MAX_NETDEV_NUM)
+		return NULL;
+
+	return &oecls_netdev_info_table[oecls_netdev_num++];
+}
+
+static bool check_irq_name(const char *irq_name, struct oecls_netdev_info *oecls_dev)
+{
+	if (!strstr(irq_name, "TxRx") && !strstr(irq_name, "comp") && !strstr(irq_name, "rx"))
+		return false;
+
+	if (strstr(irq_name, oecls_dev->dev_name))
+		return true;
+
+	if (oecls_dev->netdev->dev.parent &&
+	    strstr(irq_name, dev_name(oecls_dev->netdev->dev.parent)))
+		return true;
+
+	return false;
+}
+
+static void get_netdev_queue_info(struct oecls_netdev_info *oecls_dev)
+{
+	struct oecls_netdev_queue_info *rxq_info;
+	struct irq_desc *desc;
+	int irq;
+
+	for_each_irq_desc(irq, desc) {
+		if (!desc->action)
+			continue;
+		if (!desc->action->name)
+			continue;
+		if (!check_irq_name(desc->action->name, oecls_dev))
+			continue;
+
+		oecls_debug("irq=%d, [%s], rxq_id=%d\n", irq, desc->action->name,
+			    oecls_dev->rxq_num);
+
+		if (oecls_dev->rxq_num >= OECLS_MAX_RXQ_NUM_PER_DEV)
+			break;
+		rxq_info = &oecls_dev->rxq[oecls_dev->rxq_num++];
+		rxq_info->irq = irq;
+	}
+}
+
+static int oecls_filter_enable(const char *dev_name, bool *old_state)
+{
+	struct ethtool_value eval = {0};
+	struct cmd_context ctx = {0};
+	int ret;
+
+	strncpy(ctx.netdev, dev_name, IFNAMSIZ);
+
+	eval.cmd = ETHTOOL_GFLAGS;
+	ret = send_ethtool_ioctl(&ctx, &eval);
+	if (ret != 0) {
+		oecls_error("get %s flags fail, ret:%d\n", dev_name, ret);
+		return ret;
+	}
+	if (eval.data & ETH_FLAG_NTUPLE) {
+		*old_state = true;
+		oecls_debug("%s ntuple is already on\n", dev_name);
+		return 0;
+	}
+
+	// Set ntuple feature
+	eval.cmd = ETHTOOL_SFLAGS;
+	eval.data |= ETH_FLAG_NTUPLE;
+	ret = send_ethtool_ioctl(&ctx, &eval);
+	if (ret != 0) {
+		oecls_error("set %s flags fail, ret:%d\n", dev_name, ret);
+		return ret;
+	}
+
+	// Get ntuple feature
+	eval.cmd = ETHTOOL_GFLAGS;
+	eval.data = 0;
+	ret = send_ethtool_ioctl(&ctx, &eval);
+	if (ret != 0) {
+		oecls_error("get %s flags fail, ret:%d\n", dev_name, ret);
+		return ret;
+	}
+	if (!(eval.data & ETH_FLAG_NTUPLE)) {
+		oecls_error("enable ntuple feature fail!\n");
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static void oecls_filter_restore(const char *dev_name, bool old_state)
+{
+	struct ethtool_value eval = {0};
+	struct cmd_context ctx = {0};
+	bool cur_filter_state;
+	int ret;
+
+	strncpy(ctx.netdev, dev_name, IFNAMSIZ);
+
+	eval.cmd = ETHTOOL_GFLAGS;
+	ret = send_ethtool_ioctl(&ctx, &eval);
+	if (ret != 0) {
+		oecls_error("get %s flags fail, ret:%d\n", dev_name, ret);
+		return;
+	}
+
+	cur_filter_state = (eval.data & ETH_FLAG_NTUPLE) ? true : false;
+	if (cur_filter_state == old_state)
+		return;
+
+	// Set ntuple feature
+	eval.cmd = ETHTOOL_SFLAGS;
+	if (old_state)
+		eval.data |= ETH_FLAG_NTUPLE;
+	else
+		eval.data &= ~ETH_FLAG_NTUPLE;
+	ret = send_ethtool_ioctl(&ctx, &eval);
+	if (ret != 0) {
+		oecls_error("set %s flags fail, ret:%d\n", dev_name, ret);
+		return;
+	}
+}
+
+static int init_single_oecls_dev(char *if_name, unsigned int length)
+{
+	struct oecls_netdev_info *oecls_dev;
+	char dev_name[IFNAMSIZ] = { 0 };
+	struct net_device *netdev;
+	int cpy_len = length < IFNAMSIZ ? length : IFNAMSIZ;
+	bool old_state = false;
+	int ret;
+
+	strncpy(dev_name, if_name, cpy_len);
+	netdev = dev_get_by_name(&init_net, dev_name);
+	if (!netdev) {
+		oecls_error("dev [%s] is not exist!\n", dev_name);
+		return -ENODEV;
+	}
+
+	if (!(netdev->flags & IFF_UP)) {
+		ret = -ENETDOWN;
+		oecls_error("dev:%s not up! flags=%d.\n", dev_name, netdev->flags);
+		goto out;
+	}
+
+	if (netdev->flags & IFF_LOOPBACK) {
+		ret = -EOPNOTSUPP;
+		oecls_error("Do not support loopback.\n");
+		goto out;
+	}
+
+	ret = oecls_filter_enable(dev_name, &old_state);
+	if (ret) {
+		oecls_error("dev [%s] not support ntuple! ret=%d\n", dev_name, ret);
+		goto out;
+	}
+
+	oecls_dev = alloc_oecls_netdev_info();
+	if (!oecls_dev) {
+		ret = -ENOMEM;
+		oecls_filter_restore(dev_name, old_state);
+		oecls_error("alloc oecls_dev fail! oecls_netdev_num:%d\n", oecls_netdev_num);
+		goto out;
+	}
+
+	memcpy_r(oecls_dev->dev_name, dev_name, IFNAMSIZ);
+	oecls_dev->old_filter_state = old_state;
+	oecls_dev->netdev = netdev;
+	get_netdev_queue_info(oecls_dev);
+	return 0;
+
+out:
+	dev_put(netdev);
+	return ret;
+}
+
+static void clean_oecls_netdev_info(void)
+{
+	struct oecls_netdev_info *oecls_dev;
+	struct net_device *netdev;
+	int devid;
+
+	for_each_oecls_netdev(devid, oecls_dev) {
+		oecls_filter_restore(oecls_dev->dev_name, oecls_dev->old_filter_state);
+		netdev = oecls_dev->netdev;
+		if (netdev) {
+			oecls_dev->netdev = NULL;
+			dev_put(netdev);
+		}
+	}
+
+	oecls_netdev_num = 0;
+}
+
+static int init_oecls_netdev_info(char *netdev_str)
+{
+	char *start = netdev_str;
+	char *end;
+	int err = -ENODEV;
+
+	while (*start != '\0') {
+		// skip start #
+		end = strchr(start, '#');
+		if (end == start) {
+			start++;
+			continue;
+		}
+
+		// find the last ifname
+		if (!end) {
+			err = init_single_oecls_dev(start, strlen(start));
+			break;
+		}
+
+		err = init_single_oecls_dev(start, end - start);
+		if (err)
+			break;
+		start = end + 1;
+	}
+
+	return err;
+}
+
+struct oecls_numa_info *get_oecls_numa_info(unsigned int nid)
+{
+	if (nid >= OECLS_MAX_NUMA_NUM)
+		return NULL;
+	return &oecls_numa_info_table[nid];
+}
+
+static void clean_oecls_numa_info(void)
+{
+	oecls_numa_num = 0;
+}
+
+static void init_numa_avail_cpus(int nid, struct oecls_numa_info *numa_info)
+{
+	int cpu;
+
+	oecls_debug("numa node %d: %*pb, %*pbl\n", nid, cpumask_pr_args(cpumask_of_node(nid)),
+		    cpumask_pr_args(cpumask_of_node(nid)));
+
+	bitmap_zero(numa_info->avail_cpus, OECLS_MAX_CPU_NUM);
+	for_each_cpu(cpu, cpumask_of_node(nid)) {
+		if (cpu >= OECLS_MAX_CPU_NUM)
+			return;
+		set_bit(cpu, numa_info->avail_cpus);
+	}
+}
+
+static void init_numa_rxq_bitmap(int nid, struct oecls_numa_info *numa_info)
+{
+	struct oecls_numa_bound_dev_info *bound_dev;
+	struct oecls_netdev_info *oecls_dev;
+	int bound_rxq_num;
+	int rxq_id;
+	int devid;
+	int cpu;
+
+	for_each_oecls_netdev(devid, oecls_dev) {
+		bound_rxq_num = 0;
+		bound_dev = &numa_info->bound_dev[devid];
+		bitmap_zero(bound_dev->bitmap_rxq, OECLS_MAX_RXQ_NUM_PER_DEV);
+
+		for (rxq_id = 0; rxq_id < oecls_dev->rxq_num; rxq_id++) {
+			cpu = oecls_dev->rxq[rxq_id].affinity_cpu;
+			if (cpu_to_node(cpu) == nid) {
+				set_bit(rxq_id, bound_dev->bitmap_rxq);
+				bound_rxq_num++;
+			}
+		}
+
+		oecls_debug("nid:%d, dev_id:%d, dev:%s, rxq_num:%d, bit_num:%d, bitmap_rxq:%*pbl\n",
+			    nid, devid, oecls_dev->dev_name, oecls_dev->rxq_num,
+			    bound_rxq_num, OECLS_MAX_RXQ_NUM_PER_DEV, bound_dev->bitmap_rxq);
+	}
+}
+
+int alloc_rxq_id(int nid, int devid)
+{
+	struct oecls_numa_bound_dev_info *bound_dev;
+	struct oecls_numa_info *numa_info;
+	int rxq_id;
+
+	numa_info = get_oecls_numa_info(nid);
+	if (!numa_info) {
+		oecls_error("error nid:%d\n", nid);
+		return -EINVAL;
+	}
+
+	if (devid >= OECLS_MAX_NETDEV_NUM) {
+		oecls_error("error bound_dev index:%d\n", devid);
+		return -EINVAL;
+	}
+	bound_dev = &numa_info->bound_dev[devid];
+
+	rxq_id = find_first_bit(bound_dev->bitmap_rxq, OECLS_MAX_RXQ_NUM_PER_DEV);
+	if (rxq_id >= OECLS_MAX_RXQ_NUM_PER_DEV) {
+		oecls_error("error rxq_id:%d\n", rxq_id);
+		return -EINVAL;
+	}
+
+	clear_bit(rxq_id, bound_dev->bitmap_rxq);
+	oecls_debug("alloc nid:%d, dev_id:%d, rxq_id:%d\n", nid, devid, rxq_id);
+	return rxq_id;
+}
+
+void free_rxq_id(int nid, int devid, int rxq_id)
+{
+	struct oecls_numa_bound_dev_info *bound_dev;
+	struct oecls_numa_info *numa_info;
+
+	numa_info = get_oecls_numa_info(nid);
+	if (!numa_info) {
+		oecls_error("error nid:%d\n", nid);
+		return;
+	}
+
+	if (devid >= OECLS_MAX_NETDEV_NUM) {
+		oecls_error("error bound_dev index:%d\n", devid);
+		return;
+	}
+	bound_dev = &numa_info->bound_dev[devid];
+
+	if (rxq_id >= OECLS_MAX_RXQ_NUM_PER_DEV) {
+		oecls_error("error rxq_id:%d\n", rxq_id);
+		return;
+	}
+
+	if (test_bit(rxq_id, bound_dev->bitmap_rxq)) {
+		oecls_error("error nid:%d, devid:%d, rxq_id:%d\n", nid, devid, rxq_id);
+		return;
+	}
+
+	set_bit(rxq_id, bound_dev->bitmap_rxq);
+	oecls_debug("free nid:%d, dev_id:%d, rxq_id:%d\n", nid, devid, rxq_id);
+}
+
+static void init_oecls_numa_info(void)
+{
+	struct oecls_numa_info *numa_info;
+	unsigned int numa_num;
+	int nid;
+
+	numa_num = num_online_nodes();
+	if (numa_num > OECLS_MAX_NUMA_NUM) {
+		oecls_error("online numa num:%d is too much!\n", numa_num);
+		numa_num = OECLS_MAX_NUMA_NUM;
+	}
+	oecls_numa_num = numa_num;
+	oecls_debug("set oecls_numa_num=%d\n", numa_num);
+
+	for_each_oecls_numa(nid, numa_info)
+		init_numa_avail_cpus(nid, numa_info);
+}
+
+static int alloc_available_cpu(int nid, struct oecls_numa_info *numa_info)
+{
+	int cpu;
+
+	cpu = find_first_bit(numa_info->avail_cpus, OECLS_MAX_CPU_NUM);
+	if (cpu >= OECLS_MAX_CPU_NUM) {
+		oecls_error("no available cpus: nid=%d, cpu=%d\n", nid, cpu);
+		return -1;
+	}
+
+	clear_bit(cpu, numa_info->avail_cpus);
+	return cpu;
+}
+
+static void add_netdev_irq_affinity_cpu(struct oecls_netdev_info *oecls_dev, int rxq_id, int cpu)
+{
+	struct oecls_netdev_queue_info *rxq_info;
+
+	if (rxq_id >= OECLS_MAX_RXQ_NUM_PER_DEV)
+		return;
+
+	rxq_info = &oecls_dev->rxq[rxq_id];
+	rxq_info->affinity_cpu = cpu;
+}
+
+static void config_affinity_strategy_default(struct oecls_netdev_info *oecls_dev)
+{
+	struct oecls_numa_info *numa_info;
+	int rxq_num = oecls_dev->rxq_num;
+	int rxq_per_numa = rxq_num / oecls_numa_num;
+	int remain = rxq_num - rxq_per_numa * oecls_numa_num;
+	int numa_rxq_id;
+	int rxq_id;
+	int nid;
+	int cpu;
+
+	oecls_debug("dev=%s, rxq_num=%d, rxq_per_numa=%d, remain=%d\n", oecls_dev->dev_name,
+		    rxq_num, rxq_per_numa, remain);
+
+	// average config rxq to every numa
+	for_each_oecls_numa(nid, numa_info) {
+		for (numa_rxq_id = 0; numa_rxq_id < rxq_per_numa; numa_rxq_id++) {
+			cpu = alloc_available_cpu(nid, numa_info);
+			if (cpu < 0)
+				break;
+
+			rxq_id = rxq_per_numa * nid + numa_rxq_id;
+			add_netdev_irq_affinity_cpu(oecls_dev, rxq_id, cpu);
+		}
+	}
+
+	if (!remain)
+		return;
+
+	// config remain rxq to every numa
+	numa_rxq_id = 0;
+	for_each_oecls_numa(nid, numa_info) {
+		if (numa_rxq_id >= remain)
+			break;
+		cpu = alloc_available_cpu(nid, numa_info);
+		if (cpu < 0)
+			break;
+
+		rxq_id = rxq_per_numa * oecls_numa_num + numa_rxq_id;
+		numa_rxq_id++;
+		add_netdev_irq_affinity_cpu(oecls_dev, rxq_id, cpu);
+	}
+}
+
+static void config_affinity_strategy_cluster(struct oecls_netdev_info *oecls_dev)
+{
+	int cluster_cpu_num = 8;
+	int cluster_num = num_online_cpus() / cluster_cpu_num;
+	int cluster_cpu_id = 0;
+	int rxq_id = 0;
+	int cluster;
+	int cpu;
+
+	// average config rxq to every cluster
+	while (rxq_id < oecls_dev->rxq_num) {
+		for (cluster = 0; cluster < cluster_num; cluster++) {
+			cpu = cluster * cluster_cpu_num + cluster_cpu_id;
+			if (rxq_id >= oecls_dev->rxq_num)
+				break;
+			add_netdev_irq_affinity_cpu(oecls_dev, rxq_id++, cpu);
+		}
+		cluster_cpu_id++;
+	}
+}
+
+static void config_affinity_strategy_16cores(struct oecls_netdev_info *oecls_dev)
+{
+	struct oecls_numa_info *numa_info;
+	int numa_start_cpu;
+	int numa_cpu_id;
+	int rxq_id = 0;
+	int nid;
+	int cpu;
+
+	// only use 16 cores of one numa
+	for_each_oecls_numa(nid, numa_info) {
+		numa_start_cpu = find_first_bit(numa_info->avail_cpus, OECLS_MAX_CPU_NUM);
+		for (numa_cpu_id = 0; numa_cpu_id < 16; numa_cpu_id++) {
+			cpu = numa_start_cpu + numa_cpu_id;
+
+			if (rxq_id >= oecls_dev->rxq_num)
+				break;
+			add_netdev_irq_affinity_cpu(oecls_dev, rxq_id++, cpu);
+		}
+	}
+}
+
+static void config_affinity_strategy(void)
+{
+	struct oecls_netdev_info *oecls_dev;
+	int devid;
+
+	for_each_oecls_netdev(devid, oecls_dev) {
+		switch (strategy) {
+		case 0:
+			config_affinity_strategy_default(oecls_dev);
+			break;
+		case 1:
+			config_affinity_strategy_cluster(oecls_dev);
+			break;
+		case 2:
+			config_affinity_strategy_16cores(oecls_dev);
+			break;
+		default:
+			config_affinity_strategy_default(oecls_dev);
+			break;
+		}
+	}
+}
+
+static inline void irq_set_affinity_wrapper(int rxq, int irq, int cpu)
+{
+	int err = 0;
+
+	err = irq_set_affinity(irq, get_cpu_mask(cpu));
+	oecls_debug("rxq=%d, irq=%d, cpu=%d, err=%d\n", rxq, irq, cpu, err);
+}
+
+static void enable_affinity_strategy(void)
+{
+	struct oecls_netdev_queue_info *rxq_info;
+	struct oecls_netdev_info *oecls_dev;
+	int rxq_id;
+	int devid;
+
+	for_each_oecls_netdev(devid, oecls_dev) {
+		for (rxq_id = 0; rxq_id < oecls_dev->rxq_num; rxq_id++) {
+			rxq_info = &oecls_dev->rxq[rxq_id];
+			irq_set_affinity_wrapper(rxq_id, rxq_info->irq, rxq_info->affinity_cpu);
+		}
+	}
+}
+
+static inline void netif_set_xps_queue_wrapper(struct net_device *netdev, int rxq_id,
+					       const struct cpumask *cpu_mask)
+{
+	int err = 0;
+
+	err = netif_set_xps_queue(netdev, cpu_mask, rxq_id);
+	oecls_debug("name=%s, rxq_id=%d, mask=%*pbl, err=%d\n", netdev->name, rxq_id,
+		    cpumask_pr_args(cpu_mask), err);
+}
+
+static void set_netdev_xps_queue(bool enable)
+{
+	const struct cpumask clear_mask = { 0 };
+	struct oecls_netdev_info *oecls_dev;
+	const struct cpumask *cpu_mask;
+	int rxq_id;
+	int devid;
+	int cpu;
+	int nid;
+
+	for_each_oecls_netdev(devid, oecls_dev) {
+		for (rxq_id = 0; rxq_id < oecls_dev->rxq_num; rxq_id++) {
+			cpu = oecls_dev->rxq[rxq_id].affinity_cpu;
+			nid = cpu_to_node(cpu);
+			if (enable)
+				cpu_mask = cpumask_of_node(nid);
+			else
+				cpu_mask = &clear_mask;
+
+			netif_set_xps_queue_wrapper(oecls_dev->netdev, rxq_id, cpu_mask);
+		}
+	}
+}
+
+static __init int oecls_init(void)
+{
+	struct oecls_numa_info *numa_info;
+	int nid;
+	int err;
+
+	oecls_debug("[init] mode=%d, ifname=[%s]\n", mode, ifname);
+	if (!check_params())
+		return -EINVAL;
+
+	init_oecls_l0_cache();
+	init_oecls_numa_info();
+	err = init_oecls_netdev_info(ifname);
+	if (err)
+		goto out;
+
+	// Set irq affinity
+	config_affinity_strategy();
+	enable_affinity_strategy();
+
+	// Calculate rxq bounded to one numa
+	for_each_oecls_numa(nid, numa_info)
+		init_numa_rxq_bitmap(nid, numa_info);
+
+#ifdef CONFIG_XPS
+	set_netdev_xps_queue(true);
+#endif
+
+	if (mode == 0)
+		oecls_ntuple_res_init();
+	else
+		oecls_flow_res_init();
+
+	return 0;
+out:
+	clean_oecls_netdev_info();
+	clean_oecls_numa_info();
+	clean_oecls_l0_cache();
+	return err;
+}
+
+static __exit void oecls_exit(void)
+{
+	oecls_debug("[exit] mode=%d\n", mode);
+	if (mode == 0)
+		oecls_ntuple_res_clean();
+	else
+		oecls_flow_res_clean();
+
+#ifdef CONFIG_XPS
+	set_netdev_xps_queue(false);
+#endif
+
+	clean_oecls_netdev_info();
+	clean_oecls_numa_info();
+	clean_oecls_l0_cache();
+}
+
+module_init(oecls_init);
+module_exit(oecls_exit);
+
+MODULE_DESCRIPTION("oenetcls");
+MODULE_LICENSE("GPL v2");
diff --git a/net/oenetcls/oenetcls_ntuple.c b/net/oenetcls/oenetcls_ntuple.c
new file mode 100644
index 000000000000..38d1f5df6ff1
--- /dev/null
+++ b/net/oenetcls/oenetcls_ntuple.c
@@ -0,0 +1,510 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/inetdevice.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/irq.h>
+#include <linux/irqdesc.h>
+#include <linux/inet.h>
+#include <linux/jhash.h>
+#include <net/sock.h>
+#include <trace/hooks/oenetcls.h>
+#include "oenetcls.h"
+
+struct oecls_sk_rule_list oecls_sk_rules;
+
+static void init_oecls_sk_rules(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < OECLS_SK_RULE_HASHSIZE; i++)
+		INIT_HLIST_HEAD(oecls_sk_rules.hash + i);
+	mutex_init(&oecls_sk_rules.mutex);
+}
+
+static struct hlist_head *oecls_sk_rule_hash(u32 dip4, u16 dport)
+{
+	return oecls_sk_rules.hash + (jhash_2words(dip4, dport, 0) & OECLS_SK_RULE_HASHMASK);
+}
+
+static void add_sk_rule(int devid, u32 dip4, u16 dport, void *sk, int action,
+			int ruleid, int nid)
+{
+	struct hlist_head *hlist = oecls_sk_rule_hash(dip4, dport);
+	struct oecls_sk_rule *rule;
+
+	rule = alloc_from_l0(sizeof(struct oecls_sk_rule));
+	if (!rule)
+		return;
+	oecls_debug("alloc rule=%p\n", rule);
+
+	rule->sk = sk;
+	rule->dip4 = dip4;
+	rule->dport = dport;
+	rule->devid = devid;
+	rule->action = action;
+	rule->ruleid = ruleid;
+	rule->nid = nid;
+	hlist_add_head(&rule->node, hlist);
+}
+
+static void del_sk_rule(struct oecls_sk_rule *rule)
+{
+	hlist_del_init(&rule->node);
+	oecls_debug("del rule=%p\n", rule);
+	free_to_l0(rule);
+}
+
+static struct oecls_sk_rule *get_sk_rule(int devid, u32 dip4, u16 dport)
+{
+	struct hlist_head *hlist = oecls_sk_rule_hash(dip4, dport);
+	struct oecls_sk_rule *rule = NULL;
+
+	hlist_for_each_entry(rule, hlist, node) {
+		if (rule->devid == devid && rule->dip4 == dip4 && rule->dport == dport)
+			break;
+	}
+	return rule;
+}
+
+static bool reuseport_check(int devid, u32 dip4, u16 dport)
+{
+	return !!get_sk_rule(devid, dip4, dport);
+}
+
+static u32 get_first_ip4_addr(struct net *net)
+{
+	struct in_device *in_dev;
+	struct net_device *dev;
+	struct in_ifaddr *ifa;
+	u32 dip4 = 0;
+
+	rtnl_lock();
+	rcu_read_lock();
+	for_each_netdev(net, dev) {
+		if (dev->flags & IFF_LOOPBACK || !(dev->flags & IFF_UP))
+			continue;
+		in_dev = __in_dev_get_rcu(dev);
+		if (!in_dev)
+			continue;
+
+		in_dev_for_each_ifa_rcu(ifa, in_dev) {
+			if (!strcmp(dev->name, ifa->ifa_label)) {
+				dip4 = ifa->ifa_local;
+				oecls_debug("dev: %s, dip4: 0x%x\n", dev->name, dip4);
+				goto out;
+			}
+		}
+	}
+out:
+	rcu_read_unlock();
+	rtnl_unlock();
+	return dip4;
+}
+
+static void get_sk_rule_addr(struct sock *sk, u32 *dip4, u16 *dport)
+{
+	*dport = htons(sk->sk_num);
+
+	if (!match_ip_flag) {
+		*dip4 = 0;
+		return;
+	}
+
+	if (sk->sk_rcv_saddr)
+		*dip4 = sk->sk_rcv_saddr;
+	else
+		*dip4 = get_first_ip4_addr(sock_net(sk));
+}
+
+static int rxclass_rule_del(struct cmd_context *ctx, __u32 loc)
+{
+	struct ethtool_rxnfc nfccmd;
+	int err;
+
+	nfccmd.cmd = ETHTOOL_SRXCLSRLDEL;
+	nfccmd.fs.location = loc;
+	err = send_ethtool_ioctl(ctx, &nfccmd);
+	if (err < 0)
+		oecls_debug("rmgr: Cannot delete RX class rule, loc:%u\n", loc);
+	return err;
+}
+
+static int rmgr_ins(struct rmgr_ctrl *rmgr, __u32 loc)
+{
+	if (loc >= rmgr->size) {
+		oecls_error("rmgr: Location out of range\n");
+		return -1;
+	}
+
+	set_bit(loc, rmgr->slot);
+	return 0;
+}
+
+static int rmgr_find_empty_slot(struct rmgr_ctrl *rmgr, struct ethtool_rx_flow_spec *fsp)
+{
+	__u32 loc, slot_num;
+
+	if (rmgr->driver_select)
+		return 0;
+
+	loc = rmgr->size - 1;
+	slot_num = loc / BITS_PER_LONG;
+	if (!~(rmgr->slot[slot_num] | (~1UL << rmgr->size % BITS_PER_LONG))) {
+		loc -= 1 + (loc % BITS_PER_LONG);
+		slot_num--;
+	}
+
+	while (loc < rmgr->size && !~(rmgr->slot[slot_num])) {
+		loc -= BITS_PER_LONG;
+		slot_num--;
+	}
+
+	while (loc < rmgr->size && test_bit(loc, rmgr->slot))
+		loc--;
+
+	if (loc < rmgr->size) {
+		fsp->location = loc;
+		return rmgr_ins(rmgr, loc);
+	}
+
+	return -1;
+}
+
+static int rxclass_get_dev_info(struct cmd_context *ctx, __u32 *count, int *driver_select)
+{
+	struct ethtool_rxnfc nfccmd;
+	int err;
+
+	nfccmd.cmd = ETHTOOL_GRXCLSRLCNT;
+	nfccmd.data = 0;
+	err = send_ethtool_ioctl(ctx, &nfccmd);
+	*count = nfccmd.rule_cnt;
+	if (driver_select)
+		*driver_select = !!(nfccmd.data & RX_CLS_LOC_SPECIAL);
+	if (err < 0)
+		oecls_debug("rxclass: Cannot get RX class rule count\n");
+
+	return err;
+}
+
+static int rmgr_init(struct cmd_context *ctx, struct rmgr_ctrl *rmgr)
+{
+	struct ethtool_rxnfc *nfccmd;
+	__u32 *rule_locs;
+	int i, err = 0;
+
+	memset(rmgr, 0, sizeof(*rmgr));
+	err = rxclass_get_dev_info(ctx, &rmgr->n_rules, &rmgr->driver_select);
+	if (err < 0)
+		return err;
+
+	if (rmgr->driver_select)
+		return err;
+
+	nfccmd = kzalloc(sizeof(*nfccmd) + (rmgr->n_rules * sizeof(__u32)), GFP_ATOMIC);
+	if (!nfccmd) {
+		oecls_error("rmgr: Cannot allocate memory for RX class rule locations\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	nfccmd->cmd = ETHTOOL_GRXCLSRLALL;
+	nfccmd->rule_cnt = rmgr->n_rules;
+	err = send_ethtool_ioctl(ctx, nfccmd);
+	if (err < 0) {
+		oecls_debug("rmgr: Cannot get RX class rules\n");
+		goto out;
+	}
+
+	rmgr->size = nfccmd->data;
+	if (rmgr->size == 0 || rmgr->size < rmgr->n_rules) {
+		oecls_error("rmgr: Invalid RX class rules table size\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	rmgr->slot = kzalloc(BITS_TO_LONGS(rmgr->size) * sizeof(long), GFP_ATOMIC);
+	if (!rmgr->slot) {
+		oecls_error("rmgr: Cannot allocate memory for RX class rules\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	rule_locs = nfccmd->rule_locs;
+	for (i = 0; i < rmgr->n_rules; i++) {
+		err = rmgr_ins(rmgr, rule_locs[i]);
+		if (err < 0)
+			break;
+	}
+
+out:
+	kfree(nfccmd);
+	return err;
+}
+
+static void rmgr_cleanup(struct rmgr_ctrl *rmgr)
+{
+	kfree(rmgr->slot);
+	rmgr->slot = NULL;
+	rmgr->size = 0;
+}
+
+static int rmgr_set_location(struct cmd_context *ctx,
+			     struct ethtool_rx_flow_spec *fsp)
+{
+	struct rmgr_ctrl rmgr;
+	int ret;
+
+	ret = rmgr_init(ctx, &rmgr);
+	if (ret < 0)
+		goto out;
+
+	ret = rmgr_find_empty_slot(&rmgr, fsp);
+out:
+	rmgr_cleanup(&rmgr);
+	return ret;
+}
+
+static int rxclass_rule_ins(struct cmd_context *ctx,
+			    struct ethtool_rx_flow_spec *fsp, u32 rss_context)
+{
+	struct ethtool_rxnfc nfccmd;
+	u32 loc = fsp->location;
+	int ret;
+
+	if (loc & RX_CLS_LOC_SPECIAL) {
+		ret = rmgr_set_location(ctx, fsp);
+		if (ret < 0)
+			return ret;
+	}
+
+	nfccmd.cmd = ETHTOOL_SRXCLSRLINS;
+	nfccmd.rss_context = rss_context;
+	nfccmd.fs = *fsp;
+	ret = send_ethtool_ioctl(ctx, &nfccmd);
+	if (ret < 0) {
+		oecls_debug("Can not insert the clasification rule\n");
+		return ret;
+	}
+
+	if (loc & RX_CLS_LOC_SPECIAL)
+		oecls_debug("Added rule with ID %d\n", nfccmd.fs.location);
+
+	return 0;
+}
+
+static void flow_spec_to_ntuple(struct ethtool_rx_flow_spec *fsp,
+				struct ethtool_rx_ntuple_flow_spec *ntuple)
+{
+	int i;
+
+	memset(ntuple, ~0, sizeof(*ntuple));
+	ntuple->flow_type = fsp->flow_type;
+	ntuple->action = fsp->ring_cookie;
+	memcpy_r(&ntuple->h_u, &fsp->h_u, sizeof(fsp->h_u));
+	memcpy_r(&ntuple->m_u, &fsp->m_u, sizeof(fsp->m_u));
+	for (i = 0; i < sizeof(ntuple->m_u); i++)
+		ntuple->m_u.hdata[i] ^= 0xFF;
+	ntuple->flow_type &= ~FLOW_EXT;
+}
+
+static int do_srxntuple(struct cmd_context *ctx, struct ethtool_rx_flow_spec *fsp)
+{
+	struct ethtool_rx_ntuple ntuplecmd;
+	struct ethtool_value eval;
+	int ret = 0;
+
+	flow_spec_to_ntuple(fsp, &ntuplecmd.fs);
+
+	eval.cmd = ETHTOOL_GFLAGS;
+	ret = send_ethtool_ioctl(ctx, &eval);
+	if (ret || !(eval.data & ETH_FLAG_NTUPLE))
+		return -1;
+
+	ntuplecmd.cmd = ETHTOOL_SRXNTUPLE;
+	ret = send_ethtool_ioctl(ctx, &ntuplecmd);
+	if (ret)
+		oecls_debug("Cannot add new rule via N-tuple, ret:%d\n", ret);
+
+	return ret;
+}
+
+static int cfg_ethtool_rule(struct cmd_context *ctx, bool is_del)
+{
+	struct ethtool_rx_flow_spec *fsp, rx_rule_fs;
+	u32 rss_context = 0;
+	int ret;
+
+	oecls_debug("is_del:%d netdev:%s, dip4:%pI4, dport:%d, action:%d, ruleid:%u, del_ruleid:%u\n",
+		    is_del, ctx->netdev, &ctx->dip4, ntohs(ctx->dport), ctx->action, ctx->ruleid,
+		    ctx->del_ruleid);
+
+	if (is_del)
+		return rxclass_rule_del(ctx, ctx->del_ruleid);
+
+	ctx->ret_loc = -1;
+
+	fsp = &rx_rule_fs;
+	memset(fsp, 0, sizeof(*fsp));
+	fsp->flow_type = TCP_V4_FLOW;
+	fsp->location = RX_CLS_LOC_ANY;
+	fsp->h_u.tcp_ip4_spec.ip4dst = ctx->dip4;
+	fsp->h_u.tcp_ip4_spec.pdst = ctx->dport;
+	if (ctx->dip4)
+		fsp->m_u.tcp_ip4_spec.ip4dst = (u32)~0ULL;
+	fsp->m_u.tcp_ip4_spec.pdst = (u16)~0ULL;
+	if (ctx->ruleid)
+		fsp->location = ctx->ruleid;
+	fsp->ring_cookie = ctx->action;
+
+	ret = do_srxntuple(ctx, &rx_rule_fs);
+	if (!ret)
+		return 0;
+
+	ret = rxclass_rule_ins(ctx, &rx_rule_fs, rss_context);
+	if (!ret)
+		ctx->ret_loc = rx_rule_fs.location;
+	return ret;
+}
+
+static void del_ntuple_rule(struct sock *sk)
+{
+	struct oecls_netdev_info *oecls_dev;
+	struct cmd_context ctx = { 0 };
+	struct oecls_sk_rule *rule;
+	int devid;
+	u16 dport;
+	u32 dip4;
+	int err;
+
+	get_sk_rule_addr(sk, &dip4, &dport);
+
+	mutex_lock(&oecls_sk_rules.mutex);
+	for_each_oecls_netdev(devid, oecls_dev) {
+		strncpy(ctx.netdev, oecls_dev->dev_name, IFNAMSIZ);
+		rule = get_sk_rule(devid, dip4, dport);
+		if (!rule) {
+			oecls_debug("rule not found! sk:%p, devid:%d, dip4:0x%x, dport:%d\n", sk,
+				    devid, dip4, dport);
+			continue;
+		}
+
+		// Config Ntuple rule to dev
+		ctx.del_ruleid = rule->ruleid;
+		err = cfg_ethtool_rule(&ctx, true);
+		if (err) {
+			oecls_error("del sk:%p, nid:%d, devid:%d, action:%d, ruleid:%d, err:%d\n",
+				    sk, rule->nid, devid, rule->action, rule->ruleid, err);
+		}
+
+		// Free the bound queue
+		free_rxq_id(rule->nid, devid, rule->action);
+
+		// Delete sk rule
+		del_sk_rule(rule);
+	}
+	mutex_unlock(&oecls_sk_rules.mutex);
+}
+
+static void add_ntuple_rule(struct sock *sk)
+{
+	struct oecls_netdev_info *oecls_dev;
+	struct cmd_context ctx = { 0 };
+	int cpu = smp_processor_id();
+	int nid = cpu_to_node(cpu);
+	int rxq_id;
+	int devid;
+	int err;
+
+	if (check_appname(current->comm))
+		return;
+	get_sk_rule_addr(sk, &ctx.dip4, &ctx.dport);
+
+	mutex_lock(&oecls_sk_rules.mutex);
+	for_each_oecls_netdev(devid, oecls_dev) {
+		strncpy(ctx.netdev, oecls_dev->dev_name, IFNAMSIZ);
+		if (reuseport_check(devid, ctx.dip4, ctx.dport)) {
+			oecls_error("dip4:0x%x, dport:%d reuse!\n", ctx.dip4, ctx.dport);
+			continue;
+		}
+
+		// Calculate the bound queue
+		rxq_id = alloc_rxq_id(nid, devid);
+		if (rxq_id < 0)
+			continue;
+
+		// Config Ntuple rule to dev
+		ctx.action = (u16)rxq_id;
+		err = cfg_ethtool_rule(&ctx, false);
+		if (err) {
+			oecls_error("add sk:%p, nid:%d, devid:%d, action:%d, ruleid:%d, err:%d\n",
+				    sk, nid, devid, ctx.action, ctx.ret_loc, err);
+			continue;
+		}
+
+		// Add sk rule
+		add_sk_rule(devid, ctx.dip4, ctx.dport, sk, ctx.action, ctx.ret_loc, nid);
+	}
+	mutex_unlock(&oecls_sk_rules.mutex);
+}
+
+static void ethtool_cfg_rxcls(void *data, struct sock *sk, int is_del)
+{
+	if (sk->sk_state != TCP_LISTEN)
+		return;
+
+	if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
+		return;
+
+	oecls_debug("[cpu:%d] app:%s, sk:%p, is_del:%d, ip:0x%x, port:0x%x\n", smp_processor_id(),
+		    current->comm, sk, is_del, sk->sk_rcv_saddr, sk->sk_num);
+
+	if (is_del)
+		del_ntuple_rule(sk);
+	else
+		add_ntuple_rule(sk);
+}
+
+static void clean_oecls_sk_rules(void)
+{
+	struct oecls_netdev_info *oecls_dev;
+	struct cmd_context ctx = { 0 };
+	struct oecls_sk_rule *rule;
+	struct hlist_head *hlist;
+	struct hlist_node *n;
+	unsigned int i;
+	int err;
+
+	mutex_lock(&oecls_sk_rules.mutex);
+	for (i = 0; i < OECLS_SK_RULE_HASHSIZE; i++) {
+		hlist = &oecls_sk_rules.hash[i];
+
+		hlist_for_each_entry_safe(rule, n, hlist, node) {
+			oecls_dev = get_oecls_netdev_info(rule->devid);
+			if (!oecls_dev)
+				continue;
+			strncpy(ctx.netdev, oecls_dev->dev_name, IFNAMSIZ);
+			ctx.del_ruleid = rule->ruleid;
+			err = cfg_ethtool_rule(&ctx, true);
+			oecls_debug("sk:%p, dev_id:%d, action:%d, ruleid:%d, err:%d\n", rule->sk,
+				    rule->devid, rule->action, rule->ruleid, err);
+
+			hlist_del(&rule->node);
+			oecls_debug("clean rule=%p\n", rule);
+			free_to_l0(rule);
+		}
+	}
+	mutex_unlock(&oecls_sk_rules.mutex);
+}
+
+void oecls_ntuple_res_init(void)
+{
+	init_oecls_sk_rules();
+	register_trace_ethtool_cfg_rxcls(ðtool_cfg_rxcls, NULL);
+}
+
+void oecls_ntuple_res_clean(void)
+{
+	unregister_trace_ethtool_cfg_rxcls(ðtool_cfg_rxcls, NULL);
+	clean_oecls_sk_rules();
+}
-- 
2.34.1

    

Wang Liang

patchwork bot

tags

participants (2)