[PATCH OLK-6.6 01/15] net/venetcls: introduce venetcls for network optimization

23 Jun 2026

hulk inclusion
category: feature
Link: https://gitee.com/openeuler/kernel/issues/ICBFCS
CVE: NA

--------------------------------

This introduces a kind of network optimization method named venetcls.
It can configure the ntuple rule, and bind interrupt to the netdev
queue automatically.

Signed-off-by: Yue Haibing <yuehaibing@huawei.com>
Signed-off-by: Wang Liang <wangliang74@huawei.com>
Signed-off-by: Liu Jian <liujian56@huawei.com>
Signed-off-by: yuelg <yuelg@chinaunicom.cn>
---
 MAINTAINERS                    |    5 +
 include/linux/netdevice.h      |    3 +
 include/linux/venetcls.h       |  101 +++
 kernel/irq/irqdesc.c           |    2 +-
 net/Kconfig                    |    1 +
 net/Makefile                   |    1 +
 net/core/dev.c                 |   23 +
 net/ipv4/af_inet.c             |    6 +
 net/ipv4/tcp.c                 |    9 +
 net/venetcls/Kconfig           |   11 +
 net/venetcls/Makefile          |    7 +
 net/venetcls/asmdefs.S         |   61 ++
 net/venetcls/memcpy-sve.S      |  157 +++++
 net/venetcls/venetcls.h        |  183 +++++
 net/venetcls/venetcls_flow.c   |  514 ++++++++++++++
 net/venetcls/venetcls_main.c   | 1154 ++++++++++++++++++++++++++++++++
 net/venetcls/venetcls_ntuple.c |  713 ++++++++++++++++++++
 17 files changed, 2950 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/venetcls.h
 create mode 100644 net/venetcls/Kconfig
 create mode 100644 net/venetcls/Makefile
 create mode 100644 net/venetcls/asmdefs.S
 create mode 100644 net/venetcls/memcpy-sve.S
 create mode 100644 net/venetcls/venetcls.h
 create mode 100644 net/venetcls/venetcls_flow.c
 create mode 100644 net/venetcls/venetcls_main.c
 create mode 100644 net/venetcls/venetcls_ntuple.c

diff --git a/MAINTAINERS b/MAINTAINERS
index ab1ff9b4195e..861b3418b947 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -20568,6 +20568,11 @@ F:	net/xdp/
 F:	samples/bpf/xdpsock*
 F:	tools/lib/bpf/xsk*
 
+VENETCLS
+M:	Yue Haibing <yuehaibing@huawei.com>
+F:	include/linux/venetcls.h
+F:	net/venetcls/
+
 XEN BLOCK SUBSYSTEM
 M:	Roger Pau Monné <roger.pau@citrix.com>
 L:	xen-devel@lists.xenproject.org (moderated for non-subscribers)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cc1f14f3c236..e5f876cecf15 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -766,6 +766,9 @@ struct netdev_rx_queue {
 	struct xsk_buff_pool            *pool;
 #endif
 	struct file __rcu		*dmabuf_pages;
+#if IS_ENABLED(CONFIG_VENETCLS)
+	void __rcu			*vecls_ftb;
+#endif
 } ____cacheline_aligned_in_smp;
 
 struct page *
diff --git a/include/linux/venetcls.h b/include/linux/venetcls.h
new file mode 100644
index 000000000000..9cfcdd4e5766
--- /dev/null
+++ b/include/linux/venetcls.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _LINUX_VENETCLS_H
+#define _LINUX_VENETCLS_H
+
+struct vecls_hook_ops {
+	void (*vecls_cfg_rxcls)(struct sock *sk, int is_del);
+	void (*vecls_flow_update)(struct sock *sk);
+	void (*vecls_set_cpu)(struct sk_buff *skb, int *cpu, int *last_qtail);
+	bool (*vecls_timeout)(struct net_device *dev, u16 rxq_index,
+				u32 flow_id, u16 filter_id);
+};
+
+typedef int (*enqueue_f)(struct sk_buff *skb, int cpu, unsigned int *qtail);
+extern const struct vecls_hook_ops __rcu *vecls_ops;
+
+static inline void venetcls_cfg_rxcls(struct sock *sk, int is_del)
+{
+	const struct vecls_hook_ops *ops;
+
+	rcu_read_lock();
+	ops = rcu_dereference(vecls_ops);
+	if (ops && ops->vecls_cfg_rxcls)
+		ops->vecls_cfg_rxcls(sk, is_del);
+	rcu_read_unlock();
+}
+
+static inline void venetcls_flow_update(struct sock *sk)
+{
+	const struct vecls_hook_ops *ops;
+
+	rcu_read_lock();
+	ops = rcu_dereference(vecls_ops);
+	if (ops && ops->vecls_flow_update)
+		ops->vecls_flow_update(sk);
+	rcu_read_unlock();
+}
+
+static inline bool
+venetcls_skb_set_cpu(struct sk_buff *skb, enqueue_f enq_func, int *ret)
+{
+	const struct vecls_hook_ops *ops;
+	int cpu, last_qtail;
+	bool result = false;
+
+	rcu_read_lock();
+	ops = rcu_dereference(vecls_ops);
+	if (ops && ops->vecls_set_cpu) {
+		cpu = -1;
+		last_qtail = 0;
+		ops->vecls_set_cpu(skb, &cpu, &last_qtail);
+		if (cpu >= 0) {
+			*ret = enq_func(skb, cpu, &last_qtail);
+			result = true;
+		}
+	}
+	rcu_read_unlock();
+	return result;
+}
+
+static inline void
+venetcls_skblist_set_cpu(struct list_head *head, enqueue_f enq_func)
+{
+	const struct vecls_hook_ops *ops;
+	struct sk_buff *skb, *next;
+	int cpu, last_qtail;
+
+	rcu_read_lock();
+	ops = rcu_dereference(vecls_ops);
+	if (ops && ops->vecls_set_cpu) {
+		list_for_each_entry_safe(skb, next, head, list) {
+			cpu = -1;
+			last_qtail = 0;
+			ops->vecls_set_cpu(skb, &cpu, &last_qtail);
+			if (cpu >= 0) {
+				skb_list_del_init(skb);
+				enq_func(skb, cpu, &last_qtail);
+			}
+		}
+	}
+	rcu_read_unlock();
+}
+
+static inline bool venetcls_may_expire_flow(struct net_device *dev,
+					    u16 rxq_index, u32 flow_id,
+					    u16 filter_id, bool *expire)
+{
+	const struct vecls_hook_ops *ops;
+	bool ret = false;
+
+	*expire = true;
+	rcu_read_lock();
+	ops = rcu_dereference(vecls_ops);
+	if (ops && ops->vecls_timeout) {
+		*expire = ops->vecls_timeout(dev, rxq_index, flow_id, filter_id);
+		ret = true;
+	}
+	rcu_read_unlock();
+	return ret;
+}
+
+#endif  /* _LINUX_VENETCLS_H */
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 8202d4a996a5..eb8641e22575 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -366,7 +366,7 @@ struct irq_desc *irq_to_desc(unsigned int irq)
 {
 	return radix_tree_lookup(&irq_desc_tree, irq);
 }
-#ifdef CONFIG_KVM_BOOK3S_64_HV_MODULE
+#if defined(CONFIG_KVM_BOOK3S_64_HV_MODULE) || IS_ENABLED(CONFIG_VENETCLS)
 EXPORT_SYMBOL_GPL(irq_to_desc);
 #endif
 
diff --git a/net/Kconfig b/net/Kconfig
index dc8451e75e4c..2b68c0f8625e 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -72,6 +72,7 @@ source "net/xfrm/Kconfig"
 source "net/iucv/Kconfig"
 source "net/smc/Kconfig"
 source "net/xdp/Kconfig"
+source "net/venetcls/Kconfig"
 
 config INET
 	bool "TCP/IP networking"
diff --git a/net/Makefile b/net/Makefile
index 6a62e5b27378..a2cb1281e2a9 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -78,3 +78,4 @@ obj-$(CONFIG_NET_NCSI)		+= ncsi/
 obj-$(CONFIG_XDP_SOCKETS)	+= xdp/
 obj-$(CONFIG_MPTCP)		+= mptcp/
 obj-$(CONFIG_MCTP)		+= mctp/
+obj-$(CONFIG_VENETCLS)		+= venetcls/
diff --git a/net/core/dev.c b/net/core/dev.c
index f628494a1c0f..47b916ca8d46 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -160,6 +160,12 @@
 /* This should be increased if a protocol with a bigger head is added. */
 #define GRO_MAX_HEAD (MAX_HEADER + 128)
 
+#if IS_ENABLED(CONFIG_VENETCLS)
+#include <linux/venetcls.h>
+const struct vecls_hook_ops __rcu *vecls_ops __read_mostly;
+EXPORT_SYMBOL_GPL(vecls_ops);
+#endif
+
 static DEFINE_SPINLOCK(ptype_lock);
 static DEFINE_SPINLOCK(offload_lock);
 struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
@@ -4770,6 +4776,10 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
 	bool expire = true;
 	unsigned int cpu;
 
+#if IS_ENABLED(CONFIG_VENETCLS)
+	if (venetcls_may_expire_flow(dev, rxq_index, flow_id, filter_id, &expire))
+		return expire;
+#endif
 	rcu_read_lock();
 	flow_table = rcu_dereference(rxqueue->rps_flow_table);
 	if (flow_table && flow_id <= flow_table->mask) {
@@ -5881,6 +5891,12 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
 			return ret;
 		}
 	}
+#endif
+#if IS_ENABLED(CONFIG_VENETCLS)
+	if (venetcls_skb_set_cpu(skb, enqueue_to_backlog, &ret)) {
+		rcu_read_unlock();
+		return ret;
+	}
 #endif
 	ret = __netif_receive_skb(skb);
 	rcu_read_unlock();
@@ -5915,6 +5931,9 @@ static void netif_receive_skb_list_internal(struct list_head *head)
 			}
 		}
 	}
+#endif
+#if IS_ENABLED(CONFIG_VENETCLS)
+	venetcls_skblist_set_cpu(head, enqueue_to_backlog);
 #endif
 	__netif_receive_skb_list(head);
 	rcu_read_unlock();
@@ -10272,6 +10291,10 @@ int __netdev_update_features(struct net_device *dev)
 	return err < 0 ? 0 : 1;
 }
 
+#if IS_ENABLED(CONFIG_VENETCLS)
+EXPORT_SYMBOL(__netdev_update_features);
+#endif
+
 static int netdev_do_alloc_pcpu_stats(struct net_device *dev)
 {
 	void __percpu *v;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 5dc1955e38c4..06b917182a5a 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -120,6 +120,9 @@
 #include <net/compat.h>
 
 #include <trace/events/sock.h>
+#if IS_ENABLED(CONFIG_VENETCLS)
+#include <linux/venetcls.h>
+#endif
 
 /* The inetsw table contains everything that inet_create needs to
  * build a new socket.
@@ -229,6 +232,9 @@ int inet_listen(struct socket *sock, int backlog)
 		if (err)
 			goto out;
 		tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_LISTEN_CB, 0, NULL);
+#if IS_ENABLED(CONFIG_VENETCLS)
+		venetcls_cfg_rxcls(sk, 0);
+#endif
 	}
 	err = 0;
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e8b7f0c5dded..cc84873cee0d 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -281,6 +281,9 @@
 #include <asm/ioctls.h>
 #include <net/busy_poll.h>
 #include <linux/dma-buf.h>
+#if IS_ENABLED(CONFIG_VENETCLS)
+#include <linux/venetcls.h>
+#endif
 
 /* Track pending CMSGs. */
 enum {
@@ -2940,6 +2943,9 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
 	if (unlikely(flags & MSG_ERRQUEUE))
 		return inet_recv_error(sk, msg, len, addr_len);
 
+#if IS_ENABLED(CONFIG_VENETCLS)
+	venetcls_flow_update(sk);
+#endif
 	if (sk_can_busy_loop(sk) &&
 	    skb_queue_empty_lockless(&sk->sk_receive_queue) &&
 	    sk->sk_state == TCP_ESTABLISHED)
@@ -3300,6 +3306,9 @@ void __tcp_close(struct sock *sk, long timeout)
 void tcp_close(struct sock *sk, long timeout)
 {
 	lock_sock(sk);
+#if IS_ENABLED(CONFIG_VENETCLS)
+	venetcls_cfg_rxcls(sk, 1);
+#endif
 	__tcp_close(sk, timeout);
 	release_sock(sk);
 	sock_put(sk);
diff --git a/net/venetcls/Kconfig b/net/venetcls/Kconfig
new file mode 100644
index 000000000000..7f2ea5c4a6b0
--- /dev/null
+++ b/net/venetcls/Kconfig
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config VENETCLS
+	tristate "Network classification"
+	depends on MODULES
+	default n
+	help
+	  This introduces a kind of network optimization method, which can
+	  configure the flow steer rules, and bind interrupt to the netdev
+	  queue automatically.
+
+	  This module can only be built as a loadable module.
diff --git a/net/venetcls/Makefile b/net/venetcls/Makefile
new file mode 100644
index 000000000000..639a81d7d6b2
--- /dev/null
+++ b/net/venetcls/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_VENETCLS) = venetcls.o
+venetcls-y                      := venetcls_main.o venetcls_ntuple.o venetcls_flow.o
+ifeq ($(CONFIG_ARM64_SVE),y)
+venetcls-y                      += memcpy-sve.o
+endif
diff --git a/net/venetcls/asmdefs.S b/net/venetcls/asmdefs.S
new file mode 100644
index 000000000000..8138a94c18af
--- /dev/null
+++ b/net/venetcls/asmdefs.S
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASMDEFS_H
+#define _ASMDEFS_H
+
+/* Branch Target Identitication support.  */
+#define BTI_C		hint	34
+#define BTI_J		hint	36
+/* Return address signing support (pac-ret).  */
+#define PACIASP		hint	25; .cfi_window_save
+#define AUTIASP		hint	29; .cfi_window_save
+
+/* GNU_PROPERTY_AARCH64_* macros from elf.h.  */
+#define FEATURE_1_AND 0xc0000000
+#define FEATURE_1_BTI 1
+#define FEATURE_1_PAC 2
+
+/* Add a NT_GNU_PROPERTY_TYPE_0 note.  */
+#define GNU_PROPERTY(type, value)	\
+	.section .note.gnu.property, "a";	\
+	.p2align 3;				\
+	.word 4;				\
+	.word 16;				\
+	.word 5;				\
+	.asciz "GNU";				\
+	.word type;				\
+	.word 4;				\
+	.word value;				\
+	.word 0;				\
+	.text
+
+#ifndef WANT_GNU_PROPERTY
+#define WANT_GNU_PROPERTY 1
+#endif
+
+#if WANT_GNU_PROPERTY
+/* Add property note with supported features to all asm files.  */
+GNU_PROPERTY(FEATURE_1_AND, FEATURE_1_BTI|FEATURE_1_PAC)
+#endif
+
+#define ENTRY_ALIGN(name, alignment)	\
+	.global name;		\
+	.type name, %function;	\
+	.align alignment;	\
+name:				\
+	.cfi_startproc;		\
+	BTI_C;
+
+#define ENTRY(name)	ENTRY_ALIGN(name, 6)
+
+#define ENTRY_ALIAS(name)	\
+	.global name;		\
+	.type name, %function;	\
+  name:
+
+#define END(name)	\
+	.cfi_endproc;	\
+	.size name, .-name;
+
+#define L(l) .L ## l
+
+#endif
diff --git a/net/venetcls/memcpy-sve.S b/net/venetcls/memcpy-sve.S
new file mode 100644
index 000000000000..0452ff8b3afb
--- /dev/null
+++ b/net/venetcls/memcpy-sve.S
@@ -0,0 +1,157 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include "asmdefs.S"
+
+.arch armv8-a+sve
+
+#define dstin	x0
+#define src	x1
+#define count	x2
+#define dst	x3
+#define srcend	x4
+#define dstend	x5
+#define tmp1	x6
+#define vlen	x6
+
+#define A_q	q0
+#define B_q	q1
+#define C_q	q2
+#define D_q	q3
+#define E_q	q4
+#define F_q	q5
+#define G_q	q6
+#define H_q	q7
+
+/* This implementation handles overlaps and supports both memcpy and memmove
+   from a single entry point.  It uses unaligned accesses and branchless
+   sequences to keep the code small, simple and improve performance.
+   SVE vectors are used to speedup small copies.
+
+   Copies are split into 3 main cases: small copies of up to 32 bytes, medium
+   copies of up to 128 bytes, and large copies.  The overhead of the overlap
+   check is negligible since it is only required for large copies.
+
+   Large copies use a software pipelined loop processing 64 bytes per iteration.
+   The source pointer is 16-byte aligned to minimize unaligned accesses.
+   The loop tail is handled by always copying 64 bytes from the end.
+*/
+
+ENTRY_ALIAS (__memmove_aarch64_sve)
+ENTRY (__memcpy_aarch64_sve)
+	cmp	count, 128
+	b.hi	L(copy_long)
+	cntb	vlen
+	cmp	count, vlen, lsl 1
+	b.hi	L(copy32_128)
+
+	whilelo p0.b, xzr, count
+	whilelo p1.b, vlen, count
+	ld1b	z0.b, p0/z, [src, 0, mul vl]
+	ld1b	z1.b, p1/z, [src, 1, mul vl]
+	st1b	z0.b, p0, [dstin, 0, mul vl]
+	st1b	z1.b, p1, [dstin, 1, mul vl]
+	ret
+
+	/* Medium copies: 33..128 bytes.  */
+L(copy32_128):
+	add	srcend, src, count
+	add	dstend, dstin, count
+	ldp	A_q, B_q, [src]
+	ldp	C_q, D_q, [srcend, -32]
+	cmp	count, 64
+	b.hi	L(copy128)
+	stp	A_q, B_q, [dstin]
+	stp	C_q, D_q, [dstend, -32]
+	ret
+
+	/* Copy 65..128 bytes.  */
+L(copy128):
+	ldp	E_q, F_q, [src, 32]
+	cmp	count, 96
+	b.ls	L(copy96)
+	ldp	G_q, H_q, [srcend, -64]
+	stp	G_q, H_q, [dstend, -64]
+L(copy96):
+	stp	A_q, B_q, [dstin]
+	stp	E_q, F_q, [dstin, 32]
+	stp	C_q, D_q, [dstend, -32]
+	ret
+
+	/* Copy more than 128 bytes.  */
+L(copy_long):
+	add	srcend, src, count
+	add	dstend, dstin, count
+
+	/* Use backwards copy if there is an overlap.  */
+	sub	tmp1, dstin, src
+	cmp	tmp1, count
+	b.lo	L(copy_long_backwards)
+
+	/* Copy 16 bytes and then align src to 16-byte alignment.  */
+	ldr	D_q, [src]
+	and	tmp1, src, 15
+	bic	src, src, 15
+	sub	dst, dstin, tmp1
+	add	count, count, tmp1	/* Count is now 16 too large.  */
+	ldp	A_q, B_q, [src, 16]
+	str	D_q, [dstin]
+	ldp	C_q, D_q, [src, 48]
+	subs	count, count, 128 + 16	/* Test and readjust count.  */
+	b.ls	L(copy64_from_end)
+L(loop64):
+	stp	A_q, B_q, [dst, 16]
+	ldp	A_q, B_q, [src, 80]
+	stp	C_q, D_q, [dst, 48]
+	ldp	C_q, D_q, [src, 112]
+	add	src, src, 64
+	add	dst, dst, 64
+	subs	count, count, 64
+	b.hi	L(loop64)
+
+	/* Write the last iteration and copy 64 bytes from the end.  */
+L(copy64_from_end):
+	ldp	E_q, F_q, [srcend, -64]
+	stp	A_q, B_q, [dst, 16]
+	ldp	A_q, B_q, [srcend, -32]
+	stp	C_q, D_q, [dst, 48]
+	stp	E_q, F_q, [dstend, -64]
+	stp	A_q, B_q, [dstend, -32]
+	ret
+
+	/* Large backwards copy for overlapping copies.
+	   Copy 16 bytes and then align srcend to 16-byte alignment.  */
+L(copy_long_backwards):
+	cbz	tmp1, L(return)
+	ldr	D_q, [srcend, -16]
+	and	tmp1, srcend, 15
+	bic	srcend, srcend, 15
+	sub	count, count, tmp1
+	ldp	A_q, B_q, [srcend, -32]
+	str	D_q, [dstend, -16]
+	ldp	C_q, D_q, [srcend, -64]
+	sub	dstend, dstend, tmp1
+	subs	count, count, 128
+	b.ls	L(copy64_from_start)
+
+L(loop64_backwards):
+	str	B_q, [dstend, -16]
+	str	A_q, [dstend, -32]
+	ldp	A_q, B_q, [srcend, -96]
+	str	D_q, [dstend, -48]
+	str	C_q, [dstend, -64]!
+	ldp	C_q, D_q, [srcend, -128]
+	sub	srcend, srcend, 64
+	subs	count, count, 64
+	b.hi	L(loop64_backwards)
+
+	/* Write the last iteration and copy 64 bytes from the start.  */
+L(copy64_from_start):
+	ldp	E_q, F_q, [src, 32]
+	stp	A_q, B_q, [dstend, -32]
+	ldp	A_q, B_q, [src]
+	stp	C_q, D_q, [dstend, -64]
+	stp	E_q, F_q, [dstin, 32]
+	stp	A_q, B_q, [dstin]
+L(return):
+	ret
+
+END (__memcpy_aarch64_sve)
diff --git a/net/venetcls/venetcls.h b/net/venetcls/venetcls.h
new file mode 100644
index 000000000000..14f02cd962c3
--- /dev/null
+++ b/net/venetcls/venetcls.h
@@ -0,0 +1,183 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _NET_VENETCLS_H
+#define _NET_VENETCLS_H
+#include <linux/if.h>
+#include <linux/mutex.h>
+#include <linux/cpufeature.h>
+
+#define VECLS_MAX_NETDEV_NUM 8
+#define VECLS_MAX_RXQ_NUM_PER_DEV 256
+#define VECLS_MAX_CPU_NUM 1024
+
+#define VECLS_TIMEOUT (5 * HZ)
+#define VECLS_NO_FILTER 0xffff
+#define VECLS_NO_CPU 0xffff
+
+#define RXQ_MAX_USECNT	0xFF
+
+struct vecls_netdev_queue_info {
+	int irq;
+	int affinity_cpu;
+};
+
+struct vecls_netdev_info {
+	char				dev_name[IFNAMSIZ];
+	struct net_device		*netdev;
+	int				rxq_num;
+	struct vecls_netdev_queue_info	rxq[VECLS_MAX_RXQ_NUM_PER_DEV];
+	int				old_filter_state;
+};
+
+struct vecls_rxq {
+	int rxq_id;
+	int status;
+};
+
+struct vecls_numa_clusterinfo {
+	int cluster_id;
+	int cur_freeidx;
+	struct vecls_rxq rxqs[VECLS_MAX_RXQ_NUM_PER_DEV];
+};
+
+struct vecls_numa_bound_dev_info {
+	unsigned char bitmap_rxq[VECLS_MAX_RXQ_NUM_PER_DEV];
+	struct vecls_numa_clusterinfo *cluster_info;
+};
+
+struct vecls_numa_info {
+	DECLARE_BITMAP(avail_cpus, VECLS_MAX_CPU_NUM);
+	struct vecls_numa_bound_dev_info bound_dev[VECLS_MAX_NETDEV_NUM];
+};
+
+struct cmd_context {
+	char netdev[IFNAMSIZ];
+	bool is_ipv6;
+	u32 dip4;
+	u32 dip6[4];
+	u16 dport;
+	u16 action;
+	u32 ruleid;
+	u32 del_ruleid;
+	int ret_loc;
+};
+
+#define VECLS_SK_RULE_HASHSIZE	256
+#define VECLS_SK_RULE_HASHMASK	(VECLS_SK_RULE_HASHSIZE - 1)
+
+struct vecls_sk_rule_list {
+	struct hlist_head hash[VECLS_SK_RULE_HASHSIZE];
+	/* Mutex to synchronize access to ntuple rule locking */
+	struct mutex mutex;
+};
+
+struct vecls_sk_rule {
+	struct hlist_node node;
+	int devid;
+	void *sk;
+	bool is_ipv6;
+	u32 dip4;
+	u32 dip6[4];
+	u16 dport;
+	int action;
+	int ruleid;
+	int nid;
+};
+
+struct vecls_sk_entry {
+	struct hlist_node node;
+	void *sk;
+	u32 sk_rule_hash;
+};
+
+struct vecls_dev_flow {
+	unsigned short cpu;
+	unsigned short filter;
+	unsigned long timeout;
+	int isvalid;
+};
+
+struct vecls_dev_flow_table {
+	unsigned int	mask;
+	struct rcu_head rcu;
+	struct vecls_dev_flow flows[];
+};
+
+struct vecls_sock_flow_table {
+	u32 mask;
+	u32 ents[] ____cacheline_aligned_in_smp;
+};
+
+#define VECLS_DEV_FLOW_TABLE_NUM	0x1000
+#define VECLS_SOCK_FLOW_TABLE_NUM	0x100000
+#define VECLS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct vecls_dev_flow_table) + \
+		((_num) * sizeof(struct vecls_dev_flow)))
+#define VECLS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct vecls_sock_flow_table, ents[_num]))
+
+#define ETH_ALL_FLAGS	(ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | \
+			  ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH)
+#define ETH_ALL_FEATURES (NETIF_F_LRO | NETIF_F_HW_VLAN_CTAG_RX | \
+			  NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_NTUPLE | \
+			  NETIF_F_RXHASH)
+
+struct rmgr_ctrl {
+	int			driver_select;
+	unsigned long		*slot;
+	__u32			n_rules;
+	__u32			size;
+};
+
+struct cfg_param {
+	struct work_struct work;
+	struct cmd_context ctx;
+	struct sock *sk;
+	bool is_del;
+	int nid;
+	int cpu;
+};
+
+extern int match_ip_flag;
+extern int debug;
+extern int vecls_netdev_num;
+extern int vecls_numa_num;
+
+#define vecls_debug(fmt, ...)					\
+	do {							\
+		if (debug)					\
+			pr_info_ratelimited("venetcls [%s:%d]: " fmt,\
+					    __FILE__, __LINE__, ## __VA_ARGS__); \
+	} while (0)
+
+#define vecls_error(fmt, ...) \
+	pr_err_ratelimited("venetcls [%s:%d]: " fmt, __FILE__, __LINE__, ## __VA_ARGS__)
+
+struct vecls_netdev_info *get_vecls_netdev_info(unsigned int index);
+struct vecls_numa_info *get_vecls_numa_info(unsigned int nid);
+
+#ifdef CONFIG_ARM64_SVE
+void *__memcpy_aarch64_sve(void *, const void *, size_t);
+#define memcpy_r(dst, src, len)					\
+	do {							\
+		void *_dst = dst;				\
+		const void *_src = src;				\
+		size_t _len = len;				\
+		if (system_supports_sve())			\
+			__memcpy_aarch64_sve(_dst, _src, _len);	\
+		else						\
+			memcpy(_dst, _src, _len);			\
+	} while (0)
+#else
+#define memcpy_r(dst, src, len) memcpy(dst, src, len)
+#endif
+
+int check_appname(char *task_name);
+int send_ethtool_ioctl(struct cmd_context *ctx, void *cmd);
+int alloc_rxq_id(int nid, int cpu, int devid);
+void free_rxq_id(int nid, int devid, int rxq_id);
+int vecls_ntuple_res_init(void);
+void vecls_ntuple_res_clean(void);
+int venetcls_ntuple_status(struct seq_file *seq, void *v);
+int vecls_flow_res_init(void);
+void vecls_flow_res_clean(void);
+int venetcls_flow_status(struct seq_file *seq, void *v);
+
+#endif	/* _NET_VENETCLS_H */
diff --git a/net/venetcls/venetcls_flow.c b/net/venetcls/venetcls_flow.c
new file mode 100644
index 000000000000..242254b9bfe0
--- /dev/null
+++ b/net/venetcls/venetcls_flow.c
@@ -0,0 +1,514 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/inetdevice.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/irq.h>
+#include <linux/irqdesc.h>
+#include <linux/inet.h>
+#include <linux/venetcls.h>
+#include <net/sock.h>
+
+#include "venetcls.h"
+
+static u32 vecls_cpu_mask;
+static struct vecls_sock_flow_table __rcu *vecls_sock_flow_table;
+static DEFINE_MUTEX(vecls_sock_flow_mutex);
+static DEFINE_SPINLOCK(vecls_dev_flow_lock);
+
+bool is_vecls_config_netdev(const char *name)
+{
+	struct vecls_netdev_info *vecls_dev;
+	int devid;
+
+	for (devid = 0; devid < vecls_netdev_num; devid++) {
+		vecls_dev = get_vecls_netdev_info(devid);
+		if (!vecls_dev)
+			continue;
+		if (strcmp(vecls_dev->dev_name, name) == 0)
+			return true;
+	}
+
+	return false;
+}
+
+static bool _vecls_timeout(struct net_device *dev, u16 rxq_index,
+			   u32 flow_id, u16 filter_id)
+{
+	struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
+	struct vecls_dev_flow_table *flow_table;
+	struct vecls_dev_flow *rflow;
+	bool expire = true;
+	unsigned int cpu;
+
+	rcu_read_lock();
+	flow_table = rcu_dereference(rxqueue->vecls_ftb);
+	if (flow_table && flow_id <= flow_table->mask) {
+		rflow = &flow_table->flows[flow_id];
+		cpu = READ_ONCE(rflow->cpu);
+		if (rflow->filter == filter_id && cpu < nr_cpu_ids) {
+			if (time_before(jiffies, rflow->timeout + VECLS_TIMEOUT)) {
+				expire = false;
+			} else {
+				rflow->isvalid = 0;
+				WRITE_ONCE(rflow->cpu, VECLS_NO_CPU);
+			}
+		}
+	}
+	rcu_read_unlock();
+	if (expire)
+		vecls_debug("%s, dev:%s, rxq:%d, flow_id:%u, filter_id:%d, expire:%d\n", __func__,
+			    dev->name, rxq_index, flow_id, filter_id, expire);
+	return expire;
+}
+
+static void _vecls_flow_update(struct sock *sk)
+{
+	struct vecls_sock_flow_table *tb;
+	unsigned int hash, index;
+	u32 val;
+	u32 cpu = raw_smp_processor_id();
+
+	if (sk->sk_state != TCP_ESTABLISHED)
+		return;
+
+	if (check_appname(current->comm))
+		return;
+
+	rcu_read_lock();
+	tb = rcu_dereference(vecls_sock_flow_table);
+	hash = READ_ONCE(sk->sk_rxhash);
+	if (tb && hash) {
+		index = hash & tb->mask;
+		val = hash & ~vecls_cpu_mask;
+		val |= cpu;
+
+		if (READ_ONCE(tb->ents[index]) != val)
+			WRITE_ONCE(tb->ents[index], val);
+	}
+	rcu_read_unlock();
+}
+
+static int flow_get_queue_idx(struct net_device *dev, int nid, struct sk_buff *skb)
+{
+	struct vecls_numa_bound_dev_info *bound_dev = NULL;
+	struct vecls_netdev_info *vecls_dev;
+	struct vecls_numa_info *numa_info;
+	int i, devid, rxq_num, rxq_id;
+	u32 hash, index;
+
+	numa_info = get_vecls_numa_info(nid);
+	if (!numa_info)
+		return -1;
+
+	for (devid = 0; devid < vecls_netdev_num; devid++) {
+		vecls_dev = get_vecls_netdev_info(devid);
+		if (!vecls_dev)
+			continue;
+		if (strcmp(vecls_dev->dev_name, dev->name) == 0) {
+			bound_dev = &numa_info->bound_dev[devid];
+			break;
+		}
+	}
+	if (!bound_dev)
+		return -1;
+
+	rxq_num = 0;
+	for (i = 0; i < VECLS_MAX_RXQ_NUM_PER_DEV; i++) {
+		if (bound_dev->bitmap_rxq[i] == RXQ_MAX_USECNT)
+			continue;
+		rxq_num++;
+	}
+	if (rxq_num == 0)
+		return -1;
+	hash = skb_get_hash(skb);
+	index = hash % rxq_num;
+
+	i = 0;
+	for (rxq_id = 0; rxq_id < VECLS_MAX_RXQ_NUM_PER_DEV; rxq_id++) {
+		if (bound_dev->bitmap_rxq[rxq_id] == RXQ_MAX_USECNT)
+			continue;
+		if (i++ == index)
+			return rxq_id;
+	}
+
+	vecls_debug("%s skb:%p, no found rxq\n", __func__, skb);
+	return -1;
+}
+
+static void set_vecls_cpu(struct net_device *dev, struct sk_buff *skb,
+			  struct vecls_dev_flow *old_rflow, int old_rxq_id, u16 next_cpu)
+{
+	struct netdev_rx_queue *rxqueue;
+	struct vecls_dev_flow_table *dtb;
+	struct vecls_dev_flow *rflow;
+	u32 flow_id, hash;
+	int rxq_index, rc;
+
+	if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||
+	    !(dev->features & NETIF_F_NTUPLE))
+		return;
+
+	rxq_index = flow_get_queue_idx(dev, cpu_to_node(next_cpu), skb);
+	if (rxq_index == skb_get_rx_queue(skb) || rxq_index < 0) {
+		vecls_debug("%s skb:%p, old_rxq:%d, next_cpu:%d new_rxq:%d\n",
+			    __func__, skb, old_rxq_id, next_cpu, rxq_index);
+		return;
+	}
+
+	rxqueue = dev->_rx + rxq_index;
+	dtb = rcu_dereference(rxqueue->vecls_ftb);
+	if (!dtb)
+		return;
+
+	hash = skb_get_hash(skb);
+	flow_id = hash & dtb->mask;
+	rflow = &dtb->flows[flow_id];
+
+	if (rflow->isvalid && cpu_to_node(rflow->cpu) == cpu_to_node(next_cpu)) {
+		rflow->timeout = jiffies;
+		return;
+	}
+
+	rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb, rxq_index, flow_id);
+	if (rc < 0) {
+		vecls_debug("skb:%p rxq:%d hash:0x%x flow_id:%u old_rxq:%d rflow->cpu:%d rflow->isvalid:%d next_cpu:%d rc:%d\n",
+			    skb, rxq_index, hash, flow_id, old_rxq_id, rflow->cpu,
+			    rflow->isvalid, next_cpu, rc);
+		return;
+	}
+
+	rflow->filter = rc;
+	rflow->isvalid = 1;
+	rflow->timeout = jiffies;
+	if (old_rflow->filter == rflow->filter)
+		old_rflow->filter = VECLS_NO_FILTER;
+	rflow->cpu = next_cpu;
+}
+
+static int get_cpu_in_numa(int tcpu, u32 hash)
+{
+	const struct cpumask *mask;
+	int nr_cpus, cpu, index;
+
+	mask = cpumask_of_node(cpu_to_node(tcpu));
+	nr_cpus = cpumask_weight(mask);
+	if (nr_cpus == 0)
+		return -1;
+
+	index = reciprocal_scale(hash, nr_cpus);
+	if (index < 0)
+		return -1;
+
+	cpu = cpumask_first(mask);
+	while (--nr_cpus > 0) {
+		if (index == 0)
+			break;
+		cpu = cpumask_next(cpu, mask);
+		index--;
+	}
+	return cpu;
+}
+
+static void __vecls_set_cpu(struct sk_buff *skb, struct net_device *ndev,
+			    struct vecls_sock_flow_table *tb, struct vecls_dev_flow_table *dtb,
+			    int old_rxq_id, int *rcpu, int *last_qtail)
+{
+	u32 last_recv_cpu, hash, val, cpu, tcpu;
+	struct vecls_dev_flow *rflow;
+	int newcpu;
+
+	cpu = raw_smp_processor_id();
+	skb_reset_network_header(skb);
+	hash = skb_get_hash(skb);
+	if (!hash)
+		return;
+
+	val = READ_ONCE(tb->ents[hash & tb->mask]);
+	last_recv_cpu = val & vecls_cpu_mask;
+	rflow = &dtb->flows[hash & dtb->mask];
+	tcpu = rflow->cpu;
+
+	if ((val ^ hash) & ~vecls_cpu_mask)
+		return;
+
+	newcpu = get_cpu_in_numa(last_recv_cpu, hash);
+	if (newcpu >= 0)
+		*rcpu = newcpu;
+	else
+		newcpu = last_recv_cpu;
+
+	if (cpu_to_node(cpu) == cpu_to_node(newcpu))
+		return;
+
+	if (tcpu >= nr_cpu_ids)
+		set_vecls_cpu(ndev, skb, rflow, old_rxq_id, newcpu);
+}
+
+static void _vecls_set_cpu(struct sk_buff *skb, int *cpu, int *last_qtail)
+{
+	struct net_device *ndev = skb->dev;
+	struct vecls_sock_flow_table *stb;
+	struct vecls_dev_flow_table *dtb;
+	struct netdev_rx_queue *rxqueue;
+	int rxq_id = -1;
+
+	*cpu = -1;
+	last_qtail = 0;//unused
+	if (!ndev)
+		return;
+
+	if (!is_vecls_config_netdev(ndev->name))
+		return;
+
+	rxqueue = ndev->_rx;
+	if (skb_rx_queue_recorded(skb)) {
+		rxq_id = skb_get_rx_queue(skb);
+		if (rxq_id >= ndev->real_num_rx_queues) {
+			vecls_debug("%s ndev:%s rxq:%d real_num:%d\n", __func__,
+				    ndev->name, rxq_id, ndev->real_num_rx_queues);
+			return;
+		}
+		rxqueue += rxq_id;
+	}
+
+	if (rxq_id < 0)
+		return;
+
+	rcu_read_lock();
+	stb = rcu_dereference(vecls_sock_flow_table);
+	dtb = rcu_dereference(rxqueue->vecls_ftb);
+	if (stb && dtb)
+		__vecls_set_cpu(skb, ndev, stb, dtb, rxq_id, cpu, last_qtail);
+	rcu_read_unlock();
+}
+
+static void vecls_dev_flow_table_free(struct rcu_head *rcu)
+{
+	struct vecls_dev_flow_table *table = container_of(rcu,
+			struct vecls_dev_flow_table, rcu);
+	vfree(table);
+}
+
+static void vecls_dev_flow_table_cleanup(struct net_device *netdev, int queues)
+{
+	struct vecls_dev_flow_table *dtb;
+	struct netdev_rx_queue *queue;
+	int i;
+
+	for (i = 0; i < queues; i++) {
+		queue = netdev->_rx + i;
+		spin_lock(&vecls_dev_flow_lock);
+		dtb = rcu_dereference_protected(queue->vecls_ftb,
+						lockdep_is_held(&vecls_dev_flow_lock));
+		rcu_assign_pointer(queue->vecls_ftb, NULL);
+		spin_unlock(&vecls_dev_flow_lock);
+		if (dtb)
+			call_rcu(&dtb->rcu, vecls_dev_flow_table_free);
+	}
+}
+
+static int vecls_dev_flow_table_release(void)
+{
+	struct vecls_netdev_info *vecls_dev;
+	struct net_device *netdev;
+	int devid;
+
+	for (devid = 0; devid < vecls_netdev_num; devid++) {
+		vecls_dev = get_vecls_netdev_info(devid);
+		if (!vecls_dev)
+			continue;
+		netdev = vecls_dev->netdev;
+		if (!netdev)
+			continue;
+		vecls_dev_flow_table_cleanup(netdev, netdev->num_rx_queues);
+	}
+
+	return 0;
+}
+
+static int _vecls_dev_flow_table_init(struct net_device *netdev)
+{
+	struct vecls_dev_flow_table *table;
+	int size = VECLS_DEV_FLOW_TABLE_NUM;
+	struct netdev_rx_queue *queue;
+	int i, j, ret = 0;
+
+	size = roundup_pow_of_two(size);
+	vecls_debug("%s dev:%s num_rx_queues:%d mask:0x%x\n",
+		    __func__, netdev->name, netdev->num_rx_queues, size - 1);
+
+	for (i = 0; i < netdev->num_rx_queues; i++) {
+		table = vmalloc(VECLS_DEV_FLOW_TABLE_SIZE(size));
+		if (!table) {
+			ret = -ENOMEM;
+			goto fail;
+		}
+
+		table->mask = size - 1;
+		for (j = 0; j < size; j++) {
+			table->flows[j].cpu = VECLS_NO_CPU;
+			table->flows[j].isvalid = 0;
+		}
+
+		queue = netdev->_rx + i;
+
+		spin_lock(&vecls_dev_flow_lock);
+		rcu_assign_pointer(queue->vecls_ftb, table);
+		spin_unlock(&vecls_dev_flow_lock);
+	}
+	return ret;
+fail:
+	vecls_dev_flow_table_cleanup(netdev, i);
+	return ret;
+}
+
+static int vecls_dev_flow_table_init(void)
+{
+	struct vecls_netdev_info *vecls_dev;
+	struct net_device *ndev;
+	int i, err, devid;
+
+	for (devid = 0; devid < vecls_netdev_num; devid++) {
+		vecls_dev = get_vecls_netdev_info(devid);
+		if (!vecls_dev)
+			continue;
+		ndev = vecls_dev->netdev;
+		if (!ndev)
+			continue;
+		err = _vecls_dev_flow_table_init(ndev);
+		if (err)
+			goto out;
+	}
+
+	return 0;
+out:
+	for (i = 0; i < devid; i++) {
+		vecls_dev = get_vecls_netdev_info(i);
+		ndev = vecls_dev->netdev;
+		if (!ndev)
+			continue;
+		vecls_dev_flow_table_cleanup(ndev, ndev->num_rx_queues);
+	}
+	return err;
+}
+
+static const struct vecls_hook_ops vecls_flow_ops = {
+	.vecls_flow_update = _vecls_flow_update,
+	.vecls_set_cpu = _vecls_set_cpu,
+	.vecls_timeout = _vecls_timeout,
+	.vecls_cfg_rxcls = NULL,
+};
+
+static int vecls_sock_flow_table_release(void)
+{
+	struct vecls_sock_flow_table *tb;
+
+	mutex_lock(&vecls_sock_flow_mutex);
+	tb = rcu_dereference_protected(vecls_sock_flow_table,
+				       lockdep_is_held(&vecls_sock_flow_mutex));
+	if (tb)
+		rcu_assign_pointer(vecls_sock_flow_table, NULL);
+	mutex_unlock(&vecls_sock_flow_mutex);
+	synchronize_rcu();
+	vfree(tb);
+
+	return 0;
+}
+
+int venetcls_flow_status(struct seq_file *seq, void *v)
+{
+	struct vecls_netdev_info *vecls_dev;
+	struct vecls_dev_flow_table *dtb;
+	struct netdev_rx_queue *queue;
+	struct net_device *netdev;
+	int devid, i, j;
+	unsigned long timeout;
+
+	seq_printf(seq, "%-16s %-6s %-12s %-12s %-12s\n",
+		   "Interface", "rxq", "flowCPU", "filterId", "timeout");
+	spin_lock(&vecls_dev_flow_lock);
+	for (devid = 0; devid < vecls_netdev_num; devid++) {
+		vecls_dev = get_vecls_netdev_info(devid);
+		if (!vecls_dev)
+			continue;
+		netdev = vecls_dev->netdev;
+		if (!netdev)
+			continue;
+		for (i = 0; i < netdev->num_rx_queues; i++) {
+			queue = netdev->_rx + i;
+			dtb = rcu_dereference_protected(queue->vecls_ftb,
+							lockdep_is_held(&vecls_dev_flow_lock));
+			if (!dtb)
+				continue;
+			for (j = 0; j < VECLS_DEV_FLOW_TABLE_NUM; j++) {
+				if (dtb->flows[j].cpu == VECLS_NO_CPU)
+					continue;
+				if (dtb->flows[j].isvalid == 0)
+					continue;
+				timeout = dtb->flows[j].timeout + VECLS_TIMEOUT;
+				if (time_before(jiffies, timeout)) {
+					seq_printf(seq, "%-16s %-6d %-12d %-12d %-12u\n",
+						   vecls_dev->dev_name, i, dtb->flows[j].cpu,
+						   dtb->flows[j].filter,
+						   jiffies_to_msecs(timeout - jiffies));
+				}
+			}
+		}
+	}
+	spin_unlock(&vecls_dev_flow_lock);
+
+	return 0;
+}
+
+static int vecls_sock_flow_table_init(void)
+{
+	struct vecls_sock_flow_table *table;
+	int size = VECLS_SOCK_FLOW_TABLE_NUM;
+	int i;
+
+	size = roundup_pow_of_two(size);
+	table = vmalloc(VECLS_SOCK_FLOW_TABLE_SIZE(size));
+	if (!table)
+		return -ENOMEM;
+
+	vecls_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1;
+	vecls_debug("nr_cpu_ids:%d, vecls_cpu_mask:0x%x\n", nr_cpu_ids, vecls_cpu_mask);
+
+	table->mask = size - 1;
+	for (i = 0; i < size; i++)
+		table->ents[i] = VECLS_NO_CPU;
+
+	mutex_lock(&vecls_sock_flow_mutex);
+	rcu_assign_pointer(vecls_sock_flow_table, table);
+	mutex_unlock(&vecls_sock_flow_mutex);
+
+	return 0;
+}
+
+int vecls_flow_res_init(void)
+{
+	int err;
+
+	err = vecls_sock_flow_table_init();
+	if (err)
+		return err;
+	err = vecls_dev_flow_table_init();
+	if (err)
+		goto clean;
+
+	RCU_INIT_POINTER(vecls_ops, &vecls_flow_ops);
+	synchronize_rcu();
+
+	return 0;
+clean:
+	vecls_sock_flow_table_release();
+	return err;
+}
+
+void vecls_flow_res_clean(void)
+{
+	RCU_INIT_POINTER(vecls_ops, NULL);
+	synchronize_rcu();
+	vecls_sock_flow_table_release();
+	vecls_dev_flow_table_release();
+}
diff --git a/net/venetcls/venetcls_main.c b/net/venetcls/venetcls_main.c
new file mode 100644
index 000000000000..856ff3b4427e
--- /dev/null
+++ b/net/venetcls/venetcls_main.c
@@ -0,0 +1,1154 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/netdev_features.h>
+#include <linux/ethtool.h>
+#include <linux/irq.h>
+#include <linux/irqdesc.h>
+#include <linux/proc_fs.h>
+#include <linux/rtnetlink.h>
+#include <linux/seq_file.h>
+#include "venetcls.h"
+
+int vecls_netdev_num;
+static struct vecls_netdev_info vecls_netdev_info_table[VECLS_MAX_NETDEV_NUM];
+
+int vecls_numa_num;
+static int vecls_cluster_cpu_num, vecls_cluster_per_numa;
+static struct vecls_numa_info *vecls_numa_info_table;
+
+int debug;
+module_param(debug, int, 0644);
+MODULE_PARM_DESC(debug, "debug switch");
+
+static int mode;
+module_param(mode, int, 0444);
+MODULE_PARM_DESC(mode, "mode, default 0");
+
+static char ifname[64] = { 0 };
+module_param_string(ifname, ifname, sizeof(ifname), 0444);
+MODULE_PARM_DESC(ifname, "ifname");
+
+static char appname[64] = "redis-server";
+module_param_string(appname, appname, sizeof(appname), 0644);
+MODULE_PARM_DESC(appname, "appname, default redis-server");
+
+int match_ip_flag = 1;
+module_param(match_ip_flag, int, 0644);
+MODULE_PARM_DESC(match_ip_flag, "match ip flag");
+
+static int strategy;
+module_param(strategy, int, 0444);
+MODULE_PARM_DESC(strategy, "strategy, default 0");
+
+static int rxq_multiplex_limit = 1;
+module_param(rxq_multiplex_limit, int, 0444);
+MODULE_PARM_DESC(rxq_multiplex_limit, "rxq multiplex limit num, default 1");
+
+static char irqname[64] = "comp";
+module_param_string(irqname, irqname, sizeof(irqname), 0644);
+MODULE_PARM_DESC(irqname, "nic irq name string, default comp");
+
+static bool check_params(void)
+{
+	if (mode != 0 && mode != 1)
+		return false;
+
+	if (strlen(ifname) == 0)
+		return false;
+
+	return true;
+}
+
+int check_appname(char *task_name)
+{
+	char *start = appname, *end;
+
+	if (!strlen(appname))
+		return 0;
+
+	// support appname: app1#app2#appN
+	while (*start != '\0') {
+		end = strchr(start, '#');
+		if (end == start) {
+			start++;
+			continue;
+		}
+
+		if (!end) {
+			if (!strncmp(task_name, start, strlen(start)))
+				return 0;
+			break;
+		}
+
+		if (!strncmp(task_name, start, end - start))
+			return 0;
+		start = end + 1;
+	}
+	return -EOPNOTSUPP;
+}
+
+static u32 __ethtool_get_flags(struct net_device *dev)
+{
+	u32 flags = 0;
+
+	if (dev->features & NETIF_F_LRO)
+		flags |= ETH_FLAG_LRO;
+	if (dev->features & NETIF_F_HW_VLAN_CTAG_RX)
+		flags |= ETH_FLAG_RXVLAN;
+	if (dev->features & NETIF_F_HW_VLAN_CTAG_TX)
+		flags |= ETH_FLAG_TXVLAN;
+	if (dev->features & NETIF_F_NTUPLE)
+		flags |= ETH_FLAG_NTUPLE;
+	if (dev->features & NETIF_F_RXHASH)
+		flags |= ETH_FLAG_RXHASH;
+
+	return flags;
+}
+
+static int __ethtool_set_flags(struct net_device *dev, u32 data)
+{
+	netdev_features_t features = 0, changed;
+
+	if (data & ~ETH_ALL_FLAGS)
+		return -EINVAL;
+
+	if (data & ETH_FLAG_LRO)
+		features |= NETIF_F_LRO;
+	if (data & ETH_FLAG_RXVLAN)
+		features |= NETIF_F_HW_VLAN_CTAG_RX;
+	if (data & ETH_FLAG_TXVLAN)
+		features |= NETIF_F_HW_VLAN_CTAG_TX;
+	if (data & ETH_FLAG_NTUPLE)
+		features |= NETIF_F_NTUPLE;
+	if (data & ETH_FLAG_RXHASH)
+		features |= NETIF_F_RXHASH;
+
+	/* allow changing only bits set in hw_features */
+	changed = (features ^ dev->features) & ETH_ALL_FEATURES;
+	if (changed & ~dev->hw_features)
+		return (changed & dev->hw_features) ? -EINVAL : -EOPNOTSUPP;
+
+	dev->wanted_features =
+		(dev->wanted_features & ~changed) | (features & changed);
+
+	__netdev_update_features(dev);
+
+	return 0;
+}
+
+static void ethtool_rxnfc_copy_to_user(void *useraddr,
+				       const struct ethtool_rxnfc *rxnfc,
+				       size_t size, const u32 *rule_buf)
+{
+	memcpy_r(useraddr, rxnfc, size);
+	useraddr += offsetof(struct ethtool_rxnfc, rule_locs);
+
+	if (rule_buf)
+		memcpy_r(useraddr, rule_buf, rxnfc->rule_cnt * sizeof(u32));
+}
+
+static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
+						u32 cmd, void *useraddr)
+{
+	struct ethtool_rxnfc info;
+	size_t info_size = sizeof(info);
+	int rc;
+
+	if (!dev->ethtool_ops->set_rxnfc)
+		return -EOPNOTSUPP;
+
+	if (cmd == ETHTOOL_SRXFH)
+		info_size = (offsetof(struct ethtool_rxnfc, data) +
+			     sizeof(info.data));
+
+	memcpy_r(&info, useraddr, info_size);
+	rc = dev->ethtool_ops->set_rxnfc(dev, &info);
+	if (rc)
+		return rc;
+
+	if (cmd == ETHTOOL_SRXCLSRLINS)
+		ethtool_rxnfc_copy_to_user(useraddr, &info, info_size, NULL);
+
+	return 0;
+}
+
+static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
+						u32 cmd, void *useraddr)
+{
+	struct ethtool_rxnfc info;
+	size_t info_size = sizeof(info);
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	int ret;
+	void *rule_buf = NULL;
+
+	if (!ops->get_rxnfc)
+		return -EOPNOTSUPP;
+
+	if (cmd == ETHTOOL_GRXFH)
+		info_size = (offsetof(struct ethtool_rxnfc, data) +
+			     sizeof(info.data));
+
+	memcpy_r(&info, useraddr, info_size);
+
+	/* If FLOW_RSS was requested then user-space must be using the
+	 * new definition, as FLOW_RSS is newer.
+	 */
+	if (cmd == ETHTOOL_GRXFH && info.flow_type & FLOW_RSS) {
+		info_size = sizeof(info);
+		memcpy_r(&info, useraddr, info_size);
+		/* Since malicious users may modify the original data,
+		 * we need to check whether FLOW_RSS is still requested.
+		 */
+		if (!(info.flow_type & FLOW_RSS))
+			return -EINVAL;
+	}
+
+	if (info.cmd != cmd)
+		return -EINVAL;
+
+	if (info.cmd == ETHTOOL_GRXCLSRLALL) {
+		if (info.rule_cnt > 0) {
+			if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32))
+				rule_buf = kcalloc(info.rule_cnt, sizeof(u32),
+						   GFP_KERNEL);
+			if (!rule_buf)
+				return -ENOMEM;
+		}
+	}
+
+	ret = ops->get_rxnfc(dev, &info, rule_buf);
+	if (ret < 0)
+		goto err_out;
+
+	ethtool_rxnfc_copy_to_user(useraddr, &info, info_size, rule_buf);
+err_out:
+	kfree(rule_buf);
+
+	return ret;
+}
+
+static noinline_for_stack int ethtool_get_channels(struct net_device *dev,
+						   void *useraddr)
+{
+	struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS };
+
+	if (!dev->ethtool_ops->get_channels)
+		return -EOPNOTSUPP;
+
+	dev->ethtool_ops->get_channels(dev, &channels);
+
+	memcpy_r(useraddr, &channels, sizeof(channels));
+	return 0;
+}
+
+static int ethtool_get_value(struct net_device *dev, char *useraddr,
+			     u32 cmd, u32 (*actor)(struct net_device *))
+{
+	struct ethtool_value edata = { .cmd = cmd };
+
+	if (!actor)
+		return -EOPNOTSUPP;
+
+	edata.data = actor(dev);
+
+	memcpy_r(useraddr, &edata, sizeof(edata));
+	return 0;
+}
+
+static int ethtool_set_value(struct net_device *dev, char *useraddr,
+			     int (*actor)(struct net_device *, u32))
+{
+	struct ethtool_value edata;
+
+	if (!actor)
+		return -EOPNOTSUPP;
+
+	memcpy_r(&edata, useraddr, sizeof(edata));
+
+	return actor(dev, edata.data);
+}
+
+static int dev_ethtool_kern(struct net *net, struct ifreq *ifr)
+{
+	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
+	void *useraddr = ifr->ifr_data;
+	u32 ethcmd, sub_cmd;
+	int rc;
+	netdev_features_t old_features;
+
+	if (!dev || !netif_device_present(dev))
+		return -ENODEV;
+
+	memcpy_r(ðcmd, useraddr, sizeof(ethcmd));
+
+	if (ethcmd == ETHTOOL_PERQUEUE)
+		memcpy_r(&sub_cmd, useraddr + sizeof(ethcmd), sizeof(sub_cmd));
+	else
+		sub_cmd = ethcmd;
+
+	if (dev->ethtool_ops->begin) {
+		rc = dev->ethtool_ops->begin(dev);
+		if (rc  < 0)
+			return rc;
+	}
+	old_features = dev->features;
+
+	switch (ethcmd) {
+	case ETHTOOL_GFLAGS:
+		rc = ethtool_get_value(dev, useraddr, ethcmd,
+				       __ethtool_get_flags);
+		break;
+	case ETHTOOL_SFLAGS:
+		rc = ethtool_set_value(dev, useraddr, __ethtool_set_flags);
+		break;
+	case ETHTOOL_GRXFH:
+	case ETHTOOL_GRXRINGS:
+	case ETHTOOL_GRXCLSRLCNT:
+	case ETHTOOL_GRXCLSRULE:
+	case ETHTOOL_GRXCLSRLALL:
+		rc = ethtool_get_rxnfc(dev, ethcmd, useraddr);
+		break;
+	case ETHTOOL_SRXFH:
+	case ETHTOOL_SRXCLSRLDEL:
+	case ETHTOOL_SRXCLSRLINS:
+		rc = ethtool_set_rxnfc(dev, ethcmd, useraddr);
+		break;
+	case ETHTOOL_GCHANNELS:
+		rc = ethtool_get_channels(dev, useraddr);
+		break;
+	default:
+		rc = -EOPNOTSUPP;
+	}
+
+	if (dev->ethtool_ops->complete)
+		dev->ethtool_ops->complete(dev);
+
+	if (old_features != dev->features)
+		netdev_features_change(dev);
+
+	return rc;
+}
+
+int send_ethtool_ioctl(struct cmd_context *ctx, void *cmd)
+{
+	struct ifreq ifr = {0};
+	int ret;
+
+	strscpy(ifr.ifr_name, ctx->netdev, IFNAMSIZ);
+	ifr.ifr_data = cmd;
+
+	rtnl_lock();
+	ret = dev_ethtool_kern(&init_net, &ifr);
+	rtnl_unlock();
+
+	return ret;
+}
+
+struct vecls_netdev_info *get_vecls_netdev_info(unsigned int index)
+{
+	if (index >= VECLS_MAX_NETDEV_NUM)
+		return NULL;
+	return &vecls_netdev_info_table[index];
+}
+
+static struct vecls_netdev_info *alloc_vecls_netdev_info(void)
+{
+	if (vecls_netdev_num >= VECLS_MAX_NETDEV_NUM)
+		return NULL;
+
+	return &vecls_netdev_info_table[vecls_netdev_num++];
+}
+
+static bool check_irq_name(const char *irq_name, struct vecls_netdev_info *vecls_dev)
+{
+	if (!strstr(irq_name, "TxRx") && !strstr(irq_name, "comp") && !strstr(irq_name, "rx") &&
+	    strlen(irqname) > 0 && !strstr(irq_name, irqname))
+		return false;
+
+	if (strstr(irq_name, vecls_dev->dev_name))
+		return true;
+
+	if (vecls_dev->netdev->dev.parent &&
+	    strstr(irq_name, dev_name(vecls_dev->netdev->dev.parent)))
+		return true;
+
+	return false;
+}
+
+static void get_netdev_queue_info(struct vecls_netdev_info *vecls_dev)
+{
+	struct vecls_netdev_queue_info *rxq_info;
+	struct irq_desc *desc;
+	int irq, cpu;
+
+	for_each_irq_desc(irq, desc) {
+		if (!desc->action)
+			continue;
+		if (!desc->action->name)
+			continue;
+		if (!check_irq_name(desc->action->name, vecls_dev))
+			continue;
+		if (vecls_dev->rxq_num >= VECLS_MAX_RXQ_NUM_PER_DEV)
+			break;
+		rxq_info = &vecls_dev->rxq[vecls_dev->rxq_num++];
+		rxq_info->irq = irq;
+		cpu = cpumask_first(irq_data_get_effective_affinity_mask(&desc->irq_data));
+		rxq_info->affinity_cpu = cpu;
+		vecls_debug("irq=%d, [%s], rxq_id=%d affinity_cpu:%d\n",
+			    irq, desc->action->name, vecls_dev->rxq_num - 1, cpu);
+	}
+}
+
+static int vecls_filter_enable(const char *dev_name, bool *old_state)
+{
+	struct ethtool_value eval = {0};
+	struct cmd_context ctx = {0};
+	int ret;
+
+	strscpy(ctx.netdev, dev_name, IFNAMSIZ);
+
+	eval.cmd = ETHTOOL_GFLAGS;
+	ret = send_ethtool_ioctl(&ctx, &eval);
+	if (ret != 0) {
+		vecls_error("get %s flags fail, ret:%d\n", dev_name, ret);
+		return ret;
+	}
+	if (eval.data & ETH_FLAG_NTUPLE) {
+		*old_state = true;
+		vecls_debug("%s ntuple is already on\n", dev_name);
+		return 0;
+	}
+
+	// Set ntuple feature
+	eval.cmd = ETHTOOL_SFLAGS;
+	eval.data |= ETH_FLAG_NTUPLE;
+	ret = send_ethtool_ioctl(&ctx, &eval);
+	if (ret != 0) {
+		vecls_error("set %s flags fail, ret:%d\n", dev_name, ret);
+		return ret;
+	}
+
+	// Get ntuple feature
+	eval.cmd = ETHTOOL_GFLAGS;
+	eval.data = 0;
+	ret = send_ethtool_ioctl(&ctx, &eval);
+	if (ret != 0) {
+		vecls_error("get %s flags fail, ret:%d\n", dev_name, ret);
+		return ret;
+	}
+	if (!(eval.data & ETH_FLAG_NTUPLE)) {
+		vecls_error("enable ntuple feature fail!\n");
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static void vecls_filter_restore(const char *dev_name, bool old_state)
+{
+	struct ethtool_value eval = {0};
+	struct cmd_context ctx = {0};
+	bool cur_filter_state;
+	int ret;
+
+	strscpy(ctx.netdev, dev_name, IFNAMSIZ);
+
+	eval.cmd = ETHTOOL_GFLAGS;
+	ret = send_ethtool_ioctl(&ctx, &eval);
+	if (ret != 0) {
+		vecls_error("get %s flags fail, ret:%d\n", dev_name, ret);
+		return;
+	}
+
+	cur_filter_state = (eval.data & ETH_FLAG_NTUPLE) ? true : false;
+	if (cur_filter_state == old_state)
+		return;
+
+	// Set ntuple feature
+	eval.cmd = ETHTOOL_SFLAGS;
+	if (old_state)
+		eval.data |= ETH_FLAG_NTUPLE;
+	else
+		eval.data &= ~ETH_FLAG_NTUPLE;
+	ret = send_ethtool_ioctl(&ctx, &eval);
+	if (ret != 0) {
+		vecls_error("set %s flags fail, ret:%d\n", dev_name, ret);
+		return;
+	}
+}
+
+static int init_single_vecls_dev(char *if_name, unsigned int length)
+{
+	struct vecls_netdev_info *vecls_dev;
+	char dev_name[IFNAMSIZ] = { 0 };
+	struct net_device *netdev;
+	bool old_state = false;
+	int ret;
+
+	strscpy(dev_name, if_name, IFNAMSIZ);
+	netdev = dev_get_by_name(&init_net, dev_name);
+	if (!netdev) {
+		vecls_error("dev [%s] is not exist!\n", dev_name);
+		return -ENODEV;
+	}
+
+	if (!(netdev->flags & IFF_UP)) {
+		ret = -ENETDOWN;
+		vecls_error("dev:%s not up! flags=%d.\n", dev_name, netdev->flags);
+		goto out;
+	}
+
+	if (netdev->flags & IFF_LOOPBACK) {
+		ret = -EOPNOTSUPP;
+		vecls_error("Do not support loopback.\n");
+		goto out;
+	}
+
+	ret = vecls_filter_enable(dev_name, &old_state);
+	if (ret) {
+		vecls_error("dev [%s] not support ntuple! ret=%d\n", dev_name, ret);
+		goto out;
+	}
+
+	vecls_dev = alloc_vecls_netdev_info();
+	if (!vecls_dev) {
+		ret = -ENOMEM;
+		vecls_filter_restore(dev_name, old_state);
+		vecls_error("alloc vecls_dev fail! vecls_netdev_num:%d\n", vecls_netdev_num);
+		goto out;
+	}
+
+	memcpy_r(vecls_dev->dev_name, dev_name, IFNAMSIZ);
+	vecls_dev->old_filter_state = old_state;
+	vecls_dev->netdev = netdev;
+	get_netdev_queue_info(vecls_dev);
+	return 0;
+
+out:
+	dev_put(netdev);
+	return ret;
+}
+
+static void clean_vecls_netdev_info(void)
+{
+	struct vecls_netdev_info *vecls_dev;
+	struct net_device *netdev;
+	int devid;
+
+	for (devid = 0; devid < vecls_netdev_num; devid++) {
+		vecls_dev = get_vecls_netdev_info(devid);
+		if (!vecls_dev)
+			continue;
+		vecls_filter_restore(vecls_dev->dev_name, vecls_dev->old_filter_state);
+		netdev = vecls_dev->netdev;
+		if (netdev) {
+			vecls_dev->netdev = NULL;
+			dev_put(netdev);
+		}
+	}
+
+	vecls_netdev_num = 0;
+}
+
+static int init_vecls_netdev_info(char *netdev_str)
+{
+	char *start = netdev_str, *end;
+	int err = -ENODEV;
+
+	while (*start != '\0') {
+		// skip start #
+		end = strchr(start, '#');
+		if (end == start) {
+			start++;
+			continue;
+		}
+
+		// find the last ifname
+		if (!end) {
+			err = init_single_vecls_dev(start, strlen(start));
+			break;
+		}
+
+		err = init_single_vecls_dev(start, end - start);
+		if (err)
+			break;
+		start = end + 1;
+	}
+
+	return err;
+}
+
+struct vecls_numa_info *get_vecls_numa_info(unsigned int nid)
+{
+	if (nid >= vecls_numa_num)
+		return NULL;
+	return &vecls_numa_info_table[nid];
+}
+
+static void clean_vecls_numa_info(void)
+{
+	vecls_numa_num = 0;
+	kfree(vecls_numa_info_table);
+}
+
+static void init_numa_avail_cpus(int nid, struct vecls_numa_info *numa_info)
+{
+	int cpu;
+
+	vecls_debug("numa node %d: %*pb, %*pbl\n", nid, cpumask_pr_args(cpumask_of_node(nid)),
+		    cpumask_pr_args(cpumask_of_node(nid)));
+
+	bitmap_zero(numa_info->avail_cpus, VECLS_MAX_CPU_NUM);
+	for_each_cpu(cpu, cpumask_of_node(nid)) {
+		if (cpu >= VECLS_MAX_CPU_NUM)
+			return;
+		set_bit(cpu, numa_info->avail_cpus);
+	}
+}
+
+static void clean_vecls_rxq(void)
+{
+	struct vecls_numa_bound_dev_info *bound_dev;
+	struct vecls_netdev_info *vecls_dev;
+	struct vecls_numa_info *numa_info;
+	int nid, devid;
+
+	for (nid = 0; nid < vecls_numa_num; nid++) {
+		numa_info = get_vecls_numa_info(nid);
+		if (!numa_info)
+			continue;
+		for (devid = 0; devid < vecls_netdev_num; devid++) {
+			vecls_dev = get_vecls_netdev_info(devid);
+			if (!vecls_dev)
+				continue;
+			bound_dev = &numa_info->bound_dev[devid];
+			kfree(bound_dev->cluster_info);
+		}
+	}
+}
+
+static int init_numa_rxq_bitmap(int nid, struct vecls_numa_info *numa_info)
+{
+	int bound_rxq_num, cluster_id, cluster_idx, cur_idx;
+	struct vecls_numa_bound_dev_info *bound_dev;
+	struct vecls_netdev_info *vecls_dev;
+	int i, j, rxq_id, devid, cpu, ret = 0;
+
+	for (devid = 0; devid < vecls_netdev_num; devid++) {
+		vecls_dev = get_vecls_netdev_info(devid);
+		if (!vecls_dev)
+			continue;
+		bound_rxq_num = 0;
+		bound_dev = &numa_info->bound_dev[devid];
+		memset(bound_dev->bitmap_rxq, RXQ_MAX_USECNT, sizeof(bound_dev->bitmap_rxq));
+		bound_dev->cluster_info = kcalloc(vecls_cluster_per_numa,
+						  sizeof(*bound_dev->cluster_info), GFP_ATOMIC);
+		if (!bound_dev->cluster_info) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		for (i = 0; i < vecls_cluster_per_numa; i++) {
+			for (j = 0; j < VECLS_MAX_RXQ_NUM_PER_DEV; j++) {
+				bound_dev->cluster_info[i].rxqs[j].rxq_id = -1;
+				bound_dev->cluster_info[i].rxqs[j].status = RXQ_MAX_USECNT;
+			}
+		}
+
+		for (rxq_id = 0; rxq_id < vecls_dev->rxq_num; rxq_id++) {
+			cpu = vecls_dev->rxq[rxq_id].affinity_cpu;
+			if (cpu_to_node(cpu) == nid) {
+				bound_dev->bitmap_rxq[rxq_id] = 0;
+				cluster_id = cpu / vecls_cluster_cpu_num;
+				cluster_idx = cluster_id % vecls_cluster_per_numa;
+				bound_dev->cluster_info[cluster_idx].cluster_id = cluster_id;
+				cur_idx = bound_dev->cluster_info[cluster_idx].cur_freeidx++;
+				bound_dev->cluster_info[cluster_idx].rxqs[cur_idx].rxq_id = rxq_id;
+				bound_dev->cluster_info[cluster_idx].rxqs[cur_idx].status = 0;
+				bound_rxq_num++;
+				vecls_debug("cpu:%d cluster_id:%d cluster_idx:%d rxq_id:%d cur_idx:%d\n",
+					    cpu, cluster_id, cluster_idx, rxq_id, cur_idx);
+			}
+		}
+
+		vecls_debug("nid:%d, dev_id:%d, dev:%s, rxq_num:%d, bound_rxq_num:%d\n",
+			    nid, devid, vecls_dev->dev_name, vecls_dev->rxq_num, bound_rxq_num);
+	}
+	return ret;
+
+out:
+	clean_vecls_rxq();
+	return ret;
+}
+
+static int get_cluster_rxq(struct vecls_numa_bound_dev_info *bound_dev, int cpu)
+{
+	int cluster_id = cpu / vecls_cluster_cpu_num;
+	int min_used_count = RXQ_MAX_USECNT;
+	int i, j, rxq_id;
+
+	for (i = 0; i < vecls_cluster_per_numa; i++) {
+		if (cluster_id != bound_dev->cluster_info[i].cluster_id)
+			continue;
+		for (j = 0; j < VECLS_MAX_RXQ_NUM_PER_DEV; j++) {
+			if (bound_dev->cluster_info[i].rxqs[j].rxq_id == -1)
+				continue;
+			if (bound_dev->cluster_info[i].rxqs[j].status < min_used_count) {
+				min_used_count = bound_dev->cluster_info[i].rxqs[j].status;
+				break;
+			}
+		}
+		if (min_used_count >= RXQ_MAX_USECNT || min_used_count >= rxq_multiplex_limit) {
+			rxq_id = -1;
+			vecls_debug("cluster:%d no free rxq for cpu:%d\n", cluster_id, cpu);
+		} else {
+			rxq_id = bound_dev->cluster_info[i].rxqs[j].rxq_id;
+			bound_dev->cluster_info[i].rxqs[j].status++;
+			vecls_debug("cluster:%d cpu:%d alloc rxq_id:%d use:%d\n", cluster_id, cpu,
+				    rxq_id, bound_dev->cluster_info[i].rxqs[j].status);
+		}
+	}
+	vecls_debug("%s allcluster:%d rxq:%d for cpu:%d\n", __func__, cluster_id, rxq_id, cpu);
+	return rxq_id;
+}
+
+static int put_cluster_rxq(struct vecls_numa_bound_dev_info *bound_dev, int rxq_id)
+{
+	int i, j;
+
+	for (i = 0; i < vecls_cluster_per_numa; i++) {
+		for (j = 0; j < VECLS_MAX_RXQ_NUM_PER_DEV; j++) {
+			if (bound_dev->cluster_info[i].rxqs[j].status > 0 &&
+			    bound_dev->cluster_info[i].rxqs[j].rxq_id == rxq_id) {
+				bound_dev->cluster_info[i].rxqs[j].status--;
+				vecls_debug("free rxq_id:%d use:%d\n", rxq_id,
+					    bound_dev->cluster_info[i].rxqs[j].status);
+				return 0;
+			}
+		}
+	}
+	vecls_debug("no match malloced rxq_id:%d\n", rxq_id);
+	return -1;
+}
+
+int alloc_rxq_id(int nid, int cpu, int devid)
+{
+	struct vecls_numa_bound_dev_info *bound_dev;
+	int i, rxq_id, min_used_count = RXQ_MAX_USECNT;
+	struct vecls_numa_info *numa_info;
+
+	numa_info = get_vecls_numa_info(nid);
+	if (!numa_info) {
+		vecls_error("error nid:%d\n", nid);
+		return -EINVAL;
+	}
+
+	if (devid >= VECLS_MAX_NETDEV_NUM) {
+		vecls_error("error bound_dev index:%d\n", devid);
+		return -EINVAL;
+	}
+	bound_dev = &numa_info->bound_dev[devid];
+
+	if (strategy == 1) {
+		rxq_id = get_cluster_rxq(bound_dev, cpu);
+		if (rxq_id < 0 || rxq_id >= VECLS_MAX_RXQ_NUM_PER_DEV)
+			vecls_debug("failed to get rxq_id:%d in cluster, try numa\n", rxq_id);
+		else
+			goto found;
+	}
+
+	for (i = 0; i < VECLS_MAX_RXQ_NUM_PER_DEV; i++) {
+		if (bound_dev->bitmap_rxq[i] < min_used_count) {
+			min_used_count = bound_dev->bitmap_rxq[i];
+			rxq_id = i;
+		}
+	}
+	if (min_used_count >= RXQ_MAX_USECNT || min_used_count >= rxq_multiplex_limit) {
+		vecls_error("alloc rxq fail! nid:%d, devid:%d\n", nid, devid);
+		return -EINVAL;
+	}
+
+found:
+	bound_dev->bitmap_rxq[rxq_id]++;
+	vecls_debug("alloc nid:%d, dev_id:%d, rxq_id:%d use:%d\n", nid, devid,
+		    rxq_id, bound_dev->bitmap_rxq[rxq_id]);
+	return rxq_id;
+}
+
+void free_rxq_id(int nid, int devid, int rxq_id)
+{
+	struct vecls_numa_bound_dev_info *bound_dev;
+	struct vecls_numa_info *numa_info;
+
+	numa_info = get_vecls_numa_info(nid);
+	if (!numa_info) {
+		vecls_error("error nid:%d\n", nid);
+		return;
+	}
+
+	if (devid >= VECLS_MAX_NETDEV_NUM) {
+		vecls_error("error bound_dev index:%d\n", devid);
+		return;
+	}
+	bound_dev = &numa_info->bound_dev[devid];
+
+	if (rxq_id >= VECLS_MAX_RXQ_NUM_PER_DEV) {
+		vecls_error("error rxq_id:%d\n", rxq_id);
+		return;
+	}
+
+	if (strategy == 1)
+		put_cluster_rxq(bound_dev, rxq_id);
+
+	if (bound_dev->bitmap_rxq[rxq_id] <= 0) {
+		vecls_error("error nid:%d, devid:%d, rxq_id:%d\n", nid, devid, rxq_id);
+		return;
+	}
+
+	bound_dev->bitmap_rxq[rxq_id]--;
+	vecls_debug("free nid:%d, dev_id:%d, rxq_id:%d use:%d\n", nid, devid,
+		    rxq_id, bound_dev->bitmap_rxq[rxq_id]);
+}
+
+static int init_vecls_numa_info(void)
+{
+	struct vecls_numa_info *numa_info;
+	int nid, ret = 0;
+
+	vecls_numa_num = num_online_nodes();
+	vecls_numa_info_table = kcalloc(vecls_numa_num, sizeof(*vecls_numa_info_table), GFP_ATOMIC);
+	if (!vecls_numa_info_table) {
+		ret = -ENOMEM;
+		vecls_error("vecls_numa_info_table alloc failed:%d\n", ret);
+		return ret;
+	}
+
+	vecls_cluster_cpu_num = cpumask_weight(topology_cluster_cpumask(raw_smp_processor_id()));
+	vecls_cluster_per_numa = (nr_cpu_ids / vecls_cluster_cpu_num) / vecls_numa_num;
+	vecls_debug("vecls_numa_num=%d cluster_cpu_num:%d cluster_cpu_num:%d\n",
+		    vecls_numa_num, vecls_cluster_per_numa, vecls_cluster_cpu_num);
+
+	for (nid = 0; nid < vecls_numa_num; nid++) {
+		numa_info = get_vecls_numa_info(nid);
+		if (!numa_info)
+			continue;
+		init_numa_avail_cpus(nid, numa_info);
+	}
+
+	return ret;
+}
+
+static int alloc_available_cpu(int nid, struct vecls_numa_info *numa_info)
+{
+	int cpu;
+
+	cpu = find_first_bit(numa_info->avail_cpus, VECLS_MAX_CPU_NUM);
+	if (cpu >= VECLS_MAX_CPU_NUM) {
+		vecls_error("no available cpus: nid=%d, cpu=%d\n", nid, cpu);
+		return -1;
+	}
+
+	clear_bit(cpu, numa_info->avail_cpus);
+	return cpu;
+}
+
+static void add_netdev_irq_affinity_cpu(struct vecls_netdev_info *vecls_dev, int rxq_id, int cpu)
+{
+	struct vecls_netdev_queue_info *rxq_info;
+
+	if (rxq_id >= VECLS_MAX_RXQ_NUM_PER_DEV)
+		return;
+
+	rxq_info = &vecls_dev->rxq[rxq_id];
+	rxq_info->affinity_cpu = cpu;
+}
+
+static void config_affinity_strategy_default(struct vecls_netdev_info *vecls_dev)
+{
+	struct vecls_numa_info *numa_info;
+	int rxq_num = vecls_dev->rxq_num;
+	int rxq_per_numa = rxq_num / vecls_numa_num;
+	int remain = rxq_num - rxq_per_numa * vecls_numa_num;
+	int numa_rxq_id, rxq_id, nid, cpu;
+
+	vecls_debug("dev=%s, rxq_num=%d, rxq_per_numa=%d, remain=%d\n", vecls_dev->dev_name,
+		    rxq_num, rxq_per_numa, remain);
+
+	// average config rxq to every numa
+	for (nid = 0; nid < vecls_numa_num; nid++) {
+		numa_info = get_vecls_numa_info(nid);
+		if (!numa_info)
+			continue;
+		for (numa_rxq_id = 0; numa_rxq_id < rxq_per_numa; numa_rxq_id++) {
+			cpu = alloc_available_cpu(nid, numa_info);
+			if (cpu < 0)
+				break;
+
+			rxq_id = rxq_per_numa * nid + numa_rxq_id;
+			add_netdev_irq_affinity_cpu(vecls_dev, rxq_id, cpu);
+		}
+	}
+
+	if (!remain)
+		return;
+
+	// config remain rxq to every numa
+	numa_rxq_id = 0;
+	for (nid = 0; nid < vecls_numa_num; nid++) {
+		numa_info = get_vecls_numa_info(nid);
+		if (!numa_info)
+			continue;
+		if (numa_rxq_id >= remain)
+			break;
+		cpu = alloc_available_cpu(nid, numa_info);
+		if (cpu < 0)
+			break;
+
+		rxq_id = rxq_per_numa * vecls_numa_num + numa_rxq_id;
+		numa_rxq_id++;
+		add_netdev_irq_affinity_cpu(vecls_dev, rxq_id, cpu);
+	}
+}
+
+static void config_affinity_strategy_cluster(struct vecls_netdev_info *vecls_dev)
+{
+	int rxq_num = vecls_dev->rxq_num;
+	int rxq_per_numa = rxq_num / vecls_numa_num;
+	int remain = rxq_num - rxq_per_numa * vecls_numa_num;
+	int cpu_idx = vecls_cluster_cpu_num - 1;
+	int cluster, cpu, rxq_id = 0, round;
+
+	round = rxq_per_numa < vecls_cluster_per_numa ? rxq_per_numa : vecls_cluster_per_numa;
+	if (remain > 0)
+		round++;
+	vecls_debug("round=%d\n", round);
+
+	while (rxq_id < vecls_dev->rxq_num) {
+		for (cluster = 0; cluster < vecls_cluster_per_numa * vecls_numa_num; cluster++) {
+			if (cluster % vecls_cluster_per_numa >= round)
+				continue;
+			cpu = cluster * vecls_cluster_cpu_num + cpu_idx;
+			if (rxq_id >= vecls_dev->rxq_num)
+				break;
+			add_netdev_irq_affinity_cpu(vecls_dev, rxq_id++, cpu);
+		}
+		cpu_idx--;
+		if (--cpu_idx < 0)
+			cpu_idx = vecls_cluster_cpu_num - 1;
+	}
+}
+
+static void config_affinity_strategy_numa(struct vecls_netdev_info *vecls_dev)
+{
+	int rxq_num = vecls_dev->rxq_num;
+	int rxq_per_numa = rxq_num / vecls_numa_num;
+	int cpu_per_numa = nr_cpu_ids / vecls_numa_num;
+	int remain = rxq_num - rxq_per_numa * vecls_numa_num;
+	struct vecls_numa_info *numa_info;
+	int numa_start_cpu, numa_cpu_id;
+	int rxq_id = 0, nid, cpu;
+
+	for (nid = 0; nid < vecls_numa_num; nid++) {
+		numa_info = get_vecls_numa_info(nid);
+		if (!numa_info)
+			continue;
+		numa_start_cpu = find_first_bit(numa_info->avail_cpus, VECLS_MAX_CPU_NUM);
+		for (numa_cpu_id = 0; numa_cpu_id < rxq_per_numa; numa_cpu_id++) {
+			cpu = numa_start_cpu + (numa_cpu_id % cpu_per_numa);
+			if (rxq_id >= vecls_dev->rxq_num)
+				break;
+			add_netdev_irq_affinity_cpu(vecls_dev, rxq_id++, cpu);
+		}
+		if (remain-- > 0) {
+			cpu = numa_start_cpu + (numa_cpu_id % cpu_per_numa);
+			add_netdev_irq_affinity_cpu(vecls_dev, rxq_id++, cpu);
+		}
+	}
+}
+
+static void config_affinity_strategy_custom(struct vecls_netdev_info *vecls_dev)
+{
+	vecls_debug("dev=%s\n", vecls_dev->dev_name);
+}
+
+static void config_affinity_strategy(void)
+{
+	struct vecls_netdev_info *vecls_dev;
+	int devid;
+
+	for (devid = 0; devid < vecls_netdev_num; devid++) {
+		vecls_dev = get_vecls_netdev_info(devid);
+		if (!vecls_dev)
+			continue;
+		switch (strategy) {
+		case 1:
+			config_affinity_strategy_cluster(vecls_dev);
+			break;
+		case 2:
+			config_affinity_strategy_numa(vecls_dev);
+			break;
+		case 3:
+			config_affinity_strategy_custom(vecls_dev);
+			break;
+		case 0:
+		default:
+			config_affinity_strategy_default(vecls_dev);
+			break;
+		}
+	}
+}
+
+static inline void irq_set_affinity_wrapper(int rxq, int irq, int cpu)
+{
+	int err = 0;
+
+	err = irq_set_affinity(irq, get_cpu_mask(cpu));
+	vecls_debug("rxq=%d, irq=%d, cpu=%d, err=%d\n", rxq, irq, cpu, err);
+}
+
+static void enable_affinity_strategy(void)
+{
+	struct vecls_netdev_queue_info *rxq_info;
+	struct vecls_netdev_info *vecls_dev;
+	int rxq_id, devid;
+
+	for (devid = 0; devid < vecls_netdev_num; devid++) {
+		vecls_dev = get_vecls_netdev_info(devid);
+		if (!vecls_dev)
+			continue;
+		for (rxq_id = 0; rxq_id < vecls_dev->rxq_num; rxq_id++) {
+			rxq_info = &vecls_dev->rxq[rxq_id];
+			irq_set_affinity_wrapper(rxq_id, rxq_info->irq, rxq_info->affinity_cpu);
+		}
+	}
+}
+
+static inline void netif_set_xps_queue_wrapper(struct net_device *netdev, int rxq_id,
+					       const struct cpumask *cpu_mask)
+{
+	int err = 0;
+
+	err = netif_set_xps_queue(netdev, cpu_mask, rxq_id);
+	vecls_debug("name=%s, rxq_id=%d, mask=%*pbl, err=%d\n", netdev->name, rxq_id,
+		    cpumask_pr_args(cpu_mask), err);
+}
+
+static void set_netdev_xps_queue(bool enable)
+{
+	const struct cpumask clear_mask = { 0 };
+	struct vecls_netdev_info *vecls_dev;
+	const struct cpumask *cpu_mask;
+	int rxq_id, devid, cpu, nid;
+
+	for (devid = 0; devid < vecls_netdev_num; devid++) {
+		vecls_dev = get_vecls_netdev_info(devid);
+		if (!vecls_dev)
+			continue;
+		for (rxq_id = 0; rxq_id < vecls_dev->rxq_num; rxq_id++) {
+			cpu = vecls_dev->rxq[rxq_id].affinity_cpu;
+			nid = cpu_to_node(cpu);
+			if (enable)
+				cpu_mask = cpumask_of_node(nid);
+			else
+				cpu_mask = &clear_mask;
+
+			netif_set_xps_queue_wrapper(vecls_dev->netdev, rxq_id, cpu_mask);
+		}
+	}
+}
+
+static int __maybe_unused venetcls_status_seq_show(struct seq_file *seq, void *v)
+{
+	int err;
+
+	if (mode == 0)
+		err = venetcls_ntuple_status(seq, v);
+	else
+		err = venetcls_flow_status(seq, v);
+	return err;
+}
+
+static __init int vecls_init(void)
+{
+	struct vecls_numa_info *numa_info;
+	int nid, err;
+
+	if (!check_params())
+		return -EINVAL;
+
+	err = init_vecls_numa_info();
+	if (err)
+		return err;
+
+	err = init_vecls_netdev_info(ifname);
+	if (err)
+		goto clean_numa;
+
+	// Set irq affinity
+	config_affinity_strategy();
+	enable_affinity_strategy();
+
+	// Calculate rxq bounded to one numa
+	for (nid = 0; nid < vecls_numa_num; nid++) {
+		numa_info = get_vecls_numa_info(nid);
+		if (!numa_info)
+			continue;
+		err = init_numa_rxq_bitmap(nid, numa_info);
+		if (err)
+			goto clean_rxq;
+	}
+
+#ifdef CONFIG_XPS
+	set_netdev_xps_queue(true);
+#endif
+
+	if (mode == 0)
+		err = vecls_ntuple_res_init();
+	else
+		err = vecls_flow_res_init();
+
+	if (err)
+		goto clean_rxq;
+
+#ifdef CONFIG_PROC_FS
+	if (!proc_create_net_single("venet_status", 0444, init_net.proc_net,
+				    venetcls_status_seq_show, NULL)) {
+		err = -ENOMEM;
+		goto clean_rxq;
+	}
+#endif
+
+	return 0;
+
+clean_rxq:
+clean_numa:
+	clean_vecls_netdev_info();
+	clean_vecls_numa_info();
+	return err;
+}
+
+static __exit void vecls_exit(void)
+{
+#ifdef CONFIG_PROC_FS
+	remove_proc_entry("venet_status", init_net.proc_net);
+#endif
+	if (mode == 0)
+		vecls_ntuple_res_clean();
+	else
+		vecls_flow_res_clean();
+
+#ifdef CONFIG_XPS
+	set_netdev_xps_queue(false);
+#endif
+
+	clean_vecls_rxq();
+	clean_vecls_netdev_info();
+	clean_vecls_numa_info();
+}
+
+module_init(vecls_init);
+module_exit(vecls_exit);
+
+MODULE_DESCRIPTION("venetcls");
+MODULE_LICENSE("GPL");
diff --git a/net/venetcls/venetcls_ntuple.c b/net/venetcls/venetcls_ntuple.c
new file mode 100644
index 000000000000..ad3c10f8ae5f
--- /dev/null
+++ b/net/venetcls/venetcls_ntuple.c
@@ -0,0 +1,713 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/inetdevice.h>
+#include <linux/ethtool.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/irq.h>
+#include <linux/irqdesc.h>
+#include <linux/inet.h>
+#include <linux/jhash.h>
+#include <linux/venetcls.h>
+#include <net/addrconf.h>
+#include <net/sock.h>
+
+#include "venetcls.h"
+
+struct vecls_sk_rule_list vecls_sk_rules, vecls_sk_list;
+static struct workqueue_struct *do_cfg_workqueue;
+static atomic_t vecls_worker_count = ATOMIC_INIT(0);
+
+static void init_vecls_sk_rules(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < VECLS_SK_RULE_HASHSIZE; i++)
+		INIT_HLIST_HEAD(vecls_sk_rules.hash + i);
+	mutex_init(&vecls_sk_rules.mutex);
+}
+
+static inline u32 get_hash(struct cmd_context ctx)
+{
+	u32 hash;
+
+	if (ctx.is_ipv6)
+		hash = jhash_2words(jhash(ctx.dip6, 16, 0), ctx.dport, 0);
+	else
+		hash = jhash_2words(ctx.dip4, ctx.dport, 0);
+
+	return hash;
+}
+
+static inline struct hlist_head *get_rule_hashlist(struct cmd_context ctx)
+{
+	u32 hash;
+
+	hash = get_hash(ctx);
+	return vecls_sk_rules.hash + (hash & VECLS_SK_RULE_HASHMASK);
+}
+
+static inline struct hlist_head *get_sk_hashlist(void *sk)
+{
+	return vecls_sk_list.hash + (jhash(sk, sizeof(sk), 0) & VECLS_SK_RULE_HASHMASK);
+}
+
+static void add_sk_rule(int devid, struct cmd_context ctx, void *sk, int nid)
+{
+	struct hlist_head *hlist = get_rule_hashlist(ctx);
+	struct hlist_head *sk_hlist = get_sk_hashlist(sk);
+	struct vecls_sk_rule *rule;
+	struct vecls_sk_entry *entry;
+
+	rule = kzalloc(sizeof(*rule), GFP_ATOMIC);
+	if (!rule) {
+		vecls_error("alloc rule failed\n");
+		return;
+	}
+	entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+	if (!entry) {
+		vecls_error("alloc entry failed\n");
+		kfree(rule);
+		return;
+	}
+
+	rule->sk = sk;
+	rule->is_ipv6 = ctx.is_ipv6;
+	rule->dip4 = ctx.dip4;
+	memcpy(rule->dip6, ctx.dip6, sizeof(rule->dip6));
+	rule->dport = ctx.dport;
+	rule->devid = devid;
+	rule->action = ctx.action;
+	rule->ruleid = ctx.ret_loc;
+	rule->nid = nid;
+	hlist_add_head(&rule->node, hlist);
+
+	entry->sk = sk;
+	entry->sk_rule_hash = get_hash(ctx);
+	hlist_add_head(&entry->node, sk_hlist);
+}
+
+static struct vecls_sk_entry *get_sk_entry(void *sk)
+{
+	struct hlist_head *sk_hlist = get_sk_hashlist(sk);
+	struct vecls_sk_entry *entry = NULL;
+
+	hlist_for_each_entry(entry, sk_hlist, node) {
+		if (entry->sk == sk)
+			break;
+	}
+	return entry;
+}
+
+static void del_sk_rule(struct vecls_sk_rule *rule)
+{
+	struct vecls_sk_entry *entry;
+
+	entry = get_sk_entry(rule->sk);
+	if (!entry)
+		return;
+	hlist_del_init(&entry->node);
+	kfree(entry);
+
+	vecls_debug("del rule=%p\n", rule);
+	hlist_del_init(&rule->node);
+	kfree(rule);
+}
+
+static struct vecls_sk_rule *get_sk_rule(int devid, struct cmd_context ctx)
+{
+	struct hlist_head *hlist = get_rule_hashlist(ctx);
+	struct vecls_sk_rule *rule = NULL;
+
+	hlist_for_each_entry(rule, hlist, node) {
+		if (rule->devid != devid || rule->dport != ctx.dport)
+			continue;
+		if (!rule->is_ipv6 && rule->dip4 == ctx.dip4)
+			break;
+		if (rule->is_ipv6 && !memcmp(rule->dip6, ctx.dip6, sizeof(rule->dip6)))
+			break;
+	}
+	return rule;
+}
+
+static struct vecls_sk_rule *get_rule_from_sk(int devid, void *sk)
+{
+	struct vecls_sk_rule *rule = NULL;
+	struct vecls_sk_entry *entry;
+	struct hlist_head *hlist;
+
+	entry = get_sk_entry(sk);
+	if (!entry)
+		return NULL;
+
+	hlist = vecls_sk_rules.hash + (entry->sk_rule_hash & VECLS_SK_RULE_HASHMASK);
+	hlist_for_each_entry(rule, hlist, node) {
+		if (rule->devid == devid && rule->sk == sk)
+			break;
+	}
+	return rule;
+}
+
+static inline bool reuseport_check(int devid, struct cmd_context ctx)
+{
+	return !!get_sk_rule(devid, ctx);
+}
+
+static u32 get_first_ip4_addr(struct net *net)
+{
+	struct in_device *in_dev;
+	struct net_device *dev;
+	struct in_ifaddr *ifa;
+	u32 dip4 = 0;
+
+	rtnl_lock();
+	rcu_read_lock();
+	for_each_netdev(net, dev) {
+		if (dev->flags & IFF_LOOPBACK || !(dev->flags & IFF_UP))
+			continue;
+		in_dev = __in_dev_get_rcu(dev);
+		if (!in_dev)
+			continue;
+
+		in_dev_for_each_ifa_rcu(ifa, in_dev) {
+			if (!strcmp(dev->name, ifa->ifa_label)) {
+				dip4 = ifa->ifa_local;
+				vecls_debug("dev:%s dip:%pI4\n", dev->name, &dip4);
+				goto out;
+			}
+		}
+	}
+out:
+	rcu_read_unlock();
+	rtnl_unlock();
+	return dip4;
+}
+
+static void get_first_ip6_addr(struct net *net, u32 *dip6)
+{
+	struct inet6_dev *idev;
+	struct net_device *dev;
+	struct inet6_ifaddr *ifp;
+
+	rtnl_lock();
+	rcu_read_lock();
+	for_each_netdev(net, dev) {
+		if (dev->flags & IFF_LOOPBACK || !(dev->flags & IFF_UP))
+			continue;
+		idev = __in6_dev_get(dev);
+		if (!idev)
+			continue;
+		list_for_each_entry_rcu(ifp, &idev->addr_list, if_list) {
+			if (ifp->scope == RT_SCOPE_HOST)
+				continue;
+			if (ifp->flags & (IFA_F_TENTATIVE | IFA_F_DEPRECATED))
+				continue;
+			memcpy(dip6, &ifp->addr, sizeof(ifp->addr));
+			vecls_debug("dev:%s dip:%pI6\n", dev->name, dip6);
+			goto out;
+		}
+	}
+out:
+	rcu_read_unlock();
+	rtnl_unlock();
+}
+
+static void get_sk_rule_addr(struct sock *sk, struct cfg_param *ctx_p)
+{
+	bool is_ipv6 = !!(sk->sk_family == AF_INET6);
+	u16 *dport = &ctx_p->ctx.dport;
+	u32 *dip4 = &ctx_p->ctx.dip4;
+	u32 *dip6 = &ctx_p->ctx.dip6[0];
+
+	*dport = htons(sk->sk_num);
+	ctx_p->ctx.is_ipv6 = is_ipv6;
+
+	if (!match_ip_flag) {
+		*dip4 = 0;
+		memset(dip6, 0, sizeof(sk->sk_v6_rcv_saddr));
+		return;
+	}
+
+	if (is_ipv6) {
+		if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
+			memcpy(dip6, &sk->sk_v6_rcv_saddr, sizeof(sk->sk_v6_rcv_saddr));
+		else
+			get_first_ip6_addr(sock_net(sk), dip6);
+
+	} else {
+		if (sk->sk_rcv_saddr)
+			*dip4 = sk->sk_rcv_saddr;
+		else
+			*dip4 = get_first_ip4_addr(sock_net(sk));
+	}
+}
+
+static int rxclass_rule_del(struct cmd_context *ctx, __u32 loc)
+{
+	struct ethtool_rxnfc nfccmd;
+	int err;
+
+	nfccmd.cmd = ETHTOOL_SRXCLSRLDEL;
+	nfccmd.fs.location = loc;
+	err = send_ethtool_ioctl(ctx, &nfccmd);
+	if (err < 0)
+		vecls_debug("rmgr: Cannot delete RX class rule, loc:%u\n", loc);
+	return err;
+}
+
+static int rmgr_ins(struct rmgr_ctrl *rmgr, __u32 loc)
+{
+	if (loc >= rmgr->size) {
+		vecls_error("rmgr: Location out of range\n");
+		return -1;
+	}
+
+	set_bit(loc, rmgr->slot);
+	return 0;
+}
+
+static int rmgr_find_empty_slot(struct rmgr_ctrl *rmgr, struct ethtool_rx_flow_spec *fsp)
+{
+	__u32 loc, slot_num;
+
+	if (rmgr->driver_select)
+		return 0;
+
+	loc = rmgr->size - 1;
+	slot_num = loc / BITS_PER_LONG;
+	if (!~(rmgr->slot[slot_num] | (~1UL << rmgr->size % BITS_PER_LONG))) {
+		loc -= 1 + (loc % BITS_PER_LONG);
+		slot_num--;
+	}
+
+	while (loc < rmgr->size && !~(rmgr->slot[slot_num])) {
+		loc -= BITS_PER_LONG;
+		slot_num--;
+	}
+
+	while (loc < rmgr->size && test_bit(loc, rmgr->slot))
+		loc--;
+
+	if (loc < rmgr->size) {
+		fsp->location = loc;
+		return rmgr_ins(rmgr, loc);
+	}
+
+	return -1;
+}
+
+static int rxclass_get_dev_info(struct cmd_context *ctx, __u32 *count, int *driver_select)
+{
+	struct ethtool_rxnfc nfccmd;
+	int err;
+
+	nfccmd.cmd = ETHTOOL_GRXCLSRLCNT;
+	nfccmd.data = 0;
+	err = send_ethtool_ioctl(ctx, &nfccmd);
+	*count = nfccmd.rule_cnt;
+	if (driver_select)
+		*driver_select = !!(nfccmd.data & RX_CLS_LOC_SPECIAL);
+	if (err < 0)
+		vecls_debug("rxclass: Cannot get RX class rule count\n");
+
+	return err;
+}
+
+static int rmgr_init(struct cmd_context *ctx, struct rmgr_ctrl *rmgr)
+{
+	struct ethtool_rxnfc *nfccmd;
+	__u32 *rule_locs;
+	int i, err = 0;
+
+	memset(rmgr, 0, sizeof(*rmgr));
+	err = rxclass_get_dev_info(ctx, &rmgr->n_rules, &rmgr->driver_select);
+	if (err < 0)
+		return err;
+
+	if (rmgr->driver_select)
+		return err;
+
+	nfccmd = kzalloc(sizeof(*nfccmd) + (rmgr->n_rules * sizeof(__u32)), GFP_ATOMIC);
+	if (!nfccmd) {
+		vecls_error("rmgr: Cannot allocate memory for RX class rule locations\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	nfccmd->cmd = ETHTOOL_GRXCLSRLALL;
+	nfccmd->rule_cnt = rmgr->n_rules;
+	err = send_ethtool_ioctl(ctx, nfccmd);
+	if (err < 0) {
+		vecls_debug("rmgr: Cannot get RX class rules\n");
+		goto out;
+	}
+
+	rmgr->size = nfccmd->data;
+	if (rmgr->size == 0 || rmgr->size < rmgr->n_rules) {
+		vecls_error("rmgr: Invalid RX class rules table size\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	rmgr->slot = kzalloc(BITS_TO_LONGS(rmgr->size) * sizeof(long), GFP_ATOMIC);
+	if (!rmgr->slot) {
+		vecls_error("rmgr: Cannot allocate memory for RX class rules\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	rule_locs = nfccmd->rule_locs;
+	for (i = 0; i < rmgr->n_rules; i++) {
+		err = rmgr_ins(rmgr, rule_locs[i]);
+		if (err < 0)
+			break;
+	}
+
+out:
+	kfree(nfccmd);
+	return err;
+}
+
+static void rmgr_cleanup(struct rmgr_ctrl *rmgr)
+{
+	kfree(rmgr->slot);
+	rmgr->slot = NULL;
+	rmgr->size = 0;
+}
+
+static int rmgr_set_location(struct cmd_context *ctx,
+			     struct ethtool_rx_flow_spec *fsp)
+{
+	struct rmgr_ctrl rmgr;
+	int ret;
+
+	ret = rmgr_init(ctx, &rmgr);
+	if (ret < 0)
+		goto out;
+
+	ret = rmgr_find_empty_slot(&rmgr, fsp);
+out:
+	rmgr_cleanup(&rmgr);
+	return ret;
+}
+
+static int rxclass_rule_ins(struct cmd_context *ctx,
+			    struct ethtool_rx_flow_spec *fsp, u32 rss_context)
+{
+	struct ethtool_rxnfc nfccmd;
+	u32 loc = fsp->location;
+	int ret;
+
+	if (loc & RX_CLS_LOC_SPECIAL) {
+		ret = rmgr_set_location(ctx, fsp);
+		if (ret < 0)
+			return ret;
+	}
+
+	nfccmd.cmd = ETHTOOL_SRXCLSRLINS;
+	nfccmd.rss_context = rss_context;
+	nfccmd.fs = *fsp;
+	ret = send_ethtool_ioctl(ctx, &nfccmd);
+	if (ret < 0) {
+		vecls_debug("Can not insert the clasification rule\n");
+		return ret;
+	}
+
+	if (loc & RX_CLS_LOC_SPECIAL)
+		vecls_debug("Added rule with ID %d\n", nfccmd.fs.location);
+
+	return 0;
+}
+
+static int cfg_ethtool_rule(struct cmd_context *ctx, bool is_del)
+{
+	struct ethtool_rx_flow_spec *fsp, rx_rule_fs;
+	u32 rss_context = 0;
+	bool is_ipv6 = ctx->is_ipv6;
+	int ret, i;
+
+	if (ctx->is_ipv6)
+		vecls_debug("del:%d dev:%s dip:%pI6 dport:%d action:%d ruleid:%u del_ruleid:%u\n",
+			    is_del, ctx->netdev, &ctx->dip6, ntohs(ctx->dport), ctx->action,
+			    ctx->ruleid, ctx->del_ruleid);
+	else
+		vecls_debug("del:%d dev:%s dip:%pI4 dport:%d action:%d ruleid:%u del_ruleid:%u\n",
+			    is_del, ctx->netdev, &ctx->dip4, ntohs(ctx->dport), ctx->action,
+			    ctx->ruleid, ctx->del_ruleid);
+
+	if (is_del)
+		return rxclass_rule_del(ctx, ctx->del_ruleid);
+
+	ctx->ret_loc = -1;
+
+	fsp = &rx_rule_fs;
+	memset(fsp, 0, sizeof(*fsp));
+	if (is_ipv6) {
+		fsp->flow_type = TCP_V6_FLOW;
+		memcpy(fsp->h_u.tcp_ip6_spec.ip6dst, ctx->dip6, sizeof(ctx->dip6));
+		fsp->h_u.tcp_ip6_spec.pdst = ctx->dport;
+		fsp->m_u.tcp_ip6_spec.pdst = (u16)~0ULL;
+		if (ctx->dip6[0] | ctx->dip6[1] | ctx->dip6[2] | ctx->dip6[3]) {
+			for (i = 0; i < 4; i++)
+				fsp->m_u.tcp_ip6_spec.ip6dst[i] = (u32)~0ULL;
+		}
+	} else {
+		fsp->flow_type = TCP_V4_FLOW;
+		fsp->h_u.tcp_ip4_spec.ip4dst = ctx->dip4;
+		fsp->h_u.tcp_ip4_spec.pdst = ctx->dport;
+		fsp->m_u.tcp_ip4_spec.pdst = (u16)~0ULL;
+		if (ctx->dip4)
+			fsp->m_u.tcp_ip4_spec.ip4dst = (u32)~0ULL;
+	}
+	fsp->location = RX_CLS_LOC_ANY;
+	if (ctx->ruleid)
+		fsp->location = ctx->ruleid;
+	fsp->ring_cookie = ctx->action;
+
+	ret = rxclass_rule_ins(ctx, &rx_rule_fs, rss_context);
+	if (!ret)
+		ctx->ret_loc = rx_rule_fs.location;
+	return ret;
+}
+
+static void cfg_work(struct work_struct *work)
+{
+	struct cfg_param *ctx_p = container_of(work, struct cfg_param, work);
+	struct vecls_netdev_info *vecls_dev;
+	struct vecls_sk_rule *rule;
+	int devid, rxq_id, err;
+
+	mutex_lock(&vecls_sk_rules.mutex);
+	for (devid = 0; devid < vecls_netdev_num; devid++) {
+		vecls_dev = get_vecls_netdev_info(devid);
+		if (!vecls_dev)
+			continue;
+		strscpy(ctx_p->ctx.netdev, vecls_dev->dev_name, IFNAMSIZ);
+		if (!(ctx_p->is_del)) {
+			if (reuseport_check(devid, ctx_p->ctx)) {
+				if (ctx_p->ctx.is_ipv6)
+					vecls_debug("dip:%pI6, dport:%d reuse!\n",
+						    &ctx_p->ctx.dip6, ntohs(ctx_p->ctx.dport));
+				else
+					vecls_debug("dip:%pI4, dport:%d reuse!\n",
+						    &ctx_p->ctx.dip4, ntohs(ctx_p->ctx.dport));
+				continue;
+			}
+
+			// Calculate the bound queue
+			rxq_id = alloc_rxq_id(ctx_p->nid, ctx_p->cpu, devid);
+			if (rxq_id < 0)
+				continue;
+
+			// Config Ntuple rule to dev
+			ctx_p->ctx.action = (u16)rxq_id;
+			err = cfg_ethtool_rule(&ctx_p->ctx, ctx_p->is_del);
+			if (err) {
+				vecls_debug("Add sk:%p, dev_id:%d, rxq:%d, err:%d\n",
+					    ctx_p->sk, devid, rxq_id, err);
+				free_rxq_id(ctx_p->nid, devid, rxq_id);
+				continue;
+			}
+			add_sk_rule(devid, ctx_p->ctx, ctx_p->sk, ctx_p->nid);
+		} else {
+			rule = get_rule_from_sk(devid, ctx_p->sk);
+			if (!rule) {
+				vecls_debug("rule not found! sk:%p, devid:%d, dip4:%pI4, dport:%d\n",
+					    ctx_p->sk, devid, &ctx_p->ctx.dip4,
+					    ntohs(ctx_p->ctx.dport));
+				continue;
+			}
+
+			// Config Ntuple rule to dev
+			ctx_p->ctx.del_ruleid = rule->ruleid;
+			err = cfg_ethtool_rule(&ctx_p->ctx, ctx_p->is_del);
+			// Free the bound queue
+			free_rxq_id(rule->nid, devid, rule->action);
+			// Delete sk rule
+			del_sk_rule(rule);
+		}
+	}
+	mutex_unlock(&vecls_sk_rules.mutex);
+	kfree(ctx_p);
+	atomic_dec(&vecls_worker_count);
+}
+
+static bool has_sock_rule(struct sock *sk)
+{
+	struct vecls_netdev_info *vecls_dev;
+	struct vecls_sk_rule *rule;
+	int devid;
+
+	for (devid = 0; devid < vecls_netdev_num; devid++) {
+		vecls_dev = get_vecls_netdev_info(devid);
+		if (!vecls_dev)
+			continue;
+		rule = get_rule_from_sk(devid, sk);
+		if (rule)
+			return true;
+	}
+	return false;
+}
+
+static void del_ntuple_rule(struct sock *sk)
+{
+	struct cfg_param *ctx_p;
+
+	if (!has_sock_rule(sk))
+		return;
+
+	ctx_p = kzalloc(sizeof(*ctx_p), GFP_ATOMIC);
+	if (!ctx_p)
+		return;
+	get_sk_rule_addr(sk, ctx_p);
+
+	ctx_p->is_del = true;
+	ctx_p->sk = sk;
+	INIT_WORK(&ctx_p->work, cfg_work);
+	queue_work(do_cfg_workqueue, &ctx_p->work);
+	atomic_inc(&vecls_worker_count);
+}
+
+static void add_ntuple_rule(struct sock *sk)
+{
+	struct cfg_param *ctx_p;
+	int cpu = raw_smp_processor_id();
+	int nid = cpu_to_node(cpu);
+
+	if (check_appname(current->comm))
+		return;
+
+	ctx_p = kzalloc(sizeof(*ctx_p), GFP_ATOMIC);
+	if (!ctx_p)
+		return;
+	get_sk_rule_addr(sk, ctx_p);
+
+	ctx_p->is_del = false;
+	ctx_p->sk = sk;
+	ctx_p->nid = nid;
+	ctx_p->cpu = cpu;
+	INIT_WORK(&ctx_p->work, cfg_work);
+	queue_work(do_cfg_workqueue, &ctx_p->work);
+	atomic_inc(&vecls_worker_count);
+}
+
+static void ethtool_cfg_rxcls(struct sock *sk, int is_del)
+{
+	bool is_ipv6;
+
+	if (sk->sk_state != TCP_LISTEN)
+		return;
+
+	if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
+		return;
+
+	is_ipv6 = !!(sk->sk_family == AF_INET6);
+	if (is_ipv6)
+		vecls_debug("[cpu:%d] app:%s, sk:%p, is_del:%d, IPv6:%pI6, port:%d\n",
+			    raw_smp_processor_id(), current->comm, sk, is_del,
+			    &sk->sk_v6_rcv_saddr, (u16)sk->sk_num);
+	else
+		vecls_debug("[cpu:%d] app:%s, sk:%p, is_del:%d, IPv4:%pI4, port:%d\n",
+			    raw_smp_processor_id(), current->comm, sk, is_del,
+			    &sk->sk_rcv_saddr, (u16)sk->sk_num);
+
+	if (is_del)
+		del_ntuple_rule(sk);
+	else
+		add_ntuple_rule(sk);
+}
+
+static void clean_vecls_sk_rules(void)
+{
+	struct vecls_netdev_info *vecls_dev;
+	struct cmd_context ctx = { 0 };
+	struct vecls_sk_rule *rule;
+	struct hlist_head *hlist;
+	struct hlist_node *n;
+	unsigned int i;
+	int err;
+
+	mutex_lock(&vecls_sk_rules.mutex);
+	for (i = 0; i < VECLS_SK_RULE_HASHSIZE; i++) {
+		hlist = &vecls_sk_rules.hash[i];
+
+		hlist_for_each_entry_safe(rule, n, hlist, node) {
+			vecls_dev = get_vecls_netdev_info(rule->devid);
+			if (!vecls_dev)
+				continue;
+			strscpy(ctx.netdev, vecls_dev->dev_name, IFNAMSIZ);
+			ctx.del_ruleid = rule->ruleid;
+			err = cfg_ethtool_rule(&ctx, true);
+			vecls_debug("sk:%p, dev_id:%d, action:%d, ruleid:%d, err:%d\n", rule->sk,
+				    rule->devid, rule->action, rule->ruleid, err);
+
+			hlist_del(&rule->node);
+			vecls_debug("clean rule=%p\n", rule);
+			kfree(rule);
+		}
+	}
+	mutex_unlock(&vecls_sk_rules.mutex);
+}
+
+int venetcls_ntuple_status(struct seq_file *seq, void *v)
+{
+	struct vecls_netdev_info *vecls_dev;
+	struct vecls_sk_rule *rule;
+	struct hlist_head *hlist;
+	struct hlist_node *n;
+	unsigned int i;
+
+	seq_printf(seq, "%-16s %-42s %-8s %-6s %-6s %-6s\n",
+		   "Interface", "dstIP", "dstPort", "rxq", "ruleId", "NumaID");
+	mutex_lock(&vecls_sk_rules.mutex);
+	for (i = 0; i < VECLS_SK_RULE_HASHSIZE; i++) {
+		hlist = &vecls_sk_rules.hash[i];
+		hlist_for_each_entry_safe(rule, n, hlist, node) {
+			vecls_dev = get_vecls_netdev_info(rule->devid);
+			if (!vecls_dev)
+				continue;
+			if (rule->is_ipv6)
+				seq_printf(seq, "%-16s %-42pI6 %-8d %-6d %-6d %-6d\n",
+					   vecls_dev->dev_name, &rule->dip6, ntohs(rule->dport),
+					   rule->action, rule->ruleid, rule->nid);
+			else
+				seq_printf(seq, "%-16s %-42pI4 %-8d %-6d %-6d %-6d\n",
+					   vecls_dev->dev_name, &rule->dip4, ntohs(rule->dport),
+					   rule->action, rule->ruleid, rule->nid);
+		}
+	}
+	mutex_unlock(&vecls_sk_rules.mutex);
+
+	return 0;
+}
+
+static const struct vecls_hook_ops vecls_ntuple_ops = {
+	.vecls_flow_update = NULL,
+	.vecls_set_cpu = NULL,
+	.vecls_timeout = NULL,
+	.vecls_cfg_rxcls = ethtool_cfg_rxcls,
+};
+
+int vecls_ntuple_res_init(void)
+{
+	do_cfg_workqueue = alloc_ordered_workqueue("vecls_cfg", 0);
+	if (!do_cfg_workqueue) {
+		vecls_debug("alloc_ordered_workqueue fails\n");
+		return -ENOMEM;
+	}
+
+	init_vecls_sk_rules();
+	RCU_INIT_POINTER(vecls_ops, &vecls_ntuple_ops);
+	synchronize_rcu();
+	return 0;
+}
+
+void vecls_ntuple_res_clean(void)
+{
+	RCU_INIT_POINTER(vecls_ops, NULL);
+	synchronize_rcu();
+
+	while (atomic_read(&vecls_worker_count) != 0)
+		mdelay(1);
+	destroy_workqueue(do_cfg_workqueue);
+	clean_vecls_sk_rules();
+}
-- 
2.34.1