[PATCH] Add oenetcls support for velinux 5.15

8 Sep 2025

Signed-off-by: Yue Haibing <yuehaibing@huawei.com>
---
 ...roduce-oenetcls-for-network-optimiza.patch | 2812 +++++++++++++++++
 1 file changed, 2812 insertions(+)
 create mode 100644 0001-net-oenetcls-introduce-oenetcls-for-network-optimiza.patch

diff --git a/0001-net-oenetcls-introduce-oenetcls-for-network-optimiza.patch b/0001-net-oenetcls-introduce-oenetcls-for-network-optimiza.patch
new file mode 100644
index 0000000..a551070
--- /dev/null
+++ b/0001-net-oenetcls-introduce-oenetcls-for-network-optimiza.patch
@@ -0,0 +1,2812 @@
+From 0d83dcfc81e73470f256d1a48511706376944d11 Mon Sep 17 00:00:00 2001
+From: Yue Haibing <yuehaibing@huawei.com>
+Date: Tue, 5 Aug 2025 16:05:52 +0800
+Subject: [PATCH] net/oenetcls: introduce oenetcls for network optimization
+
+hulk inclusion
+category: feature
+bugzilla: https://gitee.com/openeuler/kernel/issues/ICBFCS
+CVE: NA
+
+--------------------------------
+
+This introduces a kind of network optimization method named oenetcls. It
+can configure the ntuple rule, and bind interrupt to the netdev queue
+automatically.
+
+Signed-off-by: Yue Haibing <yuehaibing@huawei.com>
+Signed-off-by: Wang Liang <wangliang74@huawei.com>
+Signed-off-by: Liu Jian <liujian56@huawei.com>
+Signed-off-by: yuelg <yuelg@chinaunicom.cn>
+---
+ include/linux/netdevice.h      |    3 +
+ include/linux/oenetcls.h       |   80 +++
+ kernel/irq/irqdesc.c           |    2 +-
+ net/Kconfig                    |    1 +
+ net/Makefile                   |    1 +
+ net/core/dev.c                 |   19 +
+ net/ipv4/af_inet.c             |    6 +
+ net/ipv4/tcp.c                 |    9 +
+ net/oenetcls/Kconfig           |    7 +
+ net/oenetcls/Makefile          |    7 +
+ net/oenetcls/asmdefs.h         |   61 ++
+ net/oenetcls/memcpy-sve.S      |  157 +++++
+ net/oenetcls/oenetcls.h        |  177 ++++++
+ net/oenetcls/oenetcls_flow.c   |  406 ++++++++++++
+ net/oenetcls/oenetcls_main.c   | 1075 ++++++++++++++++++++++++++++++++
+ net/oenetcls/oenetcls_ntuple.c |  573 +++++++++++++++++
+ 16 files changed, 2583 insertions(+), 1 deletion(-)
+ create mode 100644 include/linux/oenetcls.h
+ create mode 100644 net/oenetcls/Kconfig
+ create mode 100644 net/oenetcls/Makefile
+ create mode 100644 net/oenetcls/asmdefs.h
+ create mode 100644 net/oenetcls/memcpy-sve.S
+ create mode 100644 net/oenetcls/oenetcls.h
+ create mode 100644 net/oenetcls/oenetcls_flow.c
+ create mode 100644 net/oenetcls/oenetcls_main.c
+ create mode 100644 net/oenetcls/oenetcls_ntuple.c
+
+diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
+index cc1f14f3c..559e59e6b 100644
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -766,6 +766,9 @@ struct netdev_rx_queue {
+ 	struct xsk_buff_pool            *pool;
+ #endif
+ 	struct file __rcu		*dmabuf_pages;
++#if IS_ENABLED(CONFIG_OENETCLS)
++	void __rcu			*oecls_ftb;
++#endif
+ } ____cacheline_aligned_in_smp;
+ 
+ struct page *
+diff --git a/include/linux/oenetcls.h b/include/linux/oenetcls.h
+new file mode 100644
+index 000000000..cf57fc12e
+--- /dev/null
++++ b/include/linux/oenetcls.h
+@@ -0,0 +1,80 @@
++/* SPDX-License-Identifier: GPL-2.0-only */
++#ifndef _LINUX_OENETCLS_H
++#define _LINUX_OENETCLS_H
++
++struct oecls_hook_ops {
++	void (*oecls_cfg_rxcls)(struct sock *sk, int is_del);
++	void (*oecls_flow_update)(struct sock *sk);
++	void (*oecls_set_cpu)(struct sk_buff *skb);
++	bool (*oecls_timeout)(struct net_device *dev, u16 rxq_index,
++				u32 flow_id, u16 filter_id);
++};
++
++extern const struct oecls_hook_ops __rcu *oecls_ops;
++
++static inline void oenetcls_cfg_rxcls(struct sock *sk, int is_del)
++{
++	const struct oecls_hook_ops *ops;
++
++	rcu_read_lock();
++	ops = rcu_dereference(oecls_ops);
++	if (ops && ops->oecls_cfg_rxcls)
++		ops->oecls_cfg_rxcls(sk, is_del);
++	rcu_read_unlock();
++}
++
++static inline void oenetcls_flow_update(struct sock *sk)
++{
++	const struct oecls_hook_ops *ops;
++
++	rcu_read_lock();
++	ops = rcu_dereference(oecls_ops);
++	if (ops && ops->oecls_flow_update)
++		ops->oecls_flow_update(sk);
++	rcu_read_unlock();
++}
++
++static inline void oenetcls_skb_set_cpu(struct sk_buff *skb)
++{
++	const struct oecls_hook_ops *ops;
++
++	rcu_read_lock();
++	ops = rcu_dereference(oecls_ops);
++	if (ops && ops->oecls_set_cpu)
++		ops->oecls_set_cpu(skb);
++	rcu_read_unlock();
++}
++
++static inline void oenetcls_skblist_set_cpu(struct list_head *head)
++{
++	const struct oecls_hook_ops *ops;
++	struct sk_buff *skb, *next;
++
++	rcu_read_lock();
++	ops = rcu_dereference(oecls_ops);
++	if (ops && ops->oecls_set_cpu) {
++		list_for_each_entry_safe(skb, next, head, list)
++			ops->oecls_set_cpu(skb);
++	}
++	rcu_read_unlock();
++}
++
++static inline bool oenetcls_may_expire_flow(struct net_device *dev,
++					    u16 rxq_index, u32 flow_id,
++					    u16 filter_id, bool *expire)
++{
++	const struct oecls_hook_ops *ops;
++
++	rcu_read_lock();
++	ops = rcu_dereference(oecls_ops);
++	if (ops && ops->oecls_timeout) {
++		*expire = ops->oecls_timeout(dev, rxq_index, flow_id, filter_id);
++		rcu_read_unlock();
++		return true;
++	}
++	rcu_read_unlock();
++
++	return false;
++}
++
++#endif  /* _LINUX_OENETCLS_H */
+diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
+index 8202d4a99..d3db25d28 100644
+--- a/kernel/irq/irqdesc.c
++++ b/kernel/irq/irqdesc.c
+@@ -366,7 +366,7 @@ struct irq_desc *irq_to_desc(unsigned int irq)
+ {
+ 	return radix_tree_lookup(&irq_desc_tree, irq);
+ }
+-#ifdef CONFIG_KVM_BOOK3S_64_HV_MODULE
++#if defined(CONFIG_KVM_BOOK3S_64_HV_MODULE) || IS_ENABLED(CONFIG_OENETCLS)
+ EXPORT_SYMBOL_GPL(irq_to_desc);
+ #endif
+ 
+diff --git a/net/Kconfig b/net/Kconfig
+index dc8451e75..626d27bf7 100644
+--- a/net/Kconfig
++++ b/net/Kconfig
+@@ -72,6 +72,7 @@ source "net/xfrm/Kconfig"
+ source "net/iucv/Kconfig"
+ source "net/smc/Kconfig"
+ source "net/xdp/Kconfig"
++source "net/oenetcls/Kconfig"
+ 
+ config INET
+ 	bool "TCP/IP networking"
+diff --git a/net/Makefile b/net/Makefile
+index 6a62e5b27..eade2be8e 100644
+--- a/net/Makefile
++++ b/net/Makefile
+@@ -78,3 +78,4 @@ obj-$(CONFIG_NET_NCSI)		+= ncsi/
+ obj-$(CONFIG_XDP_SOCKETS)	+= xdp/
+ obj-$(CONFIG_MPTCP)		+= mptcp/
+ obj-$(CONFIG_MCTP)		+= mctp/
++obj-$(CONFIG_OENETCLS)		+= oenetcls/
+diff --git a/net/core/dev.c b/net/core/dev.c
+index f628494a1..8abe0dea5 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -160,6 +160,12 @@
+ /* This should be increased if a protocol with a bigger head is added. */
+ #define GRO_MAX_HEAD (MAX_HEADER + 128)
+ 
++#if IS_ENABLED(CONFIG_OENETCLS)
++#include <linux/oenetcls.h>
++const struct oecls_hook_ops __rcu *oecls_ops __read_mostly;
++EXPORT_SYMBOL_GPL(oecls_ops);
++#endif
++
+ static DEFINE_SPINLOCK(ptype_lock);
+ static DEFINE_SPINLOCK(offload_lock);
+ struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
+@@ -4770,6 +4776,10 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
+ 	bool expire = true;
+ 	unsigned int cpu;
+ 
++#if IS_ENABLED(CONFIG_OENETCLS)
++	if (oenetcls_may_expire_flow(dev, rxq_index, flow_id, filter_id, &expire))
++		return expire;
++#endif
+ 	rcu_read_lock();
+ 	flow_table = rcu_dereference(rxqueue->rps_flow_table);
+ 	if (flow_table && flow_id <= flow_table->mask) {
+@@ -5881,6 +5891,9 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
+ 			return ret;
+ 		}
+ 	}
++#endif
++#if IS_ENABLED(CONFIG_OENETCLS)
++	oenetcls_skb_set_cpu(skb);
+ #endif
+ 	ret = __netif_receive_skb(skb);
+ 	rcu_read_unlock();
+@@ -5915,6 +5928,9 @@ static void netif_receive_skb_list_internal(struct list_head *head)
+ 			}
+ 		}
+ 	}
++#endif
++#if IS_ENABLED(CONFIG_OENETCLS)
++	oenetcls_skblist_set_cpu(head);
+ #endif
+ 	__netif_receive_skb_list(head);
+ 	rcu_read_unlock();
+@@ -10271,6 +10287,9 @@ int __netdev_update_features(struct net_device *dev)
+ 
+ 	return err < 0 ? 0 : 1;
+ }
++#if IS_ENABLED(CONFIG_OENETCLS)
++EXPORT_SYMBOL(__netdev_update_features);
++#endif
+ 
+ static int netdev_do_alloc_pcpu_stats(struct net_device *dev)
+ {
+diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
+index 5dc1955e3..ad4937bc4 100644
+--- a/net/ipv4/af_inet.c
++++ b/net/ipv4/af_inet.c
+@@ -120,6 +120,9 @@
+ #include <net/compat.h>
+ 
+ #include <trace/events/sock.h>
++#if IS_ENABLED(CONFIG_OENETCLS)
++#include <linux/oenetcls.h>
++#endif
+ 
+ /* The inetsw table contains everything that inet_create needs to
+  * build a new socket.
+@@ -229,6 +232,9 @@ int inet_listen(struct socket *sock, int backlog)
+ 		if (err)
+ 			goto out;
+ 		tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_LISTEN_CB, 0, NULL);
++#if IS_ENABLED(CONFIG_OENETCLS)
++		oenetcls_cfg_rxcls(sk, 0);
++#endif
+ 	}
+ 	err = 0;
+ 
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index e8b7f0c5d..9309501c1 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -281,6 +281,9 @@
+ #include <asm/ioctls.h>
+ #include <net/busy_poll.h>
+ #include <linux/dma-buf.h>
++#if IS_ENABLED(CONFIG_OENETCLS)
++#include <linux/oenetcls.h>
++#endif
+ 
+ /* Track pending CMSGs. */
+ enum {
+@@ -2940,6 +2943,9 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
+ 	if (unlikely(flags & MSG_ERRQUEUE))
+ 		return inet_recv_error(sk, msg, len, addr_len);
+ 
++#if IS_ENABLED(CONFIG_OENETCLS)
++	oenetcls_flow_update(sk);
++#endif
+ 	if (sk_can_busy_loop(sk) &&
+ 	    skb_queue_empty_lockless(&sk->sk_receive_queue) &&
+ 	    sk->sk_state == TCP_ESTABLISHED)
+@@ -3300,6 +3306,9 @@ void __tcp_close(struct sock *sk, long timeout)
+ void tcp_close(struct sock *sk, long timeout)
+ {
+ 	lock_sock(sk);
++#if IS_ENABLED(CONFIG_OENETCLS)
++	oenetcls_cfg_rxcls(sk, 1);
++#endif
+ 	__tcp_close(sk, timeout);
+ 	release_sock(sk);
+ 	sock_put(sk);
+diff --git a/net/oenetcls/Kconfig b/net/oenetcls/Kconfig
+new file mode 100644
+index 000000000..1b69c744a
+--- /dev/null
++++ b/net/oenetcls/Kconfig
+@@ -0,0 +1,7 @@
++# SPDX-License-Identifier: GPL-2.0-only
++config OENETCLS
++	tristate "Network classification"
++	default n
++	help
++	  Allow to bind NIC interrupts and configure ntuple rules to
++	  achieve sock numa affinity
+diff --git a/net/oenetcls/Makefile b/net/oenetcls/Makefile
+new file mode 100644
+index 000000000..f6eeed9e8
+--- /dev/null
++++ b/net/oenetcls/Makefile
+@@ -0,0 +1,7 @@
++# SPDX-License-Identifier: GPL-2.0-only
++
++obj-$(CONFIG_OENETCLS) = oenetcls.o
++oenetcls-y                      := oenetcls_main.o oenetcls_ntuple.o oenetcls_flow.o
++ifeq ($(CONFIG_ARM64_SVE),y)
++oenetcls-y                      += memcpy-sve.o
++endif
+diff --git a/net/oenetcls/asmdefs.h b/net/oenetcls/asmdefs.h
+new file mode 100644
+index 000000000..8138a94c1
+--- /dev/null
++++ b/net/oenetcls/asmdefs.h
+@@ -0,0 +1,61 @@
++/* SPDX-License-Identifier: GPL-2.0-only */
++#ifndef _ASMDEFS_H
++#define _ASMDEFS_H
++
++/* Branch Target Identitication support.  */
++#define BTI_C		hint	34
++#define BTI_J		hint	36
++/* Return address signing support (pac-ret).  */
++#define PACIASP		hint	25; .cfi_window_save
++#define AUTIASP		hint	29; .cfi_window_save
++
++/* GNU_PROPERTY_AARCH64_* macros from elf.h.  */
++#define FEATURE_1_AND 0xc0000000
++#define FEATURE_1_BTI 1
++#define FEATURE_1_PAC 2
++
++/* Add a NT_GNU_PROPERTY_TYPE_0 note.  */
++#define GNU_PROPERTY(type, value)	\
++	.section .note.gnu.property, "a";	\
++	.p2align 3;				\
++	.word 4;				\
++	.word 16;				\
++	.word 5;				\
++	.asciz "GNU";				\
++	.word type;				\
++	.word 4;				\
++	.word value;				\
++	.word 0;				\
++	.text
++
++#ifndef WANT_GNU_PROPERTY
++#define WANT_GNU_PROPERTY 1
++#endif
++
++#if WANT_GNU_PROPERTY
++/* Add property note with supported features to all asm files.  */
++GNU_PROPERTY(FEATURE_1_AND, FEATURE_1_BTI|FEATURE_1_PAC)
++#endif
++
++#define ENTRY_ALIGN(name, alignment)	\
++	.global name;		\
++	.type name, %function;	\
++	.align alignment;	\
++name:				\
++	.cfi_startproc;		\
++	BTI_C;
++
++#define ENTRY(name)	ENTRY_ALIGN(name, 6)
++
++#define ENTRY_ALIAS(name)	\
++	.global name;		\
++	.type name, %function;	\
++  name:
++
++#define END(name)	\
++	.cfi_endproc;	\
++	.size name, .-name;
++
++#define L(l) .L ## l
++
++#endif
+diff --git a/net/oenetcls/memcpy-sve.S b/net/oenetcls/memcpy-sve.S
+new file mode 100644
+index 000000000..106e4c302
+--- /dev/null
++++ b/net/oenetcls/memcpy-sve.S
+@@ -0,0 +1,157 @@
++/* SPDX-License-Identifier: GPL-2.0-only */
++#include "asmdefs.h"
++
++.arch armv8-a+sve
++
++#define dstin	x0
++#define src	x1
++#define count	x2
++#define dst	x3
++#define srcend	x4
++#define dstend	x5
++#define tmp1	x6
++#define vlen	x6
++
++#define A_q	q0
++#define B_q	q1
++#define C_q	q2
++#define D_q	q3
++#define E_q	q4
++#define F_q	q5
++#define G_q	q6
++#define H_q	q7
++
++/* This implementation handles overlaps and supports both memcpy and memmove
++   from a single entry point.  It uses unaligned accesses and branchless
++   sequences to keep the code small, simple and improve performance.
++   SVE vectors are used to speedup small copies.
++
++   Copies are split into 3 main cases: small copies of up to 32 bytes, medium
++   copies of up to 128 bytes, and large copies.  The overhead of the overlap
++   check is negligible since it is only required for large copies.
++
++   Large copies use a software pipelined loop processing 64 bytes per iteration.
++   The source pointer is 16-byte aligned to minimize unaligned accesses.
++   The loop tail is handled by always copying 64 bytes from the end.
++*/
++
++ENTRY_ALIAS (__memmove_aarch64_sve)
++ENTRY (__memcpy_aarch64_sve)
++	cmp	count, 128
++	b.hi	L(copy_long)
++	cntb	vlen
++	cmp	count, vlen, lsl 1
++	b.hi	L(copy32_128)
++
++	whilelo p0.b, xzr, count
++	whilelo p1.b, vlen, count
++	ld1b	z0.b, p0/z, [src, 0, mul vl]
++	ld1b	z1.b, p1/z, [src, 1, mul vl]
++	st1b	z0.b, p0, [dstin, 0, mul vl]
++	st1b	z1.b, p1, [dstin, 1, mul vl]
++	ret
++
++	/* Medium copies: 33..128 bytes.  */
++L(copy32_128):
++	add	srcend, src, count
++	add	dstend, dstin, count
++	ldp	A_q, B_q, [src]
++	ldp	C_q, D_q, [srcend, -32]
++	cmp	count, 64
++	b.hi	L(copy128)
++	stp	A_q, B_q, [dstin]
++	stp	C_q, D_q, [dstend, -32]
++	ret
++
++	/* Copy 65..128 bytes.  */
++L(copy128):
++	ldp	E_q, F_q, [src, 32]
++	cmp	count, 96
++	b.ls	L(copy96)
++	ldp	G_q, H_q, [srcend, -64]
++	stp	G_q, H_q, [dstend, -64]
++L(copy96):
++	stp	A_q, B_q, [dstin]
++	stp	E_q, F_q, [dstin, 32]
++	stp	C_q, D_q, [dstend, -32]
++	ret
++
++	/* Copy more than 128 bytes.  */
++L(copy_long):
++	add	srcend, src, count
++	add	dstend, dstin, count
++
++	/* Use backwards copy if there is an overlap.  */
++	sub	tmp1, dstin, src
++	cmp	tmp1, count
++	b.lo	L(copy_long_backwards)
++
++	/* Copy 16 bytes and then align src to 16-byte alignment.  */
++	ldr	D_q, [src]
++	and	tmp1, src, 15
++	bic	src, src, 15
++	sub	dst, dstin, tmp1
++	add	count, count, tmp1	/* Count is now 16 too large.  */
++	ldp	A_q, B_q, [src, 16]
++	str	D_q, [dstin]
++	ldp	C_q, D_q, [src, 48]
++	subs	count, count, 128 + 16	/* Test and readjust count.  */
++	b.ls	L(copy64_from_end)
++L(loop64):
++	stp	A_q, B_q, [dst, 16]
++	ldp	A_q, B_q, [src, 80]
++	stp	C_q, D_q, [dst, 48]
++	ldp	C_q, D_q, [src, 112]
++	add	src, src, 64
++	add	dst, dst, 64
++	subs	count, count, 64
++	b.hi	L(loop64)
++
++	/* Write the last iteration and copy 64 bytes from the end.  */
++L(copy64_from_end):
++	ldp	E_q, F_q, [srcend, -64]
++	stp	A_q, B_q, [dst, 16]
++	ldp	A_q, B_q, [srcend, -32]
++	stp	C_q, D_q, [dst, 48]
++	stp	E_q, F_q, [dstend, -64]
++	stp	A_q, B_q, [dstend, -32]
++	ret
++
++	/* Large backwards copy for overlapping copies.
++	   Copy 16 bytes and then align srcend to 16-byte alignment.  */
++L(copy_long_backwards):
++	cbz	tmp1, L(return)
++	ldr	D_q, [srcend, -16]
++	and	tmp1, srcend, 15
++	bic	srcend, srcend, 15
++	sub	count, count, tmp1
++	ldp	A_q, B_q, [srcend, -32]
++	str	D_q, [dstend, -16]
++	ldp	C_q, D_q, [srcend, -64]
++	sub	dstend, dstend, tmp1
++	subs	count, count, 128
++	b.ls	L(copy64_from_start)
++
++L(loop64_backwards):
++	str	B_q, [dstend, -16]
++	str	A_q, [dstend, -32]
++	ldp	A_q, B_q, [srcend, -96]
++	str	D_q, [dstend, -48]
++	str	C_q, [dstend, -64]!
++	ldp	C_q, D_q, [srcend, -128]
++	sub	srcend, srcend, 64
++	subs	count, count, 64
++	b.hi	L(loop64_backwards)
++
++	/* Write the last iteration and copy 64 bytes from the start.  */
++L(copy64_from_start):
++	ldp	E_q, F_q, [src, 32]
++	stp	A_q, B_q, [dstend, -32]
++	ldp	A_q, B_q, [src]
++	stp	C_q, D_q, [dstend, -64]
++	stp	E_q, F_q, [dstin, 32]
++	stp	A_q, B_q, [dstin]
++L(return):
++	ret
++
++END (__memcpy_aarch64_sve)
+diff --git a/net/oenetcls/oenetcls.h b/net/oenetcls/oenetcls.h
+new file mode 100644
+index 000000000..215ae3e7e
+--- /dev/null
++++ b/net/oenetcls/oenetcls.h
+@@ -0,0 +1,177 @@
++/* SPDX-License-Identifier: GPL-2.0-only */
++#ifndef _NET_OENETCLS_H
++#define _NET_OENETCLS_H
++#include <linux/if.h>
++#include <linux/mutex.h>
++#include <linux/cpufeature.h>
++
++#define OECLS_MAX_NETDEV_NUM 8
++#define OECLS_MAX_RXQ_NUM_PER_DEV 256
++#define OECLS_MAX_CPU_NUM 1024
++
++#define OECLS_TIMEOUT (5 * HZ)
++#define OECLS_NO_FILTER 0xffff
++#define OECLS_NO_CPU 0xffff
++
++struct oecls_netdev_queue_info {
++	int irq;
++	int affinity_cpu;
++};
++
++struct oecls_netdev_info {
++	char				dev_name[IFNAMSIZ];
++	struct net_device		*netdev;
++	int				rxq_num;
++	struct oecls_netdev_queue_info	rxq[OECLS_MAX_RXQ_NUM_PER_DEV];
++	int				old_filter_state;
++};
++
++struct oecls_rxq {
++	int rxq_id;
++	int status;
++};
++
++struct oecls_numa_clusterinfo {
++	int cluster_id;
++	int cur_freeidx;
++	struct oecls_rxq rxqs[OECLS_MAX_RXQ_NUM_PER_DEV];
++};
++
++struct oecls_numa_bound_dev_info {
++	DECLARE_BITMAP(bitmap_rxq, OECLS_MAX_RXQ_NUM_PER_DEV);
++	struct oecls_numa_clusterinfo *cluster_info;
++};
++
++struct oecls_numa_info {
++	DECLARE_BITMAP(avail_cpus, OECLS_MAX_CPU_NUM);
++	struct oecls_numa_bound_dev_info bound_dev[OECLS_MAX_NETDEV_NUM];
++};
++
++struct cmd_context {
++	char netdev[IFNAMSIZ];
++	u32 dip4;
++	u16 dport;
++	u16 action;
++	u32 ruleid;
++	u32 del_ruleid;
++	int ret_loc;
++};
++
++#define OECLS_SK_RULE_HASHSIZE	256
++#define OECLS_SK_RULE_HASHMASK	(OECLS_SK_RULE_HASHSIZE - 1)
++
++struct oecls_sk_rule_list {
++	struct hlist_head hash[OECLS_SK_RULE_HASHSIZE];
++	/* Mutex to synchronize access to ntuple rule locking */
++	struct mutex mutex;
++};
++
++struct oecls_sk_rule {
++	struct hlist_node node;
++	int devid;
++	void *sk;
++	int dip4;
++	int dport;
++	int action;
++	int ruleid;
++	int nid;
++};
++
++struct oecls_sk_entry {
++	struct hlist_node node;
++	void *sk;
++	u32 sk_rule_hash;
++};
++
++struct oecls_dev_flow {
++	unsigned short cpu;
++	unsigned short filter;
++	unsigned int last_qtail;
++	int isvalid;
++	unsigned long timeout;
++};
++
++struct oecls_dev_flow_table {
++	unsigned int	mask;
++	struct rcu_head rcu;
++	struct oecls_dev_flow flows[];
++};
++
++struct oecls_sock_flow_table {
++	u32 mask;
++	u32 ents[] ____cacheline_aligned_in_smp;
++};
++
++#define OECLS_DEV_FLOW_TABLE_NUM	0x1000
++#define OECLS_SOCK_FLOW_TABLE_NUM	0x100000
++#define OECLS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct oecls_dev_flow_table) + \
++		((_num) * sizeof(struct oecls_dev_flow)))
++#define OECLS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct oecls_sock_flow_table, ents[_num]))
++
++#define ETH_ALL_FLAGS	(ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | \
++			  ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH)
++#define ETH_ALL_FEATURES (NETIF_F_LRO | NETIF_F_HW_VLAN_CTAG_RX | \
++			  NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_NTUPLE | \
++			  NETIF_F_RXHASH)
++
++struct rmgr_ctrl {
++	int					driver_select;
++	unsigned long		*slot;
++	__u32				n_rules;
++	__u32				size;
++};
++
++extern int match_ip_flag;
++extern int debug;
++extern int oecls_netdev_num;
++extern int oecls_numa_num;
++
++#define oecls_debug(fmt, ...)					\
++	do {							\
++		if (debug)					\
++			trace_printk(fmt, ## __VA_ARGS__);	\
++	} while (0)
++
++#define oecls_error(fmt, ...) \
++	do { \
++		pr_err("oenetcls [%s:%d]: " fmt, __FILE__, __LINE__, ## __VA_ARGS__); \
++		trace_printk(fmt, ## __VA_ARGS__); \
++	} while (0)
++
++struct oecls_netdev_info *get_oecls_netdev_info(unsigned int index);
++
++#define for_each_oecls_netdev(devid, oecls_dev) \
++	for (devid = 0, oecls_dev = get_oecls_netdev_info(devid); \
++		(devid < oecls_netdev_num) && oecls_dev; \
++		devid++, oecls_dev = get_oecls_netdev_info(devid))
++
++struct oecls_numa_info *get_oecls_numa_info(unsigned int nid);
++
++#define for_each_oecls_numa(nid, numa_info) \
++	for (nid = 0, numa_info = get_oecls_numa_info(nid); \
++		(nid < oecls_numa_num) && numa_info; \
++		nid++, numa_info = get_oecls_numa_info(nid))
++
++#ifdef CONFIG_ARM64_SVE
++void *__memcpy_aarch64_sve(void *, const void *, size_t);
++#define memcpy_r(dst, src, len)					\
++	do {							\
++		if (system_supports_sve())			\
++			__memcpy_aarch64_sve(dst, src, len);	\
++		else						\
++			memcpy(dst, src, len);			\
++	} while (0)
++#else
++#define memcpy_r(dst, src, len) memcpy(dst, src, len)
++#endif
++
++int check_appname(char *task_name);
++int send_ethtool_ioctl(struct cmd_context *ctx, void *cmd);
++int alloc_rxq_id(int nid, int devid);
++void free_rxq_id(int nid, int devid, int rxq_id);
++void oecls_ntuple_res_init(void);
++void oecls_ntuple_res_clean(void);
++void oecls_flow_res_init(void);
++void oecls_flow_res_clean(void);
++
++#endif	/* _NET_OENETCLS_H */
+diff --git a/net/oenetcls/oenetcls_flow.c b/net/oenetcls/oenetcls_flow.c
+new file mode 100644
+index 000000000..15ee13405
+--- /dev/null
++++ b/net/oenetcls/oenetcls_flow.c
+@@ -0,0 +1,406 @@
++// SPDX-License-Identifier: GPL-2.0-only
++#include <linux/inetdevice.h>
++#include <linux/netdevice.h>
++#include <linux/rtnetlink.h>
++#include <linux/irq.h>
++#include <linux/irqdesc.h>
++#include <linux/inet.h>
++#include <linux/oenetcls.h>
++#include <net/sock.h>
++
++#include "oenetcls.h"
++
++static u32 oecls_cpu_mask;
++static struct oecls_sock_flow_table __rcu *oecls_sock_flow_table;
++static DEFINE_MUTEX(oecls_sock_flow_mutex);
++static DEFINE_SPINLOCK(oecls_dev_flow_lock);
++
++bool is_oecls_config_netdev(const char *name)
++{
++	struct oecls_netdev_info *netdev_info;
++	int netdev_loop;
++
++	for_each_oecls_netdev(netdev_loop, netdev_info)
++		if (strcmp(netdev_info->dev_name, name) == 0)
++			return true;
++
++	return false;
++}
++
++static bool _oecls_timeout(struct net_device *dev, u16 rxq_index,
++			   u32 flow_id, u16 filter_id)
++{
++	struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index;
++	struct oecls_dev_flow_table *flow_table;
++	struct oecls_dev_flow *rflow;
++	bool expire = true;
++	unsigned int cpu;
++
++	rcu_read_lock();
++	flow_table = rcu_dereference(rxqueue->oecls_ftb);
++	if (flow_table && flow_id <= flow_table->mask) {
++		rflow = &flow_table->flows[flow_id];
++		cpu = READ_ONCE(rflow->cpu);
++		oecls_debug("dev:%s, rxq:%d, flow_id:%u, filter_id:%d/%d, cpu:%d\n", dev->name,
++			    rxq_index, flow_id, filter_id, rflow->filter, cpu);
++
++		if (rflow->filter == filter_id && cpu < nr_cpu_ids) {
++			if (time_before(jiffies, rflow->timeout + OECLS_TIMEOUT)) {
++				expire = false;
++			} else {
++				rflow->isvalid = 0;
++				WRITE_ONCE(rflow->cpu, OECLS_NO_CPU);
++			}
++		}
++	}
++	rcu_read_unlock();
++	oecls_debug("%s, dev:%s, rxq:%d, flow_id:%u, filter_id:%d, expire:%d\n", __func__,
++		    dev->name, rxq_index, flow_id, filter_id, expire);
++	return expire;
++}
++
++static void _oecls_flow_update(struct sock *sk)
++{
++	struct oecls_sock_flow_table *tb;
++	unsigned int hash, index;
++	u32 val;
++	u32 cpu = raw_smp_processor_id();
++
++	if (sk->sk_state != TCP_ESTABLISHED)
++		return;
++
++	if (check_appname(current->comm))
++		return;
++
++	rcu_read_lock();
++	tb = rcu_dereference(oecls_sock_flow_table);
++	hash = READ_ONCE(sk->sk_rxhash);
++	if (tb && hash) {
++		index = hash & tb->mask;
++		val = hash & ~oecls_cpu_mask;
++		val |= cpu;
++
++		if (READ_ONCE(tb->ents[index]) != val) {
++			WRITE_ONCE(tb->ents[index], val);
++
++			oecls_debug("[%s] sk:%p, hash:0x%x, index:0x%x, val:0x%x, cpu:%d\n",
++				    current->comm, sk, hash, index, val, cpu);
++		}
++	}
++	rcu_read_unlock();
++}
++
++static int flow_get_queue_idx(struct net_device *dev, int nid, struct sk_buff *skb)
++{
++	struct oecls_netdev_info *netdev_info;
++	int netdev_loop;
++	u32 hash, index;
++	struct oecls_numa_info *numa_info;
++	struct oecls_numa_bound_dev_info *bound_dev = NULL;
++	int rxq_id, rxq_num, i;
++
++	numa_info = get_oecls_numa_info(nid);
++	if (!numa_info)
++		return -1;
++
++	for_each_oecls_netdev(netdev_loop, netdev_info) {
++		if (strcmp(netdev_info->dev_name, dev->name) == 0) {
++			bound_dev = &numa_info->bound_dev[netdev_loop];
++			break;
++		}
++	}
++
++	if (!bound_dev)
++		return -1;
++	rxq_num = bitmap_weight(bound_dev->bitmap_rxq, OECLS_MAX_RXQ_NUM_PER_DEV);
++	if (rxq_num == 0)
++		return -1;
++
++	hash = skb_get_hash(skb);
++	index = hash % rxq_num;
++
++	i = 0;
++	for_each_set_bit(rxq_id, bound_dev->bitmap_rxq, OECLS_MAX_RXQ_NUM_PER_DEV)
++		if (index == i++)
++			return rxq_id;
++
++	return -1;
++}
++
++static void set_oecls_cpu(struct net_device *dev, struct sk_buff *skb,
++			  struct oecls_dev_flow *old_rflow, int old_rxq_id, u16 next_cpu)
++{
++	struct netdev_rx_queue *rxqueue;
++	struct oecls_dev_flow_table *dtb;
++	struct oecls_dev_flow *rflow;
++	u32 flow_id, hash;
++	u16 rxq_index;
++	int rc;
++
++	if (!skb_rx_queue_recorded(skb) || !dev->rx_cpu_rmap ||
++	    !(dev->features & NETIF_F_NTUPLE))
++		return;
++
++	rxq_index = flow_get_queue_idx(dev, cpu_to_node(next_cpu), skb);
++	if (rxq_index == skb_get_rx_queue(skb) || rxq_index < 0)
++		return;
++
++	rxqueue = dev->_rx + rxq_index;
++	dtb = rcu_dereference(rxqueue->oecls_ftb);
++	if (!dtb)
++		return;
++
++	hash = skb_get_hash(skb);
++	flow_id = hash & dtb->mask;
++	rflow = &dtb->flows[flow_id];
++	if (rflow->isvalid && rflow->cpu == next_cpu) {
++		rflow->timeout = jiffies;
++		return;
++	}
++
++	rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb, rxq_index, flow_id);
++	oecls_debug("skb:%p, rxq:%d, hash:0x%x, flow_id:%u, old_rxq_id:%d, next_cpu:%d, rc:%d\n",
++		    skb, rxq_index, hash, flow_id, old_rxq_id, next_cpu, rc);
++	if (rc < 0)
++		return;
++
++	rflow->filter = rc;
++	rflow->isvalid = 1;
++	rflow->timeout = jiffies;
++	if (old_rflow->filter == rflow->filter)
++		old_rflow->filter = OECLS_NO_FILTER;
++	rflow->cpu = next_cpu;
++}
++
++static void __oecls_set_cpu(struct sk_buff *skb, struct net_device *ndev,
++			    struct oecls_sock_flow_table *tb, struct oecls_dev_flow_table *dtb,
++			    int old_rxq_id)
++{
++	struct oecls_dev_flow *rflow;
++	u32 last_recv_cpu, hash, val;
++	u32 tcpu = 0;
++	u32 cpu = raw_smp_processor_id();
++
++	skb_reset_network_header(skb);
++	hash = skb_get_hash(skb);
++	if (!hash)
++		return;
++
++	val = READ_ONCE(tb->ents[hash & tb->mask]);
++	last_recv_cpu = val & oecls_cpu_mask;
++	rflow = &dtb->flows[hash & dtb->mask];
++	tcpu = rflow->cpu;
++
++	if ((val ^ hash) & ~oecls_cpu_mask)
++		return;
++
++	if (cpu_to_node(cpu) == cpu_to_node(last_recv_cpu))
++		return;
++
++	if (tcpu >= nr_cpu_ids)
++		set_oecls_cpu(ndev, skb, rflow, old_rxq_id, last_recv_cpu);
++}
++
++static void _oecls_set_cpu(struct sk_buff *skb)
++{
++	struct net_device *ndev = skb->dev;
++	struct oecls_sock_flow_table *stb;
++	struct oecls_dev_flow_table *dtb;
++	struct netdev_rx_queue *rxqueue;
++	int rxq_id = -1;
++
++	if (!ndev)
++		return;
++
++	if (!is_oecls_config_netdev(ndev->name))
++		return;
++
++	rxqueue = ndev->_rx;
++	if (skb_rx_queue_recorded(skb)) {
++		rxq_id = skb_get_rx_queue(skb);
++		if (rxq_id >= ndev->real_num_rx_queues) {
++			oecls_debug("ndev:%s, rxq:%d, real_num:%d\n", ndev->name,
++				    rxq_id, ndev->real_num_rx_queues);
++			return;
++		}
++		rxqueue += rxq_id;
++	}
++
++	// oecls_debug("skb:%px, dev:%s, rxq_id:%d\n", skb, ndev->name, rxq_id);
++	if (rxq_id < 0)
++		return;
++
++	rcu_read_lock();
++	stb = rcu_dereference(oecls_sock_flow_table);
++	dtb = rcu_dereference(rxqueue->oecls_ftb);
++	if (stb && dtb)
++		__oecls_set_cpu(skb, ndev, stb, dtb, rxq_id);
++
++	rcu_read_unlock();
++}
++
++static void oecls_dev_flow_table_free(struct rcu_head *rcu)
++{
++	struct oecls_dev_flow_table *table = container_of(rcu,
++			struct oecls_dev_flow_table, rcu);
++	vfree(table);
++}
++
++static void oecls_dev_flow_table_cleanup(struct net_device *netdev, int qid)
++{
++	struct oecls_dev_flow_table *dtb;
++	struct netdev_rx_queue *queue;
++	int i;
++
++	spin_lock(&oecls_dev_flow_lock);
++	for (i = 0; i < qid; i++) {
++		queue = netdev->_rx + i;
++		dtb = rcu_dereference_protected(queue->oecls_ftb,
++						lockdep_is_held(&oecls_dev_flow_lock));
++		rcu_assign_pointer(queue->oecls_ftb, NULL);
++	}
++	spin_unlock(&oecls_dev_flow_lock);
++	call_rcu(&dtb->rcu, oecls_dev_flow_table_free);
++}
++
++static int oecls_dev_flow_table_release(void)
++{
++	struct oecls_netdev_info *netdev_info;
++	int netdev_loop;
++	struct net_device *netdev;
++
++	for_each_oecls_netdev(netdev_loop, netdev_info) {
++		netdev = netdev_info->netdev;
++		if (!netdev)
++			continue;
++		oecls_dev_flow_table_cleanup(netdev, netdev->num_rx_queues);
++	}
++
++	return 0;
++}
++
++static int _oecls_dev_flow_table_init(struct net_device *netdev)
++{
++	struct oecls_dev_flow_table *table;
++	int size = OECLS_DEV_FLOW_TABLE_NUM;
++	struct netdev_rx_queue *queue;
++	int i, j, ret = 0;
++
++	size = roundup_pow_of_two(size);
++	oecls_debug("dev:%s, num_rx_queues:%d, mask:0x%x\n", netdev->name, netdev->num_rx_queues,
++		    size - 1);
++
++	for (i = 0; i < netdev->num_rx_queues; i++) {
++		table = vmalloc(OECLS_DEV_FLOW_TABLE_SIZE(size));
++		if (!table) {
++			ret = -ENOMEM;
++			goto fail;
++		}
++
++		table->mask = size - 1;
++		for (j = 0; j < size; j++) {
++			table->flows[j].cpu = OECLS_NO_CPU;
++			table->flows[j].isvalid = 0;
++		}
++
++		queue = netdev->_rx + i;
++
++		spin_lock(&oecls_dev_flow_lock);
++		rcu_assign_pointer(queue->oecls_ftb, table);
++		spin_unlock(&oecls_dev_flow_lock);
++	}
++	return ret;
++fail:
++	oecls_dev_flow_table_cleanup(netdev, i);
++	return ret;
++}
++
++static int oecls_dev_flow_table_init(void)
++{
++	struct oecls_netdev_info *netdev_info;
++	int netdev_loop;
++	struct net_device *ndev;
++	int i, err;
++
++	for_each_oecls_netdev(netdev_loop, netdev_info) {
++		ndev = netdev_info->netdev;
++		if (!ndev)
++			continue;
++		err = _oecls_dev_flow_table_init(ndev);
++		if (err)
++			goto out;
++	}
++
++	return 0;
++out:
++	for (i = 0; i < netdev_loop; i++) {
++		netdev_info = get_oecls_netdev_info(i);
++		ndev = netdev_info->netdev;
++		if (!ndev)
++			continue;
++		oecls_dev_flow_table_cleanup(ndev, ndev->num_rx_queues);
++	}
++	return err;
++}
++
++static const struct oecls_hook_ops oecls_flow_ops = {
++	.oecls_flow_update = _oecls_flow_update,
++	.oecls_set_cpu = _oecls_set_cpu,
++	.oecls_timeout = _oecls_timeout,
++	.oecls_cfg_rxcls = NULL,
++};
++
++static int oecls_sock_flow_table_release(void)
++{
++	struct oecls_sock_flow_table *tb;
++
++	mutex_lock(&oecls_sock_flow_mutex);
++	tb = rcu_dereference_protected(oecls_sock_flow_table,
++				       lockdep_is_held(&oecls_sock_flow_mutex));
++	if (tb)
++		rcu_assign_pointer(oecls_sock_flow_table, NULL);
++	mutex_unlock(&oecls_sock_flow_mutex);
++	synchronize_rcu();
++	vfree(tb);
++
++	return 0;
++}
++
++static int oecls_sock_flow_table_init(void)
++{
++	struct oecls_sock_flow_table *table;
++	int size = OECLS_SOCK_FLOW_TABLE_NUM;
++	int i;
++
++	size = roundup_pow_of_two(size);
++	table = vmalloc(OECLS_SOCK_FLOW_TABLE_SIZE(size));
++	if (!table)
++		return -ENOMEM;
++
++	oecls_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1;
++	oecls_debug("nr_cpu_ids:%d, oecls_cpu_mask:0x%x\n", nr_cpu_ids, oecls_cpu_mask);
++
++	table->mask = size - 1;
++	for (i = 0; i < size; i++)
++		table->ents[i] = OECLS_NO_CPU;
++
++	mutex_lock(&oecls_sock_flow_mutex);
++	rcu_assign_pointer(oecls_sock_flow_table, table);
++	mutex_unlock(&oecls_sock_flow_mutex);
++
++	return 0;
++}
++
++void oecls_flow_res_init(void)
++{
++	oecls_sock_flow_table_init();
++	oecls_dev_flow_table_init();
++	RCU_INIT_POINTER(oecls_ops, &oecls_flow_ops);
++}
++
++void oecls_flow_res_clean(void)
++{
++	RCU_INIT_POINTER(oecls_ops, NULL);
++	oecls_sock_flow_table_release();
++	oecls_dev_flow_table_release();
++}
+diff --git a/net/oenetcls/oenetcls_main.c b/net/oenetcls/oenetcls_main.c
+new file mode 100644
+index 000000000..b69d10036
+--- /dev/null
++++ b/net/oenetcls/oenetcls_main.c
+@@ -0,0 +1,1075 @@
++// SPDX-License-Identifier: GPL-2.0-only
++#include <linux/module.h>
++#include <linux/netdevice.h>
++#include <linux/netdev_features.h>
++#include <linux/ethtool.h>
++#include <linux/irq.h>
++#include <linux/irqdesc.h>
++#include <linux/rtnetlink.h>
++#include "oenetcls.h"
++
++int oecls_netdev_num;
++static struct oecls_netdev_info oecls_netdev_info_table[OECLS_MAX_NETDEV_NUM];
++
++int oecls_numa_num;
++static int oecls_cluster_cpu_num, oecls_cluster_per_numa;
++static struct oecls_numa_info *oecls_numa_info_table;
++
++int debug;
++module_param(debug, int, 0644);
++MODULE_PARM_DESC(debug, "debug switch");
++
++static int mode;
++module_param(mode, int, 0444);
++MODULE_PARM_DESC(mode, "mode, default 0");
++
++static char ifname[64] = { 0 };
++module_param_string(ifname, ifname, sizeof(ifname), 0444);
++MODULE_PARM_DESC(ifname, "ifname");
++
++static char appname[64] = "redis-server";
++module_param_string(appname, appname, sizeof(appname), 0644);
++MODULE_PARM_DESC(appname, "appname, default redis-server");
++
++int match_ip_flag = 1;
++module_param(match_ip_flag, int, 0644);
++MODULE_PARM_DESC(match_ip_flag, "match ip flag");
++
++static int strategy;
++module_param(strategy, int, 0444);
++MODULE_PARM_DESC(strategy, "strategy, default 0");
++
++static bool check_params(void)
++{
++	if (mode != 0 && mode != 1)
++		return false;
++
++	if (strlen(ifname) == 0)
++		return false;
++
++	return true;
++}
++
++int check_appname(char *task_name)
++{
++	char *start = appname, *end;
++
++	if (!strlen(appname))
++		return 0;
++
++	// support appname: app1#app2#appN
++	while (*start != '\0') {
++		end = strchr(start, '#');
++		if (end == start) {
++			start++;
++			continue;
++		}
++
++		if (!end) {
++			if (!strncmp(task_name, start, strlen(start)))
++				return 0;
++			break;
++		}
++
++		if (!strncmp(task_name, start, end - start))
++			return 0;
++		start = end + 1;
++	}
++	return -EOPNOTSUPP;
++}
++
++static u32 __ethtool_get_flags(struct net_device *dev)
++{
++	u32 flags = 0;
++
++	if (dev->features & NETIF_F_LRO)
++		flags |= ETH_FLAG_LRO;
++	if (dev->features & NETIF_F_HW_VLAN_CTAG_RX)
++		flags |= ETH_FLAG_RXVLAN;
++	if (dev->features & NETIF_F_HW_VLAN_CTAG_TX)
++		flags |= ETH_FLAG_TXVLAN;
++	if (dev->features & NETIF_F_NTUPLE)
++		flags |= ETH_FLAG_NTUPLE;
++	if (dev->features & NETIF_F_RXHASH)
++		flags |= ETH_FLAG_RXHASH;
++
++	return flags;
++}
++
++static int __ethtool_set_flags(struct net_device *dev, u32 data)
++{
++	netdev_features_t features = 0, changed;
++
++	if (data & ~ETH_ALL_FLAGS)
++		return -EINVAL;
++
++	if (data & ETH_FLAG_LRO)
++		features |= NETIF_F_LRO;
++	if (data & ETH_FLAG_RXVLAN)
++		features |= NETIF_F_HW_VLAN_CTAG_RX;
++	if (data & ETH_FLAG_TXVLAN)
++		features |= NETIF_F_HW_VLAN_CTAG_TX;
++	if (data & ETH_FLAG_NTUPLE)
++		features |= NETIF_F_NTUPLE;
++	if (data & ETH_FLAG_RXHASH)
++		features |= NETIF_F_RXHASH;
++
++	/* allow changing only bits set in hw_features */
++	changed = (features ^ dev->features) & ETH_ALL_FEATURES;
++	if (changed & ~dev->hw_features)
++		return (changed & dev->hw_features) ? -EINVAL : -EOPNOTSUPP;
++
++	dev->wanted_features =
++		(dev->wanted_features & ~changed) | (features & changed);
++
++	__netdev_update_features(dev);
++
++	return 0;
++}
++
++static void ethtool_rxnfc_copy_to_user(void *useraddr,
++				       const struct ethtool_rxnfc *rxnfc,
++				       size_t size, const u32 *rule_buf)
++{
++	memcpy_r(useraddr, rxnfc, size);
++	useraddr += offsetof(struct ethtool_rxnfc, rule_locs);
++
++	if (rule_buf)
++		memcpy_r(useraddr, rule_buf, rxnfc->rule_cnt * sizeof(u32));
++}
++
++static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
++						u32 cmd, void *useraddr)
++{
++	struct ethtool_rxnfc info;
++	size_t info_size = sizeof(info);
++	int rc;
++
++	if (!dev->ethtool_ops->set_rxnfc)
++		return -EOPNOTSUPP;
++
++	if (cmd == ETHTOOL_SRXFH)
++		info_size = (offsetof(struct ethtool_rxnfc, data) +
++			     sizeof(info.data));
++
++	memcpy_r(&info, useraddr, info_size);
++	rc = dev->ethtool_ops->set_rxnfc(dev, &info);
++	if (rc)
++		return rc;
++
++	if (cmd == ETHTOOL_SRXCLSRLINS)
++		ethtool_rxnfc_copy_to_user(useraddr, &info, info_size, NULL);
++
++	return 0;
++}
++
++static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
++						u32 cmd, void *useraddr)
++{
++	struct ethtool_rxnfc info;
++	size_t info_size = sizeof(info);
++	const struct ethtool_ops *ops = dev->ethtool_ops;
++	int ret;
++	void *rule_buf = NULL;
++
++	if (!ops->get_rxnfc)
++		return -EOPNOTSUPP;
++
++	if (cmd == ETHTOOL_GRXFH)
++		info_size = (offsetof(struct ethtool_rxnfc, data) +
++			     sizeof(info.data));
++
++	memcpy_r(&info, useraddr, info_size);
++
++	/* If FLOW_RSS was requested then user-space must be using the
++	 * new definition, as FLOW_RSS is newer.
++	 */
++	if (cmd == ETHTOOL_GRXFH && info.flow_type & FLOW_RSS) {
++		info_size = sizeof(info);
++		memcpy_r(&info, useraddr, info_size);
++		/* Since malicious users may modify the original data,
++		 * we need to check whether FLOW_RSS is still requested.
++		 */
++		if (!(info.flow_type & FLOW_RSS))
++			return -EINVAL;
++	}
++
++	if (info.cmd != cmd)
++		return -EINVAL;
++
++	if (info.cmd == ETHTOOL_GRXCLSRLALL) {
++		if (info.rule_cnt > 0) {
++			if (info.rule_cnt <= KMALLOC_MAX_SIZE / sizeof(u32))
++				rule_buf = kcalloc(info.rule_cnt, sizeof(u32),
++						   GFP_KERNEL);
++			if (!rule_buf)
++				return -ENOMEM;
++		}
++	}
++
++	ret = ops->get_rxnfc(dev, &info, rule_buf);
++	if (ret < 0)
++		goto err_out;
++
++	ethtool_rxnfc_copy_to_user(useraddr, &info, info_size, rule_buf);
++err_out:
++	kfree(rule_buf);
++
++	return ret;
++}
++
++static noinline_for_stack int ethtool_get_channels(struct net_device *dev,
++						   void *useraddr)
++{
++	struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS };
++
++	if (!dev->ethtool_ops->get_channels)
++		return -EOPNOTSUPP;
++
++	dev->ethtool_ops->get_channels(dev, &channels);
++
++	memcpy_r(useraddr, &channels, sizeof(channels));
++	return 0;
++}
++
++static int ethtool_get_value(struct net_device *dev, char *useraddr,
++			     u32 cmd, u32 (*actor)(struct net_device *))
++{
++	struct ethtool_value edata = { .cmd = cmd };
++
++	if (!actor)
++		return -EOPNOTSUPP;
++
++	edata.data = actor(dev);
++
++	memcpy_r(useraddr, &edata, sizeof(edata));
++	return 0;
++}
++
++static int ethtool_set_value(struct net_device *dev, char *useraddr,
++			     int (*actor)(struct net_device *, u32))
++{
++	struct ethtool_value edata;
++
++	if (!actor)
++		return -EOPNOTSUPP;
++
++	memcpy_r(&edata, useraddr, sizeof(edata));
++
++	return actor(dev, edata.data);
++}
++
++static int dev_ethtool_kern(struct net *net, struct ifreq *ifr)
++{
++	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
++	void *useraddr = ifr->ifr_data;
++	u32 ethcmd, sub_cmd;
++	int rc;
++	netdev_features_t old_features;
++
++	if (!dev || !netif_device_present(dev))
++		return -ENODEV;
++
++	memcpy_r(ðcmd, useraddr, sizeof(ethcmd));
++
++	if (ethcmd == ETHTOOL_PERQUEUE)
++		memcpy_r(&sub_cmd, useraddr + sizeof(ethcmd), sizeof(sub_cmd));
++	else
++		sub_cmd = ethcmd;
++
++	/* Allow some commands to be done by anyone */
++	switch (sub_cmd) {
++	case ETHTOOL_GFLAGS:
++	case ETHTOOL_GRXFH:
++	case ETHTOOL_GRXRINGS:
++	case ETHTOOL_GRXCLSRLCNT:
++	case ETHTOOL_GRXCLSRULE:
++	case ETHTOOL_GRXCLSRLALL:
++	case ETHTOOL_GCHANNELS:
++		break;
++	default:
++		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
++			return -EPERM;
++	}
++
++	if (dev->ethtool_ops->begin) {
++		rc = dev->ethtool_ops->begin(dev);
++		if (rc  < 0)
++			return rc;
++	}
++	old_features = dev->features;
++
++	switch (ethcmd) {
++	case ETHTOOL_GFLAGS:
++		rc = ethtool_get_value(dev, useraddr, ethcmd,
++				       __ethtool_get_flags);
++		break;
++	case ETHTOOL_SFLAGS:
++		rc = ethtool_set_value(dev, useraddr, __ethtool_set_flags);
++		break;
++	case ETHTOOL_GRXFH:
++	case ETHTOOL_GRXRINGS:
++	case ETHTOOL_GRXCLSRLCNT:
++	case ETHTOOL_GRXCLSRULE:
++	case ETHTOOL_GRXCLSRLALL:
++		rc = ethtool_get_rxnfc(dev, ethcmd, useraddr);
++		break;
++	case ETHTOOL_SRXFH:
++	case ETHTOOL_SRXCLSRLDEL:
++	case ETHTOOL_SRXCLSRLINS:
++		rc = ethtool_set_rxnfc(dev, ethcmd, useraddr);
++		break;
++	case ETHTOOL_GCHANNELS:
++		rc = ethtool_get_channels(dev, useraddr);
++		break;
++	default:
++		rc = -EOPNOTSUPP;
++	}
++
++	if (dev->ethtool_ops->complete)
++		dev->ethtool_ops->complete(dev);
++
++	if (old_features != dev->features)
++		netdev_features_change(dev);
++
++	return rc;
++}
++
++int send_ethtool_ioctl(struct cmd_context *ctx, void *cmd)
++{
++	struct ifreq ifr = {0};
++	int ret;
++
++	strncpy(ifr.ifr_name, ctx->netdev, IFNAMSIZ);
++	ifr.ifr_data = cmd;
++
++	rtnl_lock();
++	ret = dev_ethtool_kern(&init_net, &ifr);
++	rtnl_unlock();
++
++	return ret;
++}
++
++struct oecls_netdev_info *get_oecls_netdev_info(unsigned int index)
++{
++	if (index >= OECLS_MAX_NETDEV_NUM)
++		return NULL;
++	return &oecls_netdev_info_table[index];
++}
++
++static struct oecls_netdev_info *alloc_oecls_netdev_info(void)
++{
++	if (oecls_netdev_num >= OECLS_MAX_NETDEV_NUM)
++		return NULL;
++
++	return &oecls_netdev_info_table[oecls_netdev_num++];
++}
++
++static bool check_irq_name(const char *irq_name, struct oecls_netdev_info *oecls_dev)
++{
++	if (!strstr(irq_name, "TxRx") && !strstr(irq_name, "comp") && !strstr(irq_name, "rx"))
++		return false;
++
++	if (strstr(irq_name, oecls_dev->dev_name))
++		return true;
++
++	if (oecls_dev->netdev->dev.parent &&
++	    strstr(irq_name, dev_name(oecls_dev->netdev->dev.parent)))
++		return true;
++
++	return false;
++}
++
++static void get_netdev_queue_info(struct oecls_netdev_info *oecls_dev)
++{
++	struct oecls_netdev_queue_info *rxq_info;
++	struct irq_desc *desc;
++	int irq, cpu;
++
++	for_each_irq_desc(irq, desc) {
++		if (!desc->action)
++			continue;
++		if (!desc->action->name)
++			continue;
++		if (!check_irq_name(desc->action->name, oecls_dev))
++			continue;
++		if (oecls_dev->rxq_num >= OECLS_MAX_RXQ_NUM_PER_DEV)
++			break;
++		rxq_info = &oecls_dev->rxq[oecls_dev->rxq_num++];
++		rxq_info->irq = irq;
++		cpu = cpumask_first(irq_data_get_effective_affinity_mask(&desc->irq_data));
++		rxq_info->affinity_cpu = cpu;
++		oecls_debug("irq=%d, [%s], rxq_id=%d affinity_cpu:%d\n",
++			    irq, desc->action->name, oecls_dev->rxq_num, cpu);
++	}
++}
++
++static int oecls_filter_enable(const char *dev_name, bool *old_state)
++{
++	struct ethtool_value eval = {0};
++	struct cmd_context ctx = {0};
++	int ret;
++
++	strncpy(ctx.netdev, dev_name, IFNAMSIZ);
++
++	eval.cmd = ETHTOOL_GFLAGS;
++	ret = send_ethtool_ioctl(&ctx, &eval);
++	if (ret != 0) {
++		oecls_error("get %s flags fail, ret:%d\n", dev_name, ret);
++		return ret;
++	}
++	if (eval.data & ETH_FLAG_NTUPLE) {
++		*old_state = true;
++		oecls_debug("%s ntuple is already on\n", dev_name);
++		return 0;
++	}
++
++	// Set ntuple feature
++	eval.cmd = ETHTOOL_SFLAGS;
++	eval.data |= ETH_FLAG_NTUPLE;
++	ret = send_ethtool_ioctl(&ctx, &eval);
++	if (ret != 0) {
++		oecls_error("set %s flags fail, ret:%d\n", dev_name, ret);
++		return ret;
++	}
++
++	// Get ntuple feature
++	eval.cmd = ETHTOOL_GFLAGS;
++	eval.data = 0;
++	ret = send_ethtool_ioctl(&ctx, &eval);
++	if (ret != 0) {
++		oecls_error("get %s flags fail, ret:%d\n", dev_name, ret);
++		return ret;
++	}
++	if (!(eval.data & ETH_FLAG_NTUPLE)) {
++		oecls_error("enable ntuple feature fail!\n");
++		return -EOPNOTSUPP;
++	}
++
++	return 0;
++}
++
++static void oecls_filter_restore(const char *dev_name, bool old_state)
++{
++	struct ethtool_value eval = {0};
++	struct cmd_context ctx = {0};
++	bool cur_filter_state;
++	int ret;
++
++	strncpy(ctx.netdev, dev_name, IFNAMSIZ);
++
++	eval.cmd = ETHTOOL_GFLAGS;
++	ret = send_ethtool_ioctl(&ctx, &eval);
++	if (ret != 0) {
++		oecls_error("get %s flags fail, ret:%d\n", dev_name, ret);
++		return;
++	}
++
++	cur_filter_state = (eval.data & ETH_FLAG_NTUPLE) ? true : false;
++	if (cur_filter_state == old_state)
++		return;
++
++	// Set ntuple feature
++	eval.cmd = ETHTOOL_SFLAGS;
++	if (old_state)
++		eval.data |= ETH_FLAG_NTUPLE;
++	else
++		eval.data &= ~ETH_FLAG_NTUPLE;
++	ret = send_ethtool_ioctl(&ctx, &eval);
++	if (ret != 0) {
++		oecls_error("set %s flags fail, ret:%d\n", dev_name, ret);
++		return;
++	}
++}
++
++static int init_single_oecls_dev(char *if_name, unsigned int length)
++{
++	struct oecls_netdev_info *oecls_dev;
++	char dev_name[IFNAMSIZ] = { 0 };
++	struct net_device *netdev;
++	int cpy_len = length < IFNAMSIZ ? length : IFNAMSIZ;
++	bool old_state = false;
++	int ret;
++
++	strncpy(dev_name, if_name, cpy_len);
++	netdev = dev_get_by_name(&init_net, dev_name);
++	if (!netdev) {
++		oecls_error("dev [%s] is not exist!\n", dev_name);
++		return -ENODEV;
++	}
++
++	if (!(netdev->flags & IFF_UP)) {
++		ret = -ENETDOWN;
++		oecls_error("dev:%s not up! flags=%d.\n", dev_name, netdev->flags);
++		goto out;
++	}
++
++	if (netdev->flags & IFF_LOOPBACK) {
++		ret = -EOPNOTSUPP;
++		oecls_error("Do not support loopback.\n");
++		goto out;
++	}
++
++	ret = oecls_filter_enable(dev_name, &old_state);
++	if (ret) {
++		oecls_error("dev [%s] not support ntuple! ret=%d\n", dev_name, ret);
++		goto out;
++	}
++
++	oecls_dev = alloc_oecls_netdev_info();
++	if (!oecls_dev) {
++		ret = -ENOMEM;
++		oecls_filter_restore(dev_name, old_state);
++		oecls_error("alloc oecls_dev fail! oecls_netdev_num:%d\n", oecls_netdev_num);
++		goto out;
++	}
++
++	memcpy_r(oecls_dev->dev_name, dev_name, IFNAMSIZ);
++	oecls_dev->old_filter_state = old_state;
++	oecls_dev->netdev = netdev;
++	get_netdev_queue_info(oecls_dev);
++	return 0;
++
++out:
++	dev_put(netdev);
++	return ret;
++}
++
++static void clean_oecls_netdev_info(void)
++{
++	struct oecls_netdev_info *oecls_dev;
++	struct net_device *netdev;
++	int devid;
++
++	for_each_oecls_netdev(devid, oecls_dev) {
++		oecls_filter_restore(oecls_dev->dev_name, oecls_dev->old_filter_state);
++		netdev = oecls_dev->netdev;
++		if (netdev) {
++			oecls_dev->netdev = NULL;
++			dev_put(netdev);
++		}
++	}
++
++	oecls_netdev_num = 0;
++}
++
++static int init_oecls_netdev_info(char *netdev_str)
++{
++	char *start = netdev_str, *end;
++	int err = -ENODEV;
++
++	while (*start != '\0') {
++		// skip start #
++		end = strchr(start, '#');
++		if (end == start) {
++			start++;
++			continue;
++		}
++
++		// find the last ifname
++		if (!end) {
++			err = init_single_oecls_dev(start, strlen(start));
++			break;
++		}
++
++		err = init_single_oecls_dev(start, end - start);
++		if (err)
++			break;
++		start = end + 1;
++	}
++
++	return err;
++}
++
++struct oecls_numa_info *get_oecls_numa_info(unsigned int nid)
++{
++	if (nid >= oecls_numa_num)
++		return NULL;
++	return &oecls_numa_info_table[nid];
++}
++
++static void clean_oecls_numa_info(void)
++{
++	oecls_numa_num = 0;
++	kfree(oecls_numa_info_table);
++}
++
++static void init_numa_avail_cpus(int nid, struct oecls_numa_info *numa_info)
++{
++	int cpu;
++
++	oecls_debug("numa node %d: %*pb, %*pbl\n", nid, cpumask_pr_args(cpumask_of_node(nid)),
++		    cpumask_pr_args(cpumask_of_node(nid)));
++
++	bitmap_zero(numa_info->avail_cpus, OECLS_MAX_CPU_NUM);
++	for_each_cpu(cpu, cpumask_of_node(nid)) {
++		if (cpu >= OECLS_MAX_CPU_NUM)
++			return;
++		set_bit(cpu, numa_info->avail_cpus);
++	}
++}
++
++static void clean_oecls_rxq(void)
++{
++	struct oecls_numa_bound_dev_info *bound_dev;
++	struct oecls_netdev_info *oecls_dev;
++	struct oecls_numa_info *numa_info;
++	int nid, devid;
++
++	for_each_oecls_numa(nid, numa_info) {
++		for_each_oecls_netdev(devid, oecls_dev) {
++			bound_dev = &numa_info->bound_dev[devid];
++			kfree(bound_dev->cluster_info);
++		}
++	}
++}
++
++static int init_numa_rxq_bitmap(int nid, struct oecls_numa_info *numa_info)
++{
++	int bound_rxq_num, cluster_id, cluster_idx, cur_idx;
++	struct oecls_numa_bound_dev_info *bound_dev;
++	struct oecls_netdev_info *oecls_dev;
++	int rxq_id, devid, cpu, ret = 0;
++
++	for_each_oecls_netdev(devid, oecls_dev) {
++		bound_rxq_num = 0;
++		bound_dev = &numa_info->bound_dev[devid];
++		bitmap_zero(bound_dev->bitmap_rxq, OECLS_MAX_RXQ_NUM_PER_DEV);
++		bound_dev->cluster_info = kzalloc(sizeof(struct oecls_numa_clusterinfo)
++						  * oecls_cluster_per_numa, GFP_ATOMIC);
++		if (!bound_dev->cluster_info) {
++			ret = -ENOMEM;
++			goto out;
++		}
++
++		for (rxq_id = 0; rxq_id < oecls_dev->rxq_num; rxq_id++) {
++			cpu = oecls_dev->rxq[rxq_id].affinity_cpu;
++			if (cpu_to_node(cpu) == nid) {
++				set_bit(rxq_id, bound_dev->bitmap_rxq);
++				cluster_id = cpu / oecls_cluster_cpu_num;
++				cluster_idx = cluster_id % oecls_cluster_per_numa;
++				bound_dev->cluster_info[cluster_idx].cluster_id = cluster_id;
++				cur_idx = bound_dev->cluster_info[cluster_idx].cur_freeidx++;
++				bound_dev->cluster_info[cluster_idx].rxqs[cur_idx].rxq_id = rxq_id;
++				bound_dev->cluster_info[cluster_idx].rxqs[cur_idx].status = 1;
++				bound_rxq_num++;
++				oecls_debug("cpu:%d cluster_id:%d cluster_idx:%d rxq_id:%d cur_idx:%d\n",
++					    cpu, cluster_id, cluster_idx, rxq_id, cur_idx);
++			}
++		}
++
++		oecls_debug("nid:%d, dev_id:%d, dev:%s, rxq_num:%d, bit_num:%d, bitmap_rxq:%*pbl\n",
++			    nid, devid, oecls_dev->dev_name, oecls_dev->rxq_num,
++			    bound_rxq_num, OECLS_MAX_RXQ_NUM_PER_DEV, bound_dev->bitmap_rxq);
++	}
++	return ret;
++
++out:
++	clean_oecls_rxq();
++	return ret;
++}
++
++static int get_cluster_rxq(struct oecls_numa_bound_dev_info *bound_dev)
++{
++	int cpu = smp_processor_id();
++	int cluster_id = cpu / oecls_cluster_cpu_num;
++	int i, j, rxq_id;
++
++	for (i = 0; i < oecls_cluster_per_numa; i++) {
++		if (cluster_id != bound_dev->cluster_info[i].cluster_id)
++			continue;
++		for (j = 0; j < OECLS_MAX_RXQ_NUM_PER_DEV; j++) {
++			if (bound_dev->cluster_info[i].rxqs[j].status == 1) {
++				bound_dev->cluster_info[i].rxqs[j].status = 2;
++				rxq_id = bound_dev->cluster_info[i].rxqs[j].rxq_id;
++				oecls_debug("cluster:%d cpu:%d alloc rxq_id:%d\n",
++					    cluster_id, cpu, rxq_id);
++				return rxq_id;
++			}
++		}
++	}
++	oecls_debug("cluster:%d no free rxq for cpu:%d\n", cluster_id, cpu);
++	return -1;
++}
++
++static int put_cluster_rxq(struct oecls_numa_bound_dev_info *bound_dev, int rxq_id)
++{
++	int i, j;
++
++	for (i = 0; i < oecls_cluster_per_numa; i++) {
++		for (j = 0; j < OECLS_MAX_RXQ_NUM_PER_DEV; j++) {
++			if (bound_dev->cluster_info[i].rxqs[j].status == 2 &&
++			    bound_dev->cluster_info[i].rxqs[j].rxq_id == rxq_id) {
++				bound_dev->cluster_info[i].rxqs[j].status = 1;
++				oecls_debug("free rxq_id:%d\n", rxq_id);
++				return 0;
++			}
++		}
++	}
++	oecls_debug("no match malloced rxq_id:%d\n", rxq_id);
++	return -1;
++}
++
++int alloc_rxq_id(int nid, int devid)
++{
++	struct oecls_numa_bound_dev_info *bound_dev;
++	struct oecls_numa_info *numa_info;
++	int rxq_id;
++
++	numa_info = get_oecls_numa_info(nid);
++	if (!numa_info) {
++		oecls_error("error nid:%d\n", nid);
++		return -EINVAL;
++	}
++
++	if (devid >= OECLS_MAX_NETDEV_NUM) {
++		oecls_error("error bound_dev index:%d\n", devid);
++		return -EINVAL;
++	}
++	bound_dev = &numa_info->bound_dev[devid];
++
++	if (strategy == 1) {
++		rxq_id = get_cluster_rxq(bound_dev);
++		if (rxq_id < 0 || rxq_id >= OECLS_MAX_RXQ_NUM_PER_DEV)
++			pr_info("failed to get rxq_id:%d in cluster, try numa\n", rxq_id);
++		else
++			goto found;
++	}
++
++	rxq_id = find_first_bit(bound_dev->bitmap_rxq, OECLS_MAX_RXQ_NUM_PER_DEV);
++	if (rxq_id >= OECLS_MAX_RXQ_NUM_PER_DEV) {
++		oecls_error("error rxq_id:%d\n", rxq_id);
++		return -EINVAL;
++	}
++
++found:
++	clear_bit(rxq_id, bound_dev->bitmap_rxq);
++	oecls_debug("alloc nid:%d, dev_id:%d, rxq_id:%d\n", nid, devid, rxq_id);
++	return rxq_id;
++}
++
++void free_rxq_id(int nid, int devid, int rxq_id)
++{
++	struct oecls_numa_bound_dev_info *bound_dev;
++	struct oecls_numa_info *numa_info;
++
++	numa_info = get_oecls_numa_info(nid);
++	if (!numa_info) {
++		oecls_error("error nid:%d\n", nid);
++		return;
++	}
++
++	if (devid >= OECLS_MAX_NETDEV_NUM) {
++		oecls_error("error bound_dev index:%d\n", devid);
++		return;
++	}
++	bound_dev = &numa_info->bound_dev[devid];
++
++	if (rxq_id >= OECLS_MAX_RXQ_NUM_PER_DEV) {
++		oecls_error("error rxq_id:%d\n", rxq_id);
++		return;
++	}
++
++	if (strategy == 1)
++		put_cluster_rxq(bound_dev, rxq_id);
++
++	if (test_bit(rxq_id, bound_dev->bitmap_rxq)) {
++		oecls_error("error nid:%d, devid:%d, rxq_id:%d\n", nid, devid, rxq_id);
++		return;
++	}
++
++	set_bit(rxq_id, bound_dev->bitmap_rxq);
++	oecls_debug("free nid:%d, dev_id:%d, rxq_id:%d\n", nid, devid, rxq_id);
++}
++
++static int init_oecls_numa_info(void)
++{
++	struct oecls_numa_info *numa_info;
++	int nid, ret = 0;
++
++	oecls_numa_num = num_online_nodes();
++	oecls_numa_info_table = kzalloc(sizeof(struct oecls_numa_info) * oecls_numa_num, GFP_ATOMIC);
++	if (!oecls_numa_info_table) {
++		ret = -ENOMEM;
++		oecls_error("oecls_numa_info_table alloc failed:%d\n", ret);
++		return ret;
++	}
++
++	oecls_cluster_cpu_num = cpumask_weight(topology_cluster_cpumask(smp_processor_id()));
++	oecls_cluster_per_numa = (nr_cpu_ids / oecls_cluster_cpu_num) / oecls_numa_num;
++	oecls_debug("oecls_numa_num=%d cluster_cpu_num:%d cluster_cpu_num:%d\n",
++		    oecls_numa_num, oecls_cluster_per_numa, oecls_cluster_cpu_num);
++
++	for_each_oecls_numa(nid, numa_info)
++		init_numa_avail_cpus(nid, numa_info);
++
++	return ret;
++}
++
++static int alloc_available_cpu(int nid, struct oecls_numa_info *numa_info)
++{
++	int cpu;
++
++	cpu = find_first_bit(numa_info->avail_cpus, OECLS_MAX_CPU_NUM);
++	if (cpu >= OECLS_MAX_CPU_NUM) {
++		oecls_error("no available cpus: nid=%d, cpu=%d\n", nid, cpu);
++		return -1;
++	}
++
++	clear_bit(cpu, numa_info->avail_cpus);
++	return cpu;
++}
++
++static void add_netdev_irq_affinity_cpu(struct oecls_netdev_info *oecls_dev, int rxq_id, int cpu)
++{
++	struct oecls_netdev_queue_info *rxq_info;
++
++	if (rxq_id >= OECLS_MAX_RXQ_NUM_PER_DEV)
++		return;
++
++	rxq_info = &oecls_dev->rxq[rxq_id];
++	rxq_info->affinity_cpu = cpu;
++}
++
++static void config_affinity_strategy_default(struct oecls_netdev_info *oecls_dev)
++{
++	struct oecls_numa_info *numa_info;
++	int rxq_num = oecls_dev->rxq_num;
++	int rxq_per_numa = rxq_num / oecls_numa_num;
++	int remain = rxq_num - rxq_per_numa * oecls_numa_num;
++	int numa_rxq_id, rxq_id, nid, cpu;
++
++	oecls_debug("dev=%s, rxq_num=%d, rxq_per_numa=%d, remain=%d\n", oecls_dev->dev_name,
++		    rxq_num, rxq_per_numa, remain);
++
++	// average config rxq to every numa
++	for_each_oecls_numa(nid, numa_info) {
++		for (numa_rxq_id = 0; numa_rxq_id < rxq_per_numa; numa_rxq_id++) {
++			cpu = alloc_available_cpu(nid, numa_info);
++			if (cpu < 0)
++				break;
++
++			rxq_id = rxq_per_numa * nid + numa_rxq_id;
++			add_netdev_irq_affinity_cpu(oecls_dev, rxq_id, cpu);
++		}
++	}
++
++	if (!remain)
++		return;
++
++	// config remain rxq to every numa
++	numa_rxq_id = 0;
++	for_each_oecls_numa(nid, numa_info) {
++		if (numa_rxq_id >= remain)
++			break;
++		cpu = alloc_available_cpu(nid, numa_info);
++		if (cpu < 0)
++			break;
++
++		rxq_id = rxq_per_numa * oecls_numa_num + numa_rxq_id;
++		numa_rxq_id++;
++		add_netdev_irq_affinity_cpu(oecls_dev, rxq_id, cpu);
++	}
++}
++
++static void config_affinity_strategy_cluster(struct oecls_netdev_info *oecls_dev)
++{
++	int rxq_num = oecls_dev->rxq_num;
++	int rxq_per_numa = rxq_num / oecls_numa_num;
++	int remain = rxq_num - rxq_per_numa * oecls_numa_num;
++	int cpu_idx = oecls_cluster_cpu_num - 1;
++	int cluster, cpu, rxq_id = 0, round;
++
++	round = rxq_per_numa < oecls_cluster_per_numa ? rxq_per_numa : oecls_cluster_per_numa;
++	if (remain > 0)
++		round++;
++	oecls_debug("round=%d\n", round);
++
++	while (rxq_id < oecls_dev->rxq_num) {
++		for (cluster = 0; cluster < oecls_cluster_per_numa * oecls_numa_num; cluster++) {
++			if (cluster % oecls_cluster_per_numa >= round)
++				continue;
++			cpu = cluster * oecls_cluster_cpu_num + cpu_idx;
++			if (rxq_id >= oecls_dev->rxq_num)
++				break;
++			add_netdev_irq_affinity_cpu(oecls_dev, rxq_id++, cpu);
++		}
++		cpu_idx--;
++		if (--cpu_idx < 0)
++			cpu_idx = oecls_cluster_cpu_num - 1;
++	}
++}
++
++static void config_affinity_strategy_numa(struct oecls_netdev_info *oecls_dev)
++{
++	int rxq_num = oecls_dev->rxq_num;
++	int rxq_per_numa = rxq_num / oecls_numa_num;
++	int cpu_per_numa = nr_cpu_ids / oecls_numa_num;
++	int remain = rxq_num - rxq_per_numa * oecls_numa_num;
++	struct oecls_numa_info *numa_info;
++	int numa_start_cpu, numa_cpu_id;
++	int rxq_id = 0, nid, cpu;
++
++	for_each_oecls_numa(nid, numa_info) {
++		numa_start_cpu = find_first_bit(numa_info->avail_cpus, OECLS_MAX_CPU_NUM);
++		for (numa_cpu_id = 0; numa_cpu_id < rxq_per_numa; numa_cpu_id++) {
++			cpu = numa_start_cpu + (numa_cpu_id % cpu_per_numa);
++			if (rxq_id >= oecls_dev->rxq_num)
++				break;
++			add_netdev_irq_affinity_cpu(oecls_dev, rxq_id++, cpu);
++		}
++		if (remain-- > 0) {
++			cpu = numa_start_cpu + (numa_cpu_id % cpu_per_numa);
++			add_netdev_irq_affinity_cpu(oecls_dev, rxq_id++, cpu);
++		}
++	}
++}
++
++static void config_affinity_strategy_custom(struct oecls_netdev_info *oecls_dev)
++{
++	oecls_debug("dev=%s\n", oecls_dev->dev_name);
++}
++
++static void config_affinity_strategy(void)
++{
++	struct oecls_netdev_info *oecls_dev;
++	int devid;
++
++	for_each_oecls_netdev(devid, oecls_dev) {
++		switch (strategy) {
++		case 1:
++			config_affinity_strategy_cluster(oecls_dev);
++			break;
++		case 2:
++			config_affinity_strategy_numa(oecls_dev);
++			break;
++		case 3:
++			config_affinity_strategy_custom(oecls_dev);
++			break;
++		case 0:
++		default:
++			config_affinity_strategy_default(oecls_dev);
++			break;
++		}
++	}
++}
++
++static inline void irq_set_affinity_wrapper(int rxq, int irq, int cpu)
++{
++	int err = 0;
++
++	err = irq_set_affinity(irq, get_cpu_mask(cpu));
++	oecls_debug("rxq=%d, irq=%d, cpu=%d, err=%d\n", rxq, irq, cpu, err);
++}
++
++static void enable_affinity_strategy(void)
++{
++	struct oecls_netdev_queue_info *rxq_info;
++	struct oecls_netdev_info *oecls_dev;
++	int rxq_id, devid;
++
++	for_each_oecls_netdev(devid, oecls_dev) {
++		for (rxq_id = 0; rxq_id < oecls_dev->rxq_num; rxq_id++) {
++			rxq_info = &oecls_dev->rxq[rxq_id];
++			irq_set_affinity_wrapper(rxq_id, rxq_info->irq, rxq_info->affinity_cpu);
++		}
++	}
++}
++
++static inline void netif_set_xps_queue_wrapper(struct net_device *netdev, int rxq_id,
++					       const struct cpumask *cpu_mask)
++{
++	int err = 0;
++
++	err = netif_set_xps_queue(netdev, cpu_mask, rxq_id);
++	oecls_debug("name=%s, rxq_id=%d, mask=%*pbl, err=%d\n", netdev->name, rxq_id,
++		    cpumask_pr_args(cpu_mask), err);
++}
++
++static void set_netdev_xps_queue(bool enable)
++{
++	const struct cpumask clear_mask = { 0 };
++	struct oecls_netdev_info *oecls_dev;
++	const struct cpumask *cpu_mask;
++	int rxq_id, devid, cpu, nid;
++
++	for_each_oecls_netdev(devid, oecls_dev) {
++		for (rxq_id = 0; rxq_id < oecls_dev->rxq_num; rxq_id++) {
++			cpu = oecls_dev->rxq[rxq_id].affinity_cpu;
++			nid = cpu_to_node(cpu);
++			if (enable)
++				cpu_mask = cpumask_of_node(nid);
++			else
++				cpu_mask = &clear_mask;
++
++			netif_set_xps_queue_wrapper(oecls_dev->netdev, rxq_id, cpu_mask);
++		}
++	}
++}
++
++static __init int oecls_init(void)
++{
++	struct oecls_numa_info *numa_info;
++	int nid, err;
++
++	if (!check_params())
++		return -EINVAL;
++
++	err = init_oecls_numa_info();
++	if (err)
++		return err;
++
++	err = init_oecls_netdev_info(ifname);
++	if (err)
++		goto clean_numa;
++
++	// Set irq affinity
++	config_affinity_strategy();
++	enable_affinity_strategy();
++
++	// Calculate rxq bounded to one numa
++	for_each_oecls_numa(nid, numa_info) {
++		err = init_numa_rxq_bitmap(nid, numa_info);
++		if (err)
++			goto clean_rxq;
++	}
++
++#ifdef CONFIG_XPS
++	set_netdev_xps_queue(true);
++#endif
++
++	if (mode == 0)
++		oecls_ntuple_res_init();
++	else
++		oecls_flow_res_init();
++
++	return 0;
++
++clean_rxq:
++clean_numa:
++	clean_oecls_netdev_info();
++	clean_oecls_numa_info();
++	return err;
++}
++
++static __exit void oecls_exit(void)
++{
++	if (mode == 0)
++		oecls_ntuple_res_clean();
++	else
++		oecls_flow_res_clean();
++
++#ifdef CONFIG_XPS
++	set_netdev_xps_queue(false);
++#endif
++
++	clean_oecls_rxq();
++	clean_oecls_netdev_info();
++	clean_oecls_numa_info();
++}
++
++module_init(oecls_init);
++module_exit(oecls_exit);
++
++MODULE_DESCRIPTION("oenetcls");
++MODULE_LICENSE("GPL v2");
+diff --git a/net/oenetcls/oenetcls_ntuple.c b/net/oenetcls/oenetcls_ntuple.c
+new file mode 100644
+index 000000000..2845a5756
+--- /dev/null
++++ b/net/oenetcls/oenetcls_ntuple.c
+@@ -0,0 +1,573 @@
++// SPDX-License-Identifier: GPL-2.0-only
++#include <linux/inetdevice.h>
++#include <linux/ethtool.h>
++#include <linux/netdevice.h>
++#include <linux/rtnetlink.h>
++#include <linux/irq.h>
++#include <linux/irqdesc.h>
++#include <linux/inet.h>
++#include <linux/jhash.h>
++#include <linux/oenetcls.h>
++#include <net/sock.h>
++
++#include "oenetcls.h"
++
++struct oecls_sk_rule_list oecls_sk_rules, oecls_sk_list;
++
++static void init_oecls_sk_rules(void)
++{
++	unsigned int i;
++
++	for (i = 0; i < OECLS_SK_RULE_HASHSIZE; i++)
++		INIT_HLIST_HEAD(oecls_sk_rules.hash + i);
++	mutex_init(&oecls_sk_rules.mutex);
++}
++
++static inline struct hlist_head *get_rule_hashlist(u32 dip4, u16 dport)
++{
++	return oecls_sk_rules.hash + (jhash_2words(dip4, dport, 0) & OECLS_SK_RULE_HASHMASK);
++}
++
++static inline struct hlist_head *get_sk_hashlist(void *sk)
++{
++	return oecls_sk_list.hash + (jhash(sk, sizeof(sk), 0) & OECLS_SK_RULE_HASHMASK);
++}
++
++static void add_sk_rule(int devid, u32 dip4, u16 dport, void *sk, int action,
++			int ruleid, int nid)
++{
++	struct hlist_head *hlist = get_rule_hashlist(dip4, dport);
++	struct hlist_head *sk_hlist = get_sk_hashlist(sk);
++	struct oecls_sk_rule *rule;
++	struct oecls_sk_entry *entry;
++
++	rule = kzalloc(sizeof(struct oecls_sk_rule), GFP_ATOMIC);
++	entry = kzalloc(sizeof(struct oecls_sk_entry), GFP_ATOMIC);
++	if (!rule || !entry)
++		goto out;
++
++	rule->sk = sk;
++	rule->dip4 = dip4;
++	rule->dport = dport;
++	rule->devid = devid;
++	rule->action = action;
++	rule->ruleid = ruleid;
++	rule->nid = nid;
++	hlist_add_head(&rule->node, hlist);
++
++	entry->sk = sk;
++	entry->sk_rule_hash = jhash_2words(dip4, dport, 0);
++	hlist_add_head(&entry->node, sk_hlist);
++	return;
++out:
++	oecls_debug("alloc failed rule:%p entry:%p\n", rule, entry);
++	kfree(entry);
++	kfree(rule);
++}
++
++static struct oecls_sk_entry *get_sk_entry(void *sk)
++{
++	struct hlist_head *sk_hlist = get_sk_hashlist(sk);
++	struct oecls_sk_entry *entry = NULL;
++
++	hlist_for_each_entry(entry, sk_hlist, node) {
++		if (entry->sk == sk)
++			break;
++	}
++	return entry;
++}
++
++static void del_sk_rule(struct oecls_sk_rule *rule)
++{
++	struct oecls_sk_entry *entry;
++
++	entry = get_sk_entry(rule->sk);
++	if (!entry)
++		return;
++	hlist_del_init(&entry->node);
++	kfree(entry);
++
++	oecls_debug("del rule=%p\n", rule);
++	hlist_del_init(&rule->node);
++	kfree(rule);
++}
++
++static struct oecls_sk_rule *get_sk_rule(int devid, u32 dip4, u16 dport)
++{
++	struct hlist_head *hlist = get_rule_hashlist(dip4, dport);
++	struct oecls_sk_rule *rule = NULL;
++
++	hlist_for_each_entry(rule, hlist, node) {
++		if (rule->devid == devid && rule->dip4 == dip4 && rule->dport == dport)
++			break;
++	}
++	return rule;
++}
++
++static struct oecls_sk_rule *get_rule_from_sk(int devid, void *sk)
++{
++	struct oecls_sk_rule *rule = NULL;
++	struct oecls_sk_entry *entry;
++	struct hlist_head *hlist;
++
++	entry = get_sk_entry(sk);
++	if (!entry)
++		return NULL;
++
++	hlist = oecls_sk_rules.hash + (entry->sk_rule_hash & OECLS_SK_RULE_HASHMASK);
++	hlist_for_each_entry(rule, hlist, node) {
++		if (rule->devid == devid && rule->sk == sk)
++			break;
++	}
++	return rule;
++}
++
++static inline bool reuseport_check(int devid, u32 dip4, u16 dport)
++{
++	return !!get_sk_rule(devid, dip4, dport);
++}
++
++static u32 get_first_ip4_addr(struct net *net)
++{
++	struct in_device *in_dev;
++	struct net_device *dev;
++	struct in_ifaddr *ifa;
++	u32 dip4 = 0;
++
++	rtnl_lock();
++	rcu_read_lock();
++	for_each_netdev(net, dev) {
++		if (dev->flags & IFF_LOOPBACK || !(dev->flags & IFF_UP))
++			continue;
++		in_dev = __in_dev_get_rcu(dev);
++		if (!in_dev)
++			continue;
++
++		in_dev_for_each_ifa_rcu(ifa, in_dev) {
++			if (!strcmp(dev->name, ifa->ifa_label)) {
++				dip4 = ifa->ifa_local;
++				oecls_debug("dev: %s, dip4:%pI4\n", dev->name, &dip4);
++				goto out;
++			}
++		}
++	}
++out:
++	rcu_read_unlock();
++	rtnl_unlock();
++	return dip4;
++}
++
++static void get_sk_rule_addr(struct sock *sk, u32 *dip4, u16 *dport)
++{
++	*dport = htons(sk->sk_num);
++
++	if (!match_ip_flag) {
++		*dip4 = 0;
++		return;
++	}
++
++	if (sk->sk_rcv_saddr)
++		*dip4 = sk->sk_rcv_saddr;
++	else
++		*dip4 = get_first_ip4_addr(sock_net(sk));
++}
++
++static int rxclass_rule_del(struct cmd_context *ctx, __u32 loc)
++{
++	struct ethtool_rxnfc nfccmd;
++	int err;
++
++	nfccmd.cmd = ETHTOOL_SRXCLSRLDEL;
++	nfccmd.fs.location = loc;
++	err = send_ethtool_ioctl(ctx, &nfccmd);
++	if (err < 0)
++		oecls_debug("rmgr: Cannot delete RX class rule, loc:%u\n", loc);
++	return err;
++}
++
++static int rmgr_ins(struct rmgr_ctrl *rmgr, __u32 loc)
++{
++	if (loc >= rmgr->size) {
++		oecls_error("rmgr: Location out of range\n");
++		return -1;
++	}
++
++	set_bit(loc, rmgr->slot);
++	return 0;
++}
++
++static int rmgr_find_empty_slot(struct rmgr_ctrl *rmgr, struct ethtool_rx_flow_spec *fsp)
++{
++	__u32 loc, slot_num;
++
++	if (rmgr->driver_select)
++		return 0;
++
++	loc = rmgr->size - 1;
++	slot_num = loc / BITS_PER_LONG;
++	if (!~(rmgr->slot[slot_num] | (~1UL << rmgr->size % BITS_PER_LONG))) {
++		loc -= 1 + (loc % BITS_PER_LONG);
++		slot_num--;
++	}
++
++	while (loc < rmgr->size && !~(rmgr->slot[slot_num])) {
++		loc -= BITS_PER_LONG;
++		slot_num--;
++	}
++
++	while (loc < rmgr->size && test_bit(loc, rmgr->slot))
++		loc--;
++
++	if (loc < rmgr->size) {
++		fsp->location = loc;
++		return rmgr_ins(rmgr, loc);
++	}
++
++	return -1;
++}
++
++static int rxclass_get_dev_info(struct cmd_context *ctx, __u32 *count, int *driver_select)
++{
++	struct ethtool_rxnfc nfccmd;
++	int err;
++
++	nfccmd.cmd = ETHTOOL_GRXCLSRLCNT;
++	nfccmd.data = 0;
++	err = send_ethtool_ioctl(ctx, &nfccmd);
++	*count = nfccmd.rule_cnt;
++	if (driver_select)
++		*driver_select = !!(nfccmd.data & RX_CLS_LOC_SPECIAL);
++	if (err < 0)
++		oecls_debug("rxclass: Cannot get RX class rule count\n");
++
++	return err;
++}
++
++static int rmgr_init(struct cmd_context *ctx, struct rmgr_ctrl *rmgr)
++{
++	struct ethtool_rxnfc *nfccmd;
++	__u32 *rule_locs;
++	int i, err = 0;
++
++	memset(rmgr, 0, sizeof(*rmgr));
++	err = rxclass_get_dev_info(ctx, &rmgr->n_rules, &rmgr->driver_select);
++	if (err < 0)
++		return err;
++
++	if (rmgr->driver_select)
++		return err;
++
++	nfccmd = kzalloc(sizeof(*nfccmd) + (rmgr->n_rules * sizeof(__u32)), GFP_ATOMIC);
++	if (!nfccmd) {
++		oecls_error("rmgr: Cannot allocate memory for RX class rule locations\n");
++		err = -ENOMEM;
++		goto out;
++	}
++
++	nfccmd->cmd = ETHTOOL_GRXCLSRLALL;
++	nfccmd->rule_cnt = rmgr->n_rules;
++	err = send_ethtool_ioctl(ctx, nfccmd);
++	if (err < 0) {
++		oecls_debug("rmgr: Cannot get RX class rules\n");
++		goto out;
++	}
++
++	rmgr->size = nfccmd->data;
++	if (rmgr->size == 0 || rmgr->size < rmgr->n_rules) {
++		oecls_error("rmgr: Invalid RX class rules table size\n");
++		err = -EINVAL;
++		goto out;
++	}
++
++	rmgr->slot = kzalloc(BITS_TO_LONGS(rmgr->size) * sizeof(long), GFP_ATOMIC);
++	if (!rmgr->slot) {
++		oecls_error("rmgr: Cannot allocate memory for RX class rules\n");
++		err = -ENOMEM;
++		goto out;
++	}
++
++	rule_locs = nfccmd->rule_locs;
++	for (i = 0; i < rmgr->n_rules; i++) {
++		err = rmgr_ins(rmgr, rule_locs[i]);
++		if (err < 0)
++			break;
++	}
++
++out:
++	kfree(nfccmd);
++	return err;
++}
++
++static void rmgr_cleanup(struct rmgr_ctrl *rmgr)
++{
++	kfree(rmgr->slot);
++	rmgr->slot = NULL;
++	rmgr->size = 0;
++}
++
++static int rmgr_set_location(struct cmd_context *ctx,
++			     struct ethtool_rx_flow_spec *fsp)
++{
++	struct rmgr_ctrl rmgr;
++	int ret;
++
++	ret = rmgr_init(ctx, &rmgr);
++	if (ret < 0)
++		goto out;
++
++	ret = rmgr_find_empty_slot(&rmgr, fsp);
++out:
++	rmgr_cleanup(&rmgr);
++	return ret;
++}
++
++static int rxclass_rule_ins(struct cmd_context *ctx,
++			    struct ethtool_rx_flow_spec *fsp, u32 rss_context)
++{
++	struct ethtool_rxnfc nfccmd;
++	u32 loc = fsp->location;
++	int ret;
++
++	if (loc & RX_CLS_LOC_SPECIAL) {
++		ret = rmgr_set_location(ctx, fsp);
++		if (ret < 0)
++			return ret;
++	}
++
++	nfccmd.cmd = ETHTOOL_SRXCLSRLINS;
++	nfccmd.rss_context = rss_context;
++	nfccmd.fs = *fsp;
++	ret = send_ethtool_ioctl(ctx, &nfccmd);
++	if (ret < 0) {
++		oecls_debug("Can not insert the clasification rule\n");
++		return ret;
++	}
++
++	if (loc & RX_CLS_LOC_SPECIAL)
++		oecls_debug("Added rule with ID %d\n", nfccmd.fs.location);
++
++	return 0;
++}
++
++static void flow_spec_to_ntuple(struct ethtool_rx_flow_spec *fsp,
++				struct ethtool_rx_ntuple_flow_spec *ntuple)
++{
++	int i;
++
++	memset(ntuple, ~0, sizeof(*ntuple));
++	ntuple->flow_type = fsp->flow_type;
++	ntuple->action = fsp->ring_cookie;
++	memcpy_r(&ntuple->h_u, &fsp->h_u, sizeof(fsp->h_u));
++	memcpy_r(&ntuple->m_u, &fsp->m_u, sizeof(fsp->m_u));
++	for (i = 0; i < sizeof(ntuple->m_u); i++)
++		ntuple->m_u.hdata[i] ^= 0xFF;
++	ntuple->flow_type &= ~FLOW_EXT;
++}
++
++static int do_srxntuple(struct cmd_context *ctx, struct ethtool_rx_flow_spec *fsp)
++{
++	struct ethtool_rx_ntuple ntuplecmd;
++	struct ethtool_value eval;
++	int ret = 0;
++
++	flow_spec_to_ntuple(fsp, &ntuplecmd.fs);
++
++	eval.cmd = ETHTOOL_GFLAGS;
++	ret = send_ethtool_ioctl(ctx, &eval);
++	if (ret || !(eval.data & ETH_FLAG_NTUPLE))
++		return -1;
++
++	ntuplecmd.cmd = ETHTOOL_SRXNTUPLE;
++	ret = send_ethtool_ioctl(ctx, &ntuplecmd);
++	if (ret)
++		oecls_debug("Cannot add new rule via N-tuple, ret:%d\n", ret);
++
++	return ret;
++}
++
++static int cfg_ethtool_rule(struct cmd_context *ctx, bool is_del)
++{
++	struct ethtool_rx_flow_spec *fsp, rx_rule_fs;
++	u32 rss_context = 0;
++	int ret;
++
++	oecls_debug("is_del:%d netdev:%s, dip4:%pI4, dport:%d, action:%d, ruleid:%u, del_ruleid:%u\n",
++		    is_del, ctx->netdev, &ctx->dip4, ntohs(ctx->dport), ctx->action, ctx->ruleid,
++		    ctx->del_ruleid);
++
++	if (is_del)
++		return rxclass_rule_del(ctx, ctx->del_ruleid);
++
++	ctx->ret_loc = -1;
++
++	fsp = &rx_rule_fs;
++	memset(fsp, 0, sizeof(*fsp));
++	fsp->flow_type = TCP_V4_FLOW;
++	fsp->location = RX_CLS_LOC_ANY;
++	fsp->h_u.tcp_ip4_spec.ip4dst = ctx->dip4;
++	fsp->h_u.tcp_ip4_spec.pdst = ctx->dport;
++	if (ctx->dip4)
++		fsp->m_u.tcp_ip4_spec.ip4dst = (u32)~0ULL;
++	fsp->m_u.tcp_ip4_spec.pdst = (u16)~0ULL;
++	if (ctx->ruleid)
++		fsp->location = ctx->ruleid;
++	fsp->ring_cookie = ctx->action;
++
++	ret = do_srxntuple(ctx, &rx_rule_fs);
++	if (!ret)
++		return 0;
++
++	ret = rxclass_rule_ins(ctx, &rx_rule_fs, rss_context);
++	if (!ret)
++		ctx->ret_loc = rx_rule_fs.location;
++	return ret;
++}
++
++static void del_ntuple_rule(struct sock *sk)
++{
++	struct oecls_netdev_info *oecls_dev;
++	struct cmd_context ctx = { 0 };
++	struct oecls_sk_rule *rule;
++	int devid;
++	u16 dport;
++	u32 dip4;
++	int err;
++
++	get_sk_rule_addr(sk, &dip4, &dport);
++
++	mutex_lock(&oecls_sk_rules.mutex);
++	for_each_oecls_netdev(devid, oecls_dev) {
++		strncpy(ctx.netdev, oecls_dev->dev_name, IFNAMSIZ);
++		rule = get_rule_from_sk(devid, sk);
++		if (!rule) {
++			oecls_debug("rule not found! sk:%p, devid:%d, dip4:%pI4, dport:%d\n",
++				    sk, devid, &dip4, ntohs(dport));
++			continue;
++		}
++
++		// Config Ntuple rule to dev
++		ctx.del_ruleid = rule->ruleid;
++		err = cfg_ethtool_rule(&ctx, true);
++		if (err) {
++			oecls_error("del sk:%p, nid:%d, devid:%d, action:%d, ruleid:%d, err:%d\n",
++				    sk, rule->nid, devid, rule->action, rule->ruleid, err);
++		}
++
++		// Free the bound queue
++		free_rxq_id(rule->nid, devid, rule->action);
++
++		// Delete sk rule
++		del_sk_rule(rule);
++	}
++	mutex_unlock(&oecls_sk_rules.mutex);
++}
++
++static void add_ntuple_rule(struct sock *sk)
++{
++	struct oecls_netdev_info *oecls_dev;
++	struct cmd_context ctx = { 0 };
++	int cpu = smp_processor_id();
++	int nid = cpu_to_node(cpu);
++	int rxq_id;
++	int devid;
++	int err;
++
++	if (check_appname(current->comm))
++		return;
++	get_sk_rule_addr(sk, &ctx.dip4, &ctx.dport);
++
++	mutex_lock(&oecls_sk_rules.mutex);
++	for_each_oecls_netdev(devid, oecls_dev) {
++		strncpy(ctx.netdev, oecls_dev->dev_name, IFNAMSIZ);
++		if (reuseport_check(devid, ctx.dip4, ctx.dport)) {
++			oecls_error("dip4:%pI4, dport:%d reuse!\n", &ctx.dip4, ctx.dport);
++			continue;
++		}
++
++		// Calculate the bound queue
++		rxq_id = alloc_rxq_id(nid, devid);
++		if (rxq_id < 0)
++			continue;
++
++		// Config Ntuple rule to dev
++		ctx.action = (u16)rxq_id;
++		err = cfg_ethtool_rule(&ctx, false);
++		if (err) {
++			oecls_error("add sk:%p, nid:%d, devid:%d, action:%d, ruleid:%d, err:%d\n",
++				    sk, nid, devid, ctx.action, ctx.ret_loc, err);
++			continue;
++		}
++
++		// Add sk rule
++		add_sk_rule(devid, ctx.dip4, ctx.dport, sk, ctx.action, ctx.ret_loc, nid);
++	}
++	mutex_unlock(&oecls_sk_rules.mutex);
++}
++
++static void ethtool_cfg_rxcls(struct sock *sk, int is_del)
++{
++	if (sk->sk_state != TCP_LISTEN)
++		return;
++
++	if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
++		return;
++
++	oecls_debug("[cpu:%d] app:%s, sk:%p, is_del:%d, ip:%pI4, port:%d\n", smp_processor_id(),
++		    current->comm, sk, is_del, &sk->sk_rcv_saddr, (u16)sk->sk_num);
++
++	if (is_del)
++		del_ntuple_rule(sk);
++	else
++		add_ntuple_rule(sk);
++}
++
++static void clean_oecls_sk_rules(void)
++{
++	struct oecls_netdev_info *oecls_dev;
++	struct cmd_context ctx = { 0 };
++	struct oecls_sk_rule *rule;
++	struct hlist_head *hlist;
++	struct hlist_node *n;
++	unsigned int i;
++	int err;
++
++	mutex_lock(&oecls_sk_rules.mutex);
++	for (i = 0; i < OECLS_SK_RULE_HASHSIZE; i++) {
++		hlist = &oecls_sk_rules.hash[i];
++
++		hlist_for_each_entry_safe(rule, n, hlist, node) {
++			oecls_dev = get_oecls_netdev_info(rule->devid);
++			if (!oecls_dev)
++				continue;
++			strncpy(ctx.netdev, oecls_dev->dev_name, IFNAMSIZ);
++			ctx.del_ruleid = rule->ruleid;
++			err = cfg_ethtool_rule(&ctx, true);
++			oecls_debug("sk:%p, dev_id:%d, action:%d, ruleid:%d, err:%d\n", rule->sk,
++				    rule->devid, rule->action, rule->ruleid, err);
++
++			hlist_del(&rule->node);
++			oecls_debug("clean rule=%p\n", rule);
++			kfree(rule);
++		}
++	}
++	mutex_unlock(&oecls_sk_rules.mutex);
++}
++
++static const struct oecls_hook_ops oecls_ntuple_ops = {
++	.oecls_flow_update = NULL,
++	.oecls_set_cpu = NULL,
++	.oecls_timeout = NULL,
++	.oecls_cfg_rxcls = ethtool_cfg_rxcls,
++};
++
++void oecls_ntuple_res_init(void)
++{
++	init_oecls_sk_rules();
++	RCU_INIT_POINTER(oecls_ops, &oecls_ntuple_ops);
++}
++
++void oecls_ntuple_res_clean(void)
++{
++	RCU_INIT_POINTER(oecls_ops, NULL);
++	clean_oecls_sk_rules();
++}
+-- 
+2.20.1
+
-- 
2.34.1

    

Yue Haibing

tags

participants (1)