
From: Coco Li <lixiaoyan@google.com> mainline inclusion from mainline-v6.8-rc1 commit 43a71cd66b9c0a4af3d15d8644359fde35bdbed0 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBVXC0 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- Reorganize fast path variables on tx-txrx-rx order Fastpath variables end after npinfo. Below data generated with pahole on x86 architecture. Fast path variables span cache lines before change: 12 Fast path variables span cache lines after change: 4 Suggested-by: Eric Dumazet <edumazet@google.com> Signed-off-by: Coco Li <lixiaoyan@google.com> Reviewed-by: Eric Dumazet <edumazet@google.com> Reviewed-by: David Ahern <dsahern@kernel.org> Link: https://lore.kernel.org/r/20231204201232.520025-2-lixiaoyan@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> --- include/linux/netdevice.h | 107 ++++++++++++++++++++++---------------- net/core/dev.c | 54 +++++++++++++++++++ 2 files changed, 117 insertions(+), 44 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 76220f070127..6a64547b83d6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1958,6 +1958,69 @@ struct net_device_extended_resvd { */ struct net_device { + /* Cacheline organization can be found documented in + * Documentation/networking/net_cachelines/net_device.rst. + * Please update the document when adding new fields. + */ + + /* TX read-mostly hotpath */ + __cacheline_group_begin(net_device_read_tx); + unsigned long long priv_flags; + const struct net_device_ops *netdev_ops; + const struct header_ops *header_ops; + struct netdev_queue *_tx; + unsigned int real_num_tx_queues; + unsigned int gso_max_size; + unsigned int gso_ipv4_max_size; + u16 gso_max_segs; + s16 num_tc; + /* Note : dev->mtu is often read without holding a lock. + * Writers usually hold RTNL. + * It is recommended to use READ_ONCE() to annotate the reads, + * and to use WRITE_ONCE() to annotate the writes. + */ + unsigned int mtu; + unsigned short needed_headroom; + struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE]; +#ifdef CONFIG_XPS + struct xps_dev_maps __rcu *xps_cpus_map; + struct xps_dev_maps __rcu *xps_rxqs_map; +#endif +#ifdef CONFIG_NET_CLS_ACT + struct mini_Qdisc __rcu *miniq_egress; +#endif + + __cacheline_group_end(net_device_read_tx); + + /* TXRX read-mostly hotpath */ + __cacheline_group_begin(net_device_read_txrx); + unsigned int flags; + unsigned short hard_header_len; + netdev_features_t features; + struct inet6_dev __rcu *ip6_ptr; + __cacheline_group_end(net_device_read_txrx); + + /* RX read-mostly hotpath */ + __cacheline_group_begin(net_device_read_rx); + struct list_head ptype_specific; + int ifindex; + unsigned int real_num_rx_queues; + struct netdev_rx_queue *_rx; + unsigned long gro_flush_timeout; + int napi_defer_hard_irqs; + unsigned int gro_max_size; + unsigned int gro_ipv4_max_size; + rx_handler_func_t __rcu *rx_handler; + void __rcu *rx_handler_data; + possible_net_t nd_net; +#ifdef CONFIG_NETPOLL + struct netpoll_info __rcu *npinfo; +#endif +#ifdef CONFIG_NET_XGRESS + struct bpf_mprog_entry __rcu *tcx_ingress; +#endif + __cacheline_group_end(net_device_read_rx); + char name[IFNAMSIZ]; struct netdev_name_node *name_node; struct dev_ifalias __rcu *ifalias; @@ -1983,14 +2046,12 @@ struct net_device { struct list_head unreg_list; struct list_head close_list; struct list_head ptype_all; - struct list_head ptype_specific; struct { struct list_head upper; struct list_head lower; } adj_list; - netdev_features_t features; netdev_features_t hw_features; netdev_features_t wanted_features; netdev_features_t vlan_features; @@ -1998,7 +2059,6 @@ struct net_device { netdev_features_t mpls_features; netdev_features_t gso_partial_features; - int ifindex; int group; struct net_device_stats stats; @@ -2015,7 +2075,6 @@ struct net_device { const struct iw_handler_def *wireless_handlers; struct iw_public_data *wireless_data; #endif - const struct net_device_ops *netdev_ops; const struct ethtool_ops *ethtool_ops; #ifdef CONFIG_NET_L3_MASTER_DEV const struct l3mdev_ops *l3mdev_ops; @@ -2032,11 +2091,6 @@ struct net_device { const struct tlsdev_ops *tlsdev_ops; #endif - const struct header_ops *header_ops; - - unsigned int flags; - unsigned int priv_flags; - unsigned short gflags; unsigned short padded; @@ -2046,20 +2100,12 @@ struct net_device { unsigned char if_port; unsigned char dma; - /* Note : dev->mtu is often read without holding a lock. - * Writers usually hold RTNL. - * It is recommended to use READ_ONCE() to annotate the reads, - * and to use WRITE_ONCE() to annotate the writes. - */ - unsigned int mtu; unsigned int min_mtu; unsigned int max_mtu; unsigned short type; - unsigned short hard_header_len; unsigned char min_header_len; unsigned char name_assign_type; - unsigned short needed_headroom; unsigned short needed_tailroom; /* Interface address info. */ @@ -2107,7 +2153,6 @@ struct net_device { void *atalk_ptr; #endif struct in_device __rcu *ip_ptr; - struct inet6_dev __rcu *ip6_ptr; #if IS_ENABLED(CONFIG_AX25) void *ax25_ptr; #endif @@ -2123,15 +2168,9 @@ struct net_device { /* Interface address info used in eth_type_trans() */ unsigned char *dev_addr; - struct netdev_rx_queue *_rx; unsigned int num_rx_queues; - unsigned int real_num_rx_queues; struct bpf_prog __rcu *xdp_prog; - unsigned long gro_flush_timeout; - int napi_defer_hard_irqs; - rx_handler_func_t __rcu *rx_handler; - void __rcu *rx_handler_data; #ifdef CONFIG_NET_CLS_ACT struct mini_Qdisc __rcu *miniq_ingress; @@ -2150,23 +2189,13 @@ struct net_device { /* * Cache lines mostly used on transmit path */ - struct netdev_queue *_tx ____cacheline_aligned_in_smp; unsigned int num_tx_queues; - unsigned int real_num_tx_queues; struct Qdisc __rcu *qdisc; unsigned int tx_queue_len; spinlock_t tx_global_lock; struct xdp_dev_bulk_queue __percpu *xdp_bulkq; -#ifdef CONFIG_XPS - struct xps_dev_maps __rcu *xps_cpus_map; - struct xps_dev_maps __rcu *xps_rxqs_map; -#endif -#ifdef CONFIG_NET_CLS_ACT - struct mini_Qdisc __rcu *miniq_egress; -#endif - #ifdef CONFIG_NET_SCHED DECLARE_HASHTABLE (qdisc_hash, 4); #endif @@ -2199,12 +2228,6 @@ struct net_device { bool needs_free_netdev; void (*priv_destructor)(struct net_device *dev); -#ifdef CONFIG_NETPOLL - struct netpoll_info __rcu *npinfo; -#endif - - possible_net_t nd_net; - /* mid-layer private */ void *ml_priv; enum netdev_ml_priv_type ml_priv_type; @@ -2230,15 +2253,11 @@ struct net_device { /* for setting kernel sock attribute on TCP connection setup */ #define GSO_MAX_SIZE 65536 - unsigned int gso_max_size; #define GSO_MAX_SEGS 65535 - u16 gso_max_segs; #ifdef CONFIG_DCB const struct dcbnl_rtnl_ops *dcbnl_ops; #endif - s16 num_tc; - struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE]; u8 prio_tc_map[TC_BITMASK + 1]; #if IS_ENABLED(CONFIG_FCOE) diff --git a/net/core/dev.c b/net/core/dev.c index 9084da34213a..51379a4de04a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11532,6 +11532,58 @@ static struct pernet_operations __net_initdata default_device_ops = { .exit_batch = default_device_exit_batch, }; +static void __init net_dev_struct_check(void) +{ + /* TX read-mostly hotpath */ + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, priv_flags); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, netdev_ops); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, header_ops); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, _tx); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, real_num_tx_queues); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_max_size); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_ipv4_max_size); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_max_segs); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, num_tc); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, mtu); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, needed_headroom); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, tc_to_txq); +#ifdef CONFIG_XPS + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, xps_cpus_map); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, xps_rxqs_map); +#endif +#ifdef CONFIG_NET_CLS_ACT + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, miniq_egress); +#endif + CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_tx, 152); + + /* TXRX read-mostly hotpath */ + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, flags); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, hard_header_len); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, features); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, ip6_ptr); + CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_txrx, 30); + + /* RX read-mostly hotpath */ + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, ptype_specific); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, ifindex); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, real_num_rx_queues); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, _rx); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_flush_timeout); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, napi_defer_hard_irqs); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_max_size); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_ipv4_max_size); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, rx_handler); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, rx_handler_data); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, nd_net); +#ifdef CONFIG_NETPOLL + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, npinfo); +#endif +#ifdef CONFIG_NET_XGRESS + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, tcx_ingress); +#endif + CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 96); +} + /* * Initialize the DEV module. At boot time this walks the device list and * unhooks any devices that fail to initialise (normally hardware not @@ -11549,6 +11601,8 @@ static int __init net_dev_init(void) BUG_ON(!dev_boot_phase); + net_dev_struct_check(); + if (dev_proc_init()) goto out; -- 2.34.1