[PATCH OLK-5.10] kernel: Optimize the kernel performance under pressure

hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ICA4Z5 -------------------------------- Optimize the kernel performance. Signed-off-by: Dong Chenchen <dongchenchen2@huawei.com> --- fs/ext4/ext4.h | 31 ++++++++------ fs/ext4/mballoc.c | 54 ++++++++++++++++++------ include/net/inet_hashtables.h | 9 ++-- include/net/ip.h | 2 +- kernel/sched/core.c | 7 +++- kernel/sched/fair.c | 5 ++- kernel/sched/features.h | 1 + net/ipv4/inet_hashtables.c | 78 +++++++++++++++++++++++++++-------- net/ipv4/inet_timewait_sock.c | 2 +- net/ipv6/inet6_hashtables.c | 38 +++++++++++++---- 10 files changed, 170 insertions(+), 57 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index bf5b21b5fed4..5a383f1a0843 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1548,12 +1548,13 @@ struct ext4_sb_info { unsigned int s_mb_group_prealloc; unsigned int s_mb_max_inode_prealloc; unsigned int s_max_dir_size_kb; - /* where last allocation was done - for stream allocation */ - unsigned long s_mb_last_group; - unsigned long s_mb_last_start; unsigned int s_mb_prefetch; unsigned int s_mb_prefetch_limit; + /* where last allocation was done - for stream allocation */ + ext4_group_t *s_mb_last_groups; + unsigned int s_mb_nr_global_goals; + /* stats for buddy allocator */ atomic_t s_bal_reqs; /* number of reqs with len > 1 */ atomic_t s_bal_success; /* we found long enough chunks */ @@ -1561,6 +1562,7 @@ struct ext4_sb_info { atomic_t s_bal_ex_scanned; /* total extents scanned */ atomic_t s_bal_groups_scanned; /* number of groups scanned */ atomic_t s_bal_goals; /* goal hits */ + atomic_t s_bal_stream_goals; /* stream allocation global goal hits */ atomic_t s_bal_breaks; /* too long searches */ atomic_t s_bal_2orders; /* 2^order hits */ atomic64_t s_bal_cX_groups_considered[4]; @@ -3395,23 +3397,28 @@ static inline int ext4_fs_is_busy(struct ext4_sb_info *sbi) return (atomic_read(&sbi->s_lock_busy) > EXT4_CONTENTION_THRESHOLD); } +static inline bool ext4_try_lock_group(struct super_block *sb, ext4_group_t group) +{ + if (!spin_trylock(ext4_group_lock_ptr(sb, group))) + return false; + /* + * We're able to grab the lock right away, so drop the lock + * contention counter. + */ + atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, -1, 0); + return true; +} + static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) { - spinlock_t *lock = ext4_group_lock_ptr(sb, group); - if (spin_trylock(lock)) - /* - * We're able to grab the lock right away, so drop the - * lock contention counter. - */ - atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, -1, 0); - else { + if (!ext4_try_lock_group(sb, group)) { /* * The lock is busy, so bump the contention counter, * and then wait on the spin lock. */ atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, 1, EXT4_MAX_CONTENTION); - spin_lock(lock); + spin_lock(ext4_group_lock_ptr(sb, group)); } } diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 522d2ec128ef..270856e6efc6 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1741,11 +1741,11 @@ static void ext4_mb_use_best_found(struct ext4_allocation_context *ac, get_page(ac->ac_buddy_page); /* store last allocated for subsequent stream allocation */ if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) { - spin_lock(&sbi->s_md_lock); - sbi->s_mb_last_group = ac->ac_f_ex.fe_group; - sbi->s_mb_last_start = ac->ac_f_ex.fe_start; - spin_unlock(&sbi->s_md_lock); + int hash = ac->ac_inode->i_ino % sbi->s_mb_nr_global_goals; + + WRITE_ONCE(sbi->s_mb_last_groups[hash], ac->ac_f_ex.fe_group); } + /* * As we've just preallocated more space than * user requested originally, we store allocated @@ -2378,11 +2378,11 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) /* if stream allocation is enabled, use global goal */ if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) { - /* TBD: may be hot point */ - spin_lock(&sbi->s_md_lock); - ac->ac_g_ex.fe_group = sbi->s_mb_last_group; - ac->ac_g_ex.fe_start = sbi->s_mb_last_start; - spin_unlock(&sbi->s_md_lock); + int hash = ac->ac_inode->i_ino % sbi->s_mb_nr_global_goals; + + ac->ac_g_ex.fe_group = READ_ONCE(sbi->s_mb_last_groups[hash]); + ac->ac_g_ex.fe_start = -1; + ac->ac_flags &= ~EXT4_MB_HINT_TRY_GOAL; } /* Let's just scan groups to find more-less suitable blocks */ @@ -2434,6 +2434,11 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) nr = 0; } + /* prevent unnecessary buddy loading. */ + if (cr < 3 && + spin_is_locked(ext4_group_lock_ptr(sb, group))) + continue; + /* This now checks without needing the buddy page */ ret = ext4_mb_good_group_nolock(ac, group, cr); if (ret <= 0) { @@ -2446,7 +2451,13 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) if (err) goto out; - ext4_lock_group(sb, group); + /* skip busy group */ + if (cr >= 3) { + ext4_lock_group(sb, group); + } else if (!ext4_try_lock_group(sb, group)) { + ext4_mb_unload_buddy(&e4b); + continue; + } /* * We need to check again after locking the @@ -2507,8 +2518,12 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac) } } - if (sbi->s_mb_stats && ac->ac_status == AC_STATUS_FOUND) + if (sbi->s_mb_stats && ac->ac_status == AC_STATUS_FOUND) { atomic64_inc(&sbi->s_bal_cX_hits[ac->ac_criteria]); + if (ac->ac_flags & EXT4_MB_STREAM_ALLOC && + ac->ac_b_ex.fe_group == ac->ac_g_ex.fe_group) + atomic_inc(&sbi->s_bal_stream_goals); + } out: if (!err && ac->ac_status != AC_STATUS_FOUND && first_err) err = first_err; @@ -2658,6 +2673,8 @@ int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset) atomic64_read(&sbi->s_bal_cX_failed[3])); seq_printf(seq, "\textents_scanned: %u\n", atomic_read(&sbi->s_bal_ex_scanned)); seq_printf(seq, "\t\tgoal_hits: %u\n", atomic_read(&sbi->s_bal_goals)); + seq_printf(seq, "\t\tstream_goal_hits: %u\n", + atomic_read(&sbi->s_bal_stream_goals)); seq_printf(seq, "\t\t2^n_hits: %u\n", atomic_read(&sbi->s_bal_2orders)); seq_printf(seq, "\t\tbreaks: %u\n", atomic_read(&sbi->s_bal_breaks)); seq_printf(seq, "\t\tlost: %u\n", atomic_read(&sbi->s_mb_lost_chunks)); @@ -3014,10 +3031,19 @@ int ext4_mb_init(struct super_block *sb) sbi->s_mb_group_prealloc, sbi->s_stripe); } + sbi->s_mb_nr_global_goals = min_t(unsigned int, num_possible_cpus(), + DIV_ROUND_UP(sbi->s_groups_count, 4)); + sbi->s_mb_last_groups = kcalloc(sbi->s_mb_nr_global_goals, + sizeof(ext4_group_t), GFP_KERNEL); + if (sbi->s_mb_last_groups == NULL) { + ret = -ENOMEM; + goto out; + } + sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); if (sbi->s_locality_groups == NULL) { ret = -ENOMEM; - goto out; + goto out_free_last_groups; } for_each_possible_cpu(i) { struct ext4_locality_group *lg; @@ -3038,6 +3064,9 @@ int ext4_mb_init(struct super_block *sb) out_free_locality_groups: free_percpu(sbi->s_locality_groups); sbi->s_locality_groups = NULL; +out_free_last_groups: + kfree(sbi->s_mb_last_groups); + sbi->s_mb_last_groups = NULL; out: kfree(sbi->s_mb_offsets); sbi->s_mb_offsets = NULL; @@ -3126,6 +3155,7 @@ int ext4_mb_release(struct super_block *sb) } free_percpu(sbi->s_locality_groups); + kfree(sbi->s_mb_last_groups); return 0; } diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index c9e387d174c6..fa002c62d985 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -88,6 +88,7 @@ struct inet_bind_bucket { bool fast_ipv6_only; struct hlist_node node; struct hlist_head owners; + struct rcu_head rcu; }; static inline struct net *ib_net(struct inet_bind_bucket *ib) @@ -201,8 +202,7 @@ struct inet_bind_bucket * inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net, struct inet_bind_hashbucket *head, const unsigned short snum, int l3mdev); -void inet_bind_bucket_destroy(struct kmem_cache *cachep, - struct inet_bind_bucket *tb); +void inet_bind_bucket_destroy(struct inet_bind_bucket *tb); static inline u32 inet_bhashfn(const struct net *net, const __u16 lport, const u32 bhash_size) @@ -404,9 +404,12 @@ static inline void sk_rcv_saddr_set(struct sock *sk, __be32 addr) int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk, u64 port_offset, + u32 hash_port0, int (*check_established)(struct inet_timewait_death_row *, struct sock *, __u16, - struct inet_timewait_sock **)); + struct inet_timewait_sock **, + bool rcu_lookup, + u32 hash)); int inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk); diff --git a/include/net/ip.h b/include/net/ip.h index 12cb525572f0..b36ce4d97904 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -343,7 +343,7 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o void inet_get_local_port_range(struct net *net, int *low, int *high); #ifdef CONFIG_SYSCTL -static inline bool inet_is_local_reserved_port(struct net *net, unsigned short port) +static inline bool inet_is_local_reserved_port(const struct net *net, unsigned short port) { if (!net->ipv4.sysctl_local_reserved_ports) return false; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5d2937935b47..127ea3119b4b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2848,7 +2848,12 @@ static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags, } if (rq->idle_stamp) { - u64 delta = rq_clock(rq) - rq->idle_stamp; + u64 delta; + + if (sched_feat(IRQ_AVG)) + delta = rq_clock_task(rq) - rq->idle_stamp; + else + delta = rq_clock(rq) - rq->idle_stamp; u64 max = 2*rq->max_idle_balance_cost; update_avg(&rq->avg_idle, delta); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 848d45f2bc49..bfdcdad02028 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4473,7 +4473,10 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq) static inline void rq_idle_stamp_update(struct rq *rq) { - rq->idle_stamp = rq_clock(rq); + if (sched_feat(IRQ_AVG)) + rq->idle_stamp = rq_clock_task(rq); + else + rq->idle_stamp = rq_clock(rq); } static inline void rq_idle_stamp_clear(struct rq *rq) diff --git a/kernel/sched/features.h b/kernel/sched/features.h index c4637bbba4cd..24c546dedbc3 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -115,6 +115,7 @@ SCHED_FEAT(WA_BIAS, true) */ SCHED_FEAT(UTIL_EST, true) SCHED_FEAT(UTIL_EST_FASTUP, true) +SCHED_FEAT(IRQ_AVG, false) SCHED_FEAT(ALT_PERIOD, true) SCHED_FEAT(BASE_SLICE, true) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 1894cf014129..81b8e5cfe686 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -38,7 +38,7 @@ static u32 inet_ehashfn(const struct net *net, const __be32 laddr, net_get_random_once(&inet_ehash_secret, sizeof(inet_ehash_secret)); - return __inet_ehashfn(laddr, lport, faddr, fport, + return lport + __inet_ehashfn(laddr, 0, faddr, fport, inet_ehash_secret + net_hash_mix(net)); } @@ -86,11 +86,11 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, /* * Caller must hold hashbucket lock for this tb with local BH disabled */ -void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb) +void inet_bind_bucket_destroy(struct inet_bind_bucket *tb) { if (hlist_empty(&tb->owners)) { - __hlist_del(&tb->node); - kmem_cache_free(cachep, tb); + hlist_del_rcu(&tb->node); + kfree_rcu(tb, rcu); } } @@ -118,7 +118,7 @@ static void __inet_put_port(struct sock *sk) __sk_del_bind_node(sk); inet_csk(sk)->icsk_bind_hash = NULL; inet_sk(sk)->inet_num = 0; - inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); + inet_bind_bucket_destroy(tb); spin_unlock(&head->lock); } @@ -440,7 +440,9 @@ EXPORT_SYMBOL_GPL(__inet_lookup_established); /* called with local bh disabled */ static int __inet_check_established(struct inet_timewait_death_row *death_row, struct sock *sk, __u16 lport, - struct inet_timewait_sock **twp) + struct inet_timewait_sock **twp, + bool rcu_lookup, + u32 hash) { struct inet_hashinfo *hinfo = death_row->hashinfo; struct inet_sock *inet = inet_sk(sk); @@ -451,14 +453,25 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, int sdif = l3mdev_master_ifindex_by_index(net, dif); INET_ADDR_COOKIE(acookie, saddr, daddr); const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); - unsigned int hash = inet_ehashfn(net, daddr, lport, - saddr, inet->inet_dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); - spinlock_t *lock = inet_ehash_lockp(hinfo, hash); - struct sock *sk2; - const struct hlist_nulls_node *node; struct inet_timewait_sock *tw = NULL; + const struct hlist_nulls_node *node; + struct sock *sk2; + spinlock_t *lock; + if (rcu_lookup) { + sk_nulls_for_each(sk2, node, &head->chain) { + if (sk2->sk_hash != hash || + !INET_MATCH(net, sk2, acookie, ports, dif, sdif)) + continue; + if (sk2->sk_state == TCP_TIME_WAIT) + break; + return -EADDRNOTAVAIL; + } + return 0; + } + + lock = inet_ehash_lockp(hinfo, hash); spin_lock(lock); sk_nulls_for_each(sk2, node, &head->chain) { @@ -734,8 +747,10 @@ static u32 *table_perturb; int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk, u64 port_offset, + u32 hash_port0, int (*check_established)(struct inet_timewait_death_row *, - struct sock *, __u16, struct inet_timewait_sock **)) + struct sock *, __u16, struct inet_timewait_sock **, + bool rcu_lookup, u32 hash)) { struct inet_hashinfo *hinfo = death_row->hashinfo; struct inet_timewait_sock *tw = NULL; @@ -750,7 +765,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, if (port) { local_bh_disable(); - ret = check_established(death_row, sk, port, NULL); + ret = check_established(death_row, sk, port, NULL, false, + hash_port0 + port); local_bh_enable(); return ret; } @@ -787,6 +803,23 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, continue; head = &hinfo->bhash[inet_bhashfn(net, port, hinfo->bhash_size)]; + rcu_read_lock(); + hlist_for_each_entry_rcu(tb, &head->chain, node) { + if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev && + tb->port == port) + continue; + if (tb->fastreuse >= 0 || tb->fastreuseport >= 0) { + rcu_read_unlock(); + goto next_port; + } + if (!check_established(death_row, sk, port, &tw, true, + hash_port0 + port)) + break; + rcu_read_unlock(); + goto next_port; + } + rcu_read_unlock(); + spin_lock_bh(&head->lock); /* Does not bother with rcv_saddr checks, because @@ -797,12 +830,13 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, tb->port == port) { if (tb->fastreuse >= 0 || tb->fastreuseport >= 0) - goto next_port; + goto next_port_unlock; WARN_ON(hlist_empty(&tb->owners)); if (!check_established(death_row, sk, - port, &tw)) + port, &tw, false, + hash_port0 + port)) goto ok; - goto next_port; + goto next_port_unlock; } } @@ -815,8 +849,9 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, tb->fastreuse = -1; tb->fastreuseport = -1; goto ok; -next_port: +next_port_unlock: spin_unlock_bh(&head->lock); +next_port: cond_resched(); } @@ -856,11 +891,18 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, int inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { + const struct inet_sock *inet = inet_sk(sk); + const struct net *net = sock_net(sk); u64 port_offset = 0; + u32 hash_port0; if (!inet_sk(sk)->inet_num) port_offset = inet_sk_port_offset(sk); - return __inet_hash_connect(death_row, sk, port_offset, + + hash_port0 = inet_ehashfn(net, inet->inet_rcv_saddr, 0, + inet->inet_daddr, inet->inet_dport); + + return __inet_hash_connect(death_row, sk, port_offset, hash_port0, __inet_check_established); } EXPORT_SYMBOL_GPL(inet_hash_connect); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index cb28e2fb6c7c..f2601522bb6a 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -36,7 +36,7 @@ void inet_twsk_bind_unhash(struct inet_timewait_sock *tw, __hlist_del(&tw->tw_bind_node); tw->tw_tb = NULL; - inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); + inet_bind_bucket_destroy(tb); __sock_put((struct sock *)tw); } diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index b4a5e01e1201..3ce85db9624c 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -38,7 +38,7 @@ u32 inet6_ehashfn(const struct net *net, lhash = (__force u32)laddr->s6_addr32[3]; fhash = __ipv6_addr_jhash(faddr, ipv6_hash_secret); - return __inet6_ehashfn(lhash, lport, fhash, fport, + return lport + __inet6_ehashfn(lhash, 0, fhash, fport, inet6_ehash_secret + net_hash_mix(net)); } @@ -245,7 +245,9 @@ EXPORT_SYMBOL_GPL(inet6_lookup); static int __inet6_check_established(struct inet_timewait_death_row *death_row, struct sock *sk, const __u16 lport, - struct inet_timewait_sock **twp) + struct inet_timewait_sock **twp, + bool rcu_lookup, + u32 hash) { struct inet_hashinfo *hinfo = death_row->hashinfo; struct inet_sock *inet = inet_sk(sk); @@ -255,14 +257,26 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, struct net *net = sock_net(sk); const int sdif = l3mdev_master_ifindex_by_index(net, dif); const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); - const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr, - inet->inet_dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); - spinlock_t *lock = inet_ehash_lockp(hinfo, hash); - struct sock *sk2; - const struct hlist_nulls_node *node; struct inet_timewait_sock *tw = NULL; + const struct hlist_nulls_node *node; + struct sock *sk2; + spinlock_t *lock; + + if (rcu_lookup) { + sk_nulls_for_each(sk2, node, &head->chain) { + if (sk2->sk_hash != hash || + !inet6_match(net, sk2, saddr, daddr, + ports, dif, sdif)) + continue; + if (sk2->sk_state == TCP_TIME_WAIT) + break; + return -EADDRNOTAVAIL; + } + return 0; + } + lock = inet_ehash_lockp(hinfo, hash); spin_lock(lock); sk_nulls_for_each(sk2, node, &head->chain) { @@ -320,11 +334,19 @@ static u64 inet6_sk_port_offset(const struct sock *sk) int inet6_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { + const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr; + const struct in6_addr *saddr = &sk->sk_v6_daddr; + const struct inet_sock *inet = inet_sk(sk); + const struct net *net = sock_net(sk); u64 port_offset = 0; + u32 hash_port0; if (!inet_sk(sk)->inet_num) port_offset = inet6_sk_port_offset(sk); - return __inet_hash_connect(death_row, sk, port_offset, + + hash_port0 = inet6_ehashfn(net, daddr, 0, saddr, inet->inet_dport); + + return __inet_hash_connect(death_row, sk, port_offset, hash_port0, __inet6_check_established); } EXPORT_SYMBOL_GPL(inet6_hash_connect); -- 2.25.1

反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/18082 邮件列表地址:https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/25R... FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/18082 Mailing list address: https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/25R...
participants (2)
-
Dong Chenchen
-
patchwork bot