From: Jakub Kicinski <kuba(a)kernel.org>
mainline inclusion
from mainline-v6.8-rc6
commit 52f671db18823089a02f07efc04efdb2272ddc17
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I9E2LT
CVE: CVE-2024-26740
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
--------------------------------
The test Davide added in commit ca22da2fbd69 ("act_mirred: use the backlog
for nested calls to mirred ingress") hangs our testing VMs every 10 or so
runs, with the familiar tcp_v4_rcv -> tcp_v4_rcv deadlock reported by
lockdep.
The problem as previously described by Davide (see Link) is that
if we reverse flow of traffic with the redirect (egress -> ingress)
we may reach the same socket which generated the packet. And we may
still be holding its socket lock. The common solution to such deadlocks
is to put the packet in the Rx backlog, rather than run the Rx path
inline. Do that for all egress -> ingress reversals, not just once
we started to nest mirred calls.
In the past there was a concern that the backlog indirection will
lead to loss of error reporting / less accurate stats. But the current
workaround does not seem to address the issue.
Fixes: 53592b364001 ("net/sched: act_mirred: Implement ingress actions")
Cc: Marcelo Ricardo Leitner <marcelo.leitner(a)gmail.com>
Suggested-by: Davide Caratti <dcaratti(a)redhat.com>
Link: https://lore.kernel.org/netdev/33dc43f587ec1388ba456b4915c75f02a8aae226.166…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
Acked-by: Jamal Hadi Salim <jhs(a)mojatatu.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Conflicts:
net/sched/act_mirred.c
Signed-off-by: Zhengchao Shao <shaozhengchao(a)huawei.com>
---
net/sched/act_mirred.c | 15 ++++++---------
.../selftests/net/forwarding/tc_actions.sh | 3 ---
2 files changed, 6 insertions(+), 12 deletions(-)
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index febf06b8bbdf..336db2c938b5 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -197,18 +197,14 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
return ret;
}
-static bool is_mirred_nested(void)
-{
- return unlikely(__this_cpu_read(mirred_rec_level) > 1);
-}
-
-static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb)
+static int
+tcf_mirred_forward(bool at_ingress, bool want_ingress, struct sk_buff *skb)
{
int err;
if (!want_ingress)
err = dev_queue_xmit(skb);
- else if (is_mirred_nested())
+ else if (!at_ingress)
err = netif_rx(skb);
else
err = netif_receive_skb(skb);
@@ -300,14 +296,15 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
if (use_reinsert) {
res->ingress = want_ingress;
res->qstats = this_cpu_ptr(m->common.cpu_qstats);
- if (tcf_mirred_forward(want_ingress, skb) && res->qstats)
+ if (tcf_mirred_forward(skb_at_tc_ingress(skb), want_ingress, skb)
+ && res->qstats)
qstats_overlimit_inc(res->qstats);
__this_cpu_dec(mirred_rec_level);
return TC_ACT_CONSUMED;
}
}
- err = tcf_mirred_forward(want_ingress, skb2);
+ err = tcf_mirred_forward(skb_at_tc_ingress(skb), want_ingress, skb2);
if (err) {
out:
qstats_overlimit_inc(this_cpu_ptr(m->common.cpu_qstats));
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
index aaa1ea10ac83..221a023ee5d6 100755
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -183,9 +183,6 @@ mirred_egress_to_ingress_tcp_test()
check_err $? "didn't mirred redirect ICMP"
tc_check_packets "dev $h1 ingress" 102 10
check_err $? "didn't drop mirred ICMP"
- local overlimits=$(tc_rule_stats_get ${h1} 101 egress .overlimits)
- test ${overlimits} = 10
- check_err $? "wrong overlimits, expected 10 got ${overlimits}"
tc filter del dev $h1 egress protocol ip pref 100 handle 100 flower
tc filter del dev $h1 egress protocol ip pref 101 handle 101 flower
--
2.34.1
From: Jakub Kicinski <kuba(a)kernel.org>
mainline inclusion
from mainline-v6.8-rc6
commit 52f671db18823089a02f07efc04efdb2272ddc17
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I9E2LT
CVE: CVE-2024-26740
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
--------------------------------
The test Davide added in commit ca22da2fbd69 ("act_mirred: use the backlog
for nested calls to mirred ingress") hangs our testing VMs every 10 or so
runs, with the familiar tcp_v4_rcv -> tcp_v4_rcv deadlock reported by
lockdep.
The problem as previously described by Davide (see Link) is that
if we reverse flow of traffic with the redirect (egress -> ingress)
we may reach the same socket which generated the packet. And we may
still be holding its socket lock. The common solution to such deadlocks
is to put the packet in the Rx backlog, rather than run the Rx path
inline. Do that for all egress -> ingress reversals, not just once
we started to nest mirred calls.
In the past there was a concern that the backlog indirection will
lead to loss of error reporting / less accurate stats. But the current
workaround does not seem to address the issue.
Fixes: 53592b364001 ("net/sched: act_mirred: Implement ingress actions")
Cc: Marcelo Ricardo Leitner <marcelo.leitner(a)gmail.com>
Suggested-by: Davide Caratti <dcaratti(a)redhat.com>
Link: https://lore.kernel.org/netdev/33dc43f587ec1388ba456b4915c75f02a8aae226.166…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
Acked-by: Jamal Hadi Salim <jhs(a)mojatatu.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Conflicts:
net/sched/act_mirred.c
Signed-off-by: Zhengchao Shao <shaozhengchao(a)huawei.com>
---
net/sched/act_mirred.c | 15 ++++++---------
.../selftests/net/forwarding/tc_actions.sh | 3 ---
2 files changed, 6 insertions(+), 12 deletions(-)
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index febf06b8bbdf..336db2c938b5 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -197,18 +197,14 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
return ret;
}
-static bool is_mirred_nested(void)
-{
- return unlikely(__this_cpu_read(mirred_rec_level) > 1);
-}
-
-static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb)
+static int
+tcf_mirred_forward(bool at_ingress, bool want_ingress, struct sk_buff *skb)
{
int err;
if (!want_ingress)
err = dev_queue_xmit(skb);
- else if (is_mirred_nested())
+ else if (!at_ingress)
err = netif_rx(skb);
else
err = netif_receive_skb(skb);
@@ -300,14 +296,15 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
if (use_reinsert) {
res->ingress = want_ingress;
res->qstats = this_cpu_ptr(m->common.cpu_qstats);
- if (tcf_mirred_forward(want_ingress, skb) && res->qstats)
+ if (tcf_mirred_forward(skb_at_tc_ingress(skb), want_ingress, skb)
+ && res->qstats)
qstats_overlimit_inc(res->qstats);
__this_cpu_dec(mirred_rec_level);
return TC_ACT_CONSUMED;
}
}
- err = tcf_mirred_forward(want_ingress, skb2);
+ err = tcf_mirred_forward(skb_at_tc_ingress(skb), want_ingress, skb2);
if (err) {
out:
qstats_overlimit_inc(this_cpu_ptr(m->common.cpu_qstats));
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
index aaa1ea10ac83..221a023ee5d6 100755
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -183,9 +183,6 @@ mirred_egress_to_ingress_tcp_test()
check_err $? "didn't mirred redirect ICMP"
tc_check_packets "dev $h1 ingress" 102 10
check_err $? "didn't drop mirred ICMP"
- local overlimits=$(tc_rule_stats_get ${h1} 101 egress .overlimits)
- test ${overlimits} = 10
- check_err $? "wrong overlimits, expected 10 got ${overlimits}"
tc filter del dev $h1 egress protocol ip pref 100 handle 100 flower
tc filter del dev $h1 egress protocol ip pref 101 handle 101 flower
--
2.34.1
From: Josef Bacik <josef(a)toxicpanda.com>
stable inclusion
from stable-v6.1.79
commit 02f2b95b00bf57d20320ee168b30fb7f3db8e555
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I9E2F7
CVE: CVE-2024-26726
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
--------------------------------
commit 5571e41ec6e56e35f34ae9f5b3a335ef510e0ade upstream.
While running the CI for an unrelated change I hit the following panic
with generic/648 on btrfs_holes_spacecache.
assertion failed: block_start != EXTENT_MAP_HOLE, in fs/btrfs/extent_io.c:1385
------------[ cut here ]------------
kernel BUG at fs/btrfs/extent_io.c:1385!
invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
CPU: 1 PID: 2695096 Comm: fsstress Kdump: loaded Tainted: G W 6.8.0-rc2+ #1
RIP: 0010:__extent_writepage_io.constprop.0+0x4c1/0x5c0
Call Trace:
<TASK>
extent_write_cache_pages+0x2ac/0x8f0
extent_writepages+0x87/0x110
do_writepages+0xd5/0x1f0
filemap_fdatawrite_wbc+0x63/0x90
__filemap_fdatawrite_range+0x5c/0x80
btrfs_fdatawrite_range+0x1f/0x50
btrfs_write_out_cache+0x507/0x560
btrfs_write_dirty_block_groups+0x32a/0x420
commit_cowonly_roots+0x21b/0x290
btrfs_commit_transaction+0x813/0x1360
btrfs_sync_file+0x51a/0x640
__x64_sys_fdatasync+0x52/0x90
do_syscall_64+0x9c/0x190
entry_SYSCALL_64_after_hwframe+0x6e/0x76
This happens because we fail to write out the free space cache in one
instance, come back around and attempt to write it again. However on
the second pass through we go to call btrfs_get_extent() on the inode to
get the extent mapping. Because this is a new block group, and with the
free space inode we always search the commit root to avoid deadlocking
with the tree, we find nothing and return a EXTENT_MAP_HOLE for the
requested range.
This happens because the first time we try to write the space cache out
we hit an error, and on an error we drop the extent mapping. This is
normal for normal files, but the free space cache inode is special. We
always expect the extent map to be correct. Thus the second time
through we end up with a bogus extent map.
Since we're deprecating this feature, the most straightforward way to
fix this is to simply skip dropping the extent map range for this failed
range.
I shortened the test by using error injection to stress the area to make
it easier to reproduce. With this patch in place we no longer panic
with my error injection test.
CC: stable(a)vger.kernel.org # 4.14+
Reviewed-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: Josef Bacik <josef(a)toxicpanda.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Conflicts:
fs/btrfs/inode.c
Signed-off-by: Zizhi Wo <wozizhi(a)huawei.com>
---
fs/btrfs/inode.c | 18 ++++++++++++++++--
1 file changed, 16 insertions(+), 2 deletions(-)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b12fc82e34ba..03670d4cd6ed 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2775,8 +2775,22 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
unwritten_start += logical_len;
clear_extent_uptodate(io_tree, unwritten_start, end, NULL);
- /* Drop the cache for the part of the extent we didn't write. */
- btrfs_drop_extent_cache(BTRFS_I(inode), unwritten_start, end, 0);
+ /*
+ * Drop extent maps for the part of the extent we didn't write.
+ *
+ * We have an exception here for the free_space_inode, this is
+ * because when we do btrfs_get_extent() on the free space inode
+ * we will search the commit root. If this is a new block group
+ * we won't find anything, and we will trip over the assert in
+ * writepage where we do ASSERT(em->block_start !=
+ * EXTENT_MAP_HOLE).
+ *
+ * Theoretically we could also skip this for any NOCOW extent as
+ * we don't mess with the extent map tree in the NOCOW case, but
+ * for now simply skip this if we are the free space inode.
+ */
+ if (!btrfs_is_free_space_inode(BTRFS_I(inode)))
+ btrfs_drop_extent_cache(BTRFS_I(inode), unwritten_start, end, 0);
/*
* If the ordered extent had an IOERR or something else went
--
2.39.2
From: Josef Bacik <josef(a)toxicpanda.com>
stable inclusion
from stable-v6.1.79
commit 02f2b95b00bf57d20320ee168b30fb7f3db8e555
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I9E2F7
CVE: CVE-2024-26726
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
--------------------------------
commit 5571e41ec6e56e35f34ae9f5b3a335ef510e0ade upstream.
While running the CI for an unrelated change I hit the following panic
with generic/648 on btrfs_holes_spacecache.
assertion failed: block_start != EXTENT_MAP_HOLE, in fs/btrfs/extent_io.c:1385
------------[ cut here ]------------
kernel BUG at fs/btrfs/extent_io.c:1385!
invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
CPU: 1 PID: 2695096 Comm: fsstress Kdump: loaded Tainted: G W 6.8.0-rc2+ #1
RIP: 0010:__extent_writepage_io.constprop.0+0x4c1/0x5c0
Call Trace:
<TASK>
extent_write_cache_pages+0x2ac/0x8f0
extent_writepages+0x87/0x110
do_writepages+0xd5/0x1f0
filemap_fdatawrite_wbc+0x63/0x90
__filemap_fdatawrite_range+0x5c/0x80
btrfs_fdatawrite_range+0x1f/0x50
btrfs_write_out_cache+0x507/0x560
btrfs_write_dirty_block_groups+0x32a/0x420
commit_cowonly_roots+0x21b/0x290
btrfs_commit_transaction+0x813/0x1360
btrfs_sync_file+0x51a/0x640
__x64_sys_fdatasync+0x52/0x90
do_syscall_64+0x9c/0x190
entry_SYSCALL_64_after_hwframe+0x6e/0x76
This happens because we fail to write out the free space cache in one
instance, come back around and attempt to write it again. However on
the second pass through we go to call btrfs_get_extent() on the inode to
get the extent mapping. Because this is a new block group, and with the
free space inode we always search the commit root to avoid deadlocking
with the tree, we find nothing and return a EXTENT_MAP_HOLE for the
requested range.
This happens because the first time we try to write the space cache out
we hit an error, and on an error we drop the extent mapping. This is
normal for normal files, but the free space cache inode is special. We
always expect the extent map to be correct. Thus the second time
through we end up with a bogus extent map.
Since we're deprecating this feature, the most straightforward way to
fix this is to simply skip dropping the extent map range for this failed
range.
I shortened the test by using error injection to stress the area to make
it easier to reproduce. With this patch in place we no longer panic
with my error injection test.
CC: stable(a)vger.kernel.org # 4.14+
Reviewed-by: Filipe Manana <fdmanana(a)suse.com>
Signed-off-by: Josef Bacik <josef(a)toxicpanda.com>
Signed-off-by: David Sterba <dsterba(a)suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Conflicts:
fs/btrfs/inode.c
Signed-off-by: Zizhi Wo <wozizhi(a)huawei.com>
---
fs/btrfs/inode.c | 18 ++++++++++++++++--
1 file changed, 16 insertions(+), 2 deletions(-)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 51a119ac91cd..676cce61cad9 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3145,8 +3145,22 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
end = ordered_extent->file_offset + ordered_extent->len - 1;
clear_extent_uptodate(io_tree, start, end, NULL);
- /* Drop the cache for the part of the extent we didn't write. */
- btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 0);
+ /*
+ * Drop extent maps for the part of the extent we didn't write.
+ *
+ * We have an exception here for the free_space_inode, this is
+ * because when we do btrfs_get_extent() on the free space inode
+ * we will search the commit root. If this is a new block group
+ * we won't find anything, and we will trip over the assert in
+ * writepage where we do ASSERT(em->block_start !=
+ * EXTENT_MAP_HOLE).
+ *
+ * Theoretically we could also skip this for any NOCOW extent as
+ * we don't mess with the extent map tree in the NOCOW case, but
+ * for now simply skip this if we are the free space inode.
+ */
+ if (!btrfs_is_free_space_inode(BTRFS_I(inode)))
+ btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 0);
/*
* If the ordered extent had an IOERR or something else went
--
2.39.2
From: Jakub Kicinski <kuba(a)kernel.org>
mainline inclusion
from mainline-v6.8-rc6
commit 52f671db18823089a02f07efc04efdb2272ddc17
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I9E2LT
CVE: CVE-2024-26740
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
--------------------------------
The test Davide added in commit ca22da2fbd69 ("act_mirred: use the backlog
for nested calls to mirred ingress") hangs our testing VMs every 10 or so
runs, with the familiar tcp_v4_rcv -> tcp_v4_rcv deadlock reported by
lockdep.
The problem as previously described by Davide (see Link) is that
if we reverse flow of traffic with the redirect (egress -> ingress)
we may reach the same socket which generated the packet. And we may
still be holding its socket lock. The common solution to such deadlocks
is to put the packet in the Rx backlog, rather than run the Rx path
inline. Do that for all egress -> ingress reversals, not just once
we started to nest mirred calls.
In the past there was a concern that the backlog indirection will
lead to loss of error reporting / less accurate stats. But the current
workaround does not seem to address the issue.
Fixes: 53592b364001 ("net/sched: act_mirred: Implement ingress actions")
Cc: Marcelo Ricardo Leitner <marcelo.leitner(a)gmail.com>
Suggested-by: Davide Caratti <dcaratti(a)redhat.com>
Link: https://lore.kernel.org/netdev/33dc43f587ec1388ba456b4915c75f02a8aae226.166…
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
Acked-by: Jamal Hadi Salim <jhs(a)mojatatu.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Conflicts:
net/sched/act_mirred.c
Signed-off-by: Zhengchao Shao <shaozhengchao(a)huawei.com>
---
net/sched/act_mirred.c | 15 ++++++---------
1 file changed, 6 insertions(+), 9 deletions(-)
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index febf06b8bbdf..336db2c938b5 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -197,18 +197,14 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
return ret;
}
-static bool is_mirred_nested(void)
-{
- return unlikely(__this_cpu_read(mirred_rec_level) > 1);
-}
-
-static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb)
+static int
+tcf_mirred_forward(bool at_ingress, bool want_ingress, struct sk_buff *skb)
{
int err;
if (!want_ingress)
err = dev_queue_xmit(skb);
- else if (is_mirred_nested())
+ else if (!at_ingress)
err = netif_rx(skb);
else
err = netif_receive_skb(skb);
@@ -300,14 +296,15 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
if (use_reinsert) {
res->ingress = want_ingress;
res->qstats = this_cpu_ptr(m->common.cpu_qstats);
- if (tcf_mirred_forward(want_ingress, skb) && res->qstats)
+ if (tcf_mirred_forward(skb_at_tc_ingress(skb), want_ingress, skb)
+ && res->qstats)
qstats_overlimit_inc(res->qstats);
__this_cpu_dec(mirred_rec_level);
return TC_ACT_CONSUMED;
}
}
- err = tcf_mirred_forward(want_ingress, skb2);
+ err = tcf_mirred_forward(skb_at_tc_ingress(skb), want_ingress, skb2);
if (err) {
out:
qstats_overlimit_inc(this_cpu_ptr(m->common.cpu_qstats));
--
2.34.1
From: Zeng Heng <zengheng4(a)huawei.com>
hulk inclusion
category: performance
bugzilla: https://gitee.com/openeuler/kernel/issues/I8MV01
--------------------------------
Using the UnixBench test suite, we clearly find that osq_lock() cause
extremely high overheads with perf tool in the File Copy items:
Overhead Shared Object Symbol
94.25% [kernel] [k] osq_lock
0.74% [kernel] [k] rwsem_spin_on_owner
0.32% [kernel] [k] filemap_get_read_batch
In response to this, we conducted an analysis and made some gains:
In the prologue of osq_lock(), it set `cpu` member of percpu struct
optimistic_spin_node with the local cpu id, after that the value of the
percpu struct would never change in fact. Based on that, we can regard
the `cpu` member as a constant variable.
In the meanwhile, other members of the percpu struct like next, prev and
locked are frequently modified by osq_lock() and osq_unlock() which are
called by rwsem, mutex and so on. However, that would invalidate the cache
of the cpu member on other CPUs.
Therefore, we can place padding here and split them into different cache
lines to avoid cache misses when the next CPU is spinning to check other
node's cpu member by vcpu_is_preempted().
Here provide the UnixBench full-core test result as below:
Machine Intel(R) Xeon(R) Gold 6248 CPU, 40 cores, 80 threads
Run the command of "./Run -c 80 -i 3" 10 times and take the average.
System Benchmarks Index Values Without Patch With Patch Diff
Dhrystone 2 using register variables 185876.43 185945.41 0.04%
Double-Precision Whetstone 79637.27 79659.29 0.03%
Execl Throughput 9909.61 10576.06 6.73%
File Copy 1024 bufsize 2000 maxblocks 1723.01 2086.08 21.07%
File Copy 256 bufsize 500 maxblocks 1150.24 1338.21 16.34%
File Copy 4096 bufsize 8000 maxblocks 3719.19 4011.99 7.87%
Pipe Throughput 66184.84 66025.25 -0.24%
Pipe-based Context Switching 30606.18 31074.21 1.53%
Process Creation 9442.48 9450.77 0.09%
Shell Scripts (1 concurrent) 44526.52 46548.54 4.54%
Shell Scripts (8 concurrent) 42903.96 45718.56 6.56%
System Call Overhead 3645.20 3717.42 1.98%
========
System Benchmarks Index Score 15126.87 15931.29 5.32%
Signed-off-by: Zeng Heng <zengheng4(a)huawei.com>
Signed-off-by: liwei <liwei728(a)huawei.com>
---
include/linux/osq_lock.h | 2 +-
kernel/locking/osq_lock.c | 8 +++++++-
2 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/include/linux/osq_lock.h b/include/linux/osq_lock.h
index 5581dbd3bd34..deb90ad5f560 100644
--- a/include/linux/osq_lock.h
+++ b/include/linux/osq_lock.h
@@ -9,7 +9,7 @@
struct optimistic_spin_node {
struct optimistic_spin_node *next, *prev;
int locked; /* 1 if lock acquired */
- int cpu; /* encoded CPU # + 1 value */
+ int cpu ____cacheline_aligned; /* encoded CPU # + 1 value */
};
struct optimistic_spin_queue {
diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c
index d5610ad52b92..17618d62343f 100644
--- a/kernel/locking/osq_lock.c
+++ b/kernel/locking/osq_lock.c
@@ -96,7 +96,13 @@ bool osq_lock(struct optimistic_spin_queue *lock)
node->locked = 0;
node->next = NULL;
- node->cpu = curr;
+ /*
+ * After this cpu member is initialized for the first time, it
+ * would no longer change in fact. That could avoid cache misses
+ * when spin and access the cpu member by other CPUs.
+ */
+ if (node->cpu != curr)
+ node->cpu = curr;
/*
* We need both ACQUIRE (pairs with corresponding RELEASE in
--
2.25.1