From: Xiongfeng Wang wangxiongfeng2@huawei.com
mainline inclusion from mainline-v5.16-rc3 commit d5624bb29f49b849ac8d1e9783dbf9c65cf33457 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4QKQ3 CVE: NA
----------------------
For memory accesses with write-combining attributes (e.g. those returned by ioremap_wc()), the CPU may wait for prior accesses to be merged with subsequent ones. But in some situation, such wait is bad for the performance.
We introduce io_stop_wc() to prevent the merging of write-combining memory accesses before this macro with those after it.
We add implementation for ARM64 using DGH instruction and provide NOP implementation for other architectures.
Signed-off-by: Xiongfeng Wang wangxiongfeng2@huawei.com Suggested-by: Will Deacon will@kernel.org Suggested-by: Catalin Marinas catalin.marinas@arm.com Acked-by: Arnd Bergmann arnd@arndb.de Link: https://lore.kernel.org/r/20211221035556.60346-1-wangxiongfeng2@huawei.com Signed-off-by: Catalin Marinas catalin.marinas@arm.com
Conflicts: arch/arm64/include/asm/barrier.h Signed-off-by: Xiongfeng Wang wangxiongfeng2@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- Documentation/memory-barriers.txt | 8 ++++++++ arch/arm64/include/asm/barrier.h | 10 ++++++++++ include/asm-generic/barrier.h | 11 +++++++++++ 3 files changed, 29 insertions(+)
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index 17c8e0c2deb4..e0ebb5747d95 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -1950,6 +1950,14 @@ There are some more advanced barrier functions: For load from persistent memory, existing read memory barriers are sufficient to ensure read ordering.
+ (*) io_stop_wc(); + + For memory accesses with write-combining attributes (e.g. those returned + by ioremap_wc(), the CPU may wait for prior accesses to be merged with + subsequent ones. io_stop_wc() can be used to prevent the merging of + write-combining memory accesses before this macro with those after it when + such wait has performance implications. + =============================== IMPLICIT KERNEL MEMORY BARRIERS =============================== diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 37d891af8ea5..2a13b1c2c390 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -25,6 +25,14 @@ #define psb_csync() asm volatile("hint #17" : : : "memory") #define csdb() asm volatile("hint #20" : : : "memory")
+/* + * Data Gathering Hint: + * This instruction prevents merging memory accesses with Normal-NC or + * Device-GRE attributes before the hint instruction with any memory accesses + * appearing after the hint instruction. + */ +#define dgh() asm volatile("hint #6" : : : "memory") + #define spec_bar() asm volatile(ALTERNATIVE("dsb nsh\nisb\n", \ SB_BARRIER_INSN"nop\n", \ ARM64_HAS_SB)) @@ -49,6 +57,8 @@ #define dma_rmb() dmb(oshld) #define dma_wmb() dmb(oshst)
+#define io_stop_wc() dgh() + /* * Generate a mask for array_index__nospec() that is ~0UL when 0 <= idx < sz * and 0 otherwise. diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index 640f09479bdf..4c2c1b830344 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -251,5 +251,16 @@ do { \ #define pmem_wmb() wmb() #endif
+/* + * ioremap_wc() maps I/O memory as memory with write-combining attributes. For + * this kind of memory accesses, the CPU may wait for prior accesses to be + * merged with subsequent ones. In some situation, such wait is bad for the + * performance. io_stop_wc() can be used to prevent the merging of + * write-combining memory accesses before this macro with those after it. + */ +#ifndef io_stop_wc +#define io_stop_wc do { } while (0) +#endif + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_GENERIC_BARRIER_H */