The cache false-sharing exists in the struct file, for syscall test case of Unixbench. In a system with a 128B cacheline size, we force set it to 64B alignment to get a better performance.
If we use alignment, it would waste ~192 bytes at worst case for each file struct.
If unsure, say N.
Xie XiuQi (2): fs: mitigatin cacheline false sharing in struct file fs: enable CONFIG_FILE_MITIGATION_FALSE_SHARING by default on arm64
arch/arm64/configs/openeuler_defconfig | 1 + fs/Kconfig | 13 +++++++++ fs/file_table.c | 37 +++++++++++++++++++++++--- 3 files changed, 48 insertions(+), 3 deletions(-)
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/3547 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/C...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/3547 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/C...
hulk inclusion category: performance bugzilla: https://gitee.com/openeuler/kernel/issues/I8QS6H
--------------------------------
The cache false-sharing exists in the struct file, for syscall test case of Unixbench. In a system with a 128B cacheline size, we force set it to 64B alignment to get a better performance.
If we use alignment, it would waste ~192 bytes at worst case for each file struct.
If unsure, say N.
Signed-off-by: Xie XiuQi xiexiuqi@huawei.com Signed-off-by: Ruan Jinjie ruanjinjie@huawei.com --- fs/Kconfig | 13 +++++++++++++ fs/file_table.c | 37 ++++++++++++++++++++++++++++++++++--- 2 files changed, 47 insertions(+), 3 deletions(-)
diff --git a/fs/Kconfig b/fs/Kconfig index d3fff13ca13d..3d8c3663677c 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -398,6 +398,19 @@ source "fs/unicode/Kconfig" config IO_WQ bool
+config FILE_MITIGATION_FALSE_SHARING + bool "mitigation false sharing in file struct" + depends on ARCH_LLC_128_LINE_SIZE + default n + help + Enable this to mitigation cacheline false sharing in file struct. + + Cache false share might cause performance decrease. So adjust alignment + of file struct. This would waste ~192 bytes at worst case for each file + struct. + + + If unsure, say N. endmenu
config RESCTRL diff --git a/fs/file_table.c b/fs/file_table.c index 7a3b4a7f6808..32d33a7b3852 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -42,12 +42,43 @@ static struct kmem_cache *filp_cachep __read_mostly;
static struct percpu_counter nr_files __cacheline_aligned_in_smp;
+#ifdef CONFIG_FILE_MITIGATION_FALSE_SHARING +struct file_wrap { + u64 pad[8]; + struct file f; +}; + +#define GET_FILE_WRAP(fp) container_of(fp, struct file_wrap, f) +#define FILE_SZ sizeof(struct file_wrap) +#define FILE_ALIGN 128 + +static inline struct file *kmem_cache_zalloc_file(void) +{ + struct file_wrap *fw; + + fw = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); + if (unlikely(!fw)) + return NULL; + + return &fw->f; +} +#else +#define GET_FILE_WRAP(fp) fp +#define FILE_SZ sizeof(struct file) +#define FILE_ALIGN 0 + +static inline struct file *kmem_cache_zalloc_file(void) +{ + return kmem_cache_zalloc(filp_cachep, GFP_KERNEL); +} +#endif + static void file_free_rcu(struct rcu_head *head) { struct file *f = container_of(head, struct file, f_u.fu_rcuhead);
put_cred(f->f_cred); - kmem_cache_free(filp_cachep, f); + kmem_cache_free(filp_cachep, GET_FILE_WRAP(f)); }
static inline void file_free(struct file *f) @@ -98,7 +129,7 @@ static struct file *__alloc_file(int flags, const struct cred *cred) struct file *f; int error;
- f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL); + f = kmem_cache_zalloc_file(); if (unlikely(!f)) return ERR_PTR(-ENOMEM);
@@ -380,7 +411,7 @@ EXPORT_SYMBOL(__fput_sync);
void __init files_init(void) { - filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, + filp_cachep = kmem_cache_create("filp", FILE_SZ, FILE_ALIGN, SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT, NULL); percpu_counter_init(&nr_files, 0, GFP_KERNEL); }
hulk inclusion category: performance bugzilla: https://gitee.com/openeuler/kernel/issues/I8QS6H
--------------------------------
The cache false-sharing exists in the struct file, for syscall test case of Unixbench. In a system with a 128B cacheline size, we force set it to 64B alignment to get a better performance.
If we use alignment, it would waste ~192 bytes at worst case for each file struct.
We enable it on openEuler arm64 platform.
If unsure, see N.
Signed-off-by: Xie XiuQi xiexiuqi@huawei.com --- arch/arm64/configs/openeuler_defconfig | 1 + 1 file changed, 1 insertion(+)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 158a9f3bb8fd..0d0d7e04d4b5 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -6592,6 +6592,7 @@ CONFIG_NLS_UTF8=m # CONFIG_DLM is not set # CONFIG_UNICODE is not set CONFIG_IO_WQ=y +CONFIG_FILE_MITIGATION_FALSE_SHARING=y # end of File systems
CONFIG_RESCTRL=y