Hulk 6.6 did not use OPTIMIZE_INLINING. It used the gnu_inline attribute, which caused the inline functions to not be actually inlined, introducing performance issues. Therefore, we set it to be editable (disabled by default) and made adjustments to some functions that had linking conflicts.
Guo Xuenan (2): Revert "compiler: remove CONFIG_OPTIMIZE_INLINING entirely" make OPTIMIZE_INLINING config editable
arch/arm64/kvm/sys_regs.h | 7 ++++- arch/x86/configs/i386_defconfig | 1 + arch/x86/configs/x86_64_defconfig | 1 + .../net/ethernet/mellanox/mlx5/core/health.c | 30 +++++++++++++++++++ .../ethernet/mellanox/mlx5/core/mlx5_core.h | 24 --------------- .../pci/hive_isp_css_include/print_support.h | 4 +++ include/linux/compiler_types.h | 8 +++++ include/trace/trace_events.h | 15 ++++++++++ kernel/configs/tiny.config | 1 + lib/Kconfig.debug | 13 ++++++++ 10 files changed, 79 insertions(+), 25 deletions(-)
From: Guo Xuenan guoxuenan@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8PGC4
--------------------------------
This reverts commit 889b3c1245de48ed0cacf7aebb25c489d3e4a3e9.
Signed-off-by: Guo Xuenan guoxuenan@huawei.com Signed-off-by: Yuntao Liu liuyuntao12@huawei.com --- arch/x86/configs/i386_defconfig | 1 + arch/x86/configs/x86_64_defconfig | 1 + include/linux/compiler_types.h | 8 ++++++++ kernel/configs/tiny.config | 1 + lib/Kconfig.debug | 12 ++++++++++++ 5 files changed, 23 insertions(+)
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 1b411bbf3cb0..6a505b3b4b4d 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -280,5 +280,6 @@ CONFIG_BLK_DEV_IO_TRACE=y CONFIG_PROVIDE_OHCI1394_DMA_INIT=y CONFIG_EARLY_PRINTK_DBGP=y CONFIG_DEBUG_BOOT_PARAMS=y +CONFIG_OPTIMIZE_INLINING=y CONFIG_UNWINDER_FRAME_POINTER=y # CONFIG_64BIT is not set diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 409e9182bd29..5bd1fcabbe7d 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -276,3 +276,4 @@ CONFIG_BLK_DEV_IO_TRACE=y CONFIG_PROVIDE_OHCI1394_DMA_INIT=y CONFIG_EARLY_PRINTK_DBGP=y CONFIG_DEBUG_BOOT_PARAMS=y +CONFIG_OPTIMIZE_INLINING=y diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index c523c6683789..476c54a6fb12 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -206,13 +206,21 @@ struct ftrace_likely_data { #define __naked __attribute__((__naked__)) notrace
/* + * Force always-inline if the user requests it so via the .config. * Prefer gnu_inline, so that extern inline functions do not emit an * externally visible function. This makes extern inline behave as per gnu89 * semantics rather than c99. This prevents multiple symbol definition errors * of extern inline functions at link time. * A lot of inline functions can cause havoc with function tracing. + * Do not use __always_inline here, since currently it expands to inline again + * (which would break users of __always_inline). */ +#if !defined(CONFIG_OPTIMIZE_INLINING) +#define inline inline __attribute__((__always_inline__)) __gnu_inline \ + __inline_maybe_unused notrace +#else #define inline inline __gnu_inline __inline_maybe_unused notrace +#endif
/* * gcc provides both __inline__ and __inline as alternate spellings of diff --git a/kernel/configs/tiny.config b/kernel/configs/tiny.config index 00009f7d0835..d0655ba4df4c 100644 --- a/kernel/configs/tiny.config +++ b/kernel/configs/tiny.config @@ -6,5 +6,6 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_KERNEL_XZ=y # CONFIG_KERNEL_LZO is not set # CONFIG_KERNEL_LZ4 is not set +CONFIG_OPTIMIZE_INLINING=y CONFIG_SLUB=y CONFIG_SLUB_TINY=y diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 7241c304cb5c..89582a93257a 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -474,6 +474,18 @@ config HEADERS_INSTALL user-space program samples. It is also needed by some features such as uapi header sanity checks.
+config OPTIMIZE_INLINING + def_bool y + help + This option determines if the kernel forces gcc to inline the functions + developers have marked 'inline'. Doing so takes away freedom from gcc to + do what it thinks is best, which is desirable for the gcc 3.x series of + compilers. The gcc 4.x series have a rewritten inlining algorithm and + enabling this option will generate a smaller kernel there. Hopefully + this algorithm is so good that allowing gcc 4.x and above to make the + decision will become the default in the future. Until then this option + is there to test gcc for this. + config DEBUG_SECTION_MISMATCH bool "Enable full Section mismatch analysis" depends on CC_IS_GCC
From: Guo Xuenan guoxuenan@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8PGC4
--------------------------------
for performance reasons, hulk 6.6 do not use OPTIMIZE_INLINING. it using gnu_inline attribute causing inline functions not really inline, which introducing performance issues,so we make it editable(default disable) and adapt some link conflicting functions.
Signed-off-by: Guo Xuenan guoxuenan@huawei.com Signed-off-by: Yuntao Liu liuyuntao12@huawei.com --- arch/arm64/kvm/sys_regs.h | 7 ++++- .../net/ethernet/mellanox/mlx5/core/health.c | 30 +++++++++++++++++++ .../ethernet/mellanox/mlx5/core/mlx5_core.h | 24 --------------- .../pci/hive_isp_css_include/print_support.h | 4 +++ include/trace/trace_events.h | 15 ++++++++++ lib/Kconfig.debug | 3 +- 6 files changed, 57 insertions(+), 26 deletions(-)
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index c65c129b3500..3080693719d2 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -100,8 +100,13 @@ struct sys_reg_desc { #define REG_USER_WI (1 << 3) /* WI from userspace only */
static __printf(2, 3) +#if defined(CONFIG_OPTIMIZE_INLINING) inline void print_sys_reg_msg(const struct sys_reg_params *p, - char *fmt, ...) + char *fmt, ...) +#else +void print_sys_reg_msg(const struct sys_reg_params *p, + char *fmt, ...) +#endif { va_list va;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 2fb2598b775e..71de8578271b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -45,6 +45,36 @@ #include "diag/fw_tracer.h" #include "diag/reporter_vnic.h"
+#if defined(CONFIG_OPTIMIZE_INLINING) +static inline void mlx5_printk(struct mlx5_core_dev *dev, int level, + const char *format, ...) +#else +static void mlx5_printk(struct mlx5_core_dev *dev, int level, + const char *format, ...) +#endif +{ + struct device *device = dev->device; + struct va_format vaf; + va_list args; + + if (WARN_ONCE(level < LOGLEVEL_EMERG || level > LOGLEVEL_DEBUG, + "Level %d is out of range, set to default level\n", level)) + level = LOGLEVEL_DEFAULT; + + va_start(args, format); + vaf.fmt = format; + vaf.va = &args; + + dev_printk_emit(level, device, "%s %s: %pV", dev_driver_string(device), dev_name(device), + &vaf); + va_end(args); +} + +#define mlx5_log(__dev, level, format, ...) \ + mlx5_printk(__dev, level, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ + ##__VA_ARGS__) + enum { MAX_MISSES = 3, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 124352459c23..ca6736987fb6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -97,30 +97,6 @@ do { \ __func__, __LINE__, current->pid, \ ##__VA_ARGS__)
-static inline void mlx5_printk(struct mlx5_core_dev *dev, int level, const char *format, ...) -{ - struct device *device = dev->device; - struct va_format vaf; - va_list args; - - if (WARN_ONCE(level < LOGLEVEL_EMERG || level > LOGLEVEL_DEBUG, - "Level %d is out of range, set to default level\n", level)) - level = LOGLEVEL_DEFAULT; - - va_start(args, format); - vaf.fmt = format; - vaf.va = &args; - - dev_printk_emit(level, device, "%s %s: %pV", dev_driver_string(device), dev_name(device), - &vaf); - va_end(args); -} - -#define mlx5_log(__dev, level, format, ...) \ - mlx5_printk(__dev, level, "%s:%d:(pid %d): " format, \ - __func__, __LINE__, current->pid, \ - ##__VA_ARGS__) - static inline struct device *mlx5_core_dma_dev(struct mlx5_core_dev *dev) { return &dev->pdev->dev; diff --git a/drivers/staging/media/atomisp/pci/hive_isp_css_include/print_support.h b/drivers/staging/media/atomisp/pci/hive_isp_css_include/print_support.h index a3c7f3de6d17..916529e10bd1 100644 --- a/drivers/staging/media/atomisp/pci/hive_isp_css_include/print_support.h +++ b/drivers/staging/media/atomisp/pci/hive_isp_css_include/print_support.h @@ -20,7 +20,11 @@
extern int (*sh_css_printf)(const char *fmt, va_list args); /* depends on host supplied print function in ia_css_init() */ +#if defined(CONFIG_OPTIMIZE_INLINING) static inline __printf(1, 2) void ia_css_print(const char *fmt, ...) +#else +static __printf(1, 2) void ia_css_print(const char *fmt, ...) +#endif { va_list ap;
diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index c2f9cabf154d..618e2d0f22a0 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -252,6 +252,7 @@ static struct trace_event_fields trace_event_fields_##call[] = { \ #include "stages/stage5_get_offsets.h"
#undef DECLARE_EVENT_CLASS +#if defined(CONFIG_OPTIMIZE_INLINING) #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ static inline notrace int trace_event_get_offsets_##call( \ struct trace_event_data_offsets_##call *__data_offsets, proto) \ @@ -264,6 +265,20 @@ static inline notrace int trace_event_get_offsets_##call( \ \ return __data_size; \ } +#else +#define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ +static notrace int trace_event_get_offsets_##call( \ + struct trace_event_data_offsets_##call *__data_offsets, proto) \ +{ \ + int __data_size = 0; \ + int __maybe_unused __item_length; \ + struct trace_event_raw_##call __maybe_unused *entry; \ + \ + tstruct; \ + \ + return __data_size; \ +} +#endif
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 89582a93257a..b0cecd03ceba 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -475,7 +475,8 @@ config HEADERS_INSTALL as uapi header sanity checks.
config OPTIMIZE_INLINING - def_bool y + bool "Optimize inlining" + def_bool n help This option determines if the kernel forces gcc to inline the functions developers have marked 'inline'. Doing so takes away freedom from gcc to
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/3971 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/3...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/3971 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/3...