[PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX

These four patches try to avoid or fix kabi change caused by Intel AMX PR: https://gitee.com/openeuler/kernel/pulls/58 Zheng Zengkai (4): x86: Avoid kabi change caused by adding pkru element in thread_struct x86/extable: Avoid kabi change caused by exception table rework x86/fpu: Avoid kabi change caused by struct fpu mm: Fix kabi change caused by saved_auxv[] in mm_struct for x86_64 arch/x86/include/asm/extable.h | 4 ++++ arch/x86/include/asm/fpu/types.h | 4 ++++ arch/x86/include/asm/processor.h | 2 ++ arch/x86/include/uapi/asm/auxvec.h | 1 + include/linux/mm_types.h | 15 +++++++++++++++ 5 files changed, 26 insertions(+) -- 2.20.1

Assuming that struct thread_struct won't be accessed directly by drivers, Using __GENKSYMS__ macro to avoid kabi change caused by commit 9782a712eb97 ("x86/fpu: Add PKRU storage outside of task XSAVE buffer"). Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> --- arch/x86/include/asm/processor.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 008237b10e1f..765f4dbc9a7a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -534,6 +534,7 @@ struct thread_struct { unsigned int iopl_warn:1; unsigned int sig_on_uaccess_err:1; +#ifndef __GENKSYMS__ /* * Protection Keys Register for Userspace. Loaded immediately on * context switch. Store it in thread_struct to avoid a lookup in @@ -542,6 +543,7 @@ struct thread_struct { * PKRU is the hardware itself. */ u32 pkru; +#endif /* Floating point and extended processor state */ struct fpu fpu; -- 2.20.1

Assuming that struct exception_table_entry won't be accessed directly by drivers, Using __GENKSYMS__ macro to avoid kabi change caused by commit 46d28947d987 ("x86/extable: Rework the exception table mechanics"). Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> --- arch/x86/include/asm/extable.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/include/asm/extable.h b/arch/x86/include/asm/extable.h index 93f400eb728f..56730f7e728a 100644 --- a/arch/x86/include/asm/extable.h +++ b/arch/x86/include/asm/extable.h @@ -21,7 +21,11 @@ */ struct exception_table_entry { +#ifndef __GENKSYMS__ int insn, fixup, type; +#else + int insn, fixup, handler; +#endif }; struct pt_regs; -- 2.20.1

Assuming that struct fpu won't be accessed directly by drivers, Using __GENKSYMS__ macro to avoid kabi change caused by following commits: 87d0e5be0fac ("x86/fpu: Provide struct fpstate") 2f27b5034244 ("x86/fpu: Remove fpu::state") 75c52dad5e32 ("x86/fpu: Prepare for sanitizing KVM FPU code") 6f6a7c09c406 ("x86/fpu: Add members to struct fpu to cache permission information") Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> --- arch/x86/include/asm/fpu/types.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 3c06c82ab355..941a9abc2735 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -453,6 +453,7 @@ struct fpu { */ unsigned long avx512_timestamp; +#ifndef __GENKSYMS__ /* * @fpstate: * @@ -485,6 +486,9 @@ struct fpu { * are not longer containing the tasks FPU register state. */ struct fpstate __fpstate; +#else + union fpregs_state state; +#endif /* * WARNING: '__fpstate' is dynamically-sized. Do not put * anything after it here. -- 2.20.1

To fix kabi change caused by commit 1c33bb050750 ("x86/elf: Support a new ELF aux vector AT_MINSIGSTKSZ"), add the same size padding to the original position of saved_auxv, then move saved_auxv[] with the new size to the end of mm_struct, and use the __GENKSYMS__ macro to avoid kabi change. All the modifications are applied to x86_64 only. Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> --- arch/x86/include/uapi/asm/auxvec.h | 1 + include/linux/mm_types.h | 15 +++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/arch/x86/include/uapi/asm/auxvec.h b/arch/x86/include/uapi/asm/auxvec.h index 6beb55bbefa4..bc116e7dc143 100644 --- a/arch/x86/include/uapi/asm/auxvec.h +++ b/arch/x86/include/uapi/asm/auxvec.h @@ -12,6 +12,7 @@ /* entries in ARCH_DLINFO: */ #if defined(CONFIG_IA32_EMULATION) || !defined(CONFIG_X86_64) +# define AT_VECTOR_SIZE_ARCH_OLD 2 # define AT_VECTOR_SIZE_ARCH 3 #else /* else it's non-compat x86-64 */ # define AT_VECTOR_SIZE_ARCH 2 diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 1c22e294f083..71df1586e36f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -22,6 +22,11 @@ #ifndef AT_VECTOR_SIZE_ARCH #define AT_VECTOR_SIZE_ARCH 0 #endif + +#ifdef CONFIG_X86_64 +#define AT_VECTOR_SIZE_OLD (2*(AT_VECTOR_SIZE_ARCH_OLD + AT_VECTOR_SIZE_BASE + 1)) +#endif + #define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1)) #define INIT_PASID 0 @@ -508,7 +513,14 @@ struct mm_struct { unsigned long start_brk, brk, start_stack; unsigned long arg_start, arg_end, env_start, env_end; +#if defined(CONFIG_X86_64) && !defined(__GENKSYMS__) + unsigned long saved_auxv_padding[AT_VECTOR_SIZE_OLD]; /* for /proc/PID/auxv */ +#elif defined(CONFIG_X86_64) + unsigned long saved_auxv[AT_VECTOR_SIZE_OLD]; /* for /proc/PID/auxv */ +#else unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */ +#endif + /* * Special counters, in some configurations protected by the @@ -608,6 +620,9 @@ struct mm_struct { struct sp_group_master *sp_group_master; #endif +#if defined(CONFIG_X86_64) && !defined(__GENKSYMS__) + unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */ +#endif /* * The mm_cpumask needs to be at the end of mm_struct, because it * is dynamically sized based on nr_cpu_ids. -- 2.20.1

代码注释有点少, 无法体现KABI兼容思路与驱动的使用约束; 兼容补丁基于下面设计约束进行设计: 内核KABI白名单主要是为第3方驱动提供稳定的运行环境; 因此fpu, task_thread, 中断数据结构内部成员, 禁止驱动代码使用; 已经排查过driver目录确实没有驱动使用的情况; 基于上面约束, 因此当前fix kabi的补丁仅是避免了检查工具的误报; 另外一种解决思路: 直接修改kabi CRC计算工具, 将特定数据结构的CRC值清除; 同时需要在编译阶段检查驱动没有使用特定的数据结构成员; -----Original Message----- From: Zhengzengkai Sent: Saturday, October 22, 2022 3:39 PM To: kernel@openeuler.org Cc: Xiexiuqi <xiexiuqi@huawei.com>; Zhoukang (A) <zhoukang7@huawei.com>; jason.zeng@intel.com; lin.x.wang@intel.com; jun.j.tian@intel.com; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com>; Zhengzengkai <zhengzengkai@huawei.com> Subject: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX These four patches try to avoid or fix kabi change caused by Intel AMX PR: https://gitee.com/openeuler/kernel/pulls/58 Zheng Zengkai (4): x86: Avoid kabi change caused by adding pkru element in thread_struct x86/extable: Avoid kabi change caused by exception table rework x86/fpu: Avoid kabi change caused by struct fpu mm: Fix kabi change caused by saved_auxv[] in mm_struct for x86_64 arch/x86/include/asm/extable.h | 4 ++++ arch/x86/include/asm/fpu/types.h | 4 ++++ arch/x86/include/asm/processor.h | 2 ++ arch/x86/include/uapi/asm/auxvec.h | 1 + include/linux/mm_types.h | 15 +++++++++++++++ 5 files changed, 26 insertions(+) -- 2.20.1

目前看为了统一KABI的检查,最好还是利用__GENKSYMS__包含对应已有修改的kernel struct和ABI。 这样防止SPR代码合并后check-kabi持续误报的问题,我们也会对SPR相应的KABI change的部分统一 通过这个方式处理以保持一致。当然前提是大家已经审视过SPR相应change并没有被引用或者影响可控。 修改KABI CRC工具未来也可能造成其他潜在问题,比如不参与checksum检查的KABI未来可能有 第三方module会引用。 Thanks, Jun Tian
-----Original Message----- From: Zhoukang (A) <zhoukang7@huawei.com> Sent: Saturday, October 22, 2022 3:54 PM To: Zhengzengkai <zhengzengkai@huawei.com>; kernel@openeuler.org Cc: Xiexiuqi <xiexiuqi@huawei.com>; Zeng, Jason <jason.zeng@intel.com>; Wang, Lin X <lin.x.wang@intel.com>; Tian, Jun J <jun.j.tian@intel.com>; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com>; Huxinwei <huxinwei@huawei.com>; Hushiyuan <hushiyuan@huawei.com>; Xuhanbing <xuhanbing@huawei.com> Subject: RE: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX
代码注释有点少, 无法体现KABI兼容思路与驱动的使用约束;
兼容补丁基于下面设计约束进行设计: 内核KABI白名单主要是为第3方驱动提供稳定的运行环境; 因此fpu, task_thread, 中断数据结构内部成员, 禁止驱动代码使用; 已经排查过driver 目录确实没有驱动使用的情况; 基于上面约束, 因此当前fix kabi的补丁仅是避免了检查工具的误报;
另外一种解决思路: 直接修改kabi CRC计算工具, 将特定数据结构的CRC值清除; 同时需要在编 译阶段检查驱动没有使用特定的数据结构成员;
-----Original Message----- From: Zhengzengkai Sent: Saturday, October 22, 2022 3:39 PM To: kernel@openeuler.org Cc: Xiexiuqi <xiexiuqi@huawei.com>; Zhoukang (A) <zhoukang7@huawei.com>; jason.zeng@intel.com; lin.x.wang@intel.com; jun.j.tian@intel.com; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com>; Zhengzengkai <zhengzengkai@huawei.com> Subject: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX
These four patches try to avoid or fix kabi change caused by Intel AMX PR: https://gitee.com/openeuler/kernel/pulls/58
Zheng Zengkai (4): x86: Avoid kabi change caused by adding pkru element in thread_struct x86/extable: Avoid kabi change caused by exception table rework x86/fpu: Avoid kabi change caused by struct fpu mm: Fix kabi change caused by saved_auxv[] in mm_struct for x86_64
arch/x86/include/asm/extable.h | 4 ++++ arch/x86/include/asm/fpu/types.h | 4 ++++ arch/x86/include/asm/processor.h | 2 ++ arch/x86/include/uapi/asm/auxvec.h | 1 + include/linux/mm_types.h | 15 +++++++++++++++ 5 files changed, 26 insertions(+)
-- 2.20.1

好的,谢谢!我们也再评估下, 另外 uapi目录修改的那个头文件, 是给用户态libc库用的吗? 是否有libc的对应补丁; -----邮件原件----- 发件人: Tian, Jun J [mailto:jun.j.tian@intel.com] 发送时间: 2022年10月24日 15:41 收件人: Zhoukang (A) <zhoukang7@huawei.com>; Zhengzengkai <zhengzengkai@huawei.com>; kernel@openeuler.org 抄送: Xiexiuqi <xiexiuqi@huawei.com>; Zeng, Jason <jason.zeng@intel.com>; Wang, Lin X <lin.x.wang@intel.com>; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com>; Huxinwei <huxinwei@huawei.com>; Hushiyuan <hushiyuan@huawei.com>; Xuhanbing <xuhanbing@huawei.com> 主题: RE: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX 目前看为了统一KABI的检查,最好还是利用__GENKSYMS__包含对应已有修改的kernel struct和ABI。 这样防止SPR代码合并后check-kabi持续误报的问题,我们也会对SPR相应的KABI change的部分统一 通过这个方式处理以保持一致。当然前提是大家已经审视过SPR相应change并没有被引用或者影响可控。 修改KABI CRC工具未来也可能造成其他潜在问题,比如不参与checksum检查的KABI未来可能有 第三方module会引用。 Thanks, Jun Tian
-----Original Message----- From: Zhoukang (A) <zhoukang7@huawei.com> Sent: Saturday, October 22, 2022 3:54 PM To: Zhengzengkai <zhengzengkai@huawei.com>; kernel@openeuler.org Cc: Xiexiuqi <xiexiuqi@huawei.com>; Zeng, Jason <jason.zeng@intel.com>; Wang, Lin X <lin.x.wang@intel.com>; Tian, Jun J <jun.j.tian@intel.com>; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com>; Huxinwei <huxinwei@huawei.com>; Hushiyuan <hushiyuan@huawei.com>; Xuhanbing <xuhanbing@huawei.com> Subject: RE: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX
代码注释有点少, 无法体现KABI兼容思路与驱动的使用约束;
兼容补丁基于下面设计约束进行设计: 内核KABI白名单主要是为第3方驱动提供稳定的运行环境; 因此fpu, task_thread, 中断数据结构内部成员, 禁止驱动代码使用; 已经排查过driver 目录确实没有驱动使用的情况; 基于上面约束, 因此当前fix kabi的补丁仅是避免了检查工具的误报;
另外一种解决思路: 直接修改kabi CRC计算工具, 将特定数据结构的CRC值清除; 同时需要在编 译阶段检查驱动没有使用特定的数据结构成员;
-----Original Message----- From: Zhengzengkai Sent: Saturday, October 22, 2022 3:39 PM To: kernel@openeuler.org Cc: Xiexiuqi <xiexiuqi@huawei.com>; Zhoukang (A) <zhoukang7@huawei.com>; jason.zeng@intel.com; lin.x.wang@intel.com; jun.j.tian@intel.com; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com>; Zhengzengkai <zhengzengkai@huawei.com> Subject: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX
These four patches try to avoid or fix kabi change caused by Intel AMX PR: https://gitee.com/openeuler/kernel/pulls/58
Zheng Zengkai (4): x86: Avoid kabi change caused by adding pkru element in thread_struct x86/extable: Avoid kabi change caused by exception table rework x86/fpu: Avoid kabi change caused by struct fpu mm: Fix kabi change caused by saved_auxv[] in mm_struct for x86_64
arch/x86/include/asm/extable.h | 4 ++++ arch/x86/include/asm/fpu/types.h | 4 ++++ arch/x86/include/asm/processor.h | 2 ++ arch/x86/include/uapi/asm/auxvec.h | 1 + include/linux/mm_types.h | 15 +++++++++++++++ 5 files changed, 26 insertions(+)
-- 2.20.1

uapi/auxvec: Define the aux vector AT_MINSIGSTKSZ 这个patch是“Improve Minimum Alternate Stack Size”这组patch中的一个,实现的功能之一就是动态计算出当前用户栈所需要满足的最小要求,用户可以直接调用用户态接口而无需依赖libc: getauxval(AT_MINSIGSTKSZ); libc在2.34之前给出的MINSIGSTKSZ是一个固定值,可能会造成栈溢出(比如使用了AVX512:https://bugzilla.kernel.org/show_bug.cgi?id=153531),在AMX这类动态特性加入后就完全不能满足要求了,所以2.34+也实现了类似功能,通过#include <signal.h>就可以获得deliver signal所需要满足的要求。 Br, Lin -----Original Message----- From: Zhengzengkai <zhengzengkai@huawei.com> Sent: Monday, October 24, 2022 5:10 PM To: Tian, Jun J <jun.j.tian@intel.com>; Zhoukang (A) <zhoukang7@huawei.com>; kernel@openeuler.org Cc: Xiexiuqi <xiexiuqi@huawei.com>; Zeng, Jason <jason.zeng@intel.com>; Wang, Lin X <lin.x.wang@intel.com>; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com>; Huxinwei <huxinwei@huawei.com>; Hushiyuan <hushiyuan@huawei.com>; Xuhanbing <xuhanbing@huawei.com> Subject: 答复: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX 好的,谢谢!我们也再评估下, 另外 uapi目录修改的那个头文件, 是给用户态libc库用的吗? 是否有libc的对应补丁; -----邮件原件----- 发件人: Tian, Jun J [mailto:jun.j.tian@intel.com] 发送时间: 2022年10月24日 15:41 收件人: Zhoukang (A) <zhoukang7@huawei.com>; Zhengzengkai <zhengzengkai@huawei.com>; kernel@openeuler.org 抄送: Xiexiuqi <xiexiuqi@huawei.com>; Zeng, Jason <jason.zeng@intel.com>; Wang, Lin X <lin.x.wang@intel.com>; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com>; Huxinwei <huxinwei@huawei.com>; Hushiyuan <hushiyuan@huawei.com>; Xuhanbing <xuhanbing@huawei.com> 主题: RE: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX 目前看为了统一KABI的检查,最好还是利用__GENKSYMS__包含对应已有修改的kernel struct和ABI。 这样防止SPR代码合并后check-kabi持续误报的问题,我们也会对SPR相应的KABI change的部分统一 通过这个方式处理以保持一致。当然前提是大家已经审视过SPR相应change并没有被引用或者影响可控。 修改KABI CRC工具未来也可能造成其他潜在问题,比如不参与checksum检查的KABI未来可能有 第三方module会引用。 Thanks, Jun Tian
-----Original Message----- From: Zhoukang (A) <zhoukang7@huawei.com> Sent: Saturday, October 22, 2022 3:54 PM To: Zhengzengkai <zhengzengkai@huawei.com>; kernel@openeuler.org Cc: Xiexiuqi <xiexiuqi@huawei.com>; Zeng, Jason <jason.zeng@intel.com>; Wang, Lin X <lin.x.wang@intel.com>; Tian, Jun J <jun.j.tian@intel.com>; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com>; Huxinwei <huxinwei@huawei.com>; Hushiyuan <hushiyuan@huawei.com>; Xuhanbing <xuhanbing@huawei.com> Subject: RE: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX
代码注释有点少, 无法体现KABI兼容思路与驱动的使用约束;
兼容补丁基于下面设计约束进行设计: 内核KABI白名单主要是为第3方驱动提供稳定的运行环境; 因此fpu, task_thread, 中断数据结构内部成员, 禁止驱动代码使用; 已经排查过driver 目录确实没有驱动使用的情况; 基于上面约束, 因此当前fix kabi的补丁仅是避免了检查工具的误报;
另外一种解决思路: 直接修改kabi CRC计算工具, 将特定数据结构的CRC值清除; 同时需要在编 译阶段检查驱动没有使用特定的数据结构成员;
-----Original Message----- From: Zhengzengkai Sent: Saturday, October 22, 2022 3:39 PM To: kernel@openeuler.org Cc: Xiexiuqi <xiexiuqi@huawei.com>; Zhoukang (A) <zhoukang7@huawei.com>; jason.zeng@intel.com; lin.x.wang@intel.com; jun.j.tian@intel.com; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com>; Zhengzengkai <zhengzengkai@huawei.com> Subject: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX
These four patches try to avoid or fix kabi change caused by Intel AMX PR: https://gitee.com/openeuler/kernel/pulls/58
Zheng Zengkai (4): x86: Avoid kabi change caused by adding pkru element in thread_struct x86/extable: Avoid kabi change caused by exception table rework x86/fpu: Avoid kabi change caused by struct fpu mm: Fix kabi change caused by saved_auxv[] in mm_struct for x86_64
arch/x86/include/asm/extable.h | 4 ++++ arch/x86/include/asm/fpu/types.h | 4 ++++ arch/x86/include/asm/processor.h | 2 ++ arch/x86/include/uapi/asm/auxvec.h | 1 + include/linux/mm_types.h | 15 +++++++++++++++ 5 files changed, 26 insertions(+)
-- 2.20.1

好的,感谢解答! @Zhoukang, 康总,你看下呢? -----邮件原件----- 发件人: Wang, Lin X [mailto:lin.x.wang@intel.com] 发送时间: 2022年10月24日 19:19 收件人: Zhengzengkai <zhengzengkai@huawei.com>; Tian, Jun J <jun.j.tian@intel.com>; Zhoukang (A) <zhoukang7@huawei.com>; kernel@openeuler.org 抄送: Xiexiuqi <xiexiuqi@huawei.com>; Zeng, Jason <jason.zeng@intel.com>; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com>; Huxinwei <huxinwei@huawei.com>; Hushiyuan <hushiyuan@huawei.com>; Xuhanbing <xuhanbing@huawei.com> 主题: RE: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX uapi/auxvec: Define the aux vector AT_MINSIGSTKSZ 这个patch是“Improve Minimum Alternate Stack Size”这组patch中的一个,实现的功能之一就是动态计算出当前用户栈所需要满足的最小要求,用户可以直接调用用户态接口而无需依赖libc: getauxval(AT_MINSIGSTKSZ); libc在2.34之前给出的MINSIGSTKSZ是一个固定值,可能会造成栈溢出(比如使用了AVX512:https://bugzilla.kernel.org/show_bug.cgi?id=153531),在AMX这类动态特性加入后就完全不能满足要求了,所以2.34+也实现了类似功能,通过#include <signal.h>就可以获得deliver signal所需要满足的要求。 Br, Lin -----Original Message----- From: Zhengzengkai <zhengzengkai@huawei.com> Sent: Monday, October 24, 2022 5:10 PM To: Tian, Jun J <jun.j.tian@intel.com>; Zhoukang (A) <zhoukang7@huawei.com>; kernel@openeuler.org Cc: Xiexiuqi <xiexiuqi@huawei.com>; Zeng, Jason <jason.zeng@intel.com>; Wang, Lin X <lin.x.wang@intel.com>; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com>; Huxinwei <huxinwei@huawei.com>; Hushiyuan <hushiyuan@huawei.com>; Xuhanbing <xuhanbing@huawei.com> Subject: 答复: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX 好的,谢谢!我们也再评估下, 另外 uapi目录修改的那个头文件, 是给用户态libc库用的吗? 是否有libc的对应补丁; -----邮件原件----- 发件人: Tian, Jun J [mailto:jun.j.tian@intel.com] 发送时间: 2022年10月24日 15:41 收件人: Zhoukang (A) <zhoukang7@huawei.com>; Zhengzengkai <zhengzengkai@huawei.com>; kernel@openeuler.org 抄送: Xiexiuqi <xiexiuqi@huawei.com>; Zeng, Jason <jason.zeng@intel.com>; Wang, Lin X <lin.x.wang@intel.com>; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com>; Huxinwei <huxinwei@huawei.com>; Hushiyuan <hushiyuan@huawei.com>; Xuhanbing <xuhanbing@huawei.com> 主题: RE: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX 目前看为了统一KABI的检查,最好还是利用__GENKSYMS__包含对应已有修改的kernel struct和ABI。 这样防止SPR代码合并后check-kabi持续误报的问题,我们也会对SPR相应的KABI change的部分统一 通过这个方式处理以保持一致。当然前提是大家已经审视过SPR相应change并没有被引用或者影响可控。 修改KABI CRC工具未来也可能造成其他潜在问题,比如不参与checksum检查的KABI未来可能有 第三方module会引用。 Thanks, Jun Tian
-----Original Message----- From: Zhoukang (A) <zhoukang7@huawei.com> Sent: Saturday, October 22, 2022 3:54 PM To: Zhengzengkai <zhengzengkai@huawei.com>; kernel@openeuler.org Cc: Xiexiuqi <xiexiuqi@huawei.com>; Zeng, Jason <jason.zeng@intel.com>; Wang, Lin X <lin.x.wang@intel.com>; Tian, Jun J <jun.j.tian@intel.com>; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com>; Huxinwei <huxinwei@huawei.com>; Hushiyuan <hushiyuan@huawei.com>; Xuhanbing <xuhanbing@huawei.com> Subject: RE: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX
代码注释有点少, 无法体现KABI兼容思路与驱动的使用约束;
兼容补丁基于下面设计约束进行设计: 内核KABI白名单主要是为第3方驱动提供稳定的运行环境; 因此fpu, task_thread, 中断数据结构内部成员, 禁止驱动代码使用; 已经排查过driver 目录确实没有驱动使用的情况; 基于上面约束, 因此当前fix kabi的补丁仅是避免了检查工具的误报;
另外一种解决思路: 直接修改kabi CRC计算工具, 将特定数据结构的CRC值清除; 同时需要在编 译阶段检查驱动没有使用特定的数据结构成员;
-----Original Message----- From: Zhengzengkai Sent: Saturday, October 22, 2022 3:39 PM To: kernel@openeuler.org Cc: Xiexiuqi <xiexiuqi@huawei.com>; Zhoukang (A) <zhoukang7@huawei.com>; jason.zeng@intel.com; lin.x.wang@intel.com; jun.j.tian@intel.com; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com>; Zhengzengkai <zhengzengkai@huawei.com> Subject: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX
These four patches try to avoid or fix kabi change caused by Intel AMX PR: https://gitee.com/openeuler/kernel/pulls/58
Zheng Zengkai (4): x86: Avoid kabi change caused by adding pkru element in thread_struct x86/extable: Avoid kabi change caused by exception table rework x86/fpu: Avoid kabi change caused by struct fpu mm: Fix kabi change caused by saved_auxv[] in mm_struct for x86_64
arch/x86/include/asm/extable.h | 4 ++++ arch/x86/include/asm/fpu/types.h | 4 ++++ arch/x86/include/asm/processor.h | 2 ++ arch/x86/include/uapi/asm/auxvec.h | 1 + include/linux/mm_types.h | 15 +++++++++++++++ 5 files changed, 26 insertions(+)
-- 2.20.1

请教下,假设内核合入了对应补丁。 如果用户态程序是用 2.34之前的glibc编译出来的,代码中使用固定的 MINSIGSTKSZ。这种程序在与新内核交互时是否可能出现前面说的栈溢出情况? 因为客户的程序很多并不是在最新版本上编译出来使用的。
-----邮件原件----- 发件人: Wang, Lin X <lin.x.wang@intel.com> 发送时间: 2022年10月24日 19:19 收件人: Zhengzengkai <zhengzengkai@huawei.com>; Tian, Jun J <jun.j.tian@intel.com>; Zhoukang (A) <zhoukang7@huawei.com>; kernel@openeuler.org 抄送: Xiexiuqi <xiexiuqi@huawei.com>; Zeng, Jason <jason.zeng@intel.com>; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com>; Huxinwei <huxinwei@huawei.com>; Hushiyuan <hushiyuan@huawei.com>; Xuhanbing <xuhanbing@huawei.com> 主题: RE: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX
uapi/auxvec: Define the aux vector AT_MINSIGSTKSZ
这个patch是“Improve Minimum Alternate Stack Size”这组patch中的一个, 实现的功能之一就是动态计算出当前用户栈所需要满足的最小要求,用户 可以直接调用用户态接口而无需依赖libc:
getauxval(AT_MINSIGSTKSZ);
libc在2.34之前给出的MINSIGSTKSZ是一个固定值,可能会造成栈溢出(比 如使用了AVX512:https://bugzilla.kernel.org/show_bug.cgi?id=153531),在 AMX这类动态特性加入后就完全不能满足要求了,所以2.34+也实现了类 似功能,通过#include <signal.h>就可以获得deliver signal所需要满足的要求。
Br,
Lin
-----Original Message----- From: Zhengzengkai <zhengzengkai@huawei.com> Sent: Monday, October 24, 2022 5:10 PM To: Tian, Jun J <jun.j.tian@intel.com>; Zhoukang (A) <zhoukang7@huawei.com>; kernel@openeuler.org Cc: Xiexiuqi <xiexiuqi@huawei.com>; Zeng, Jason <jason.zeng@intel.com>; Wang, Lin X <lin.x.wang@intel.com>; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com>; Huxinwei <huxinwei@huawei.com>; Hushiyuan <hushiyuan@huawei.com>; Xuhanbing <xuhanbing@huawei.com> Subject: 答复: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX
好的,谢谢!我们也再评估下, 另外 uapi目录修改的那个头文件, 是给用户态libc库用的吗? 是否有libc的对 应补丁;
-----邮件原件----- 发件人: Tian, Jun J [mailto:jun.j.tian@intel.com] 发送时间: 2022年10月24日 15:41 收件人: Zhoukang (A) <zhoukang7@huawei.com>; Zhengzengkai <zhengzengkai@huawei.com>; kernel@openeuler.org 抄送: Xiexiuqi <xiexiuqi@huawei.com>; Zeng, Jason <jason.zeng@intel.com>; Wang, Lin X <lin.x.wang@intel.com>; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com>; Huxinwei <huxinwei@huawei.com>; Hushiyuan <hushiyuan@huawei.com>; Xuhanbing <xuhanbing@huawei.com> 主题: RE: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX
目前看为了统一KABI的检查,最好还是利用__GENKSYMS__包含对应已有 修改的kernel struct和ABI。 这样防止SPR代码合并后check-kabi持续误报的问题,我们也会对SPR相应 的KABI change的部分统一 通过这个方式处理以保持一致。当然前提是大家已经审视过SPR相应 change并没有被引用或者影响可控。
修改KABI CRC工具未来也可能造成其他潜在问题,比如不参与checksum检 查的KABI未来可能有 第三方module会引用。
Thanks, Jun Tian
-----Original Message----- From: Zhoukang (A) <zhoukang7@huawei.com> Sent: Saturday, October 22, 2022 3:54 PM To: Zhengzengkai <zhengzengkai@huawei.com>; kernel@openeuler.org Cc: Xiexiuqi <xiexiuqi@huawei.com>; Zeng, Jason <jason.zeng@intel.com>; Wang, Lin X <lin.x.wang@intel.com>; Tian, Jun J <jun.j.tian@intel.com>; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com>; Huxinwei <huxinwei@huawei.com>; Hushiyuan <hushiyuan@huawei.com>; Xuhanbing <xuhanbing@huawei.com> Subject: RE: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX
代码注释有点少, 无法体现KABI兼容思路与驱动的使用约束;
兼容补丁基于下面设计约束进行设计: 内核KABI白名单主要是为第3方驱动提供稳定的运行环境; 因此fpu, task_thread, 中断数据结构内部成员, 禁止驱动代码使用; 已经排查过 driver 目录确实没有驱动使用的情况; 基于上面约束, 因此当前fix kabi的补丁仅是避免了检查工具的误报;
另外一种解决思路: 直接修改kabi CRC计算工具, 将特定数据结构的CRC值清除; 同时需要在 编 译阶段检查驱动没有使用特定的数据结构成员;
-----Original Message----- From: Zhengzengkai Sent: Saturday, October 22, 2022 3:39 PM To: kernel@openeuler.org Cc: Xiexiuqi <xiexiuqi@huawei.com>; Zhoukang (A) <zhoukang7@huawei.com>; jason.zeng@intel.com; lin.x.wang@intel.com; jun.j.tian@intel.com; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com>; Zhengzengkai <zhengzengkai@huawei.com> Subject: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX
These four patches try to avoid or fix kabi change caused by Intel AMX PR: https://gitee.com/openeuler/kernel/pulls/58
Zheng Zengkai (4): x86: Avoid kabi change caused by adding pkru element in thread_struct x86/extable: Avoid kabi change caused by exception table rework x86/fpu: Avoid kabi change caused by struct fpu mm: Fix kabi change caused by saved_auxv[] in mm_struct for x86_64
arch/x86/include/asm/extable.h | 4 ++++ arch/x86/include/asm/fpu/types.h | 4 ++++ arch/x86/include/asm/processor.h | 2 ++ arch/x86/include/uapi/asm/auxvec.h | 1 + include/linux/mm_types.h | 15 +++++++++++++++ 5 files changed, 26 insertions(+)
-- 2.20.1
_______________________________________________ Kernel mailing list -- kernel@openeuler.org To unsubscribe send an email to kernel-leave@openeuler.org

补充一下,是这个头文件,arch/x86/include/uapi/asm/auxvec.h 请问这个头文件是给用户态libc库用的吗? 是否有libc的对应补丁; 谢谢! -----邮件原件----- 发件人: Zhengzengkai 发送时间: 2022年10月24日 17:10 收件人: 'Tian, Jun J' <jun.j.tian@intel.com>; Zhoukang (A) <zhoukang7@huawei.com>; kernel@openeuler.org 抄送: Xiexiuqi <xiexiuqi@huawei.com>; Zeng, Jason <jason.zeng@intel.com>; Wang, Lin X <lin.x.wang@intel.com>; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com>; Huxinwei <huxinwei@huawei.com>; Hushiyuan <hushiyuan@huawei.com>; Xuhanbing <xuhanbing@huawei.com> 主题: 答复: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX 好的,谢谢!我们也再评估下, 另外 uapi目录修改的那个头文件, 是给用户态libc库用的吗? 是否有libc的对应补丁; -----邮件原件----- 发件人: Tian, Jun J [mailto:jun.j.tian@intel.com] 发送时间: 2022年10月24日 15:41 收件人: Zhoukang (A) <zhoukang7@huawei.com>; Zhengzengkai <zhengzengkai@huawei.com>; kernel@openeuler.org 抄送: Xiexiuqi <xiexiuqi@huawei.com>; Zeng, Jason <jason.zeng@intel.com>; Wang, Lin X <lin.x.wang@intel.com>; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com>; Huxinwei <huxinwei@huawei.com>; Hushiyuan <hushiyuan@huawei.com>; Xuhanbing <xuhanbing@huawei.com> 主题: RE: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX 目前看为了统一KABI的检查,最好还是利用__GENKSYMS__包含对应已有修改的kernel struct和ABI。 这样防止SPR代码合并后check-kabi持续误报的问题,我们也会对SPR相应的KABI change的部分统一 通过这个方式处理以保持一致。当然前提是大家已经审视过SPR相应change并没有被引用或者影响可控。 修改KABI CRC工具未来也可能造成其他潜在问题,比如不参与checksum检查的KABI未来可能有 第三方module会引用。 Thanks, Jun Tian
-----Original Message----- From: Zhoukang (A) <zhoukang7@huawei.com> Sent: Saturday, October 22, 2022 3:54 PM To: Zhengzengkai <zhengzengkai@huawei.com>; kernel@openeuler.org Cc: Xiexiuqi <xiexiuqi@huawei.com>; Zeng, Jason <jason.zeng@intel.com>; Wang, Lin X <lin.x.wang@intel.com>; Tian, Jun J <jun.j.tian@intel.com>; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com>; Huxinwei <huxinwei@huawei.com>; Hushiyuan <hushiyuan@huawei.com>; Xuhanbing <xuhanbing@huawei.com> Subject: RE: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX
代码注释有点少, 无法体现KABI兼容思路与驱动的使用约束;
兼容补丁基于下面设计约束进行设计: 内核KABI白名单主要是为第3方驱动提供稳定的运行环境; 因此fpu, task_thread, 中断数据结构内部成员, 禁止驱动代码使用; 已经排查过driver 目录确实没有驱动使用的情况; 基于上面约束, 因此当前fix kabi的补丁仅是避免了检查工具的误报;
另外一种解决思路: 直接修改kabi CRC计算工具, 将特定数据结构的CRC值清除; 同时需要在编 译阶段检查驱动没有使用特定的数据结构成员;
-----Original Message----- From: Zhengzengkai Sent: Saturday, October 22, 2022 3:39 PM To: kernel@openeuler.org Cc: Xiexiuqi <xiexiuqi@huawei.com>; Zhoukang (A) <zhoukang7@huawei.com>; jason.zeng@intel.com; lin.x.wang@intel.com; jun.j.tian@intel.com; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com>; Zhengzengkai <zhengzengkai@huawei.com> Subject: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX
These four patches try to avoid or fix kabi change caused by Intel AMX PR: https://gitee.com/openeuler/kernel/pulls/58
Zheng Zengkai (4): x86: Avoid kabi change caused by adding pkru element in thread_struct x86/extable: Avoid kabi change caused by exception table rework x86/fpu: Avoid kabi change caused by struct fpu mm: Fix kabi change caused by saved_auxv[] in mm_struct for x86_64
arch/x86/include/asm/extable.h | 4 ++++ arch/x86/include/asm/fpu/types.h | 4 ++++ arch/x86/include/asm/processor.h | 2 ++ arch/x86/include/uapi/asm/auxvec.h | 1 + include/linux/mm_types.h | 15 +++++++++++++++ 5 files changed, 26 insertions(+)
-- 2.20.1

不建议所有的KABI变更(不论是否SPR相关的补丁)都用直接加__GENKSYMS__宏的方式规避KABI变更。 这组补丁的意图是想让Intel一起评估下这样改的收益和风险,说白了是否值得? 或者说有没有更好的思路? ________________________________ 郑增凯 Zheng Zengkai Mobile: +86-50000020998(For Welink,eSpace Calls) Email: zhengzengkai@huawei.com 发件人:Tian, Jun J <jun.j.tian@intel.com> 收件人:Zhoukang (A) <zhoukang7@huawei.com>;Zhengzengkai <zhengzengkai@huawei.com>;kernel <kernel@openeuler.org> 抄 送:Xiexiuqi <xiexiuqi@huawei.com>;Zeng, Jason <jason.zeng@intel.com>;Wang, Lin X <lin.x.wang@intel.com>;Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com>;Huxinwei <huxinwei@huawei.com>;Hushiyuan <hushiyuan@huawei.com>;Xuhanbing <xuhanbing@huawei.com> 时 间:2022-10-24 15:42:04 主 题:RE: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX 目前看为了统一KABI的检查,最好还是利用__GENKSYMS__包含对应已有修改的kernel struct和ABI。 这样防止SPR代码合并后check-kabi持续误报的问题,我们也会对SPR相应的KABI change的部分统一 通过这个方式处理以保持一致。当然前提是大家已经审视过SPR相应change并没有被引用或者影响可控。 修改KABI CRC工具未来也可能造成其他潜在问题,比如不参与checksum检查的KABI未来可能有 第三方module会引用。 Thanks, Jun Tian
-----Original Message----- From: Zhoukang (A) <zhoukang7@huawei.com> Sent: Saturday, October 22, 2022 3:54 PM To: Zhengzengkai <zhengzengkai@huawei.com>; kernel@openeuler.org Cc: Xiexiuqi <xiexiuqi@huawei.com>; Zeng, Jason <jason.zeng@intel.com>; Wang, Lin X <lin.x.wang@intel.com>; Tian, Jun J <jun.j.tian@intel.com>; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com>; Huxinwei <huxinwei@huawei.com>; Hushiyuan <hushiyuan@huawei.com>; Xuhanbing <xuhanbing@huawei.com> Subject: RE: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX
代码注释有点少, 无法体现KABI兼容思路与驱动的使用约束;
兼容补丁基于下面设计约束进行设计: 内核KABI白名单主要是为第3方驱动提供稳定的运行环境; 因此fpu, task_thread, 中断数据结构内部成员, 禁止驱动代码使用; 已经排查过driver 目录确实没有驱动使用的情况; 基于上面约束, 因此当前fix kabi的补丁仅是避免了检查工具的误报;
另外一种解决思路: 直接修改kabi CRC计算工具, 将特定数据结构的CRC值清除; 同时需要在编 译阶段检查驱动没有使用特定的数据结构成员;
-----Original Message----- From: Zhengzengkai Sent: Saturday, October 22, 2022 3:39 PM To: kernel@openeuler.org Cc: Xiexiuqi <xiexiuqi@huawei.com>; Zhoukang (A) <zhoukang7@huawei.com>; jason.zeng@intel.com; lin.x.wang@intel.com; jun.j.tian@intel.com; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com>; Zhengzengkai <zhengzengkai@huawei.com> Subject: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX
These four patches try to avoid or fix kabi change caused by Intel AMX PR: https://gitee.com/openeuler/kernel/pulls/58
Zheng Zengkai (4): x86: Avoid kabi change caused by adding pkru element in thread_struct x86/extable: Avoid kabi change caused by exception table rework x86/fpu: Avoid kabi change caused by struct fpu mm: Fix kabi change caused by saved_auxv[] in mm_struct for x86_64
arch/x86/include/asm/extable.h | 4 ++++ arch/x86/include/asm/fpu/types.h | 4 ++++ arch/x86/include/asm/processor.h | 2 ++ arch/x86/include/uapi/asm/auxvec.h | 1 + include/linux/mm_types.h | 15 +++++++++++++++ 5 files changed, 26 insertions(+)
-- 2.20.1

KABI的问题主要还是在openEuler LTS update上合入新平台的策略问题。目前openEuler正在快速引入各类多样性平台和完善各平台生态,合入大型的主流新平台很难避免大量的KABI change。在这个前提下更多的应该是检视KABI change是否影响到第三方module,即使有影响的相关module能否适配KABI的变化。KABI的严格要求也是为了避免这类问题发生,但类似fpu, task_thread这种内核底层的修改很少会被driver module引用,如果openEuler检视的结果是没有发现类似的兼容问题,同时其他主流OSV已经合并而且暂时也没有类似的问题,那应该有一个策略来决定是否引入对应的平台的KABI的变化。 另外通过宏来规避检查工具或者通过修改现有patch的实现来统一KABI都不是理想的解决办法。修改patch实现来保持KABI的一致也会造成当前openEuler的base和kernel upstream以及其他主流的OSV的代码存在差异性,对未来rebase或者kernel upgrade会造成conflict的问题,对通用的三方module的维护也可能有潜在问题。实际上大量加入__GENKSYMS__宏也会造成未来维护和rebase的负担。 所以这个问题的本质是需要大家探讨出相应的策略,并建立一套评估流程,在保证KABI稳定的前提下也能灵活的支持更广泛的平台和生态。 Thanks, Jun Tian From: Zhengzengkai <zhengzengkai@huawei.com> Sent: Monday, October 24, 2022 7:06 PM To: Tian, Jun J <jun.j.tian@intel.com>; Zhoukang (A) <zhoukang7@huawei.com>; kernel <kernel@openeuler.org> Cc: Xiexiuqi <xiexiuqi@huawei.com>; Zeng, Jason <jason.zeng@intel.com>; Wang, Lin X <lin.x.wang@intel.com>; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com>; Huxinwei <huxinwei@huawei.com>; Hushiyuan <hushiyuan@huawei.com>; Xuhanbing <xuhanbing@huawei.com> Subject: RE: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX 不建议所有的KABI变更(不论是否SPR相关的补丁)都用直接加__GENKSYMS__宏的方式规避KABI变更。 这组补丁的意图是想让Intel一起评估下这样改的收益和风险,说白了是否值得? 或者说有没有更好的思路? ________________________________ 郑增凯 Zheng Zengkai Mobile: +86-50000020998(For Welink,eSpace Calls) Email: zhengzengkai@huawei.com<mailto:zhengzengkai@huawei.com> 发件人:Tian, Jun J <jun.j.tian@intel.com<mailto:jun.j.tian@intel.com>> 收件人:Zhoukang (A) <zhoukang7@huawei.com<mailto:zhoukang7@huawei.com>>;Zhengzengkai <zhengzengkai@huawei.com<mailto:zhengzengkai@huawei.com>>;kernel <kernel@openeuler.org<mailto:kernel@openeuler.org>> 抄 送:Xiexiuqi <xiexiuqi@huawei.com<mailto:xiexiuqi@huawei.com>>;Zeng, Jason <jason.zeng@intel.com<mailto:jason.zeng@intel.com>>;Wang, Lin X <lin.x.wang@intel.com<mailto:lin.x.wang@intel.com>>;Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com<mailto:dukaitian@huawei.com>>;Huxinwei <huxinwei@huawei.com<mailto:huxinwei@huawei.com>>;Hushiyuan <hushiyuan@huawei.com<mailto:hushiyuan@huawei.com>>;Xuhanbing <xuhanbing@huawei.com<mailto:xuhanbing@huawei.com>> 时 间:2022-10-24 15:42:04 主 题:RE: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX 目前看为了统一KABI的检查,最好还是利用__GENKSYMS__包含对应已有修改的kernel struct和ABI。 这样防止SPR代码合并后check-kabi持续误报的问题,我们也会对SPR相应的KABI change的部分统一 通过这个方式处理以保持一致。当然前提是大家已经审视过SPR相应change并没有被引用或者影响可控。 修改KABI CRC工具未来也可能造成其他潜在问题,比如不参与checksum检查的KABI未来可能有 第三方module会引用。 Thanks, Jun Tian
-----Original Message----- From: Zhoukang (A) <zhoukang7@huawei.com<mailto:zhoukang7@huawei.com>> Sent: Saturday, October 22, 2022 3:54 PM To: Zhengzengkai <zhengzengkai@huawei.com<mailto:zhengzengkai@huawei.com>>; kernel@openeuler.org<mailto:kernel@openeuler.org> Cc: Xiexiuqi <xiexiuqi@huawei.com<mailto:xiexiuqi@huawei.com>>; Zeng, Jason <jason.zeng@intel.com<mailto:jason.zeng@intel.com>>; Wang, Lin X <lin.x.wang@intel.com<mailto:lin.x.wang@intel.com>>; Tian, Jun J <jun.j.tian@intel.com<mailto:jun.j.tian@intel.com>>; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com<mailto:dukaitian@huawei.com>>; Huxinwei <huxinwei@huawei.com<mailto:huxinwei@huawei.com>>; Hushiyuan <hushiyuan@huawei.com<mailto:hushiyuan@huawei.com>>; Xuhanbing <xuhanbing@huawei.com<mailto:xuhanbing@huawei.com>> Subject: RE: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX
代码注释有点少, 无法体现KABI兼容思路与驱动的使用约束;
兼容补丁基于下面设计约束进行设计: 内核KABI白名单主要是为第3方驱动提供稳定的运行环境; 因此fpu, task_thread, 中断数据结构内部成员, 禁止驱动代码使用; 已经排查过driver 目录确实没有驱动使用的情况; 基于上面约束, 因此当前fix kabi的补丁仅是避免了检查工具的误报;
另外一种解决思路: 直接修改kabi CRC计算工具, 将特定数据结构的CRC值清除; 同时需要在编 译阶段检查驱动没有使用特定的数据结构成员;
-----Original Message----- From: Zhengzengkai Sent: Saturday, October 22, 2022 3:39 PM To: kernel@openeuler.org<mailto:kernel@openeuler.org> Cc: Xiexiuqi <xiexiuqi@huawei.com<mailto:xiexiuqi@huawei.com>>; Zhoukang (A) <zhoukang7@huawei.com<mailto:zhoukang7@huawei.com>>; jason.zeng@intel.com<mailto:jason.zeng@intel.com>; lin.x.wang@intel.com<mailto:lin.x.wang@intel.com>; jun.j.tian@intel.com<mailto:jun.j.tian@intel.com>; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com<mailto:dukaitian@huawei.com>>; Zhengzengkai <zhengzengkai@huawei.com<mailto:zhengzengkai@huawei.com>> Subject: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX
These four patches try to avoid or fix kabi change caused by Intel AMX PR: https://gitee.com/openeuler/kernel/pulls/58
Zheng Zengkai (4): x86: Avoid kabi change caused by adding pkru element in thread_struct x86/extable: Avoid kabi change caused by exception table rework x86/fpu: Avoid kabi change caused by struct fpu mm: Fix kabi change caused by saved_auxv[] in mm_struct for x86_64
arch/x86/include/asm/extable.h | 4 ++++ arch/x86/include/asm/fpu/types.h | 4 ++++ arch/x86/include/asm/processor.h | 2 ++ arch/x86/include/uapi/asm/auxvec.h | 1 + include/linux/mm_types.h | 15 +++++++++++++++ 5 files changed, 26 insertions(+)
-- 2.20.1

Hi 田俊, 附件为TC例会KABI修复建议评估材料 请查收。 谢谢! 发件人: Tian, Jun J [mailto:jun.j.tian@intel.com] 发送时间: 2022年10月25日 10:43 收件人: Zhengzengkai <zhengzengkai@huawei.com>; Zhoukang (A) <zhoukang7@huawei.com>; kernel <kernel@openeuler.org> 抄送: Xiexiuqi <xiexiuqi@huawei.com>; Zeng, Jason <jason.zeng@intel.com>; Wang, Lin X <lin.x.wang@intel.com>; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com>; Huxinwei <huxinwei@huawei.com>; Hushiyuan <hushiyuan@huawei.com>; Xuhanbing <xuhanbing@huawei.com> 主题: RE: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX KABI的问题主要还是在openEuler LTS update上合入新平台的策略问题。目前openEuler正在快速引入各类多样性平台和完善各平台生态,合入大型的主流新平台很难避免大量的KABI change。在这个前提下更多的应该是检视KABI change是否影响到第三方module,即使有影响的相关module能否适配KABI的变化。KABI的严格要求也是为了避免这类问题发生,但类似fpu, task_thread这种内核底层的修改很少会被driver module引用,如果openEuler检视的结果是没有发现类似的兼容问题,同时其他主流OSV已经合并而且暂时也没有类似的问题,那应该有一个策略来决定是否引入对应的平台的KABI的变化。 另外通过宏来规避检查工具或者通过修改现有patch的实现来统一KABI都不是理想的解决办法。修改patch实现来保持KABI的一致也会造成当前openEuler的base和kernel upstream以及其他主流的OSV的代码存在差异性,对未来rebase或者kernel upgrade会造成conflict的问题,对通用的三方module的维护也可能有潜在问题。实际上大量加入__GENKSYMS__宏也会造成未来维护和rebase的负担。 所以这个问题的本质是需要大家探讨出相应的策略,并建立一套评估流程,在保证KABI稳定的前提下也能灵活的支持更广泛的平台和生态。 Thanks, Jun Tian From: Zhengzengkai <zhengzengkai@huawei.com<mailto:zhengzengkai@huawei.com>> Sent: Monday, October 24, 2022 7:06 PM To: Tian, Jun J <jun.j.tian@intel.com<mailto:jun.j.tian@intel.com>>; Zhoukang (A) <zhoukang7@huawei.com<mailto:zhoukang7@huawei.com>>; kernel <kernel@openeuler.org<mailto:kernel@openeuler.org>> Cc: Xiexiuqi <xiexiuqi@huawei.com<mailto:xiexiuqi@huawei.com>>; Zeng, Jason <jason.zeng@intel.com<mailto:jason.zeng@intel.com>>; Wang, Lin X <lin.x.wang@intel.com<mailto:lin.x.wang@intel.com>>; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com<mailto:dukaitian@huawei.com>>; Huxinwei <huxinwei@huawei.com<mailto:huxinwei@huawei.com>>; Hushiyuan <hushiyuan@huawei.com<mailto:hushiyuan@huawei.com>>; Xuhanbing <xuhanbing@huawei.com<mailto:xuhanbing@huawei.com>> Subject: RE: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX 不建议所有的KABI变更(不论是否SPR相关的补丁)都用直接加__GENKSYMS__宏的方式规避KABI变更。 这组补丁的意图是想让Intel一起评估下这样改的收益和风险,说白了是否值得? 或者说有没有更好的思路? ________________________________ 郑增凯 Zheng Zengkai Mobile: +86-50000020998(For Welink,eSpace Calls) Email: zhengzengkai@huawei.com<mailto:zhengzengkai@huawei.com> 发件人:Tian, Jun J <jun.j.tian@intel.com<mailto:jun.j.tian@intel.com>> 收件人:Zhoukang (A) <zhoukang7@huawei.com<mailto:zhoukang7@huawei.com>>;Zhengzengkai <zhengzengkai@huawei.com<mailto:zhengzengkai@huawei.com>>;kernel <kernel@openeuler.org<mailto:kernel@openeuler.org>> 抄 送:Xiexiuqi <xiexiuqi@huawei.com<mailto:xiexiuqi@huawei.com>>;Zeng, Jason <jason.zeng@intel.com<mailto:jason.zeng@intel.com>>;Wang, Lin X <lin.x.wang@intel.com<mailto:lin.x.wang@intel.com>>;Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com<mailto:dukaitian@huawei.com>>;Huxinwei <huxinwei@huawei.com<mailto:huxinwei@huawei.com>>;Hushiyuan <hushiyuan@huawei.com<mailto:hushiyuan@huawei.com>>;Xuhanbing <xuhanbing@huawei.com<mailto:xuhanbing@huawei.com>> 时 间:2022-10-24 15:42:04 主 题:RE: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX 目前看为了统一KABI的检查,最好还是利用__GENKSYMS__包含对应已有修改的kernel struct和ABI。 这样防止SPR代码合并后check-kabi持续误报的问题,我们也会对SPR相应的KABI change的部分统一 通过这个方式处理以保持一致。当然前提是大家已经审视过SPR相应change并没有被引用或者影响可控。 修改KABI CRC工具未来也可能造成其他潜在问题,比如不参与checksum检查的KABI未来可能有 第三方module会引用。 Thanks, Jun Tian
-----Original Message----- From: Zhoukang (A) <zhoukang7@huawei.com<mailto:zhoukang7@huawei.com>> Sent: Saturday, October 22, 2022 3:54 PM To: Zhengzengkai <zhengzengkai@huawei.com<mailto:zhengzengkai@huawei.com>>; kernel@openeuler.org<mailto:kernel@openeuler.org> Cc: Xiexiuqi <xiexiuqi@huawei.com<mailto:xiexiuqi@huawei.com>>; Zeng, Jason <jason.zeng@intel.com<mailto:jason.zeng@intel.com>>; Wang, Lin X <lin.x.wang@intel.com<mailto:lin.x.wang@intel.com>>; Tian, Jun J <jun.j.tian@intel.com<mailto:jun.j.tian@intel.com>>; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com<mailto:dukaitian@huawei.com>>; Huxinwei <huxinwei@huawei.com<mailto:huxinwei@huawei.com>>; Hushiyuan <hushiyuan@huawei.com<mailto:hushiyuan@huawei.com>>; Xuhanbing <xuhanbing@huawei.com<mailto:xuhanbing@huawei.com>> Subject: RE: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX
代码注释有点少, 无法体现KABI兼容思路与驱动的使用约束;
兼容补丁基于下面设计约束进行设计: 内核KABI白名单主要是为第3方驱动提供稳定的运行环境; 因此fpu, task_thread, 中断数据结构内部成员, 禁止驱动代码使用; 已经排查过driver 目录确实没有驱动使用的情况; 基于上面约束, 因此当前fix kabi的补丁仅是避免了检查工具的误报;
另外一种解决思路: 直接修改kabi CRC计算工具, 将特定数据结构的CRC值清除; 同时需要在编 译阶段检查驱动没有使用特定的数据结构成员;
-----Original Message----- From: Zhengzengkai Sent: Saturday, October 22, 2022 3:39 PM To: kernel@openeuler.org<mailto:kernel@openeuler.org> Cc: Xiexiuqi <xiexiuqi@huawei.com<mailto:xiexiuqi@huawei.com>>; Zhoukang (A) <zhoukang7@huawei.com<mailto:zhoukang7@huawei.com>>; jason.zeng@intel.com<mailto:jason.zeng@intel.com>; lin.x.wang@intel.com<mailto:lin.x.wang@intel.com>; jun.j.tian@intel.com<mailto:jun.j.tian@intel.com>; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com<mailto:dukaitian@huawei.com>>; Zhengzengkai <zhengzengkai@huawei.com<mailto:zhengzengkai@huawei.com>> Subject: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX
These four patches try to avoid or fix kabi change caused by Intel AMX PR: https://gitee.com/openeuler/kernel/pulls/58
Zheng Zengkai (4): x86: Avoid kabi change caused by adding pkru element in thread_struct x86/extable: Avoid kabi change caused by exception table rework x86/fpu: Avoid kabi change caused by struct fpu mm: Fix kabi change caused by saved_auxv[] in mm_struct for x86_64
arch/x86/include/asm/extable.h | 4 ++++ arch/x86/include/asm/fpu/types.h | 4 ++++ arch/x86/include/asm/processor.h | 2 ++ arch/x86/include/uapi/asm/auxvec.h | 1 + include/linux/mm_types.h | 15 +++++++++++++++ 5 files changed, 26 insertions(+)
-- 2.20.1

多谢增凯,接下来我们会根据TC会议的思路逐步提交SPR剩余的PR到OLK-5.10。还麻烦各位帮忙review。 Thanks, Jun Tian From: Zhengzengkai <zhengzengkai@huawei.com> Sent: Wednesday, October 26, 2022 2:36 PM To: Tian, Jun J <jun.j.tian@intel.com>; Zhoukang (A) <zhoukang7@huawei.com>; kernel <kernel@openeuler.org> Cc: Xiexiuqi <xiexiuqi@huawei.com>; Zeng, Jason <jason.zeng@intel.com>; Wang, Lin X <lin.x.wang@intel.com>; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com>; Huxinwei <huxinwei@huawei.com>; Hushiyuan <hushiyuan@huawei.com>; Xuhanbing <xuhanbing@huawei.com> Subject: 答复: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX Hi 田俊, 附件为TC例会KABI修复建议评估材料 请查收。 谢谢! 发件人: Tian, Jun J [mailto:jun.j.tian@intel.com] 发送时间: 2022年10月25日 10:43 收件人: Zhengzengkai <zhengzengkai@huawei.com<mailto:zhengzengkai@huawei.com>>; Zhoukang (A) <zhoukang7@huawei.com<mailto:zhoukang7@huawei.com>>; kernel <kernel@openeuler.org<mailto:kernel@openeuler.org>> 抄送: Xiexiuqi <xiexiuqi@huawei.com<mailto:xiexiuqi@huawei.com>>; Zeng, Jason <jason.zeng@intel.com<mailto:jason.zeng@intel.com>>; Wang, Lin X <lin.x.wang@intel.com<mailto:lin.x.wang@intel.com>>; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com<mailto:dukaitian@huawei.com>>; Huxinwei <huxinwei@huawei.com<mailto:huxinwei@huawei.com>>; Hushiyuan <hushiyuan@huawei.com<mailto:hushiyuan@huawei.com>>; Xuhanbing <xuhanbing@huawei.com<mailto:xuhanbing@huawei.com>> 主题: RE: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX KABI的问题主要还是在openEuler LTS update上合入新平台的策略问题。目前openEuler正在快速引入各类多样性平台和完善各平台生态,合入大型的主流新平台很难避免大量的KABI change。在这个前提下更多的应该是检视KABI change是否影响到第三方module,即使有影响的相关module能否适配KABI的变化。KABI的严格要求也是为了避免这类问题发生,但类似fpu, task_thread这种内核底层的修改很少会被driver module引用,如果openEuler检视的结果是没有发现类似的兼容问题,同时其他主流OSV已经合并而且暂时也没有类似的问题,那应该有一个策略来决定是否引入对应的平台的KABI的变化。 另外通过宏来规避检查工具或者通过修改现有patch的实现来统一KABI都不是理想的解决办法。修改patch实现来保持KABI的一致也会造成当前openEuler的base和kernel upstream以及其他主流的OSV的代码存在差异性,对未来rebase或者kernel upgrade会造成conflict的问题,对通用的三方module的维护也可能有潜在问题。实际上大量加入__GENKSYMS__宏也会造成未来维护和rebase的负担。 所以这个问题的本质是需要大家探讨出相应的策略,并建立一套评估流程,在保证KABI稳定的前提下也能灵活的支持更广泛的平台和生态。 Thanks, Jun Tian From: Zhengzengkai <zhengzengkai@huawei.com<mailto:zhengzengkai@huawei.com>> Sent: Monday, October 24, 2022 7:06 PM To: Tian, Jun J <jun.j.tian@intel.com<mailto:jun.j.tian@intel.com>>; Zhoukang (A) <zhoukang7@huawei.com<mailto:zhoukang7@huawei.com>>; kernel <kernel@openeuler.org<mailto:kernel@openeuler.org>> Cc: Xiexiuqi <xiexiuqi@huawei.com<mailto:xiexiuqi@huawei.com>>; Zeng, Jason <jason.zeng@intel.com<mailto:jason.zeng@intel.com>>; Wang, Lin X <lin.x.wang@intel.com<mailto:lin.x.wang@intel.com>>; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com<mailto:dukaitian@huawei.com>>; Huxinwei <huxinwei@huawei.com<mailto:huxinwei@huawei.com>>; Hushiyuan <hushiyuan@huawei.com<mailto:hushiyuan@huawei.com>>; Xuhanbing <xuhanbing@huawei.com<mailto:xuhanbing@huawei.com>> Subject: RE: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX 不建议所有的KABI变更(不论是否SPR相关的补丁)都用直接加__GENKSYMS__宏的方式规避KABI变更。 这组补丁的意图是想让Intel一起评估下这样改的收益和风险,说白了是否值得? 或者说有没有更好的思路? ________________________________ 郑增凯 Zheng Zengkai Mobile: +86-50000020998(For Welink,eSpace Calls) Email: zhengzengkai@huawei.com<mailto:zhengzengkai@huawei.com> 发件人:Tian, Jun J <jun.j.tian@intel.com<mailto:jun.j.tian@intel.com>> 收件人:Zhoukang (A) <zhoukang7@huawei.com<mailto:zhoukang7@huawei.com>>;Zhengzengkai <zhengzengkai@huawei.com<mailto:zhengzengkai@huawei.com>>;kernel <kernel@openeuler.org<mailto:kernel@openeuler.org>> 抄 送:Xiexiuqi <xiexiuqi@huawei.com<mailto:xiexiuqi@huawei.com>>;Zeng, Jason <jason.zeng@intel.com<mailto:jason.zeng@intel.com>>;Wang, Lin X <lin.x.wang@intel.com<mailto:lin.x.wang@intel.com>>;Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com<mailto:dukaitian@huawei.com>>;Huxinwei <huxinwei@huawei.com<mailto:huxinwei@huawei.com>>;Hushiyuan <hushiyuan@huawei.com<mailto:hushiyuan@huawei.com>>;Xuhanbing <xuhanbing@huawei.com<mailto:xuhanbing@huawei.com>> 时 间:2022-10-24 15:42:04 主 题:RE: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX 目前看为了统一KABI的检查,最好还是利用__GENKSYMS__包含对应已有修改的kernel struct和ABI。 这样防止SPR代码合并后check-kabi持续误报的问题,我们也会对SPR相应的KABI change的部分统一 通过这个方式处理以保持一致。当然前提是大家已经审视过SPR相应change并没有被引用或者影响可控。 修改KABI CRC工具未来也可能造成其他潜在问题,比如不参与checksum检查的KABI未来可能有 第三方module会引用。 Thanks, Jun Tian
-----Original Message----- From: Zhoukang (A) <zhoukang7@huawei.com<mailto:zhoukang7@huawei.com>> Sent: Saturday, October 22, 2022 3:54 PM To: Zhengzengkai <zhengzengkai@huawei.com<mailto:zhengzengkai@huawei.com>>; kernel@openeuler.org<mailto:kernel@openeuler.org> Cc: Xiexiuqi <xiexiuqi@huawei.com<mailto:xiexiuqi@huawei.com>>; Zeng, Jason <jason.zeng@intel.com<mailto:jason.zeng@intel.com>>; Wang, Lin X <lin.x.wang@intel.com<mailto:lin.x.wang@intel.com>>; Tian, Jun J <jun.j.tian@intel.com<mailto:jun.j.tian@intel.com>>; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com<mailto:dukaitian@huawei.com>>; Huxinwei <huxinwei@huawei.com<mailto:huxinwei@huawei.com>>; Hushiyuan <hushiyuan@huawei.com<mailto:hushiyuan@huawei.com>>; Xuhanbing <xuhanbing@huawei.com<mailto:xuhanbing@huawei.com>> Subject: RE: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX
代码注释有点少, 无法体现KABI兼容思路与驱动的使用约束;
兼容补丁基于下面设计约束进行设计: 内核KABI白名单主要是为第3方驱动提供稳定的运行环境; 因此fpu, task_thread, 中断数据结构内部成员, 禁止驱动代码使用; 已经排查过driver 目录确实没有驱动使用的情况; 基于上面约束, 因此当前fix kabi的补丁仅是避免了检查工具的误报;
另外一种解决思路: 直接修改kabi CRC计算工具, 将特定数据结构的CRC值清除; 同时需要在编 译阶段检查驱动没有使用特定的数据结构成员;
-----Original Message----- From: Zhengzengkai Sent: Saturday, October 22, 2022 3:39 PM To: kernel@openeuler.org<mailto:kernel@openeuler.org> Cc: Xiexiuqi <xiexiuqi@huawei.com<mailto:xiexiuqi@huawei.com>>; Zhoukang (A) <zhoukang7@huawei.com<mailto:zhoukang7@huawei.com>>; jason.zeng@intel.com<mailto:jason.zeng@intel.com>; lin.x.wang@intel.com<mailto:lin.x.wang@intel.com>; jun.j.tian@intel.com<mailto:jun.j.tian@intel.com>; Dukaitian (Dukaitian, Intelligent Computing R&D) <dukaitian@huawei.com<mailto:dukaitian@huawei.com>>; Zhengzengkai <zhengzengkai@huawei.com<mailto:zhengzengkai@huawei.com>> Subject: [PATCH openEuler-5.10 0/4] Try to fix kabi change caused by Intel AMX
These four patches try to avoid or fix kabi change caused by Intel AMX PR: https://gitee.com/openeuler/kernel/pulls/58
Zheng Zengkai (4): x86: Avoid kabi change caused by adding pkru element in thread_struct x86/extable: Avoid kabi change caused by exception table rework x86/fpu: Avoid kabi change caused by struct fpu mm: Fix kabi change caused by saved_auxv[] in mm_struct for x86_64
arch/x86/include/asm/extable.h | 4 ++++ arch/x86/include/asm/fpu/types.h | 4 ++++ arch/x86/include/asm/processor.h | 2 ++ arch/x86/include/uapi/asm/auxvec.h | 1 + include/linux/mm_types.h | 15 +++++++++++++++ 5 files changed, 26 insertions(+)
-- 2.20.1
participants (6)
-
Tian, Jun J
-
Wang, Lin X
-
yangbin
-
Zheng Zengkai
-
Zhengzengkai
-
Zhoukang (A)