[PATCH OLK-6.6] sched: troubleshooting for EEVDF null pointers

Offering: HULK hulk inclusion category: other bugzilla: https://gitee.com/openeuler/kernel/issues/ICDF44 -------------------------------- The purpose of this patch is only for debugging purposes. It focus on the null pointer issue caused by the vruntime_eligible() function consistently returning false due to overflow in s64 casting and operations related to cfs_rq->avg_vruntime within EEVDF. When CONFIG_TEMP_EEVDF_NULL_POINTER_CHECKER is enabled, it will record the calculation factors required by each sched_entity's vruntime in place_entity() and related contexts. When CONFIG_TEMP_EEVDF_NULL_POINTER_CHECKER_BUGON is enabled, in addition to recording, it will trigger BUG_ON for some suspicious values. Note that this BUG_ON may have false triggers and it is only intended for EEVDF troubleshooting. Signed-off-by: Zicheng Qu <quzicheng@huawei.com> --- include/linux/sched.h | 34 +++++++++++++++++++++++++++ init/init_task.c | 9 +++++++ kernel/Kconfig.preempt | 15 ++++++++++++ kernel/fork.c | 15 ++++++++++++ kernel/sched/fair.c | 53 ++++++++++++++++++++++++++++++++++++++++++ kernel/sched/sched.h | 34 +++++++++++++++++++++++++++ 6 files changed, 160 insertions(+) diff --git a/include/linux/sched.h b/include/linux/sched.h index b6bc8d72309a..9d089094f1e6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -572,6 +572,36 @@ struct sched_statistics { #endif /* CONFIG_SCHEDSTATS */ } ____cacheline_aligned; +#ifdef CONFIG_TEMP_EEVDF_NULL_POINTER_CHECKER +struct sched_entity_resvd { + /* pointer back to the main sched_entity */ + struct sched_entity *se; + + /* + * CONFIG_TEMP_EEVDF_NULL_POINTER_CHECKER is only designed + * to verify EEVDF null pointer issues. + */ + /* attrs for cfs_rq */ + s64 cfs_rq_avg_vruntime; + u64 cfs_rq_avg_load; + u64 cfs_rq_min_vruntime; + unsigned long cfs_rq_load_weight; + u64 cfs_rq_load_inv_weight; + + /* attrs for cfs_rq->curr */ + struct sched_entity *curr_address; + unsigned int curr_on_rq; + u64 curr_vruntime; + u64 curr_min_vruntime; + unsigned long curr_load_weight; + u32 curr_load_inv_weight; + + /* calculators for place_entity() */ + u64 function_place_entity_vruntime; + s64 function_place_entity_lag; +}; +#endif /* CONFIG_TEMP_EEVDF_NULL_POINTER_CHECKER */ + struct sched_entity { /* For load-balancing: */ struct load_weight load; @@ -611,8 +641,12 @@ struct sched_entity { */ struct sched_avg avg; #endif +#ifdef CONFIG_TEMP_EEVDF_NULL_POINTER_CHECKER + KABI_REPLACE(KABI_RESERVE(1); KABI_RESERVE(2), KABI_AUX_PTR(sched_entity)) +#else KABI_RESERVE(1) KABI_RESERVE(2) +#endif /* CONFIG_TEMP_EEVDF_NULL_POINTER_CHECKER */ KABI_RESERVE(3) KABI_RESERVE(4) }; diff --git a/init/init_task.c b/init/init_task.c index 1adc17149558..7e4bec74f241 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -61,6 +61,12 @@ static struct task_struct_resvd init_task_struct_resvd = { .task = &init_task, }; +#ifdef CONFIG_TEMP_EEVDF_NULL_POINTER_CHECKER +static struct sched_entity_resvd init_sched_entity_resvd = { + .se = &init_task.se, +}; +#endif /* CONFIG_TEMP_EEVDF_NULL_POINTER_CHECKER */ + /* * Set up the first task table, touch at your own risk!. Base=0, * limit=0x1fffff (=2MB) @@ -94,6 +100,9 @@ struct task_struct init_task }, .se = { .group_node = LIST_HEAD_INIT(init_task.se.group_node), +#ifdef CONFIG_TEMP_EEVDF_NULL_POINTER_CHECKER + ._resvd = &init_sched_entity_resvd, +#endif /* CONFIG_TEMP_EEVDF_NULL_POINTER_CHECKER */ }, .rt = { .run_list = LIST_HEAD_INIT(init_task.rt.run_list), diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index dc2a630f200a..090f588a88ee 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -134,3 +134,18 @@ config SCHED_CORE be no measurable impact on performance. +config TEMP_EEVDF_NULL_POINTER_CHECKER + bool "temp eevdf null pointer" + default n + help + This option can record the attrs of the cfs_rq and cfs_rq->curr + when a sched_entity in place_entity(). + +config TEMP_EEVDF_NULL_POINTER_CHECKER_BUGON + bool "temp eevdf null pointer bugon" + depends on TEMP_EEVDF_NULL_POINTER_CHECKER + default n + help + This option can record the attrs of the cfs_rq and cfs_rq->curr + when a sched_entity in place_entity(). Also, it will bug_on when + necessary. \ No newline at end of file diff --git a/kernel/fork.c b/kernel/fork.c index 7f7c297d5f48..a056de39f327 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -184,6 +184,9 @@ static inline struct task_struct *alloc_task_struct_node(int node) static inline void free_task_struct(struct task_struct *tsk) { kfree(tsk->_resvd); +#ifdef CONFIG_TEMP_EEVDF_NULL_POINTER_CHECKER + kfree(tsk->se._resvd); +#endif /* CONFIG_TEMP_EEVDF_NULL_POINTER_CHECKER */ kmem_cache_free(task_struct_cachep, tsk); } #endif @@ -1158,6 +1161,15 @@ static bool dup_resvd_task_struct(struct task_struct *dst, if (!dst->_resvd) return false; +#ifdef CONFIG_TEMP_EEVDF_NULL_POINTER_CHECKER + dst->se._resvd = kzalloc_node(sizeof(struct sched_entity_resvd), + GFP_KERNEL, node); + if (!dst->se._resvd) + return false; + + dst->se._resvd->se = &dst->se; +#endif /* CONFIG_TEMP_EEVDF_NULL_POINTER_CHECKER */ + dst->_resvd->task = dst; return true; } @@ -1178,6 +1190,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) * a double-free for task_struct_resvd extension object. */ WRITE_ONCE(tsk->_resvd, NULL); +#ifdef CONFIG_TEMP_EEVDF_NULL_POINTER_CHECKER + WRITE_ONCE(tsk->se._resvd, NULL); +#endif /* CONFIG_TEMP_EEVDF_NULL_POINTER_CHECKER */ err = arch_dup_task_struct(tsk, orig); if (err || !dup_resvd_task_struct(tsk, orig, node)) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f619dd53cc49..8cf3311a672c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -330,6 +330,10 @@ static int __init sched_fair_sysctl_init(void) late_initcall(sched_fair_sysctl_init); #endif +#ifdef CONFIG_TEMP_EEVDF_NULL_POINTER_CHECKER_BUGON +static inline void avg_vruntime_validate(struct cfs_rq *cfs_rq); +#endif /* CONFIG_TEMP_EEVDF_NULL_POINTER_CHECKER_BUGON */ + static inline void update_load_add(struct load_weight *lw, unsigned long inc) { lw->weight += inc; @@ -799,6 +803,11 @@ avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se) unsigned long weight = scale_load_down(se->load.weight); s64 key = entity_key(cfs_rq, se); +#ifdef CONFIG_TEMP_EEVDF_NULL_POINTER_CHECKER_BUGON + /* not yet added to tree */ + avg_vruntime_validate(cfs_rq); +#endif /* CONFIG_TEMP_EEVDF_NULL_POINTER_CHECKER_BUGON */ + cfs_rq->avg_vruntime += key * weight; cfs_rq->avg_load += weight; } @@ -811,6 +820,11 @@ avg_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se) cfs_rq->avg_vruntime -= key * weight; cfs_rq->avg_load -= weight; + +#ifdef CONFIG_TEMP_EEVDF_NULL_POINTER_CHECKER_BUGON + /* already removed from tree */ + avg_vruntime_validate(cfs_rq); +#endif /* CONFIG_TEMP_EEVDF_NULL_POINTER_CHECKER_BUGON */ } static inline @@ -912,6 +926,10 @@ static int vruntime_eligible(struct cfs_rq *cfs_rq, u64 vruntime) load += weight; } +#ifdef CONFIG_TEMP_EEVDF_NULL_POINTER_CHECKER_BUGON + WARN_ON_ONCE(!(avg >= (s64)(vruntime - cfs_rq->min_vruntime) * load) + && (avg - (s64)(vruntime - cfs_rq->min_vruntime) * load >= 0)); +#endif /* CONFIG_TEMP_EEVDF_NULL_POINTER_CHECKER_BUGON */ return avg >= (s64)(vruntime - cfs_rq->min_vruntime) * load; } @@ -3818,6 +3836,32 @@ static inline void dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { } #endif +#ifdef CONFIG_TEMP_EEVDF_NULL_POINTER_CHECKER_BUGON + +#define TEMP_EEVDF_MAX_LAG (1ULL << 50) +#define TEMP_EEVDF_NULL_ABS(x) ((x) < 0 ? -(x) : (x)) + +static inline void avg_vruntime_validate(struct cfs_rq *cfs_rq) +{ + unsigned long load = 0; + s64 vruntime = 0; + struct rb_node *node = rb_first_cached(&cfs_rq->tasks_timeline); + + for (; node; node = rb_next(node)) { + struct sched_entity *se = __node_2_se(node); + unsigned long weight = scale_load_down(se->load.weight); + s64 key = entity_key(cfs_rq, se); + /* vruntime += key * weight; */ + WARN_ON_ONCE(__builtin_mul_overflow(key, weight, &key)); + WARN_ON_ONCE(__builtin_add_overflow(vruntime, key, &vruntime)); + load += weight; + } + + WARN_ON_ONCE(cfs_rq->avg_load != load); + WARN_ON_ONCE(cfs_rq->avg_vruntime != vruntime); +} +#endif /* CONFIG_TEMP_EEVDF_NULL_POINTER_CHECKER_BUGON */ + static void reweight_eevdf(struct sched_entity *se, u64 avruntime, unsigned long weight) { @@ -3905,6 +3949,9 @@ static void reweight_eevdf(struct sched_entity *se, u64 avruntime, vlag = entity_lag(avruntime, se); vlag = div_s64(vlag * old_weight, weight); se->vruntime = avruntime - vlag; +#ifdef CONFIG_TEMP_EEVDF_NULL_POINTER_CHECKER_BUGON + BUG_ON(TEMP_EEVDF_NULL_ABS(vlag) > TEMP_EEVDF_MAX_LAG); +#endif /* CONFIG_TEMP_EEVDF_NULL_POINTER_CHECKER_BUGON */ } /* @@ -5417,6 +5464,12 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) } se->vruntime = vruntime - lag; +#ifdef CONFIG_TEMP_EEVDF_NULL_POINTER_CHECKER + temp_save_info_for_eevdf_nullpointer(cfs_rq, se, vruntime, lag); +#ifdef CONFIG_TEMP_EEVDF_NULL_POINTER_CHECKER_BUGON + BUG_ON(TEMP_EEVDF_NULL_ABS(lag) > TEMP_EEVDF_MAX_LAG); +#endif /* CONFIG_TEMP_EEVDF_NULL_POINTER_CHECKER_BUGON */ +#endif /* CONFIG_TEMP_EEVDF_NULL_POINTER_CHECKER */ /* * When joining the competition; the exisiting tasks will be, diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index f6a3f93d1f75..d3560256cd1b 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -3788,4 +3788,38 @@ static inline int destroy_soft_domain(struct task_group *tg) #endif +#ifdef CONFIG_TEMP_EEVDF_NULL_POINTER_CHECKER +static inline void temp_save_info_for_eevdf_nullpointer( + struct cfs_rq *cfs_rq, + struct sched_entity *se, + u64 function_place_entity_vruntime, + s64 function_place_entity_lag) +{ + // attrs for cfs_rq + se->_resvd->cfs_rq_avg_vruntime = cfs_rq->avg_vruntime; + se->_resvd->cfs_rq_avg_load = cfs_rq->avg_load; + se->_resvd->cfs_rq_min_vruntime = cfs_rq->min_vruntime; + se->_resvd->cfs_rq_load_weight = cfs_rq->load.weight; + se->_resvd->cfs_rq_load_inv_weight = cfs_rq->load.inv_weight; + + // attrs for cfs_rq->curr + struct sched_entity *curr = cfs_rq->curr; + + if (curr) { + se->_resvd->curr_address = curr; + se->_resvd->curr_on_rq = curr->on_rq; + se->_resvd->curr_vruntime = curr->vruntime; + se->_resvd->curr_min_vruntime = curr->min_vruntime; + se->_resvd->curr_load_weight = curr->load.weight; + se->_resvd->curr_load_inv_weight = curr->load.inv_weight; + } else { + se->_resvd->curr_address = NULL; + } + + // calculators for place_entity() + se->_resvd->function_place_entity_vruntime = function_place_entity_vruntime; + se->_resvd->function_place_entity_lag = function_place_entity_lag; +} +#endif /* CONFIG_TEMP_EEVDF_NULL_POINTER_CHECKER */ + #endif /* _KERNEL_SCHED_SCHED_H */ -- 2.34.1

反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/17497 邮件列表地址:https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/4H4... FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/17497 Mailing list address: https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/4H4...
participants (2)
-
patchwork bot
-
Zicheng Qu