[PATCH 0/2] EEVDF null pointer
Zicheng Qu (2): EEVDF null pointer reproduction zero_vruntime kernel/sched/debug.c | 8 +- kernel/sched/fair.c | 170 +++++++++++++++++----------- kernel/sched/sched.h | 2 +- null_reproduction_test/Makefile | 9 ++ null_reproduction_test/fullcpu.c | 12 ++ null_reproduction_test/make.sh | 17 +++ null_reproduction_test/test.sh | 34 ++++++ null_reproduction_test/test_sched.c | 141 +++++++++++++++++++++++ 8 files changed, 325 insertions(+), 68 deletions(-) create mode 100644 null_reproduction_test/Makefile create mode 100644 null_reproduction_test/fullcpu.c create mode 100755 null_reproduction_test/make.sh create mode 100755 null_reproduction_test/test.sh create mode 100644 null_reproduction_test/test_sched.c -- 2.34.1
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ICDF44?from=project-issue ---------------------------------------- EEVDF null pointer reproduction code. Signed-off-by: Zicheng Qu <quzicheng@huawei.com> Signed-off-by: wulibin163 <wulibin163@126.com> --- kernel/sched/fair.c | 101 +++++++++++++++++++- null_reproduction_test/Makefile | 9 ++ null_reproduction_test/fullcpu.c | 12 +++ null_reproduction_test/make.sh | 17 ++++ null_reproduction_test/test.sh | 34 +++++++ null_reproduction_test/test_sched.c | 141 ++++++++++++++++++++++++++++ 6 files changed, 313 insertions(+), 1 deletion(-) create mode 100644 null_reproduction_test/Makefile create mode 100644 null_reproduction_test/fullcpu.c create mode 100755 null_reproduction_test/make.sh create mode 100755 null_reproduction_test/test.sh create mode 100644 null_reproduction_test/test_sched.c diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c4c3afa6e7b4..ebfbdc1a4ce4 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -75,6 +75,17 @@ #endif #include <linux/sched/grid_qos.h> +static int se_schedule_pid = 0; // the pid of task `test_sched_0` started in test_sched.c. +module_param(se_schedule_pid, int, 0644); +static int qzc_vlag_switch = 0; // switch to control the vlag for test_sched_0 in place_entity() +module_param(qzc_vlag_switch, int, 0644); +static int qzc_fixed_switch = 0; // switch to control whether to apply the old fix patch, not zero_vruntime patch +module_param(qzc_fixed_switch, int, 0644); +#define __FILENAME__ (__builtin_strrchr(__FILE__, '/') ? __builtin_strrchr(__FILE__, '/') + 1 : __FILE__) +#define ENQUEUE_ENTITY_NONE 0 +#define ENQUEUE_ENTITY_BEGIN 1 +#define ENQUEUE_ENTITY_END 2 + /* * The initial- and re-scaling of tunables is configurable * @@ -3930,6 +3941,14 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, bool curr = cfs_rq->curr == se; u64 avruntime; + if (qzc_fixed_switch && curr && se->on_rq && cfs_rq->nr_running == 1 && + se->vruntime < cfs_rq->min_vruntime) { + s64 rel_deadline = se->deadline - se->vruntime; + + se->vruntime = cfs_rq->min_vruntime; + se->deadline = se->vruntime + rel_deadline; + } + if (se->on_rq) { /* commit outstanding execution time */ update_curr(cfs_rq); @@ -4106,7 +4125,7 @@ static long calc_group_shares(struct cfs_rq *cfs_rq) * Recomputes the group entity based on the current state of its group * runqueue. */ -static void update_cfs_group(struct sched_entity *se) +static void __update_cfs_group(struct sched_entity *se, int flag) { struct cfs_rq *gcfs_rq = group_cfs_rq(se); long shares; @@ -4126,10 +4145,21 @@ static void update_cfs_group(struct sched_entity *se) #else shares = calc_group_shares(gcfs_rq); #endif + + if (flag == ENQUEUE_ENTITY_BEGIN) // enqueue begin + shares = 111616; + else if (flag == ENQUEUE_ENTITY_END) // enqueue end + shares = 395264; + if (unlikely(se->load.weight != shares)) reweight_entity(cfs_rq_of(se), se, shares); } +static void update_cfs_group(struct sched_entity *se) +{ + __update_cfs_group(se, ENQUEUE_ENTITY_NONE); +} + #else /* CONFIG_FAIR_GROUP_SCHED */ static inline void update_cfs_group(struct sched_entity *se) { @@ -5352,6 +5382,16 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) struct sched_entity *curr = cfs_rq->curr; unsigned long load; + /* + * To make the avg_vruntime() and cfs_rq->avg_vruntime lower and lower: + * The original goal is to migrate a large number (countless) of test_sched_0 type tasks + * with very positive high vlag one by one to a specific cfs_rq. + * However, it is difficult to control from user space, + * so we directly simulate it here to achieve this. + */ + if (qzc_vlag_switch != 0 && se_schedule_pid > 0 && entity_is_task(se) && (task_of(se)->pid == se_schedule_pid)) + se->vlag = qzc_vlag_switch == 1 ? calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se) : qzc_vlag_switch; + lag = se->vlag; /* @@ -5442,6 +5482,19 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) { bool curr = cfs_rq->curr == se; + /* + * At the very beginning and end of the enqueue process for task `test_sched_0`, + * we want to adjust the weight/shares of cfs_rq->curr simultaneously, + * which is actually the task `fullcpu` from test.sh in most cases. + * + * However, it is quite challenging to control from user space, + * so we intend to simulate this behavior here instead. + */ + if (se_schedule_pid > 0 && entity_is_task(se) && (task_of(se)->pid == se_schedule_pid)) { + if (cfs_rq->curr) + __update_cfs_group(cfs_rq->curr, ENQUEUE_ENTITY_BEGIN); + } + /* * If we're the current task, we must renormalise before calling * update_curr(). @@ -5509,6 +5562,11 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) #endif } } + + if (se_schedule_pid > 0 && entity_is_task(se) && (task_of(se)->pid == se_schedule_pid)) { + if (cfs_rq->curr) + __update_cfs_group(cfs_rq->curr, ENQUEUE_ENTITY_END); + } } static void __clear_buddies_next(struct sched_entity *se) @@ -14819,6 +14877,15 @@ void init_cfs_rq(struct cfs_rq *cfs_rq) { cfs_rq->tasks_timeline = RB_ROOT_CACHED; u64_u32_store(cfs_rq->min_vruntime, (u64)(-(1LL << 20))); + /* + * We suppose the original intention of (u64)(-(1LL << 20)) was likely to + * force cfs_rq->min_vruntime to overflow as quickly as possible, + * thereby exposing related overflow issues early during the kernel initial phase. + * + * To accelerate the reproduction of these issues, + * we have temporarily modified the initial value of cfs_rq->min_vruntime. + */ + cfs_rq->min_vruntime = (u64)(4596393947272479); #ifdef CONFIG_SMP raw_spin_lock_init(&cfs_rq->removed.lock); #endif @@ -15269,3 +15336,35 @@ __init void init_sched_fair_class(void) #endif /* SMP */ } +u64 sched_debug_min_vruntime(struct cfs_rq *cfs_rq) +{ + return cfs_rq->min_vruntime; +} +EXPORT_SYMBOL(sched_debug_min_vruntime); + +void sched_debug_cfs_rq_info(struct cfs_rq *cfs_rq) +{ + u64 qzc_avruntime = avg_vruntime(cfs_rq); + + printk("%s:%s:%d, cfs_rq=[%p]\tcfs_rq->nr_running=[%d]\tcfs_rq->avg_vruntime=[%lld]\tcfs_rq->min_vruntime=[%llu]\tcfs_rq->avg_load=[%llu]\tavg_vruntime(cfs_rq)=[%llu]\n", + __FILENAME__,__FUNCTION__, __LINE__, + cfs_rq, cfs_rq->nr_running, cfs_rq->avg_vruntime, cfs_rq->min_vruntime, cfs_rq->avg_load, qzc_avruntime); + + if (cfs_rq->curr) { + printk("%s:%s:%d, curr=[%p]\tpid=[%d]\ttgid=[%d]\tcurr->vruntime=[%llu]\tcurr->load.weight=[%lu]\tcurr->vlag=[%lld]\tcurr->slice=[%llu]\tcurr->deadline=[%llu]\tcurr->my_q=[%p]\treal_vlag=[%lld]\tvruntime_eligible=[%d]\n", + __FILENAME__,__FUNCTION__, __LINE__, + cfs_rq->curr, entity_is_task(cfs_rq->curr) ? task_of(cfs_rq->curr)->pid : -1, entity_is_task(cfs_rq->curr) ? task_of(cfs_rq->curr)->tgid : -1, + cfs_rq->curr->vruntime, cfs_rq->curr->load.weight, cfs_rq->curr->vlag, cfs_rq->curr->slice, cfs_rq->curr->deadline, cfs_rq->curr->my_q, entity_lag(qzc_avruntime, cfs_rq->curr), vruntime_eligible(cfs_rq, cfs_rq->curr->vruntime)); + } + + struct rb_node *node = rb_first_cached(&cfs_rq->tasks_timeline); + + for (; node; node = rb_next(node)) { + struct sched_entity *rb_se = __node_2_se(node); + printk("%s:%s:%d, rb_se=[%p]\tpid=[%d]\ttgid=[%d]\trb_se->vruntime=[%llu]\trb_se->load.weight=[%lu]\trb_se->vlag=[%lld]\trb_se->slice=[%llu]\trb_se->deadline=[%llu]\trb_se->my_q=[%p]\treal_vlag=[%lld]\tvruntime_eligible=[%d]\n", + __FILENAME__,__FUNCTION__, __LINE__, + rb_se, entity_is_task(rb_se) ? task_of(rb_se)->pid : -1, entity_is_task(rb_se) ? task_of(rb_se)->tgid : -1, + rb_se->vruntime, rb_se->load.weight, rb_se->vlag, rb_se->slice, rb_se->deadline, rb_se->my_q, entity_lag(qzc_avruntime, rb_se), vruntime_eligible(cfs_rq, rb_se->vruntime)); + } +} +EXPORT_SYMBOL(sched_debug_cfs_rq_info); diff --git a/null_reproduction_test/Makefile b/null_reproduction_test/Makefile new file mode 100644 index 000000000000..48feb459e5ff --- /dev/null +++ b/null_reproduction_test/Makefile @@ -0,0 +1,9 @@ +obj-m += test_sched.o +KDIR := /lib/modules/$(shell uname -r)/build +PWD := $(shell pwd) + +all: + $(MAKE) -C $(KDIR) M=$(PWD) modules + +clean: + $(MAKE) -C $(KDIR) M=$(PWD) clean \ No newline at end of file diff --git a/null_reproduction_test/fullcpu.c b/null_reproduction_test/fullcpu.c new file mode 100644 index 000000000000..136c73671035 --- /dev/null +++ b/null_reproduction_test/fullcpu.c @@ -0,0 +1,12 @@ +#include <string.h> +#include <unistd.h> + +int main() +{ + int a=9; + while(1) { + a*=9; + } + + return 0; +} \ No newline at end of file diff --git a/null_reproduction_test/make.sh b/null_reproduction_test/make.sh new file mode 100755 index 000000000000..002385d17046 --- /dev/null +++ b/null_reproduction_test/make.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +make clean + +cd .. + +make modules_prepare + +cd ./null_reproduction_test + +make -C ../ M=$(pwd) + +gcc fullcpu.c -o fullcpu + +echo "====================" +echo 'please run test.sh' +echo "====================" \ No newline at end of file diff --git a/null_reproduction_test/test.sh b/null_reproduction_test/test.sh new file mode 100755 index 000000000000..a6cac6d2d7c2 --- /dev/null +++ b/null_reproduction_test/test.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +test() { + cpu=$1 + cgroup=test0 + + mkdir /sys/fs/cgroup/cpu/$cgroup/ + mkdir /sys/fs/cgroup/memory/$cgroup/ + echo 10000000 > /sys/fs/cgroup/memory/$cgroup/memory.limit_in_bytes + + taskset -c $cpu ./fullcpu & + pid=$! + + echo $pid > /sys/fs/cgroup/cpu/$cgroup/tasks + echo $pid > /sys/fs/cgroup/memory/$cgroup/tasks + + let cpu1_count=0 + for pid in $(ps -auxf | grep test_sched | grep -v grep | grep -v test_sched_0 | grep -v test_sched_1 | awk '{print($2)}'); do + echo $pid > /sys/fs/cgroup/cpu/$cgroup/tasks + done +} + +killall fullcpu +rmmod test_sched +insmod ./test_sched.ko bind_cpu=1 test_count=15 + +pid0=$(ps -auxf | grep 'test_sched_0' | grep -v grep | awk '{print($2)}') +echo $pid0 > /sys/module/fair/parameters/se_schedule_pid + +# echo 1 > /sys/module/fair/parameters/qzc_fixed_switch + +echo 1 > /sys/module/fair/parameters/qzc_vlag_switch + +test 1 diff --git a/null_reproduction_test/test_sched.c b/null_reproduction_test/test_sched.c new file mode 100644 index 000000000000..7a33fa77c923 --- /dev/null +++ b/null_reproduction_test/test_sched.c @@ -0,0 +1,141 @@ +#include <linux/init.h> +#include <linux/module.h> +#include <linux/kthread.h> +#include <linux/sched.h> +#include <linux/delay.h> +#include <linux/cpumask.h> +#include <linux/completion.h> +#include <linux/slab.h> +#include <linux/sched/task.h> + +static DECLARE_COMPLETION(comp); + +#define THREAD_NUM 100000 +static struct task_struct *schedule_threads[THREAD_NUM]; +static int bind_cpu = 0; +module_param(bind_cpu, int, 0644); +MODULE_PARM_DESC(bind_cpu, "CPU core to bind the thread to"); + +static int test_count = 1; +module_param(test_count, int, 0644); +MODULE_PARM_DESC(test_count, "test thread count (default: 1)"); + +static int sched_debug_cfs_rq_info_print_cnt = 0; + +static int thread_function(void *data); +static void start_one_thread(int id, int cpu); + +static int __init schedule_driver_init(void) +{ + printk(KERN_INFO "Schedule driver: Initializing\n"); + + start_one_thread(0, bind_cpu); + start_one_thread(1, bind_cpu); + for (int i=2; i<test_count; i++) + start_one_thread(i, -1); + + return 0; +} + +struct thread_data { + int id; +}; + +static void start_one_thread(int id, int cpu) +{ + char name[255]; + sprintf(name, "test_sched_%u/%d", id, cpu); + + struct thread_data *tdata = kmalloc(sizeof(struct thread_data), GFP_KERNEL); + tdata->id = id; + + // create kthread but not run immediately + schedule_threads[id] = kthread_create(thread_function, tdata, name); + if (IS_ERR(schedule_threads[id])) { + schedule_threads[id] = 0; + printk("Failed to create %s, %ld\n", name, PTR_ERR(schedule_threads[id])); + return; + } + + if (cpu > 0) + kthread_bind(schedule_threads[id], cpu); + // run the kthread + wake_up_process(schedule_threads[id]); + + printk(KERN_INFO "create %s success\n", name); + return; +} + +u64 sched_debug_min_vruntime(struct cfs_rq *cfs); +void sched_debug_cfs_rq_info(struct cfs_rq *cfs_rq); + +static int thread_function(void *data) +{ + printk(KERN_INFO "Schedule thread: Started on CPU %d\n", smp_processor_id()); + struct task_struct *task = current; + + set_current_state(TASK_RUNNING); + + struct thread_data *tdata = data; + // test_sched_1 wait + if (tdata->id == 1) { + set_user_nice(task, 8); + wait_for_completion_interruptible(&comp); + } + + while (!kthread_should_stop()) { + // test_sched_0 check the condition + if (tdata->id == 0) { + struct sched_entity *se = &task->se; + struct cfs_rq *cfs = se->cfs_rq; + u64 vruntime = se->vruntime; + u64 min_vruntime = sched_debug_min_vruntime(cfs); + + if (sched_debug_cfs_rq_info_print_cnt % 10000 == 0) { + sched_debug_cfs_rq_info(cfs); + } + sched_debug_cfs_rq_info_print_cnt += 1; + + if (-102743846405689 > (s64)(vruntime - min_vruntime)) { + int old_nice = task_nice(task); + set_user_nice(task, -20); + + complete(&comp); // wake up test_sched_1 + printk("vruntime: %llu, min_vruntime: %llu, renice: %d->%d\n", + vruntime, min_vruntime, old_nice, -20); + } + } else if (tdata->id == 1) { + int a = 1; + for (int i=0; i<1000000; i++) { + a += tdata->id; + } + } + + if (tdata->id == 1) + cond_resched(); + else { + schedule_timeout_uninterruptible(1); + } + } + + printk(KERN_INFO "Schedule thread: Exiting from CPU %d\n", smp_processor_id()); + return 0; +} + +static void __exit schedule_driver_exit(void) +{ + for (int i=0; i<test_count; i++) { + if (schedule_threads[i]) { + kthread_stop(schedule_threads[i]); + printk(KERN_INFO "Schedule driver: Thread stopped\n"); + } + } +} + +module_init(schedule_driver_init); +module_exit(schedule_driver_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Your Name"); +MODULE_DESCRIPTION("A driver that creates a thread calling schedule() in a loop with CPU binding"); +MODULE_VERSION("1.0"); -- 2.34.1
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/ICDF44?from=project-issue ---------------------------------------- zero vruntime code Signed-off-by: Zicheng Qu <quzicheng@huawei.com> Signed-off-by: wulibin163 <wulibin163@126.com> --- kernel/sched/debug.c | 8 ++-- kernel/sched/fair.c | 93 +++++++++----------------------------------- kernel/sched/sched.h | 2 +- 3 files changed, 24 insertions(+), 79 deletions(-) diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 7a9e6896c699..af3cb2bb6a9a 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -628,7 +628,7 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu) void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) { - s64 left_vruntime = -1, min_vruntime, right_vruntime = -1, left_deadline = -1, spread; + s64 left_vruntime = -1, zero_vruntime, right_vruntime = -1, left_deadline = -1, spread; struct sched_entity *last, *first, *root; struct rq *rq = cpu_rq(cpu); unsigned long flags; @@ -653,15 +653,15 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) last = __pick_last_entity(cfs_rq); if (last) right_vruntime = last->vruntime; - min_vruntime = cfs_rq->min_vruntime; + zero_vruntime = cfs_rq->zero_vruntime; raw_spin_rq_unlock_irqrestore(rq, flags); SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "left_deadline", SPLIT_NS(left_deadline)); SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "left_vruntime", SPLIT_NS(left_vruntime)); - SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "min_vruntime", - SPLIT_NS(min_vruntime)); + SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "zero_vruntime", + SPLIT_NS(zero_vruntime)); SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "avg_vruntime", SPLIT_NS(avg_vruntime(cfs_rq))); SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "right_vruntime", diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ebfbdc1a4ce4..533ee220b4b2 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -740,7 +740,7 @@ static inline bool entity_before(const struct sched_entity *a, static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se) { - return (s64)(se->vruntime - cfs_rq->min_vruntime); + return (s64)(se->vruntime - cfs_rq->zero_vruntime); } #define __node_2_se(node) \ @@ -792,13 +792,13 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se) * * Which we track using: * - * v0 := cfs_rq->min_vruntime + * v0 := cfs_rq->zero_vruntime * \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime * \Sum w_i := cfs_rq->avg_load * - * Since min_vruntime is a monotonic increasing variable that closely tracks - * the per-task service, these deltas: (v_i - v), will be in the order of the - * maximal (virtual) lag induced in the system due to quantisation. + * Since zero_vruntime closely tracks the per-task service, these + * deltas: (v_i - v), will be in the order of the maximal (virtual) lag + * induced in the system due to quantisation. * * Also, we use scale_load_down() to reduce the size. * @@ -857,7 +857,7 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq) avg = div_s64(avg, load); } - return cfs_rq->min_vruntime + avg; + return cfs_rq->zero_vruntime + avg; } /* @@ -923,7 +923,7 @@ static int vruntime_eligible(struct cfs_rq *cfs_rq, u64 vruntime) load += weight; } - return avg >= (s64)(vruntime - cfs_rq->min_vruntime) * load; + return avg >= (s64)(vruntime - cfs_rq->zero_vruntime) * load; } int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se) @@ -938,43 +938,13 @@ int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se) return vruntime_eligible(cfs_rq, se->vruntime); } -static u64 __update_min_vruntime(struct cfs_rq *cfs_rq, u64 vruntime) +static void update_zero_vruntime(struct cfs_rq *cfs_rq) { - u64 min_vruntime = cfs_rq->min_vruntime; - /* - * open coded max_vruntime() to allow updating avg_vruntime - */ - s64 delta = (s64)(vruntime - min_vruntime); - if (delta > 0) { - avg_vruntime_update(cfs_rq, delta); - min_vruntime = vruntime; - } - return min_vruntime; -} - -static void update_min_vruntime(struct cfs_rq *cfs_rq) -{ - struct sched_entity *se = __pick_root_entity(cfs_rq); - struct sched_entity *curr = cfs_rq->curr; - u64 vruntime = cfs_rq->min_vruntime; - - if (curr) { - if (curr->on_rq) - vruntime = curr->vruntime; - else - curr = NULL; - } + u64 vruntime = avg_vruntime(cfs_rq); + s64 delta = (s64)(vruntime - cfs_rq->zero_vruntime); - if (se) { - if (!curr) - vruntime = se->min_vruntime; - else - vruntime = min_vruntime(vruntime, se->min_vruntime); - } - - /* ensure we never gain time by being placed backwards. */ - u64_u32_store(cfs_rq->min_vruntime, - __update_min_vruntime(cfs_rq, vruntime)); + avg_vruntime_update(cfs_rq, delta); + cfs_rq->zero_vruntime = vruntime; } static inline bool __entity_less(struct rb_node *a, const struct rb_node *b) @@ -1017,6 +987,7 @@ RB_DECLARE_CALLBACKS(static, min_vruntime_cb, struct sched_entity, static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) { avg_vruntime_add(cfs_rq, se); + update_zero_vruntime(cfs_rq); se->min_vruntime = se->vruntime; rb_add_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline, __entity_less, &min_vruntime_cb); @@ -1027,6 +998,7 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) rb_erase_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline, &min_vruntime_cb); avg_vruntime_sub(cfs_rq, se); + update_zero_vruntime(cfs_rq); } struct sched_entity *__pick_root_entity(struct cfs_rq *cfs_rq) @@ -1343,7 +1315,6 @@ static void update_curr(struct cfs_rq *cfs_rq) curr->vruntime += calc_delta_fair(delta_exec, curr); update_deadline(cfs_rq, curr); - update_min_vruntime(cfs_rq); if (entity_is_task(curr)) { struct task_struct *curtask = task_of(curr); @@ -3941,14 +3912,6 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, bool curr = cfs_rq->curr == se; u64 avruntime; - if (qzc_fixed_switch && curr && se->on_rq && cfs_rq->nr_running == 1 && - se->vruntime < cfs_rq->min_vruntime) { - s64 rel_deadline = se->deadline - se->vruntime; - - se->vruntime = cfs_rq->min_vruntime; - se->deadline = se->vruntime + rel_deadline; - } - if (se->on_rq) { /* commit outstanding execution time */ update_curr(cfs_rq); @@ -3984,15 +3947,6 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, update_load_add(&cfs_rq->load, se->load.weight); if (!curr) __enqueue_entity(cfs_rq, se); - - /* - * The entity's vruntime has been adjusted, so let's check - * whether the rq-wide min_vruntime needs updated too. Since - * the calculations above require stable min_vruntime rather - * than up-to-date one, we do the update at the end of the - * reweight process. - */ - update_min_vruntime(cfs_rq); } } @@ -5628,15 +5582,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) update_cfs_group(se); - /* - * Now advance min_vruntime if @se was the entity holding it back, - * except when: DEQUEUE_SAVE && !DEQUEUE_MOVE, in this case we'll be - * put back on, and if we advance min_vruntime, we'll be placed back - * further than we started -- ie. we'll be penalized. - */ - if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) != DEQUEUE_SAVE) - update_min_vruntime(cfs_rq); - if (cfs_rq->nr_running == 0) update_idle_cfs_rq_clock_pelt(cfs_rq); } @@ -14876,7 +14821,7 @@ static void set_next_task_fair(struct rq *rq, struct task_struct *p, bool first) void init_cfs_rq(struct cfs_rq *cfs_rq) { cfs_rq->tasks_timeline = RB_ROOT_CACHED; - u64_u32_store(cfs_rq->min_vruntime, (u64)(-(1LL << 20))); + u64_u32_store(cfs_rq->zero_vruntime, (u64)(-(1LL << 20))); /* * We suppose the original intention of (u64)(-(1LL << 20)) was likely to * force cfs_rq->min_vruntime to overflow as quickly as possible, @@ -14885,7 +14830,7 @@ void init_cfs_rq(struct cfs_rq *cfs_rq) * To accelerate the reproduction of these issues, * we have temporarily modified the initial value of cfs_rq->min_vruntime. */ - cfs_rq->min_vruntime = (u64)(4596393947272479); + cfs_rq->zero_vruntime = (u64)(4596393947272479); #ifdef CONFIG_SMP raw_spin_lock_init(&cfs_rq->removed.lock); #endif @@ -15338,7 +15283,7 @@ __init void init_sched_fair_class(void) } u64 sched_debug_min_vruntime(struct cfs_rq *cfs_rq) { - return cfs_rq->min_vruntime; + return cfs_rq->zero_vruntime; } EXPORT_SYMBOL(sched_debug_min_vruntime); @@ -15346,9 +15291,9 @@ void sched_debug_cfs_rq_info(struct cfs_rq *cfs_rq) { u64 qzc_avruntime = avg_vruntime(cfs_rq); - printk("%s:%s:%d, cfs_rq=[%p]\tcfs_rq->nr_running=[%d]\tcfs_rq->avg_vruntime=[%lld]\tcfs_rq->min_vruntime=[%llu]\tcfs_rq->avg_load=[%llu]\tavg_vruntime(cfs_rq)=[%llu]\n", + printk("%s:%s:%d, cfs_rq=[%p]\tcfs_rq->nr_running=[%d]\tcfs_rq->avg_vruntime=[%lld]\tcfs_rq->zero_vruntime=[%llu]\tcfs_rq->avg_load=[%llu]\tavg_vruntime(cfs_rq)=[%llu]\n", __FILENAME__,__FUNCTION__, __LINE__, - cfs_rq, cfs_rq->nr_running, cfs_rq->avg_vruntime, cfs_rq->min_vruntime, cfs_rq->avg_load, qzc_avruntime); + cfs_rq, cfs_rq->nr_running, cfs_rq->avg_vruntime, cfs_rq->zero_vruntime, cfs_rq->avg_load, qzc_avruntime); if (cfs_rq->curr) { printk("%s:%s:%d, curr=[%p]\tpid=[%d]\ttgid=[%d]\tcurr->vruntime=[%llu]\tcurr->load.weight=[%lu]\tcurr->vlag=[%lld]\tcurr->slice=[%llu]\tcurr->deadline=[%llu]\tcurr->my_q=[%p]\treal_vlag=[%lld]\tvruntime_eligible=[%d]\n", diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index f6a3f93d1f75..0f4d4445657e 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -663,7 +663,7 @@ struct cfs_rq { u64 avg_load; u64 exec_clock; - u64 min_vruntime; + u64 zero_vruntime; #ifdef CONFIG_SCHED_CORE unsigned int forceidle_seq; u64 min_vruntime_fi; -- 2.34.1
participants (1)
-
Zicheng Qu