[PATCH 1/2] EEVDF null pointer reproduction

4 Nov 2025

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/ICDF44?from=project-issue

----------------------------------------

EEVDF null pointer reproduction code.

Signed-off-by: Zicheng Qu <quzicheng@huawei.com>
Signed-off-by: wulibin163 <wulibin163@126.com>
---
 kernel/sched/fair.c                 | 101 +++++++++++++++++++-
 null_reproduction_test/Makefile     |   9 ++
 null_reproduction_test/fullcpu.c    |  12 +++
 null_reproduction_test/make.sh      |  17 ++++
 null_reproduction_test/test.sh      |  34 +++++++
 null_reproduction_test/test_sched.c | 141 ++++++++++++++++++++++++++++
 6 files changed, 313 insertions(+), 1 deletion(-)
 create mode 100644 null_reproduction_test/Makefile
 create mode 100644 null_reproduction_test/fullcpu.c
 create mode 100755 null_reproduction_test/make.sh
 create mode 100755 null_reproduction_test/test.sh
 create mode 100644 null_reproduction_test/test_sched.c

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c4c3afa6e7b4..ebfbdc1a4ce4 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -75,6 +75,17 @@
 #endif
 #include <linux/sched/grid_qos.h>
 
+static int se_schedule_pid = 0; // the pid of task `test_sched_0` started in test_sched.c.
+module_param(se_schedule_pid, int, 0644);
+static int qzc_vlag_switch = 0; // switch to control the vlag for test_sched_0 in place_entity()
+module_param(qzc_vlag_switch, int, 0644);
+static int qzc_fixed_switch = 0; // switch to control whether to apply the old fix patch, not zero_vruntime patch
+module_param(qzc_fixed_switch, int, 0644);
+#define __FILENAME__ (__builtin_strrchr(__FILE__, '/') ? __builtin_strrchr(__FILE__, '/') + 1 : __FILE__)
+#define ENQUEUE_ENTITY_NONE 0
+#define ENQUEUE_ENTITY_BEGIN 1
+#define ENQUEUE_ENTITY_END 2
+
 /*
  * The initial- and re-scaling of tunables is configurable
  *
@@ -3930,6 +3941,14 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
 	bool curr = cfs_rq->curr == se;
 	u64 avruntime;
 
+	if (qzc_fixed_switch && curr && se->on_rq && cfs_rq->nr_running == 1 &&
+		se->vruntime < cfs_rq->min_vruntime) {
+		s64 rel_deadline = se->deadline - se->vruntime;
+
+		se->vruntime = cfs_rq->min_vruntime;
+		se->deadline = se->vruntime + rel_deadline;
+	}
+
 	if (se->on_rq) {
 		/* commit outstanding execution time */
 		update_curr(cfs_rq);
@@ -4106,7 +4125,7 @@ static long calc_group_shares(struct cfs_rq *cfs_rq)
  * Recomputes the group entity based on the current state of its group
  * runqueue.
  */
-static void update_cfs_group(struct sched_entity *se)
+static void __update_cfs_group(struct sched_entity *se, int flag)
 {
 	struct cfs_rq *gcfs_rq = group_cfs_rq(se);
 	long shares;
@@ -4126,10 +4145,21 @@ static void update_cfs_group(struct sched_entity *se)
 #else
 	shares = calc_group_shares(gcfs_rq);
 #endif
+
+	if (flag == ENQUEUE_ENTITY_BEGIN)  // enqueue begin 
+		shares = 111616;
+	else if (flag == ENQUEUE_ENTITY_END) // enqueue end
+		shares = 395264;
+
 	if (unlikely(se->load.weight != shares))
 		reweight_entity(cfs_rq_of(se), se, shares);
 }
 
+static void update_cfs_group(struct sched_entity *se)
+{
+       __update_cfs_group(se, ENQUEUE_ENTITY_NONE);
+}
+
 #else /* CONFIG_FAIR_GROUP_SCHED */
 static inline void update_cfs_group(struct sched_entity *se)
 {
@@ -5352,6 +5382,16 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 		struct sched_entity *curr = cfs_rq->curr;
 		unsigned long load;
 
+		/*
+		 * To make the avg_vruntime() and cfs_rq->avg_vruntime lower and lower:
+		 * The original goal is to migrate a large number (countless) of test_sched_0 type tasks 
+		 * with very positive high vlag one by one to a specific cfs_rq. 
+		 * However, it is difficult to control from user space, 
+		 * so we directly simulate it here to achieve this.
+		 */
+		if (qzc_vlag_switch != 0 && se_schedule_pid > 0 && entity_is_task(se) && (task_of(se)->pid == se_schedule_pid))
+ 			se->vlag = qzc_vlag_switch == 1 ? calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se) : qzc_vlag_switch;
+
 		lag = se->vlag;
 
 		/*
@@ -5442,6 +5482,19 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
 	bool curr = cfs_rq->curr == se;
 
+	/*
+	 * At the very beginning and end of the enqueue process for task `test_sched_0`,
+	 * we want to adjust the weight/shares of cfs_rq->curr simultaneously,
+	 * which is actually the task `fullcpu` from test.sh in most cases.
+	 *
+	 * However, it is quite challenging to control from user space,
+	 * so we intend to simulate this behavior here instead.
+	 */
+	if (se_schedule_pid > 0 && entity_is_task(se) && (task_of(se)->pid == se_schedule_pid)) {
+     	if (cfs_rq->curr)
+			__update_cfs_group(cfs_rq->curr, ENQUEUE_ENTITY_BEGIN);
+	}
+
 	/*
 	 * If we're the current task, we must renormalise before calling
 	 * update_curr().
@@ -5509,6 +5562,11 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 #endif
 		}
 	}
+
+	if (se_schedule_pid > 0 && entity_is_task(se) && (task_of(se)->pid == se_schedule_pid)) {
+		if (cfs_rq->curr)
+			__update_cfs_group(cfs_rq->curr, ENQUEUE_ENTITY_END);
+	}
 }
 
 static void __clear_buddies_next(struct sched_entity *se)
@@ -14819,6 +14877,15 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
 {
 	cfs_rq->tasks_timeline = RB_ROOT_CACHED;
 	u64_u32_store(cfs_rq->min_vruntime, (u64)(-(1LL << 20)));
+	/*
+	 * We suppose the original intention of (u64)(-(1LL << 20)) was likely to 
+	 * force cfs_rq->min_vruntime to overflow as quickly as possible, 
+	 * thereby exposing related overflow issues early during the kernel initial phase.
+	 *
+	 * To accelerate the reproduction of these issues, 
+	 * we have temporarily modified the initial value of cfs_rq->min_vruntime.
+	 */
+	cfs_rq->min_vruntime = (u64)(4596393947272479);
 #ifdef CONFIG_SMP
 	raw_spin_lock_init(&cfs_rq->removed.lock);
 #endif
@@ -15269,3 +15336,35 @@ __init void init_sched_fair_class(void)
 #endif /* SMP */
 
 }
+u64 sched_debug_min_vruntime(struct cfs_rq *cfs_rq)
+{
+    return cfs_rq->min_vruntime;
+}
+EXPORT_SYMBOL(sched_debug_min_vruntime);
+
+void sched_debug_cfs_rq_info(struct cfs_rq *cfs_rq)
+{
+	u64 qzc_avruntime = avg_vruntime(cfs_rq);
+
+	printk("%s:%s:%d, cfs_rq=[%p]\tcfs_rq->nr_running=[%d]\tcfs_rq->avg_vruntime=[%lld]\tcfs_rq->min_vruntime=[%llu]\tcfs_rq->avg_load=[%llu]\tavg_vruntime(cfs_rq)=[%llu]\n", 
+		__FILENAME__,__FUNCTION__, __LINE__, 
+		cfs_rq, cfs_rq->nr_running, cfs_rq->avg_vruntime, cfs_rq->min_vruntime, cfs_rq->avg_load, qzc_avruntime);
+	
+	if (cfs_rq->curr) {
+		printk("%s:%s:%d, curr=[%p]\tpid=[%d]\ttgid=[%d]\tcurr->vruntime=[%llu]\tcurr->load.weight=[%lu]\tcurr->vlag=[%lld]\tcurr->slice=[%llu]\tcurr->deadline=[%llu]\tcurr->my_q=[%p]\treal_vlag=[%lld]\tvruntime_eligible=[%d]\n", 
+			__FILENAME__,__FUNCTION__, __LINE__, 
+			cfs_rq->curr, entity_is_task(cfs_rq->curr) ? task_of(cfs_rq->curr)->pid : -1, entity_is_task(cfs_rq->curr) ? task_of(cfs_rq->curr)->tgid : -1, 
+			cfs_rq->curr->vruntime, cfs_rq->curr->load.weight, cfs_rq->curr->vlag, cfs_rq->curr->slice, cfs_rq->curr->deadline, cfs_rq->curr->my_q, entity_lag(qzc_avruntime, cfs_rq->curr), vruntime_eligible(cfs_rq, cfs_rq->curr->vruntime));
+	}
+
+	struct rb_node *node = rb_first_cached(&cfs_rq->tasks_timeline);
+
+	for (; node; node = rb_next(node)) {
+		struct sched_entity *rb_se = __node_2_se(node);
+		printk("%s:%s:%d, rb_se=[%p]\tpid=[%d]\ttgid=[%d]\trb_se->vruntime=[%llu]\trb_se->load.weight=[%lu]\trb_se->vlag=[%lld]\trb_se->slice=[%llu]\trb_se->deadline=[%llu]\trb_se->my_q=[%p]\treal_vlag=[%lld]\tvruntime_eligible=[%d]\n", 
+			__FILENAME__,__FUNCTION__, __LINE__, 
+			rb_se, entity_is_task(rb_se) ? task_of(rb_se)->pid : -1, entity_is_task(rb_se) ? task_of(rb_se)->tgid : -1, 
+			rb_se->vruntime, rb_se->load.weight, rb_se->vlag, rb_se->slice, rb_se->deadline, rb_se->my_q, entity_lag(qzc_avruntime, rb_se), vruntime_eligible(cfs_rq, rb_se->vruntime));
+	}
+}
+EXPORT_SYMBOL(sched_debug_cfs_rq_info);
diff --git a/null_reproduction_test/Makefile b/null_reproduction_test/Makefile
new file mode 100644
index 000000000000..48feb459e5ff
--- /dev/null
+++ b/null_reproduction_test/Makefile
@@ -0,0 +1,9 @@
+obj-m += test_sched.o
+KDIR := /lib/modules/$(shell uname -r)/build
+PWD := $(shell pwd)
+
+all:
+	$(MAKE) -C $(KDIR) M=$(PWD) modules
+
+clean:
+	$(MAKE) -C $(KDIR) M=$(PWD) clean
\ No newline at end of file
diff --git a/null_reproduction_test/fullcpu.c b/null_reproduction_test/fullcpu.c
new file mode 100644
index 000000000000..136c73671035
--- /dev/null
+++ b/null_reproduction_test/fullcpu.c
@@ -0,0 +1,12 @@
+#include <string.h>
+#include <unistd.h>
+
+int main()
+{
+    int a=9;
+    while(1) {
+            a*=9;
+    }
+
+    return 0;
+}
\ No newline at end of file
diff --git a/null_reproduction_test/make.sh b/null_reproduction_test/make.sh
new file mode 100755
index 000000000000..002385d17046
--- /dev/null
+++ b/null_reproduction_test/make.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+make clean
+
+cd ..
+
+make modules_prepare
+
+cd ./null_reproduction_test
+
+make -C ../ M=$(pwd)
+
+gcc fullcpu.c -o fullcpu
+
+echo "===================="
+echo 'please run test.sh'
+echo "===================="
\ No newline at end of file
diff --git a/null_reproduction_test/test.sh b/null_reproduction_test/test.sh
new file mode 100755
index 000000000000..a6cac6d2d7c2
--- /dev/null
+++ b/null_reproduction_test/test.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+test() {
+    cpu=$1
+    cgroup=test0
+
+    mkdir /sys/fs/cgroup/cpu/$cgroup/
+    mkdir /sys/fs/cgroup/memory/$cgroup/
+    echo 10000000 > /sys/fs/cgroup/memory/$cgroup/memory.limit_in_bytes
+
+    taskset -c $cpu ./fullcpu &
+    pid=$!
+
+    echo $pid > /sys/fs/cgroup/cpu/$cgroup/tasks
+    echo $pid > /sys/fs/cgroup/memory/$cgroup/tasks
+
+    let cpu1_count=0
+    for pid in $(ps -auxf | grep test_sched  | grep -v grep | grep -v test_sched_0 | grep -v test_sched_1 | awk '{print($2)}'); do
+            echo $pid > /sys/fs/cgroup/cpu/$cgroup/tasks
+    done
+}
+
+killall fullcpu
+rmmod test_sched
+insmod ./test_sched.ko bind_cpu=1 test_count=15
+
+pid0=$(ps -auxf | grep  'test_sched_0' | grep -v grep | awk '{print($2)}')
+echo $pid0 > /sys/module/fair/parameters/se_schedule_pid
+
+# echo 1 > /sys/module/fair/parameters/qzc_fixed_switch
+
+echo 1 > /sys/module/fair/parameters/qzc_vlag_switch
+
+test 1
diff --git a/null_reproduction_test/test_sched.c b/null_reproduction_test/test_sched.c
new file mode 100644
index 000000000000..7a33fa77c923
--- /dev/null
+++ b/null_reproduction_test/test_sched.c
@@ -0,0 +1,141 @@
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/cpumask.h>
+#include <linux/completion.h>
+#include <linux/slab.h>
+#include <linux/sched/task.h>
+
+static DECLARE_COMPLETION(comp);
+
+#define THREAD_NUM 100000
+static struct task_struct *schedule_threads[THREAD_NUM];
+static int bind_cpu = 0;  
+module_param(bind_cpu, int, 0644);
+MODULE_PARM_DESC(bind_cpu, "CPU core to bind the thread to");
+
+static int test_count = 1;  
+module_param(test_count, int, 0644);
+MODULE_PARM_DESC(test_count, "test thread count (default: 1)");
+
+static int sched_debug_cfs_rq_info_print_cnt = 0;
+
+static int thread_function(void *data);
+static void start_one_thread(int id, int cpu);
+
+static int __init schedule_driver_init(void)
+{
+    printk(KERN_INFO "Schedule driver: Initializing\n");
+    
+    start_one_thread(0, bind_cpu);
+    start_one_thread(1, bind_cpu);
+    for (int i=2; i<test_count; i++)
+        start_one_thread(i, -1);
+
+    return 0;
+}
+
+struct thread_data {
+    int id;
+};
+
+static void start_one_thread(int id, int cpu)
+{
+    char name[255];
+    sprintf(name, "test_sched_%u/%d", id, cpu);
+
+    struct thread_data  *tdata = kmalloc(sizeof(struct thread_data), GFP_KERNEL);
+    tdata->id = id;
+
+    // create kthread but not run immediately
+    schedule_threads[id] = kthread_create(thread_function, tdata, name);
+    if (IS_ERR(schedule_threads[id])) {
+        schedule_threads[id] = 0;
+        printk("Failed to create %s, %ld\n", name, PTR_ERR(schedule_threads[id]));
+        return;
+    }
+
+    if (cpu > 0)
+        kthread_bind(schedule_threads[id], cpu);
+    // run the kthread
+    wake_up_process(schedule_threads[id]);
+
+    printk(KERN_INFO "create %s success\n", name);
+    return;
+}
+
+u64 sched_debug_min_vruntime(struct cfs_rq *cfs);
+void sched_debug_cfs_rq_info(struct cfs_rq *cfs_rq);
+
+static int thread_function(void *data)
+{
+    printk(KERN_INFO "Schedule thread: Started on CPU %d\n", smp_processor_id());
+    struct task_struct *task = current;
+
+    set_current_state(TASK_RUNNING);
+
+    struct thread_data  *tdata = data;
+    // test_sched_1 wait
+    if (tdata->id == 1) {
+        set_user_nice(task, 8);
+        wait_for_completion_interruptible(&comp);
+    }
+
+    while (!kthread_should_stop()) {
+        // test_sched_0 check the condition
+        if (tdata->id == 0) {
+            struct sched_entity *se = &task->se;
+            struct cfs_rq *cfs = se->cfs_rq;
+            u64 vruntime = se->vruntime;
+            u64 min_vruntime = sched_debug_min_vruntime(cfs);
+
+            if (sched_debug_cfs_rq_info_print_cnt % 10000 == 0) {
+                sched_debug_cfs_rq_info(cfs);
+            }
+            sched_debug_cfs_rq_info_print_cnt += 1;
+
+            if (-102743846405689 > (s64)(vruntime - min_vruntime)) {
+                int old_nice = task_nice(task);
+                set_user_nice(task, -20);
+
+                complete(&comp); // wake up test_sched_1
+                printk("vruntime: %llu, min_vruntime: %llu, renice: %d->%d\n",
+                    vruntime, min_vruntime, old_nice, -20);
+            }
+        } else if (tdata->id == 1) {
+            int a = 1;
+            for (int i=0; i<1000000; i++) {
+                a += tdata->id;
+            }
+        }
+
+        if (tdata->id == 1) 
+            cond_resched();
+        else {
+            schedule_timeout_uninterruptible(1);
+        }
+    }
+
+    printk(KERN_INFO "Schedule thread: Exiting from CPU %d\n", smp_processor_id());
+    return 0;
+}
+
+static void __exit schedule_driver_exit(void)
+{
+    for (int i=0; i<test_count; i++) {
+        if (schedule_threads[i]) {
+            kthread_stop(schedule_threads[i]);
+            printk(KERN_INFO "Schedule driver: Thread stopped\n");
+        }
+    }
+}
+
+module_init(schedule_driver_init);
+module_exit(schedule_driver_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Your Name");
+MODULE_DESCRIPTION("A driver that creates a thread calling schedule() in a loop with CPU binding");
+MODULE_VERSION("1.0");
-- 
2.34.1