[PATCH OLK-6.6 15/15] [Huawei] sched/fair: Use separate qos_throttled and qos_throttle_count

13 Apr 2026

Offering: HULK
hulk inclusion
category: feature
bugzilla: NA

--------------------------------

Use separate qos_throttled and qos_throttle_count for QoS throttle
to avoid conflicts with CFS bandwidth throttle's task-based model.
This removes the need for separate PELT clock handling logic while
keeping the list_add/list_del operations.

Signed-off-by: Zhang Qiao <zhangqiao22@huawei.com>
---
 kernel/sched/fair.c  | 118 ++++++++++++++++++++++++++-----------------
 kernel/sched/sched.h |   5 ++
 2 files changed, 78 insertions(+), 45 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index a7d07ec86cb3..603c67be3b5b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -163,6 +163,20 @@ static int __unthrottle_qos_cfs_rqs(int cpu);
 static int unthrottle_qos_cfs_rqs(int cpu);
 static bool qos_smt_expelled(int this_cpu);
 static bool is_offline_task(struct task_struct *p);
+static inline int qos_cfs_rq_throttled(struct cfs_rq *cfs_rq);
+static inline int qos_throttled_hierarchy(struct cfs_rq *cfs_rq);
+
+#else // !CONFIG_QOS_SCHED
+
+static inline int qos_cfs_rq_throttled(struct cfs_rq *cfs_rq)
+{
+	return false;
+}
+
+static inline int qos_throttled_hierarchy(struct cfs_rq *cfs_rq)
+{
+	return false;
+}
 #endif
 
 #ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
@@ -5506,20 +5520,22 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 
 	if (cfs_rq->nr_running == 1) {
 		check_enqueue_throttle(cfs_rq);
-		list_add_leaf_cfs_rq(cfs_rq);
-#ifdef CONFIG_QOS_SCHED
-		if (cfs_rq->throttled != QOS_THROTTLED) {
-#endif
+
+		/*
+		 * Skip adding to leaf list when qos throttled. The cfs_rq is
+		 * not in the leaf list while throttled, and will be added
+		 * back when unthrottled via tg_qos_unthrottle_up().
+		 */
+		if (!qos_throttled_hierarchy(cfs_rq))
+			list_add_leaf_cfs_rq(cfs_rq);
+
 #ifdef CONFIG_CFS_BANDWIDTH
-			if (cfs_rq->pelt_clock_throttled) {
-				struct rq *rq = rq_of(cfs_rq);
+		if (cfs_rq->pelt_clock_throttled) {
+			struct rq *rq = rq_of(cfs_rq);
 
-				cfs_rq->throttled_clock_pelt_time += rq_clock_pelt(rq) -
-					cfs_rq->throttled_clock_pelt;
-				cfs_rq->pelt_clock_throttled = 0;
-			}
-#endif
-#ifdef CONFIG_QOS_SCHED
+			cfs_rq->throttled_clock_pelt_time += rq_clock_pelt(rq) -
+				cfs_rq->throttled_clock_pelt;
+			cfs_rq->pelt_clock_throttled = 0;
 		}
 #endif
 	}
@@ -6201,14 +6217,6 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
 	list_del_init(&cfs_rq->soft_quota_throttled_list);
 #endif
 
-#ifdef CONFIG_QOS_SCHED
-	/*
-	 * if this cfs_rq throttled by qos, not need unthrottle it.
-	 */
-	if (cfs_rq->throttled == QOS_THROTTLED)
-		return;
-#endif
-
 	cfs_rq->throttled = 0;
 
 	update_rq_clock(rq);
@@ -6373,7 +6381,7 @@ static bool distribute_cfs_runtime(struct cfs_bandwidth *cfs_b)
 		 * so no longer allocate time to cfs_rq in this scenario.
 		 */
 #ifdef CONFIG_QOS_SCHED
-		if (cfs_rq->throttled == QOS_THROTTLED &&
+		if (qos_cfs_rq_throttled(cfs_rq) &&
 			cfs_rq->runtime_remaining > 0)
 			goto next;
 #endif
@@ -7682,6 +7690,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 
 		if (cfs_rq_is_idle(cfs_rq))
 			idle_h_nr_running = 1;
+		/* end evaluation on encountering a throttled cfs_rq */
+		if (qos_cfs_rq_throttled(cfs_rq))
+			goto enqueue_throttle;
 
 		flags = ENQUEUE_WAKEUP;
 	}
@@ -7700,6 +7711,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 #endif
 		if (cfs_rq_is_idle(cfs_rq))
 			idle_h_nr_running = 1;
+		/* end evaluation on encountering a throttled cfs_rq */
+		if (qos_cfs_rq_throttled(cfs_rq))
+			goto enqueue_throttle;
 	}
 
 	/* At this point se is NULL and we are at root level*/
@@ -7724,6 +7738,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 	if (!task_new)
 		check_update_overutilized_status(rq);
 
+enqueue_throttle:
 	assert_list_leaf_cfs_rq(rq);
 
 	hrtick_update(rq);
@@ -7769,6 +7784,10 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 		if (cfs_rq_is_idle(cfs_rq))
 			idle_h_nr_running = 1;
 
+		/* end evaluation on encountering a throttled cfs_rq */
+		if (qos_cfs_rq_throttled(cfs_rq))
+			goto dequeue_throttle;
+
 		if (throttled_hierarchy(cfs_rq) && task_throttled)
 			record_throttle_clock(cfs_rq);
 
@@ -7802,6 +7821,10 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 		if (cfs_rq_is_idle(cfs_rq))
 			idle_h_nr_running = 1;
 
+		/* end evaluation on encountering a throttled cfs_rq */
+		if (qos_cfs_rq_throttled(cfs_rq))
+			goto dequeue_throttle;
+
 		if (throttled_hierarchy(cfs_rq) && task_throttled)
 			record_throttle_clock(cfs_rq);
 	}
@@ -7815,6 +7838,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 	if (unlikely(!was_sched_idle && sched_idle_rq(rq)))
 		rq->next_balance = jiffies;
 
+dequeue_throttle:
 	util_est_update(&rq->cfs, p, task_sleep);
 	hrtick_update(rq);
 }
@@ -9727,10 +9751,10 @@ static int tg_qos_throttle_down(struct task_group *tg, void *data)
 	struct rq *rq = data;
 	struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
 
-	if (!cfs_rq->throttle_count)
+	if (!cfs_rq->qos_throttle_count)
 		list_del_leaf_cfs_rq(cfs_rq);
 
-	cfs_rq->throttle_count++;
+	cfs_rq->qos_throttle_count++;
 
 	return 0;
 }
@@ -9740,14 +9764,23 @@ static int tg_qos_unthrottle_up(struct task_group *tg, void *data)
 	struct rq *rq = data;
 	struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
 
-	cfs_rq->throttle_count--;
-
-	if (!cfs_rq->throttle_count && !cfs_rq_is_decayed(cfs_rq))
+	cfs_rq->qos_throttle_count--;
+	if (!cfs_rq->qos_throttle_count && !cfs_rq_is_decayed(cfs_rq))
 		list_add_leaf_cfs_rq(cfs_rq);
 
 	return 0;
 }
 
+static inline int qos_cfs_rq_throttled(struct cfs_rq *cfs_rq)
+{
+	return cfs_rq->qos_throttled;
+}
+
+static inline int qos_throttled_hierarchy(struct cfs_rq *cfs_rq)
+{
+	return cfs_rq->qos_throttle_count;
+}
+
 static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq)
 {
 	struct rq *rq = rq_of(cfs_rq);
@@ -9819,7 +9852,7 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq)
 	if (!qos_timer_is_activated(cpu_of(rq)))
 		start_qos_hrtimer(cpu_of(rq));
 
-	cfs_rq->throttled = QOS_THROTTLED;
+	cfs_rq->qos_throttled = 1;
 
 	list_add(&cfs_rq->qos_throttled_list,
 		 &per_cpu(qos_throttled_cfs_rq, cpu_of(rq)));
@@ -9837,10 +9870,7 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq)
 
 	se = cfs_rq->tg->se[cpu_of(rq)];
 
-	if (cfs_rq->throttled != QOS_THROTTLED)
-		return;
-
-	cfs_rq->throttled = 0;
+	cfs_rq->qos_throttled = 0;
 
 	update_rq_clock(rq);
 	list_del_init(&cfs_rq->qos_throttled_list);
@@ -9882,7 +9912,7 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq)
 		cfs_rq->qos_idle_h_nr_running += qos_idle_delta;
 #endif
 
-		if (cfs_rq_throttled(cfs_rq))
+		if (qos_cfs_rq_throttled(cfs_rq))
 			goto unthrottle_throttle;
 	}
 
@@ -9899,7 +9929,7 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq)
 #endif
 
 		/* end evaluation on encountering a throttled cfs_rq */
-		if (cfs_rq_throttled(cfs_rq))
+		if (qos_cfs_rq_throttled(cfs_rq))
 			goto unthrottle_throttle;
 	}
 
@@ -9915,30 +9945,26 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq)
 static int __unthrottle_qos_cfs_rqs(int cpu)
 {
 	struct cfs_rq *cfs_rq, *tmp_rq;
-	int cfs_bandwidth_throttle = 0;
 
 	list_for_each_entry_safe(cfs_rq, tmp_rq, &per_cpu(qos_throttled_cfs_rq, cpu),
 				 qos_throttled_list) {
-		if (cfs_rq_throttled(cfs_rq)) {
+		if (qos_cfs_rq_throttled(cfs_rq)) {
 			unthrottle_qos_cfs_rq(cfs_rq);
 		}
-
-		if (throttled_hierarchy(cfs_rq))
-			cfs_bandwidth_throttle = 1;
 	}
 
-	return cfs_bandwidth_throttle;
+	return 0;
 }
 
 static int unthrottle_qos_cfs_rqs(int cpu)
 {
-	int throttled = __unthrottle_qos_cfs_rqs(cpu);
+	__unthrottle_qos_cfs_rqs(cpu);
 
 	/*
 	 * We should not cancel the timer if there is still a cfs_rq
 	 * throttling after __unthrottle_qos_cfs_rqs().
 	 */
-	if (qos_timer_is_activated(cpu) && !(qos_smt_expelled(cpu) || throttled))
+	if (qos_timer_is_activated(cpu) && !qos_smt_expelled(cpu))
 		cancel_qos_timer(cpu);
 
 	return cpu_rq(cpu)->cfs.h_nr_running;
@@ -9949,6 +9975,9 @@ static bool check_qos_cfs_rq(struct cfs_rq *cfs_rq)
 	if (unlikely(__this_cpu_read(qos_cpu_overload)))
 		return false;
 
+	if (cfs_rq && throttled_hierarchy(cfs_rq))
+		return false;
+
 	if (unlikely(cfs_rq && is_offline_level(cfs_rq->tg->qos_level) &&
 		     !sched_idle_cpu(smp_processor_id()) &&
 		     cfs_rq->h_nr_running == cfs_rq->idle_h_nr_running)) {
@@ -9969,7 +9998,7 @@ static inline void unthrottle_qos_sched_group(struct cfs_rq *cfs_rq)
 	struct rq_flags rf;
 
 	rq_lock_irqsave(rq, &rf);
-	if (is_offline_level(cfs_rq->tg->qos_level) && cfs_rq_throttled(cfs_rq))
+	if (is_offline_level(cfs_rq->tg->qos_level) && qos_cfs_rq_throttled(cfs_rq))
 		unthrottle_qos_cfs_rq(cfs_rq);
 	rq_unlock_irqrestore(rq, &rf);
 }
@@ -10085,7 +10114,6 @@ static bool qos_smt_expelled(int this_cpu)
 	return false;
 }
 #endif
-
 #endif
 
 #ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
@@ -15162,7 +15190,7 @@ static void propagate_entity_cfs_rq(struct sched_entity *se)
 	 * change, make sure this cfs_rq stays on leaf cfs_rq list to have
 	 * that removed load decayed or it can cause faireness problem.
 	 */
-	if (!cfs_rq_pelt_clock_throttled(cfs_rq))
+	if (!cfs_rq_pelt_clock_throttled(cfs_rq) && !qos_throttled_hierarchy(cfs_rq))
 		list_add_leaf_cfs_rq(cfs_rq);
 
 	/* Start to propagate at parent */
@@ -15173,7 +15201,7 @@ static void propagate_entity_cfs_rq(struct sched_entity *se)
 
 		update_load_avg(cfs_rq, se, UPDATE_TG);
 
-		if (!cfs_rq_pelt_clock_throttled(cfs_rq))
+		if (!cfs_rq_pelt_clock_throttled(cfs_rq) && !qos_throttled_hierarchy(cfs_rq))
 			list_add_leaf_cfs_rq(cfs_rq);
 	}
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 850baeab1085..1e1fcfd37b02 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -794,7 +794,12 @@ struct cfs_rq {
 #endif
 	KABI_REPLACE(_KABI_RESERVE(5); _KABI_RESERVE(6),
 		struct list_head throttled_limbo_list)
+#ifdef CONFIG_QOS_SCHED
+	KABI_USE2(7, int qos_throttled, int qos_throttle_count)
+#else
 	KABI_RESERVE(7)
+#endif
+
 	KABI_RESERVE(8)
 };
 
-- 
2.18.0