On 20/04/2021 02:18, Barry Song wrote:
[...]
@@ -5786,11 +5786,12 @@ static void record_wakee(struct task_struct *p)
- whatever is irrelevant, spread criteria is apparent partner count exceeds
- socket size.
*/ -static int wake_wide(struct task_struct *p) +static int wake_wide(struct task_struct *p, int cluster) { unsigned int master = current->wakee_flips; unsigned int slave = p->wakee_flips;
- int factor = __this_cpu_read(sd_llc_size);
- int factor = cluster ? __this_cpu_read(sd_cluster_size) :
__this_cpu_read(sd_llc_size);
I don't see that the wake_wide() change has any effect here. None of the sched domains has SD_BALANCE_WAKE set so a wakeup (WF_TTWU) can never end up in the slow path. Have you seen a diff when running your `lmbench stream` workload in what wake_wide() returns when you use `sd cluster size` instead of `sd llc size` as factor?
I guess for you, wakeups are now subdivided into faster (cluster = 4 CPUs) and fast (llc = 24 CPUs) via sis(), not into fast (sis()) and slow (find_idlest_cpu()).
if (master < slave) swap(master, slave);
[...]
@@ -6745,6 +6748,12 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) int want_affine = 0; /* SD_flags and WF_flags share the first nibble */ int sd_flag = wake_flags & 0xF;
- /*
* if cpu and prev_cpu share LLC, consider cluster sibling rather
* than llc. this is typically true while tasks are bound within
* one numa
*/
- int cluster = sched_cluster_active() && cpus_share_cache(cpu, prev_cpu, 0);
So you changed from scanning cluster before LLC to scan either cluster or LLC.
And this is based on whether `this_cpu` and `prev_cpu` are sharing LLC or not. So you only see an effect when running the workload with `numactl -N X ...`.
if (wake_flags & WF_TTWU) { record_wakee(p); @@ -6756,7 +6765,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) new_cpu = prev_cpu; }
want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, p->cpus_ptr);
want_affine = !wake_wide(p, cluster) && cpumask_test_cpu(cpu, p->cpus_ptr);
}
rcu_read_lock();
@@ -6768,7 +6777,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) if (want_affine && (tmp->flags & SD_WAKE_AFFINE) && cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) { if (cpu != prev_cpu)
new_cpu = wake_affine(tmp, p, cpu, prev_cpu, sync);
new_cpu = wake_affine(tmp, p, cpu, prev_cpu, sync, cluster); sd = NULL; /* Prefer wake_affine over balance flags */ break;
@@ -6785,7 +6794,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag); } else if (wake_flags & WF_TTWU) { /* XXX always ? */ /* Fast path */
new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
new_cpu = select_idle_sibling(p, prev_cpu, new_cpu, cluster);
if (want_affine) current->recent_used_cpu = cpu;
[...]