
After merging SNC support, we found that ARM MPAM and x86 RDT concepts diverge significantly. Attempts to maintain a shared resctrl layer resulted in excessive compatibility shims and fragile code. Therefore we now split compilation dependencies between ARM MPAM and x86 RDT, and maintain separate code paths for each architecture. The preparatory patch has restores the x86 RDT implementation to its original state (commit 543fe1576126), while allowing ARM MPAM to evolve independently. Signed-off-by: Zeng Heng <zengheng4@huawei.com> --- arch/x86/Kconfig | 2 - arch/x86/kernel/cpu/resctrl/rdtgroup.c | 4 +- fs/resctrl/Kconfig | 1 + include/linux/resctrl.h | 7 + include/linux/resctrl_mpam.h | 452 +++++++++++++++++++++++++ 5 files changed, 462 insertions(+), 4 deletions(-) create mode 100644 include/linux/resctrl_mpam.h diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4ba29ccfaa9d..c2bdb7b95176 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -482,8 +482,6 @@ config X86_CPU_RESCTRL depends on MISC_FILESYSTEMS select KERNFS select ARCH_HAS_CPU_RESCTRL - select RESCTRL_FS - select RESCTRL_FS_PSEUDO_LOCK help Enable x86 CPU resource control support. diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index d7163b764c62..be67cb47a888 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -3036,7 +3036,7 @@ static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, char name[32]; snc_mode = r->mon_scope == RESCTRL_L3_NODE; - sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id); + sprintf(name, "mon_%s_%02ld", r->name, snc_mode ? d->ci->id : d->hdr.id); if (snc_mode) sprintf(subname, "mon_sub_%s_%02d", r->name, d->hdr.id); @@ -3088,7 +3088,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, lockdep_assert_held(&rdtgroup_mutex); snc_mode = r->mon_scope == RESCTRL_L3_NODE; - sprintf(name, "mon_%s_%02d", r->name, snc_mode ? d->ci->id : d->hdr.id); + sprintf(name, "mon_%s_%02ld", r->name, snc_mode ? d->ci->id : d->hdr.id); kn = kernfs_find_and_get(parent_kn, name); if (kn) { /* diff --git a/fs/resctrl/Kconfig b/fs/resctrl/Kconfig index 5bbf5a1798cd..540da76e1189 100644 --- a/fs/resctrl/Kconfig +++ b/fs/resctrl/Kconfig @@ -1,6 +1,7 @@ config RESCTRL_FS bool "CPU Resource Control Filesystem (resctrl)" depends on ARCH_HAS_CPU_RESCTRL + depends on ARM_CPU_RESCTRL select KERNFS select PROC_CPU_RESCTRL if PROC_FS help diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index d94abba1c716..d86f0b1d3f60 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -2,6 +2,12 @@ #ifndef _RESCTRL_H #define _RESCTRL_H +#ifdef CONFIG_ARM_CPU_RESCTRL + +#include <linux/resctrl_mpam.h> + +#else + #include <linux/cacheinfo.h> #include <linux/kernel.h> #include <linux/list.h> @@ -343,4 +349,5 @@ void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain * extern unsigned int resctrl_rmid_realloc_threshold; extern unsigned int resctrl_rmid_realloc_limit; +#endif /* ARM_CPU_RESCTRL */ #endif /* _RESCTRL_H */ diff --git a/include/linux/resctrl_mpam.h b/include/linux/resctrl_mpam.h new file mode 100644 index 000000000000..449620cf04b3 --- /dev/null +++ b/include/linux/resctrl_mpam.h @@ -0,0 +1,452 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _RESCTRL_MPAM_H +#define _RESCTRL_MPAM_H + +#include <linux/cpu.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/pid.h> +#include <linux/resctrl_types.h> + +#ifdef CONFIG_ARCH_HAS_CPU_RESCTRL +#include <asm/resctrl.h> +#endif + +/* CLOSID, RMID value used by the default control group */ +#define RESCTRL_RESERVED_CLOSID 0 +#define RESCTRL_RESERVED_RMID 0 + +#define RESCTRL_PICK_ANY_CPU -1 + +#ifdef CONFIG_PROC_CPU_RESCTRL + +int proc_resctrl_show(struct seq_file *m, + struct pid_namespace *ns, + struct pid *pid, + struct task_struct *tsk); + +#endif + +/* max value for struct rdt_domain's mbps_val */ +#define MBA_MAX_MBPS U32_MAX + +/* + * Resctrl uses u32 to hold the user-space config. The maximum bitmap size is + * 32. + */ +#define RESCTRL_MAX_CBM 32 + +extern unsigned int resctrl_rmid_realloc_limit; +extern unsigned int resctrl_rmid_realloc_threshold; + +/** + * struct pseudo_lock_region - pseudo-lock region information + * @s: Resctrl schema for the resource to which this + * pseudo-locked region belongs + * @closid: The closid that this pseudo-locked region uses + * @d: RDT domain to which this pseudo-locked region + * belongs + * @cbm: bitmask of the pseudo-locked region + * @lock_thread_wq: waitqueue used to wait on the pseudo-locking thread + * completion + * @thread_done: variable used by waitqueue to test if pseudo-locking + * thread completed + * @cpu: core associated with the cache on which the setup code + * will be run + * @line_size: size of the cache lines + * @size: size of pseudo-locked region in bytes + * @kmem: the kernel memory associated with pseudo-locked region + * @minor: minor number of character device associated with this + * region + * @debugfs_dir: pointer to this region's directory in the debugfs + * filesystem + * @pm_reqs: Power management QoS requests related to this region + */ +struct pseudo_lock_region { + struct resctrl_schema *s; + u32 closid; + struct rdt_domain *d; + u32 cbm; + wait_queue_head_t lock_thread_wq; + int thread_done; + int cpu; + unsigned int line_size; + unsigned int size; + void *kmem; + unsigned int minor; + struct dentry *debugfs_dir; + struct list_head pm_reqs; +}; + +/** + * struct resctrl_staged_config - parsed configuration to be applied + * @new_ctrl: new ctrl value to be loaded + * @have_new_ctrl: whether the user provided new_ctrl is valid + */ +struct resctrl_staged_config { + u32 new_ctrl; + bool have_new_ctrl; +}; + +/** + * struct rdt_domain - group of CPUs sharing a resctrl resource + * @list: all instances of this resource + * @id: unique id for this instance + * @cpu_mask: which CPUs share this resource + * @rmid_busy_llc: bitmap of which limbo RMIDs are above threshold + * @mbm_total: saved state for MBM total bandwidth + * @mbm_local: saved state for MBM local bandwidth + * @mbm_over: worker to periodically read MBM h/w counters + * @cqm_limbo: worker to periodically read CQM h/w counters + * @mbm_work_cpu: worker CPU for MBM h/w counters + * @cqm_work_cpu: worker CPU for CQM h/w counters + * @plr: pseudo-locked region (if any) associated with domain + * @staged_config: parsed configuration to be applied + * @mbps_val: When mba_sc is enabled, this holds the array of user + * specified control values for mba_sc in MBps, indexed + * by closid + */ +struct rdt_domain { + struct list_head list; + int id; + struct cpumask cpu_mask; + unsigned long *rmid_busy_llc; + struct mbm_state *mbm_total; + struct mbm_state *mbm_local; + struct mbm_state *mbm_core; + struct delayed_work mbm_over; + struct delayed_work cqm_limbo; + int mbm_work_cpu; + int cqm_work_cpu; + struct pseudo_lock_region *plr; + struct resctrl_staged_config staged_config[CDP_NUM_TYPES]; + u32 *mbps_val; +}; + +/** + * struct resctrl_cache - Cache allocation related data + * @cbm_len: Length of the cache bit mask + * @min_cbm_bits: Minimum number of consecutive bits to be set. + * The value 0 means the architecture can support + * zero CBM. + * @shareable_bits: Bitmask of shareable resource with other + * executing entities + * @arch_has_sparse_bitmasks: True if a bitmask like f00f is valid. + * @arch_has_per_cpu_cfg: True if QOS_CFG register for this cache + * level has CPU scope. + * @intpri_wd: Number of implemented bits in the priority + * partition. + */ +struct resctrl_cache { + unsigned int cbm_len; + unsigned int min_cbm_bits; + unsigned int shareable_bits; + bool arch_has_sparse_bitmasks; + bool arch_has_per_cpu_cfg; + unsigned int intpri_wd; +}; + +/** + * enum membw_throttle_mode - System's memory bandwidth throttling mode + * @THREAD_THROTTLE_UNDEFINED: Not relevant to the system + * @THREAD_THROTTLE_MAX: Memory bandwidth is throttled at the core + * always using smallest bandwidth percentage + * assigned to threads, aka "max throttling" + * @THREAD_THROTTLE_PER_THREAD: Memory bandwidth is throttled at the thread + */ +enum membw_throttle_mode { + THREAD_THROTTLE_UNDEFINED = 0, + THREAD_THROTTLE_MAX, + THREAD_THROTTLE_PER_THREAD, +}; + +/** + * struct resctrl_membw - Memory bandwidth allocation related data + * @min_bw: Minimum memory bandwidth percentage user can request + * @max_bw: Maximum memory bandwidth value, used as the reset value + * @bw_gran: Granularity at which the memory bandwidth is allocated + * @delay_linear: True if memory B/W delay is in linear scale + * @arch_needs_linear: True if we can't configure non-linear resources + * @throttle_mode: Bandwidth throttling mode when threads request + * different memory bandwidths + * @mba_sc: True if MBA software controller(mba_sc) is enabled + * @mb_map: Mapping of memory B/W percentage to memory B/W delay + * @intpri_wd: Number of implemented bits in the priority + * partition. + */ +struct resctrl_membw { + u32 min_bw; + u32 max_bw; + u32 bw_gran; + u32 delay_linear; + bool arch_needs_linear; + enum membw_throttle_mode throttle_mode; + bool mba_sc; + u32 *mb_map; + u32 intpri_wd; +}; + +/** + * enum resctrl_schema_fmt - The format user-space provides for a schema. + * @RESCTRL_SCHEMA_BITMAP: The schema is a bitmap in hex. + * @RESCTRL_SCHEMA_RANGE: The schema is a decimal number. + */ +enum resctrl_schema_fmt { + RESCTRL_SCHEMA_BITMAP, + RESCTRL_SCHEMA_RANGE, +}; + +/** + * struct rdt_resource - attributes of a resctrl resource + * @rid: The index of the resource + * @alloc_capable: Is allocation available on this machine + * @mon_capable: Is monitor feature available on this machine + * @num_rmid: Number of RMIDs available + * @cache_level: Which cache level defines scope of this resource + * @cache: Cache allocation related data + * @membw: If the component has bandwidth controls, their properties. + * @domains: RCU list of all domains for this resource + * @name: Name to use in "schemata" file. + * @data_width: Character width of data when displaying + * @default_ctrl: Specifies default cache cbm or memory B/W percent. + * @format_str: Per resource format string to show domain value + * @evt_list: List of monitoring events + * @mbm_cfg_mask: Bandwidth sources that can be tracked when bandwidth + * monitoring events can be configured. + * @fflags: flags to choose base and info files + * @cdp_capable: Is the CDP feature available on this resource + */ +struct rdt_resource { + int rid; + bool alloc_capable; + bool mon_capable; + bool invisible; + bool is_volatile; + int num_rmid; + int cache_level; + struct resctrl_cache cache; + struct resctrl_membw membw; + struct list_head domains; + char *name; + int data_width; + u32 default_ctrl; + const char *format_str; + struct list_head evt_list; + unsigned int mbm_cfg_mask; + unsigned long fflags; + bool cdp_capable; + enum resctrl_schema_fmt schema_fmt; +}; + +/* + * Get the resource that exists at this level. If the level is not supported + * a dummy/not-capable resource can be returned. Levels >= RDT_NUM_RESOURCES + * will return NULL. + */ +struct rdt_resource *resctrl_arch_get_resource(enum resctrl_res_level l); + +/** + * struct resctrl_schema - configuration abilities of a resource presented to + * user-space + * @list: Member of resctrl_schema_all. + * @name: The name to use in the "schemata" file. + * @conf_type: Whether this schema is specific to code/data. + * @res: The resource structure exported by the architecture to describe + * the hardware that is configured by this schema. + * @num_closid: The number of closid that can be used with this schema. When + * features like CDP are enabled, this will be lower than the + * hardware supports for the resource. + */ +struct resctrl_schema { + struct list_head list; + char name[16]; + enum resctrl_conf_type conf_type; + struct rdt_resource *res; + u32 num_closid; +}; + +struct resctrl_cpu_sync { + u32 closid; + u32 rmid; +}; + +struct resctrl_mon_config_info { + struct rdt_resource *r; + struct rdt_domain *d; + u32 evtid; + u32 mon_config; + + int err; +}; + +/** + * struct mon_evt - Entry in the event list of a resource + * @evtid: event id + * @name: name of the event + * @configurable: true if the event is configurable + * @list: entry in &rdt_resource->evt_list + */ +struct mon_evt { + enum resctrl_event_id evtid; + char *name; + bool configurable; + struct list_head list; +}; + +/* + * Update and re-load this CPUs defaults. Called via IPI, takes a pointer to + * struct resctrl_cpu_sync, or NULL. + */ +void resctrl_arch_sync_cpu_defaults(void *info); + +/* The number of closid supported by this resource regardless of CDP */ +u32 resctrl_arch_get_num_closid(struct rdt_resource *r); + +struct rdt_domain *resctrl_arch_find_domain(struct rdt_resource *r, int id); +int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid); + +bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt); +void resctrl_arch_mon_event_config_write(void *info); +void resctrl_arch_mon_event_config_read(void *info); + +/* For use by arch code that needs to remap resctrl's smaller CDP closid */ +static inline u32 resctrl_get_config_index(u32 closid, + enum resctrl_conf_type type) +{ + switch (type) { + default: + case CDP_NONE: + return closid; + case CDP_CODE: + return (closid * 2) + 1; + case CDP_DATA: + return (closid * 2); + } +} + +/* + * Caller must be in a RCU read-side critical section, or hold the + * cpuhp read lock to prevent the struct rdt_domain being freed. + */ +static inline struct rdt_domain * +resctrl_get_domain_from_cpu(int cpu, struct rdt_resource *r) +{ + struct rdt_domain *d; + + /* + * Walking r->domains, ensure it can't race with cpuhp. + * Because this is called via IPI by rdt_ctrl_update(), assertions + * about locks this thread holds will lead to false positives. Check + * someone is holding the CPUs lock. + */ + if (IS_ENABLED(CONFIG_HOTPLUG_CPU) && IS_ENABLED(CONFIG_LOCKDEP)) + lockdep_is_cpus_held(); + + list_for_each_entry_rcu(d, &r->domains, list) { + /* Find the domain that contains this CPU */ + if (cpumask_test_cpu(cpu, &d->cpu_mask)) + return d; + } + + return NULL; +} + +/* + * Update the ctrl_val and apply this config right now. + * Must be called on one of the domain's CPUs. + */ +int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_domain *d, + u32 closid, enum resctrl_conf_type t, u32 cfg_val); + +u32 resctrl_arch_get_config(struct rdt_resource *r, struct rdt_domain *d, + u32 closid, enum resctrl_conf_type type); +int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d); +void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d); +void resctrl_online_cpu(unsigned int cpu); +void resctrl_offline_cpu(unsigned int cpu); + +/** + * resctrl_arch_rmid_read() - Read the eventid counter corresponding to rmid + * for this resource and domain. + * @r: resource that the counter should be read from. + * @d: domain that the counter should be read from. + * @closid: closid that matches the rmid. Depending on the architecture, the + * counter may match traffic of both @closid and @rmid, or @rmid + * only. + * @rmid: rmid of the counter to read. + * @eventid: eventid to read, e.g. L3 occupancy. + * @val: result of the counter read in bytes. + * @arch_mon_ctx: An architecture specific value from + * resctrl_arch_mon_ctx_alloc(), for MPAM this identifies + * the hardware monitor allocated for this read request. + * + * Some architectures need to sleep when first programming some of the counters. + * (specifically: arm64's MPAM cache occupancy counters can return 'not ready' + * for a short period of time). Call from a non-migrateable process context on + * a CPU that belongs to domain @d. e.g. use smp_call_on_cpu() or + * schedule_work_on(). This function can be called with interrupts masked, + * e.g. using smp_call_function_any(), but may consistently return an error. + * + * Return: + * 0 on success, or -EIO, -EINVAL etc on error. + */ +int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, + u32 closid, u32 rmid, enum resctrl_event_id eventid, + u64 *val, void *arch_mon_ctx); + +/** + * resctrl_arch_rmid_read_context_check() - warn about invalid contexts + * + * When built with CONFIG_DEBUG_ATOMIC_SLEEP generate a warning when + * resctrl_arch_rmid_read() is called with preemption disabled. + * + * The contract with resctrl_arch_rmid_read() is that if interrupts + * are unmasked, it can sleep. This allows NOHZ_FULL systems to use an + * IPI, (and fail if the call needed to sleep), while most of the time + * the work is scheduled, allowing the call to sleep. + */ +static inline void resctrl_arch_rmid_read_context_check(void) +{ + if (!irqs_disabled()) + might_sleep(); +} + +/** + * resctrl_arch_reset_rmid() - Reset any private state associated with rmid + * and eventid. + * @r: The domain's resource. + * @d: The rmid's domain. + * @closid: closid that matches the rmid. Depending on the architecture, the + * counter may match traffic of both @closid and @rmid, or @rmid only. + * @rmid: The rmid whose counter values should be reset. + * @eventid: The eventid whose counter values should be reset. + * + * This can be called from any CPU. + */ +void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, + u32 closid, u32 rmid, + enum resctrl_event_id eventid); + +/** + * resctrl_arch_reset_rmid_all() - Reset all private state associated with + * all rmids and eventids. + * @r: The resctrl resource. + * @d: The domain for which all architectural counter state will + * be cleared. + * + * This can be called from any CPU. + */ +void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_domain *d); + +extern unsigned int resctrl_rmid_realloc_threshold; +extern unsigned int resctrl_rmid_realloc_limit; + +extern bool resctrl_mounted; + +int resctrl_init(void); +void resctrl_exit(void); + +int resctrl_arch_mon_resource_init(void); +void mbm_config_rftype_init(const char *config); + +#endif /* _RESCTRL_MPAM_H */ -- 2.25.1