hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I80YXE CVE: NA
----------------------------------------
ucc support for XPU.
Signed-off-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Yang Yanchao yangyanchao6@huawei.com Signed-off-by: Hui Tang tanghui20@huawei.com Signed-off-by: Guan Jing guanjing6@huawei.com Signed-off-by: Jinjie Ruan ruanjinjie@huawei.com --- Kconfig | 2 + drivers/Kconfig | 2 + drivers/Makefile | 1 + drivers/xpu/Kconfig | 9 + drivers/xpu/Makefile | 1 + drivers/xpu/xpu_group.c | 175 ++++++++ fs/proc/base.c | 102 ++++- include/linux/sched.h | 3 + include/linux/ucc_common.h | 21 + include/linux/ucc_kfd.h | 110 +++++ include/linux/ucc_sched.h | 36 ++ include/linux/ucc_sched/ucc_sched.h | 71 +++ include/linux/ucc_ts.h | 254 +++++++++++ include/linux/vstream.h | 123 ++++++ include/linux/xpu_group.h | 66 +++ include/trace/events/ucc_sched.h | 120 +++++ init/init_task.c | 4 + init/main.c | 9 + kernel/Makefile | 2 + kernel/sched/Makefile | 1 + kernel/sched/core.c | 5 + kernel/sched/ucc_sched.c | 148 +++++++ kernel/sysctl.c | 17 +- kernel/ucc/Kconfig | 21 + kernel/ucc/Makefile | 1 + kernel/ucc/ascend_vstream.c | 654 ++++++++++++++++++++++++++++ kernel/ucc/ascend_vstream.h | 13 + kernel/ucc/vstream.c | 62 +++ kernel/ucc_sched/Makefile | 1 + kernel/ucc_sched/core.c | 591 +++++++++++++++++++++++++ kernel/ucc_sched/ucc_sched.h | 43 ++ 31 files changed, 2666 insertions(+), 2 deletions(-) create mode 100644 drivers/xpu/Kconfig create mode 100644 drivers/xpu/Makefile create mode 100644 drivers/xpu/xpu_group.c create mode 100644 include/linux/ucc_common.h create mode 100644 include/linux/ucc_kfd.h create mode 100644 include/linux/ucc_sched.h create mode 100644 include/linux/ucc_sched/ucc_sched.h create mode 100644 include/linux/ucc_ts.h create mode 100644 include/linux/vstream.h create mode 100644 include/linux/xpu_group.h create mode 100644 include/trace/events/ucc_sched.h create mode 100644 kernel/sched/ucc_sched.c create mode 100644 kernel/ucc/Kconfig create mode 100644 kernel/ucc/Makefile create mode 100644 kernel/ucc/ascend_vstream.c create mode 100644 kernel/ucc/ascend_vstream.h create mode 100644 kernel/ucc/vstream.c create mode 100644 kernel/ucc_sched/Makefile create mode 100644 kernel/ucc_sched/core.c create mode 100644 kernel/ucc_sched/ucc_sched.h
diff --git a/Kconfig b/Kconfig index 48a80beab685..8e558777fb54 100644 --- a/Kconfig +++ b/Kconfig @@ -30,3 +30,5 @@ source "crypto/Kconfig" source "lib/Kconfig"
source "lib/Kconfig.debug" + +source "kernel/ucc/Kconfig" diff --git a/drivers/Kconfig b/drivers/Kconfig index ab4d43923c4d..bd59e9e525ba 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -219,4 +219,6 @@ source "drivers/siox/Kconfig"
source "drivers/slimbus/Kconfig"
+source "drivers/xpu/Kconfig" + endmenu diff --git a/drivers/Makefile b/drivers/Makefile index 578f469f72fb..1130b2d92df1 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -186,3 +186,4 @@ obj-$(CONFIG_MULTIPLEXER) += mux/ obj-$(CONFIG_UNISYS_VISORBUS) += visorbus/ obj-$(CONFIG_SIOX) += siox/ obj-$(CONFIG_GNSS) += gnss/ +obj-$(CONFIG_XPU_SCHEDULE) += xpu/ diff --git a/drivers/xpu/Kconfig b/drivers/xpu/Kconfig new file mode 100644 index 000000000000..c4a391d0039d --- /dev/null +++ b/drivers/xpu/Kconfig @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 + +menuconfig XPU_SCHEDULE + bool "xpu schedule" + default n + help + Support xpu schedule, Say Y here if you want support for use + xpu schedule. + diff --git a/drivers/xpu/Makefile b/drivers/xpu/Makefile new file mode 100644 index 000000000000..9edc6dcdd4d0 --- /dev/null +++ b/drivers/xpu/Makefile @@ -0,0 +1 @@ +obj-y += xpu_group.o diff --git a/drivers/xpu/xpu_group.c b/drivers/xpu/xpu_group.c new file mode 100644 index 000000000000..53a598db0615 --- /dev/null +++ b/drivers/xpu/xpu_group.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/xpu_group.h> +#include <linux/rwsem.h> +#include <linux/slab.h> + +extern int ucc_rt_nr_running(struct xcu *cu); +static DECLARE_RWSEM(xpu_group_rwsem); + +static struct xpu_capability xpu_capability_root; + +struct xpu_group __xpu_root = { + .type = XPU_TYPE_ROOT, + .capability = &xpu_capability_root, + + .next_layer = IDR_INIT(next_layer), +}; + +struct xpu_group *xpu_root = &__xpu_root; +EXPORT_SYMBOL(xpu_root); + +int __xpu_group_attach(struct xpu_group *new_group, + struct xpu_group *previous_group) +{ + int id = new_group->id; + + if (id == -1) + id = idr_alloc(&previous_group->next_layer, new_group, + 0, INT_MAX, GFP_KERNEL); + else + id = idr_alloc(&previous_group->next_layer, new_group, + id, id + 1, GFP_KERNEL); + if (id < 0) + return -EEXIST; + + new_group->id = id; + new_group->previous_layer = previous_group; + + return 0; +} + +int xpu_group_attach(struct xpu_group *new_group, + struct xpu_group *previous_group) +{ + int ret; + + down_write(&xpu_group_rwsem); + ret = __xpu_group_attach(new_group, previous_group); + up_write(&xpu_group_rwsem); + return ret; +} +EXPORT_SYMBOL(xpu_group_attach); + +struct xpu_group *xpu_group_alloc_and_attach(struct xpu_group *previous_group, + int id) +{ + struct xpu_group *new = xpu_group_alloc(); + + if (!new) { + pr_err("alloc xpu_group failed\n"); + return NULL; + } + + new->id = id; + + if (!xpu_group_attach(new, previous_group)) + return NULL; + + return new; +} +EXPORT_SYMBOL(xpu_group_alloc_and_attach); + +int __xpu_group_detach(struct xpu_group *group) +{ + idr_remove(&group->previous_layer->next_layer, group->id); + return 0; +} + +int xpu_group_detach(struct xpu_group *group) +{ + int ret; + + down_write(&xpu_group_rwsem); + ret = __xpu_group_detach(group); + up_write(&xpu_group_rwsem); + return ret; +} +EXPORT_SYMBOL(xpu_group_detach); + +struct xpu_group *__xpu_group_find(struct xpu_group *group, int id) +{ + return idr_find(&group->next_layer, id); +} + +struct xpu_group *xpu_group_find(struct xpu_group *group, int id) +{ + struct xpu_group *p; + + p = xpu_group_alloc(); + + down_read(&xpu_group_rwsem); + p = __xpu_group_find(group, id); + up_read(&xpu_group_rwsem); + + return p; +} +EXPORT_SYMBOL(xpu_group_find); + + +struct xpu_group *xpu_idle_group_find(struct xpu_group *group) +{ + struct xpu_group *entry_group; + int id; + + down_read(&xpu_group_rwsem); + idr_for_each_entry(&group->next_layer, entry_group, id) { + if (!entry_group->used) { + up_read(&xpu_group_rwsem); + return entry_group; + } + } + up_read(&xpu_group_rwsem); + + return NULL; +} + +int xpu_run(struct xpu_group *group, void *para1, void *para2) +{ + int ret = 0; + + if (group->opt && group->opt->run) + ret = group->opt->run(group, para1, para2); + + return ret; +} + +int xpu_finish(struct xpu_group *group, void *para1, void *para2) +{ + if (group->opt && group->opt->finish) + return group->opt->finish(group, para1, para2); + + return 0; +} + +int xpu_wait(struct xpu_group *group, void *para1, void *para2, void *para3) +{ + if (group->opt && group->opt->wait) + return group->opt->wait(group, para1, para2, para3); + + return 0; +} + +int xpu_complete(struct xpu_group *group, void *para1, void *para2, void *para3) +{ + if (group->opt && group->opt->complete) + return group->opt->complete(group, para1, para2, para3); + + return 0; +} + +struct xpu_group *xpu_group_alloc(void) +{ + struct xpu_group *node = kzalloc(sizeof(*node), GFP_KERNEL); + + if (!node) + return NULL; + + node->type = XPU_TYPE_CUSTOM; + idr_init(&node->next_layer); + + return node; +} +EXPORT_SYMBOL(xpu_group_alloc); diff --git a/fs/proc/base.c b/fs/proc/base.c index dc9841826264..516eee1ae952 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -770,7 +770,6 @@ static const struct file_operations proc_single_file_operations = { .release = single_release, };
- struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode) { struct task_struct *task = get_proc_task(inode); @@ -1546,6 +1545,99 @@ static const struct file_operations proc_pid_sched_operations = {
#endif
+#ifdef CONFIG_XPU_SCHEDULE +static ssize_t ucc_step_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task; + char numbuf[PROC_NUMBUF]; + ssize_t len; + + task = get_proc_task(file_inode(file)); + if (!task) + return -ESRCH; + + len = snprintf(numbuf, sizeof(numbuf), "%u\n", task->ucc_step); + + put_task_struct(task); + + return simple_read_from_buffer(buf, count, ppos, numbuf, len); +} + +static ssize_t ucc_step_write(struct file *file, const char __user *buf, + size_t count, loff_t *offset) +{ + struct inode *inode = file_inode(file); + struct task_struct *p; + int err; + unsigned int ucc_step; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + err = kstrtouint_from_user(buf, count, 0, &ucc_step); + if (err) + return err; + + p->ucc_step = ucc_step; + put_task_struct(p); + + return count; +} + +static const struct file_operations ucc_step_operations = { + .write = ucc_step_write, + .read = ucc_step_read, +}; + +static ssize_t ucc_priority_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task; + char numbuf[PROC_NUMBUF]; + ssize_t len; + + task = get_proc_task(file_inode(file)); + if (!task) + return -ESRCH; + + len = snprintf(numbuf, sizeof(numbuf), "%u\n", task->ucc_priority); + + put_task_struct(task); + + return simple_read_from_buffer(buf, count, ppos, numbuf, len); +} + +static ssize_t ucc_priority_write(struct file *file, const char __user *buf, + size_t count, loff_t *offset) +{ + struct inode *inode = file_inode(file); + struct task_struct *p; + int err; + unsigned int ucc_priority; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + err = kstrtouint_from_user(buf, count, 0, &ucc_priority); + if (err) + return err; + + p->ucc_priority = ucc_priority; + put_task_struct(p); + + return count; +} + +static const struct file_operations ucc_priority_operations = { + .write = ucc_priority_write, + .read = ucc_priority_read, +}; + +#endif + #ifdef CONFIG_SCHED_AUTOGROUP /* * Print out autogroup related information: @@ -3151,6 +3243,10 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_ASCEND_SHARE_POOL ONE("sp_group", S_IRUGO, proc_sp_group_state), #endif +#ifdef CONFIG_XPU_SCHEDULE + REG("ucc_priority", 0644, ucc_priority_operations), + REG("ucc_step", 0644, ucc_step_operations), +#endif };
static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) @@ -3537,6 +3633,10 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_ASCEND_SHARE_POOL ONE("sp_group", S_IRUGO, proc_sp_group_state), #endif +#ifdef CONFIG_XPU_SCHEDULE + REG("ucc_priority", 0644, ucc_priority_operations), + REG("ucc_step", 0644, ucc_step_operations), +#endif };
static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx) diff --git a/include/linux/sched.h b/include/linux/sched.h index 8fd8c5b7cdc6..175659be95f3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1281,6 +1281,9 @@ struct task_struct { #if !defined(__GENKSYMS__) #if defined(CONFIG_QOS_SCHED_SMART_GRID) struct sched_grid_qos *grid_qos; +#elif defined(CONFIG_XPU_SCHEDULE) + u32 ucc_priority; + u32 ucc_step; #else KABI_RESERVE(8) #endif diff --git a/include/linux/ucc_common.h b/include/linux/ucc_common.h new file mode 100644 index 000000000000..3875c2226d24 --- /dev/null +++ b/include/linux/ucc_common.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _UCC_COMMON_H +#define _UCC_COMMON_H + +/* + * UCC Print Function + */ +#ifndef pr_fmt +#define pr_fmt(fmt) fmt +#endif + +#define ucc_err(fmt, ...) printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) + +#define ucc_warn(fmt, ...) printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) + +#define ucc_info(fmt, ...) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) + +#define ucc_dbg(fmt, ...) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) + +#endif diff --git a/include/linux/ucc_kfd.h b/include/linux/ucc_kfd.h new file mode 100644 index 000000000000..07eedc2fd5f2 --- /dev/null +++ b/include/linux/ucc_kfd.h @@ -0,0 +1,110 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef KFD_PRIV_H_INCLUDED +#define KFD_PRIV_H_INCLUDED + +#include <linux/mmu_notifier.h> +#include <linux/types.h> +#include <linux/kref.h> +#include <linux/mutex.h> +#include <linux/sched.h> +#include <linux/mmu_notifier.h> +#include <linux/idr.h> +#include <linux/dma-fence.h> +#include <linux/workqueue.h> +#include <linux/fs.h> +#include <linux/kobject.h> +#include <linux/sysfs.h> + +struct process_queue_manager; +struct kfd_process; +struct kfd_signal_page; + +struct process_queue_manager { + struct kfd_process *process; + struct list_head queues; + unsigned long *queue_slot_bitmap; +}; + +struct kfd_signal_page { + uint64_t *kernel_address; + uint64_t __user *user_address; + bool need_to_free_pages; +}; + +/* Process data */ +struct kfd_process { + struct hlist_node kfd_processes; + void *mm; + struct kref ref; + struct work_struct release_work; + struct mutex mutex; + struct task_struct *lead_thread; + struct mmu_notifier mmu_notifier; +/* TODO: check if use right branch */ + struct rcu_head rcu; + uint16_t pasid; + struct list_head per_device_data; + struct process_queue_manager pqm; + bool is_32bit_user_mode; + struct mutex event_mutex; + struct idr event_idr; + struct kfd_signal_page *signal_page; + size_t signal_mapped_size; + size_t signal_event_count; + bool signal_event_limit_reached; +/* TODO: check if use right branch */ + struct rb_root bo_interval_tree; + void *kgd_process_info; + struct dma_fence *ef; + struct delayed_work eviction_work; + struct delayed_work restore_work; + unsigned int last_eviction_seqno; + unsigned long last_restore_timestamp; + unsigned long last_evict_timestamp; + bool debug_trap_enabled; + uint32_t trap_debug_wave_launch_mode; + struct file *dbg_ev_file; + uint32_t allocated_debug_watch_point_bitmask; + struct kobject *kobj; + struct kobject *kobj_queues; + struct attribute attr_pasid; + bool has_cwsr; + uint64_t exception_enable_mask; + uint64_t exception_status; +}; + +struct kfd_ioctl_create_queue_args { + __u64 ring_base_address; /* to KFD */ + __u64 write_pointer_address; /* from KFD */ + __u64 read_pointer_address; /* from KFD */ + __u64 doorbell_offset; /* from KFD */ + + __u32 ring_size; /* to KFD */ + __u32 gpu_id; /* to KFD */ + __u32 queue_type; /* to KFD */ + __u32 queue_percentage; /* to KFD */ + __u32 queue_priority; /* to KFD */ + __u32 queue_id; /* from KFD */ + + __u64 eop_buffer_address; /* to KFD */ + __u64 eop_buffer_size; /* to KFD */ + __u64 ctx_save_restore_address; /* to KFD */ + __u32 ctx_save_restore_size; /* to KFD */ + __u32 ctl_stack_size; /* to KFD */ +}; + +struct kfd_ioctl_destroy_queue_args { + __u32 queue_id; /* to KFD */ + __u32 pad; +}; + +struct kfd_ioctl_update_queue_args { + __u64 ring_base_address; /* to KFD */ + + __u32 queue_id; /* to KFD */ + __u32 ring_size; /* to KFD */ + __u32 queue_percentage; /* to KFD */ + __u32 queue_priority; /* to KFD */ +}; +#endif diff --git a/include/linux/ucc_sched.h b/include/linux/ucc_sched.h new file mode 100644 index 000000000000..5b170545f7c2 --- /dev/null +++ b/include/linux/ucc_sched.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __LINUX_UCC_SCHED_H__ +#define __LINUX_UCC_SCHED_H__ + +#include <linux/list.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/hash.h> +#include <linux/rculist.h> +#include <linux/idr.h> +#include <linux/xpu_group.h> +#include <linux/hashtable.h> +#include <linux/vstream.h> +#include <linux/slab.h> +#include <linux/sched.h> + +#define VRTSQ_RTSQ_HASH_ORDER 6 + +#ifdef CONFIG_XPU_SCHEDULE +int ucc_process_task(struct vstream_info *vsqcq_info, struct tsdrv_ctx *ctx, + int *sqenum); +int ucc_free_task(struct vstream_info *vsqcq_info, struct tsdrv_ctx *ctx); +int ucc_wait_cq(struct vstream_info *vsqcq_info, struct tsdrv_ctx *ctx, + struct devdrv_report_para *arg, int *sqenum); +struct xpu_group *select_sq(struct vstream_info *vstream_info); +int ucc_sched_register_xcu(int dev_id, int ts_id, int cu_num); +void ucc_set_vstream_state(struct vstream_info *vinfo, int state); +void ucc_dequeue_task(struct vstream_info *vInfo); +int ucc_rt_nr_running(struct xcu *cu); +struct xcu *ucc_get_xcu_by_id(int cu_id); +int ucc_xcu_is_sched(int cu_id); +void ucc_dump_statistics_info(struct ucc_se *se); +#endif + +#endif diff --git a/include/linux/ucc_sched/ucc_sched.h b/include/linux/ucc_sched/ucc_sched.h new file mode 100644 index 000000000000..6edd8930e09e --- /dev/null +++ b/include/linux/ucc_sched/ucc_sched.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) Huawei Technologies Co., Ltd. 2019. All rights reserved. + * Author: Huawei OS Kernel Lab + * Create: Mon Jan 30 14:29:19 2023 + */ + +#ifndef __LINUX_UCC_SCHED_USCHED_H__ +#define __LINUX_UCC_SCHED_USCHED_H__ + +enum ucc_se_state { + SE_PREPARE, + SE_READY, + SE_RUNNING, + SE_BLOCK, + SE_DEAD, +}; + +enum ucc_se_flag { + UCC_TIF_NONE, + UCC_TIF_PREEMPT, + UCC_TIF_BALANCE, +}; + +enum ucc_se_prio { + UCC_PRIO_HIGH, + UCC_PRIO_LOW, +}; + +enum ucc_se_step { + UCC_STEP_SLOW = 1, + UCC_STEP_FAST = 10, +}; + +struct ucc_statistics { + u64 wait_start; + u64 wait_max; + u64 wait_count; + u64 wait_sum; + + u64 preempt_start; + u64 preempt_max; + u64 preempt_count; + u64 preempt_sum; + + u64 kernel_sum; + u64 timeout_count; + + u64 run_start; + u64 run_max; + u64 run_count; + u64 run_sum; +}; + +struct ucc_se { + int on_cu; + struct list_head run_list; + enum ucc_se_state state; + enum ucc_se_flag flag; + enum ucc_se_prio prio; + enum ucc_se_step step; + raw_spinlock_t se_lock; + struct ucc_statistics statistics; + int is_timeout; +}; + +int ucc_sched_init(void); +int ucc_schedule(int cu_id); +int ucc_wake_up(struct ucc_se *se); + +#endif diff --git a/include/linux/ucc_ts.h b/include/linux/ucc_ts.h new file mode 100644 index 000000000000..7280ccca1059 --- /dev/null +++ b/include/linux/ucc_ts.h @@ -0,0 +1,254 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef TS_H +#define TS_H + +#include <linux/file.h> +#include <linux/device.h> +#include <linux/cdev.h> +#include <linux/fs.h> + +#define DEVDRV_MAX_SQ_DEPTH (1024) +#define DEVDRV_SQ_SLOT_SIZE (64) + +#define DEVDRV_MAX_SQ_NUM (512 - 1) +#define DEVDRV_MAX_CQ_NUM (352 - 1) + +#define DEVDRV_MAX_TS_NUM (1) + +#define REMAP_ALIGN_SIZE (64 * 1024) +#define REMAP_ALIGN_MASK (~(REMAP_ALIGN_SIZE - 1)) +#define REMAP_ALIGN(x) (((x) + REMAP_ALIGN_SIZE - 1) & \ + REMAP_ALIGN_MASK) + +#define DEVDRV_DB_SPACE_SIZE (1024 * 4096) + +#define SQCQ_RTS_INFO_LENGTH 5 +#define SQCQ_RESV_LENGTH 8 + +#define DEVDRV_CBCQ_MAX_GID 128 + +enum phy_sqcq_type { + NORMAL_SQCQ_TYPE = 0, + CALLBACK_SQCQ_TYPE, + LOGIC_SQCQ_TYPE, + SHM_SQCQ_TYPE, + DFX_SQCQ_TYPE, + TS_SQCQ_TYPE, + KERNEL_SQCQ_TYPE, +}; + +struct notifier_operations { + int (*notifier_call)(struct file *file_op, unsigned long mode); +}; + +#define MAX_DEVICE_COUNT 64 + +struct davinci_intf_stru { + atomic_t count; + struct mutex dmutex; + struct cdev cdev; + struct device *device; + struct list_head process_list; + struct list_head module_list; + unsigned int device_status[MAX_DEVICE_COUNT]; + cpumask_var_t cpumask; +}; + +#define DAVINIC_MODULE_NAME_MAX 256 +struct davinci_intf_private_stru { + char module_name[DAVINIC_MODULE_NAME_MAX]; + unsigned int device_id; + pid_t owner_pid; + int close_flag; + atomic_t work_count; + int release_status; + struct mutex fmutex; + const struct file_operations fops; + struct notifier_operations notifier; + struct davinci_intf_stru *device_cb; + struct file priv_filep; + unsigned int free_type; +}; + +enum sqcq_alloc_status { + SQCQ_INACTIVE = 0, + SQCQ_ACTIVE +}; + +struct devdrv_ts_sq_info { + enum phy_sqcq_type type; + pid_t tgid; + u32 head; + u32 tail; + u32 credit; + u32 index; + int uio_fd; + + u8 *uio_addr; + int uio_size; + + enum sqcq_alloc_status alloc_status; + u64 send_count; + + void *sq_sub; +}; + +struct devdrv_ts_cq_info { + enum phy_sqcq_type type; + pid_t tgid; + u32 vfid; + + u32 head; + u32 tail; + u32 release_head; /* runtime read cq head value */ + u32 index; + u32 phase; + u32 int_flag; + + int uio_fd; + + u8 *uio_addr; + int uio_size; + + enum sqcq_alloc_status alloc_status; + u64 receive_count; + + void *cq_sub; + + void (*complete_handle)(struct devdrv_ts_cq_info *cq_info); + + u8 slot_size; +}; + +#define DEVDRV_SQ_INFO_OCCUPY_SIZE \ + (sizeof(struct devdrv_ts_sq_info) * DEVDRV_MAX_SQ_NUM) +#define DEVDRV_CQ_INFO_OCCUPY_SIZE \ + (sizeof(struct devdrv_ts_cq_info) * DEVDRV_MAX_CQ_NUM) + +#define DEVDRV_MAX_INFO_SIZE \ + (DEVDRV_SQ_INFO_OCCUPY_SIZE + DEVDRV_CQ_INFO_OCCUPY_SIZE) +#define DEVDRV_VM_SQ_MEM_OFFSET 0 +#define DEVDRV_VM_SQ_SLOT_SIZE \ + REMAP_ALIGN(DEVDRV_MAX_SQ_DEPTH * DEVDRV_SQ_SLOT_SIZE) +#define DEVDRV_VM_SQ_MEM_SIZE \ + (DEVDRV_VM_SQ_SLOT_SIZE * DEVDRV_MAX_SQ_NUM) + +#define DEVDRV_VM_INFO_MEM_OFFSET \ + (DEVDRV_VM_SQ_MEM_OFFSET + DEVDRV_VM_SQ_MEM_SIZE) +#define DEVDRV_VM_INFO_MEM_SIZE REMAP_ALIGN(DEVDRV_MAX_INFO_SIZE) + +#define DEVDRV_VM_DB_MEM_OFFSET \ + (DEVDRV_VM_INFO_MEM_OFFSET + DEVDRV_VM_INFO_MEM_SIZE) +#define DEVDRV_VM_DB_MEM_SIZE REMAP_ALIGN(DEVDRV_DB_SPACE_SIZE) + +#define DEVDRV_VM_CQ_MEM_OFFSET \ + (DEVDRV_VM_DB_MEM_OFFSET + DEVDRV_VM_DB_MEM_SIZE) + +enum tsdrv_id_type { + TSDRV_STREAM_ID, + TSDRV_NOTIFY_ID, + TSDRV_MODEL_ID, + TSDRV_EVENT_SW_ID, /* should use for event alloc/free/inquiry res_num*/ + TSDRV_EVENT_HW_ID, + TSDRV_IPC_EVENT_ID, + TSDRV_SQ_ID, + TSDRV_CQ_ID, + TSDRV_PCQ_ID, + TSDRV_MAX_ID, +}; + +#define TSDRV_CQ_REUSE 0x00000001 +#define TSDRV_SQ_REUSE 0x00000002 + +struct normal_alloc_sqcq_para { + uint32_t fd; + uint32_t tsId; + uint32_t devId; + uint32_t sqeSize; + uint32_t cqeSize; + uint32_t sqeDepth; + uint32_t cqeDepth; + uint32_t grpId; + uint32_t flag; + uint32_t sqId; + uint32_t cqId; + uint32_t priority; + uint32_t info[SQCQ_RTS_INFO_LENGTH]; + uint32_t res[SQCQ_RESV_LENGTH]; +}; + +struct normal_free_sqcq_para { + uint32_t tsId; + uint32_t flag; + uint32_t sqId; + uint32_t cqId; + uint32_t res[SQCQ_RESV_LENGTH]; +}; + +struct tsdrv_sqcq_data_para { + uint32_t id; + uint32_t val; +}; + +struct devdrv_report_para { + int timeout; + u32 cq_tail; + u32 cq_id; +}; + +struct tsdrv_ts_id_ctx { + u32 id_num; + struct list_head id_list; + spinlock_t id_lock; +}; +struct tsdrv_ts_ctx { + u32 tsid; + atomic_t status; + u32 send_count; + u64 receive_count; + + int32_t cq_tail_updated; + wait_queue_head_t report_wait; + + struct work_struct recycle_work; + + wait_queue_head_t cbcq_wait[DEVDRV_CBCQ_MAX_GID]; + + void *shm_sqcq_ctx; + void *logic_sqcq_ctx; + void *sync_cb_sqcq_ctx; // mini callback + + struct tsdrv_ts_id_ctx id_ctx[TSDRV_MAX_ID]; + + /* only used by vm */ + u32 vcqid; + u32 wait_queue_inited; + u32 cq_report_status; + int32_t cq_tail; + spinlock_t ctx_lock; + + u32 recycle_cbsqcq_num; // min callback +}; + +//Context Delivers +struct tsdrv_ctx { + u32 ctx_index; + atomic_t status; + atomic_t type; + pid_t tgid; + pid_t pid; + int32_t ssid; + u32 thread_bind_irq_num; + u32 mirror_ctx_status; + struct rb_node node; + struct list_head list; + struct vm_area_struct *vma[DEVDRV_MAX_TS_NUM]; + spinlock_t ctx_lock; + struct mutex mutex_lock; + struct tsdrv_ts_ctx ts_ctx[DEVDRV_MAX_TS_NUM]; + + u64 unique_id; /* mark unique processes for vm */ +}; + +#endif diff --git a/include/linux/vstream.h b/include/linux/vstream.h new file mode 100644 index 000000000000..14d799296053 --- /dev/null +++ b/include/linux/vstream.h @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_VSTREAM_H +#define _LINUX_VSTREAM_H + +#include <linux/ucc_kfd.h> +#include <linux/ucc_sched/ucc_sched.h> +#include <linux/ucc_ts.h> + +#define MAX_VSTREAM_SIZE 1024 +#define MAX_VSTREAM_SLOT_SIZE 64 +#define MAX_CQ_SLOT_SIZE 12 + +/* + * XXX_VSTREAM_ALLOC: alloc a vstream, buffer for tasks + * XXX_VSTREAM_FREE: free a vstream + * XXX_VSTREAM_KICK: there are tasks to be executed in the vstream + * XXX_VSTREAM_UPDATE: update information for an existing vstream + * XXX_CALLBACK_VSTREAM_WAIT: waiting for callback tasks + * XXX_CALLBACK_VSTREAM_KICK: callback tasks have been executed + * + * NOTE: Callback vstream is only for Ascend now. We do not need + * CALLBACK_VSTREAM_ALLOC because the callback vstream will be + * alloced with vstream on Ascend. + */ +enum VSTREAM_COMMAND { + /* vstream command for Ascend */ + ASCEND_VSTREAM_ALLOC = 0, + ASCEND_VSTREAM_FREE, + ASCEND_VSTREAM_KICK, + ASCEND_CALLBACK_VSTREAM_WAIT, + ASCEND_CALLBACK_VSTREAM_KICK, + ASCEND_VSTREAM_GET_HEAD, + ASCEND_MAX_COMMAND, + + /* vstream command for amdgpu */ + AMDGPU_VSTREAM_ALLOC = ASCEND_MAX_COMMAND + 1, + AMDGPU_VSTREAM_FREE, + AMDGPU_VSTREAM_KICK, + AMDGPU_VSTREAM_UPDATE, + AMDGPU_MAX_COMMAND, +}; + +struct vstream_alloc_args { + union { + /* For Ascend */ + struct normal_alloc_sqcq_para ascend; + /* For amdgpu */ + struct kfd_ioctl_create_queue_args amdgpu; + }; +}; + +struct vstream_free_args { + union { + /* For Ascend */ + struct normal_free_sqcq_para ascend; + /* For amdgpu */ + struct kfd_ioctl_destroy_queue_args amdgpu; + }; +}; + +struct vstream_kick_args { + union { + /* For Ascend */ + struct tsdrv_sqcq_data_para ascend; + /* For amdgpu */ + }; +}; + +struct vstream_args { + union { + struct vstream_alloc_args va_args; + struct vstream_free_args vf_args; + struct vstream_kick_args vk_args; + struct kfd_ioctl_update_queue_args vu_args; + struct tsdrv_sqcq_data_para vh_args; + struct devdrv_report_para cvw_args; + struct tsdrv_sqcq_data_para cvk_args; + }; +}; + +struct vstream_node { + uint32_t id; + uint32_t head; + uint32_t tail; + uint32_t credit; + void *vstreamData; + raw_spinlock_t spin_lock; +}; + +struct vstream_id { + uint32_t vstreamId; + struct list_head list; +}; + +struct vcq_map_table { + uint32_t vcqId; + struct vstream_node *vcqNode; + struct list_head vstreamId_list; +}; + +struct vstream_info { + uint32_t vstreamId; //key + uint32_t vcqId; + uint32_t devId; + uint32_t tsId; + struct ucc_se se; + //TODO::check name + struct vstream_node *vsqNode; + struct vstream_node *vcqNode; + void *privdata; + uint32_t info[SQCQ_RTS_INFO_LENGTH]; + int cu_id; + struct xpu_group *group; + int send_cnt; + struct task_struct *p; +}; + +typedef int vstream_manage_t(struct vstream_args *arg); +int update_vstream_head(struct vstream_info *vstream_info, int num); +struct vstream_info *vstream_get_info(uint32_t id); +bool vstream_have_kernel(struct ucc_se *se); + +#endif /* _LINUX_VSTREAM_H */ diff --git a/include/linux/xpu_group.h b/include/linux/xpu_group.h new file mode 100644 index 000000000000..5e3a96b15f9c --- /dev/null +++ b/include/linux/xpu_group.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __XPU_GROUP_H__ +#define __XPU_GROUP_H__ +#include <linux/idr.h> + +struct xpu_group; +struct xcu; + +enum xpu_type { + XPU_TYPE_ROOT, + XPU_TYPE_TASK_QUEUE, + XPU_TYPE_NPU_310, + XPU_TYPE_CUSTOM, +}; + +enum xpu_capability_type { + TYPE_1, + XPU_CAPABILITY_TYPE_NR, +}; + +struct xpu_capability { + unsigned long capacities[XPU_CAPABILITY_TYPE_NR]; +}; + +struct xpu_operation { + int (*run)(struct xpu_group *group, void *para1, void *para2); + int (*finish)(struct xpu_group *group, void *para1, void *para2); + int (*wait)(struct xpu_group *group, void *para1, void *para2, + void *para3); + int (*complete)(struct xpu_group *group, void *para1, void *para2, + void *para3); +}; + +struct xpu_group { + int id; + enum xpu_type type; + struct xpu_capability *capability; + + struct xpu_group *previous_layer; + struct idr next_layer; + + struct xpu_operation *opt; + + int used; + + void *data; +}; + +extern struct xpu_group *xpu_root; + +#ifdef CONFIG_XPU_SCHEDULE +int xpu_group_attach(struct xpu_group *new_group, + struct xpu_group *previous_group); +int xpu_group_detach(struct xpu_group *group); +struct xpu_group *xpu_group_find(struct xpu_group *group, int id); +struct xpu_group *xpu_idle_group_find(struct xpu_group *group); +struct xpu_group *xpu_group_alloc(void); +struct xpu_group *xpu_group_alloc_and_attach(struct xpu_group *previous_group, + int id); +int xpu_run(struct xpu_group *group, void *para1, void *para2); +int xpu_finish(struct xpu_group *group, void *para1, void *para2); +int xpu_wait(struct xpu_group *group, void *para1, void *para2, void *para3); +#endif + +#endif diff --git a/include/trace/events/ucc_sched.h b/include/trace/events/ucc_sched.h new file mode 100644 index 000000000000..104a39b2f41c --- /dev/null +++ b/include/trace/events/ucc_sched.h @@ -0,0 +1,120 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM ucc_sched + +#if !defined(_TRACE_UCC_SCHED_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_UCC_SCHED_H + +#include <linux/tracepoint.h> +#include <linux/binfmts.h> + +/* + * XXX the below ucc_sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE + * adding ucc_sched_stat support to SCHED_FIFO/RR would be welcome. + */ +DECLARE_EVENT_CLASS(ucc_sched_stat_template, + + TP_PROTO(struct vstream_info *vinfo, u64 delay), + + TP_ARGS(vinfo, delay), + + TP_STRUCT__entry( + __array(char, comm, TASK_COMM_LEN) + __field(pid_t, pid) + __field(int, cu_id) + __field(u32, vstreamId) + __field(u32, prio) + __field(u64, delay) + ), + + TP_fast_assign( + memcpy(__entry->comm, vinfo->p->comm, TASK_COMM_LEN); + __entry->pid = vinfo->p->pid; + __entry->cu_id = vinfo->cu_id; + __entry->vstreamId = vinfo->vstreamId; + __entry->prio = vinfo->p->ucc_priority; + __entry->delay = delay; + ), + + TP_printk("comm=%s pid=%d cu_id=%d vstreamId %u prio %u, delay=%llu [ns]", + __entry->comm, __entry->pid, + __entry->cu_id, __entry->vstreamId, __entry->prio, + (unsigned long long)__entry->delay) +); + +DECLARE_EVENT_CLASS(ucc_sched_stat_template_1, + + TP_PROTO(struct vstream_info *vinfo, u64 delay, int is_timeout), + + TP_ARGS(vinfo, delay, is_timeout), + + TP_STRUCT__entry( + __array(char, comm, TASK_COMM_LEN) + __field(pid_t, pid) + __field(int, cu_id) + __field(u32, vstreamId) + __field(u64, delay) + __field(int, is_timeout) + ), + + TP_fast_assign( + memcpy(__entry->comm, vinfo->p->comm, TASK_COMM_LEN); + __entry->pid = vinfo->p->pid; + __entry->cu_id = vinfo->cu_id; + __entry->vstreamId = vinfo->vstreamId; + __entry->delay = delay; + __entry->is_timeout = is_timeout; + ), + + TP_printk("comm=%s pid=%d cu_id=%d vstreamId %u, delay=%llu [ns]:%d", + __entry->comm, __entry->pid, + __entry->cu_id, __entry->vstreamId, + (unsigned long long)__entry->delay, + __entry->is_timeout) +); +/* + * Tracepoint for accounting wait time (time the task is runnable + * but not actually running due to scheduler contention). + */ +DEFINE_EVENT(ucc_sched_stat_template, ucc_sched_stat_wait, + TP_PROTO(struct vstream_info *vinfo, u64 delay), + TP_ARGS(vinfo, delay)); + +DEFINE_EVENT(ucc_sched_stat_template, ucc_sched_stat_preempt, + TP_PROTO(struct vstream_info *vinfo, u64 delay), + TP_ARGS(vinfo, delay)); + +DEFINE_EVENT(ucc_sched_stat_template_1, ucc_sched_stat_run, + TP_PROTO(struct vstream_info *vinfo, u64 delay, int is_timeout), + TP_ARGS(vinfo, delay, is_timeout)); + +TRACE_EVENT(ucc_sched_switch, + + TP_PROTO(int preempt, + struct vstream_info *next), + + TP_ARGS(preempt, next), + + TP_STRUCT__entry( + __field(int, cu_id) + __field(u32, next_vstreamId) + __field(u32, next_prio) + __field(int, preempt) + ), + + TP_fast_assign( + __entry->cu_id = next->cu_id; + __entry->next_vstreamId = next->vstreamId; + __entry->next_prio = next->p->ucc_priority; + __entry->preempt = preempt; + ), + + TP_printk("cu_id=%d next_vstreamId %u next_prio %u preempt[%d]", + __entry->cu_id, + __entry->next_vstreamId, __entry->next_prio, + __entry->preempt) +); +#endif /* _TRACE_UCC_SCHED_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/init/init_task.c b/init/init_task.c index b312a045f4b9..c1a78b4da368 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -188,6 +188,10 @@ struct task_struct init_task .fork_pid = 0, }, #endif +#ifdef CONFIG_XPU_SCHEDULE + .ucc_priority = 1, + .ucc_step = 1, +#endif }; EXPORT_SYMBOL(init_task);
diff --git a/init/main.c b/init/main.c index 50af60ff0ef6..7ed2e67d7011 100644 --- a/init/main.c +++ b/init/main.c @@ -66,6 +66,7 @@ #include <linux/kthread.h> #include <linux/sched.h> #include <linux/sched/init.h> +#include <linux/ucc_sched/ucc_sched.h> #include <linux/signal.h> #include <linux/idr.h> #include <linux/kgdb.h> @@ -599,6 +600,14 @@ asmlinkage __visible void __init start_kernel(void) * time - but meanwhile we still have a functioning scheduler. */ sched_init(); + +#ifdef CONFIG_XPU_SCHEDULE + /* + * Set up the ucc scheduler, to enable heterogeneous scheduling. + */ + ucc_sched_init(); +#endif + /* * Disable preemption - early bootup scheduling is extremely * fragile until we cpu_idle() for the first time. diff --git a/kernel/Makefile b/kernel/Makefile index d0482bd27ba4..273fe481d303 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -43,6 +43,8 @@ obj-y += irq/ obj-y += rcu/ obj-y += livepatch/ obj-y += dma/ +obj-(CONFIG_XPU_SCHEDULE) += ucc_sched/ +obj-(CONFIG_XPU_UCC) += ucc/
obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o obj-$(CONFIG_FREEZER) += freezer.o diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 0612af002ae5..0f659b2ad251 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -19,6 +19,7 @@ endif obj-y += core.o loadavg.o clock.o cputime.o obj-y += idle.o fair.o rt.o deadline.o obj-y += wait.o wait_bit.o swait.o completion.o +obj-(CONFIG_XPU_SCHEDULE) += ucc_sched.o
obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 67bda877bfa8..89348097b29a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2316,6 +2316,11 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) */ p->prio = current->normal_prio;
+#ifdef CONFIG_XPU_SCHEDULE + p->ucc_priority = current->ucc_priority; + p->ucc_step = current->ucc_step; +#endif + /* * Revert to default priority/policy on fork if requested. */ diff --git a/kernel/sched/ucc_sched.c b/kernel/sched/ucc_sched.c new file mode 100644 index 000000000000..646f120c3c34 --- /dev/null +++ b/kernel/sched/ucc_sched.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/ucc_sched.h> +#include <linux/ucc_common.h> + +static DEFINE_MUTEX(revmap_mutex); + +static DEFINE_HASHTABLE(vrtsq_rtsq_revmap, VRTSQ_RTSQ_HASH_ORDER); + +/** + * @group: value for this entry. + * @hash_node : hash node list. + * @ + */ +struct vsqce_idx_revmap_data { + unsigned int vrtsdId; + struct xpu_group *group; + struct hlist_node hash_node; +}; + +struct xpu_group *select_sq(struct vstream_info *vstream_info) +{ + struct vsqce_idx_revmap_data *revmap_data; + + /* find history */ + mutex_lock(&revmap_mutex); + hash_for_each_possible(vrtsq_rtsq_revmap, revmap_data, hash_node, + (unsigned long)vstream_info->vstreamId) { + if (revmap_data && revmap_data->group) { + mutex_unlock(&revmap_mutex); + return revmap_data->group; + } + } + mutex_unlock(&revmap_mutex); + + revmap_data = kzalloc(sizeof(struct vsqce_idx_revmap_data), GFP_KERNEL); + if (revmap_data == NULL) + return NULL; + /* find XPU group */ + revmap_data->group = xpu_group_find(xpu_root, XPU_TYPE_NPU_310); + if (revmap_data->group == NULL) { + ucc_err("find XPU group is failed.\n"); + return NULL; + } + /* find device group */ + revmap_data->group = xpu_group_find(revmap_data->group, + vstream_info->devId); + if (revmap_data->group == NULL) { + ucc_err("find device group is failed.\n"); + return NULL; + } + /* find tsgroup */ + revmap_data->group = xpu_group_find(revmap_data->group, + vstream_info->tsId); + if (revmap_data->group == NULL) { + ucc_err("find ts group is failed.\n"); + return NULL; + } + + /* select idle xcu */ + revmap_data->group = xpu_idle_group_find(revmap_data->group); + if (revmap_data->group == NULL) { + ucc_err("find rtsq group is failed.\n"); + return NULL; + } + + revmap_data->vrtsdId = vstream_info->vstreamId; + /* set group used : 1 */ + revmap_data->group->used = 1; + + mutex_lock(&revmap_mutex); + hash_add(vrtsq_rtsq_revmap, &revmap_data->hash_node, + (unsigned long)vstream_info->vstreamId); + mutex_unlock(&revmap_mutex); + return revmap_data->group; +} + +int ucc_process_task(struct vstream_info *vstream_info, struct tsdrv_ctx *ctx, + int *sqenum) +{ + struct xpu_group *group = NULL; + + if (vstream_info == NULL) { + ucc_err("vsqcq_info is NULL\n"); + return -1; + } + + group = select_sq(vstream_info); + if (group == NULL) { + ucc_err("find group is failed.\n"); + return -1; + } + /* send sqe */ + *sqenum = xpu_run(group, vstream_info, ctx); + + return 0; +} +EXPORT_SYMBOL(ucc_process_task); + +int ucc_free_task(struct vstream_info *vstream_info, struct tsdrv_ctx *ctx) +{ + struct vsqce_idx_revmap_data *revmap_data; + + ucc_dequeue_task(vstream_info); + + while (!ucc_xcu_is_sched(vstream_info->cu_id)) + schedule_timeout_interruptible(10); + + ucc_dump_statistics_info(&vstream_info->se); + + mutex_lock(&revmap_mutex); + hash_for_each_possible(vrtsq_rtsq_revmap, revmap_data, hash_node, + (unsigned long)vstream_info->vstreamId) { + if (revmap_data && + revmap_data->vrtsdId == vstream_info->vstreamId && + revmap_data->group) { + xpu_finish(revmap_data->group, vstream_info, ctx); + /* set group unused : 0 */ + revmap_data->group->used = 0; + hash_del(&revmap_data->hash_node); + kfree(revmap_data); + revmap_data = NULL; + break; + } + } + mutex_unlock(&revmap_mutex); + + return 0; +} +EXPORT_SYMBOL(ucc_free_task); + +int ucc_wait_cq(struct vstream_info *vstream_info, struct tsdrv_ctx *ctx, + struct devdrv_report_para *arg, int *cqenum) +{ + struct vsqce_idx_revmap_data *revmap_data; + + hash_for_each_possible(vrtsq_rtsq_revmap, revmap_data, hash_node, + (unsigned long)vstream_info->vstreamId) { + if (revmap_data && + revmap_data->vrtsdId == vstream_info->vstreamId && + revmap_data->group) + *cqenum = xpu_wait(revmap_data->group, vstream_info, + ctx, arg); + } + + return 0; +} +EXPORT_SYMBOL(ucc_wait_cq); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c7064f67f4a5..aeceb9e9c927 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -117,6 +117,10 @@ extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max; extern int sysctl_nr_trim_pages; #endif
+#ifdef CONFIG_XPU_SCHEDULE +extern int sysctl_ucc_sched_rcv_timeout_ms; +#endif + /* Constants used for minimum and maximum */ #ifdef CONFIG_LOCKUP_DETECTOR static int sixty = 60; @@ -139,7 +143,7 @@ static int one_thousand = 1000; #ifdef CONFIG_PRINTK static int ten_thousand = 10000; #endif -#if defined(CONFIG_QOS_SCHED) || defined(CONFIG_QOS_SCHED_SMART_GRID) +#if defined(CONFIG_QOS_SCHED) || defined(CONFIG_QOS_SCHED_SMART_GRID) || defined(CONFIG_XPU_SCHEDULE) static int hundred_thousand = 100000; #endif #ifdef CONFIG_PERF_EVENTS @@ -352,6 +356,17 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#ifdef CONFIG_XPU_SCHEDULE + { + .procname = "ucc_sched_rcv_timeout", + .data = &sysctl_ucc_sched_rcv_timeout_ms, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &hundred_thousand, + }, +#endif #ifdef CONFIG_SCHED_DEBUG { .procname = "sched_min_granularity_ns", diff --git a/kernel/ucc/Kconfig b/kernel/ucc/Kconfig new file mode 100644 index 000000000000..279c11f702b1 --- /dev/null +++ b/kernel/ucc/Kconfig @@ -0,0 +1,21 @@ +# +# TODO: add description +# + +config XPU_UCC + bool "ucc" + default n + depends on ARM64 || X86 + help + Say Y here if you want support for using XPU UCC. XPU UCC + is helpfer for XPU schedule. The full name of UCC is + Universal Converged Computing. + + +config XPU_VSTREAM + bool "virtual submit queue and complete queue" + default n + depends on XPU_UCC + help + virtual Submit Queue and Complete Queue support for XPU. + It is used to help XPU schedule. diff --git a/kernel/ucc/Makefile b/kernel/ucc/Makefile new file mode 100644 index 000000000000..0e2735d2aef4 --- /dev/null +++ b/kernel/ucc/Makefile @@ -0,0 +1 @@ +obj-y += ascend_vstream.o vstream.o diff --git a/kernel/ucc/ascend_vstream.c b/kernel/ucc/ascend_vstream.c new file mode 100644 index 000000000000..d248aaff7639 --- /dev/null +++ b/kernel/ucc/ascend_vstream.c @@ -0,0 +1,654 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/uaccess.h> +#include <linux/syscalls.h> +#include <linux/mm.h> +#include <linux/pagemap.h> +#include <linux/vstream.h> +#include <linux/slab.h> +#include <linux/list.h> +#include <linux/ucc_common.h> +#include <linux/ucc_sched.h> + +DEFINE_MUTEX(vstreamId_Bitmap_mutex); +static DECLARE_BITMAP(vstreamIdBitmap, DEVDRV_MAX_SQ_NUM); + +static DEFINE_MUTEX(vcqId_Bitmap_mutex); +static DECLARE_BITMAP(vcqIdBitmap, DEVDRV_MAX_CQ_NUM); + +static DEFINE_MUTEX(revmap_mutex); + +static struct vstream_info *vstreamContainer[DEVDRV_MAX_SQ_NUM]; +static struct vcq_map_table *vsqcqMapTable[DEVDRV_MAX_CQ_NUM]; + +#define MAX_SQ_SIZE (MAX_VSTREAM_SIZE * MAX_VSTREAM_SLOT_SIZE) +#define MAX_CQ_SIZE (MAX_VSTREAM_SIZE * MAX_CQ_SLOT_SIZE) + +#define SQ_USER_ADDR_OFFSET(id) ((unsigned long)REMAP_ALIGN(MAX_SQ_SIZE) * id) +#define CQ_USER_ADDR_OFFSET(id) ((unsigned long)REMAP_ALIGN(MAX_CQ_SIZE) * id) + +#define SQ_VSTREAM_DATA(id) vstreamContainer[id]->vsqNode->vstreamData +#define CQ_VSTREAM_DATA(id) vstreamContainer[id]->vcqNode->vstreamData + +static struct tsdrv_ctx *get_ctx(int fd) +{ + struct fd f; + struct davinci_intf_private_stru *file_private_data; + struct tsdrv_ctx *ctx = NULL; + + f = fdget(fd); + if (!f.file) + goto out; + + file_private_data = f.file->private_data; + if (!file_private_data) + goto out; + + ctx = file_private_data->priv_filep.private_data; + +out: + fdput(f); + return ctx; +} + +static struct vcq_map_table *vstream_get_map_table(uint32_t id) +{ + return vsqcqMapTable[id]; +} + +static void free_vstreamId(uint32_t vstreamId) +{ + mutex_lock(&vstreamId_Bitmap_mutex); + clear_bit(vstreamId, vstreamIdBitmap); + mutex_unlock(&vstreamId_Bitmap_mutex); +} + +static void free_vcqId(uint32_t vcqId, uint32_t flag) +{ + mutex_lock(&vcqId_Bitmap_mutex); + if (!(flag & TSDRV_CQ_REUSE)) + clear_bit(vcqId, vcqIdBitmap); + mutex_unlock(&vcqId_Bitmap_mutex); +} + +static void vstream_free_map_table(uint32_t vcqId, uint32_t vstreamId, + uint32_t flag) +{ + struct vcq_map_table *freeTable = NULL; + struct vstream_id *vstreamIdNode = NULL; + + freeTable = vstream_get_map_table(vcqId); + if (!freeTable) { + ucc_err("No map found for vcq:%d.\n", vcqId); + return; + } + + list_for_each_entry(vstreamIdNode, &freeTable->vstreamId_list, list) { + if (vstreamIdNode->vstreamId == vstreamId) { + list_del(&vstreamIdNode->list); + kfree(vstreamIdNode); + break; + } + } + if (!(flag & TSDRV_CQ_REUSE)) { + kfree(freeTable->vcqNode->vstreamData); + kfree(freeTable->vcqNode); + kfree(freeTable); + } +} + +static void vstream_alloc_ucc_se(struct ucc_se *se) +{ + memset(&se->statistics, 0, sizeof(se->statistics)); + se->on_cu = 0; + se->state = SE_PREPARE; + se->flag = UCC_TIF_NONE; + se->prio = UCC_PRIO_HIGH; + se->step = UCC_STEP_SLOW; + raw_spin_lock_init(&se->se_lock); +} + +static struct vstream_info *vstream_create_info(struct tsdrv_ctx *ctx, + struct normal_alloc_sqcq_para *para) +{ + struct vcq_map_table *mapTable = NULL; + + struct vstream_info *vstream = kzalloc(sizeof(struct vstream_info), + GFP_KERNEL); + if (!vstream) + return NULL; + + (void)memcpy(vstream->info, para->info, + sizeof(uint32_t) * SQCQ_RTS_INFO_LENGTH); + + vstream->privdata = ctx; + vstream->tsId = para->tsId; + vstream->vstreamId = para->sqId; + vstream->vcqId = para->cqId; + + mapTable = vstream_get_map_table(vstream->vcqId); + if (!mapTable || !mapTable->vcqNode) { + ucc_err("No map found for vcqId:%d.\n", vstream->vcqId); + goto free_vstream; + } + vstream->vcqNode = mapTable->vcqNode; + vstream->vsqNode = kmalloc(sizeof(struct vstream_node), GFP_KERNEL); + if (!vstream->vsqNode) { + ucc_err("Failed to alloc memory for vsqNode:%d.\n", + vstream->vstreamId); + goto free_vstream; + } + vstream->vsqNode->vstreamData = kmalloc(MAX_SQ_SIZE, GFP_KERNEL); + if (!vstream->vsqNode->vstreamData) + goto free_vsqNode; + vstream->vsqNode->id = vstream->vstreamId; + vstream->vsqNode->head = 0; + vstream->vsqNode->tail = 0; + vstream->vsqNode->credit = MAX_VSTREAM_SIZE; + raw_spin_lock_init(&vstream->vsqNode->spin_lock); + vstream->send_cnt = 0; + vstream->p = current; + vstream_alloc_ucc_se(&vstream->se); + + return vstream; + +free_vsqNode: + kfree(vstream->vsqNode); + +free_vstream: + kfree(vstream); + return NULL; +} + +struct vstream_info *vstream_get_info(uint32_t id) +{ + return vstreamContainer[id]; +} + +static void vstream_free_info(uint32_t id) +{ + struct vstream_info *freeInfo = vstream_get_info(id); + + ucc_set_vstream_state(freeInfo, SE_DEAD); + + if (freeInfo) { + if (freeInfo->vsqNode) + kfree(freeInfo->vsqNode->vstreamData); + + kfree(freeInfo->vsqNode); + } + + kfree(freeInfo); +} + +static int queue_pop_by_num(struct vstream_node *node, uint32_t pop_num) +{ + if (node->credit + pop_num > MAX_VSTREAM_SIZE) { + ucc_err("Queue usage out-of-bounds"); + return -EACCES; + } + + node->credit += pop_num; + node->head = (node->head + pop_num) % MAX_VSTREAM_SIZE; + return 0; +} + +static int queue_pop_by_head(struct vstream_node *node, uint32_t head) +{ + int pop_num = (head - node->head + MAX_VSTREAM_SIZE) % + MAX_VSTREAM_SIZE; + return queue_pop_by_num(node, pop_num); +} + +int update_vstream_head(struct vstream_info *vstream_info, int num) +{ + struct vstream_node *node = vstream_info->vsqNode; + + raw_spin_lock(&node->spin_lock); + if (node->credit + num > MAX_VSTREAM_SIZE) { + raw_spin_unlock(&node->spin_lock); + return -1; + } + + node->credit += num; + node->head = (node->head + num) % MAX_VSTREAM_SIZE; + raw_spin_unlock(&node->spin_lock); + + return 0; +} + +bool vstream_have_kernel(struct ucc_se *se) +{ + struct vstream_info *vinfo; + + vinfo = container_of(se, struct vstream_info, se); + return vinfo->vsqNode->credit != MAX_VSTREAM_SIZE; +} + +static int queue_push_by_num(struct vstream_node *node, uint32_t push_num) +{ + if (node->credit - push_num < 0) + return -EACCES; + + node->credit -= push_num; + node->tail = (node->tail + push_num) % MAX_VSTREAM_SIZE; + return 0; +} + +static int queue_push_by_tail(struct vstream_node *node, uint32_t tail) +{ + int push_num = (tail - node->tail + MAX_VSTREAM_SIZE) % + MAX_VSTREAM_SIZE; + return queue_push_by_num(node, push_num); +} + +static uint32_t vstream_alloc_vstreamId(void) +{ + uint32_t vstreamId = DEVDRV_MAX_SQ_NUM; + + /* alloc vstreamId */ + mutex_lock(&vstreamId_Bitmap_mutex); + vstreamId = find_first_zero_bit(vstreamIdBitmap, DEVDRV_MAX_SQ_NUM); + if (vstreamId == DEVDRV_MAX_SQ_NUM) { + ucc_err("vstreamId exhausted.\n"); + mutex_unlock(&vstreamId_Bitmap_mutex); + return DEVDRV_MAX_SQ_NUM; + } + set_bit(vstreamId, vstreamIdBitmap); + mutex_unlock(&vstreamId_Bitmap_mutex); + + return vstreamId; +} + +static uint32_t vstream_alloc_vcqid(void) +{ + uint32_t vcqId = DEVDRV_MAX_CQ_NUM; + + /* alloc vcqid */ + mutex_lock(&vcqId_Bitmap_mutex); + vcqId = find_first_zero_bit(vcqIdBitmap, DEVDRV_MAX_CQ_NUM); + if (vcqId == DEVDRV_MAX_CQ_NUM) { + ucc_err("vcqId has been used up.\n"); + mutex_unlock(&vcqId_Bitmap_mutex); + return DEVDRV_MAX_CQ_NUM; + } + set_bit(vcqId, vcqIdBitmap); + mutex_unlock(&vcqId_Bitmap_mutex); + + ucc_info("vcqId = %d\n", vcqId); + return vcqId; +} + +int vstream_map_pfnaddr(struct tsdrv_ctx *ctx, + struct normal_alloc_sqcq_para *para) +{ + int err = 0; + unsigned long vsqAddr; + unsigned long vcqAddr; + pgprot_t vm_page_prot; + struct vm_area_struct *vma = ctx->vma[para->tsId]; + + vsqAddr = vma->vm_start + SQ_USER_ADDR_OFFSET(para->sqId); + vm_page_prot = pgprot_device(vma->vm_page_prot); + err = remap_pfn_range(vma, vsqAddr, + virt_to_pfn(SQ_VSTREAM_DATA(para->sqId)), + MAX_SQ_SIZE, vm_page_prot); + if (err) { + ucc_err("remap_pfn_range failed,ret=%d.\n", err); + return -EFAULT; + } + if (!(para->flag & TSDRV_CQ_REUSE)) { + vcqAddr = vma->vm_start + DEVDRV_VM_CQ_MEM_OFFSET + + CQ_USER_ADDR_OFFSET(para->cqId); + err = remap_pfn_range(vma, vcqAddr, + virt_to_pfn(CQ_VSTREAM_DATA(para->sqId)), + MAX_CQ_SIZE, vm_page_prot); + if (err) { + ucc_err("remap_pfn_range failed,ret=%d.\n", err); + return -EFAULT; + } + } + + return err; +} + +void vstream_unmap_pfnaddr(struct tsdrv_ctx *ctx, + struct normal_free_sqcq_para *para) +{ + unsigned long vsqAddr; + unsigned long vcqAddr; + size_t cqSize = PAGE_ALIGN(MAX_CQ_SIZE); + struct vm_area_struct *vma = ctx->vma[para->tsId]; + + vsqAddr = vma->vm_start + SQ_USER_ADDR_OFFSET(para->sqId); + zap_vma_ptes(vma, vsqAddr, MAX_SQ_SIZE); + + if (!(para->flag & TSDRV_CQ_REUSE)) { + vcqAddr = vma->vm_start + DEVDRV_VM_CQ_MEM_OFFSET + + CQ_USER_ADDR_OFFSET(para->cqId); + zap_vma_ptes(vma, vcqAddr, cqSize); + } +} + +static int vstream_update_vcqtable(uint32_t vcqId, uint32_t vstreamId, + uint32_t flag) +{ + int err = -ENOSPC; + struct vcq_map_table *vcqTable = NULL; + struct vstream_id *vstreamIdNode = NULL; + + if (!(flag & TSDRV_CQ_REUSE)) { + vcqTable = kmalloc(sizeof(struct vcq_map_table), GFP_KERNEL); + if (!vcqTable) + return -ENOMEM; + + vcqTable->vcqId = vcqId; + vcqTable->vcqNode = kmalloc(sizeof(struct vstream_node), + GFP_KERNEL); + if (!vcqTable->vcqNode) { + err = -ENOMEM; + goto free_vcqTable; + } + + vcqTable->vcqNode->vstreamData = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!vcqTable->vcqNode->vstreamData) { + err = -ENOMEM; + goto free_vcqNode; + } + vcqTable->vcqNode->id = vcqId; + vcqTable->vcqNode->head = 0; + vcqTable->vcqNode->tail = 0; + vcqTable->vcqNode->credit = MAX_VSTREAM_SIZE; + INIT_LIST_HEAD(&vcqTable->vstreamId_list); + vsqcqMapTable[vcqId] = vcqTable; + } else { + vcqTable = vsqcqMapTable[vcqId]; + } + vstreamIdNode = kmalloc(sizeof(struct vstream_id), GFP_KERNEL); + if (!vstreamIdNode) { + err = -ENOMEM; + + if (!(flag & TSDRV_CQ_REUSE)) + goto free_vstreamData; + return err; + } + vstreamIdNode->vstreamId = vstreamId; + list_add(&vstreamIdNode->list, &vcqTable->vstreamId_list); + + return 0; + +free_vstreamData: + kfree(vcqTable->vcqNode->vstreamData); + +free_vcqNode: + kfree(vcqTable->vcqNode); + +free_vcqTable: + kfree(vcqTable); + + return err; +} + +int ascend_vstream_alloc(struct vstream_args *arg) +{ + uint32_t vstreamId; + uint32_t vcqId = DEVDRV_MAX_CQ_NUM; + int err = -EINVAL; + struct vstream_info *vstream = NULL; + struct tsdrv_ctx *ctx = NULL; + struct normal_alloc_sqcq_para *sqcq_alloc_para = &arg->va_args.ascend; + + ctx = get_ctx(sqcq_alloc_para->fd); + if (!ctx) + return err; + + vstreamId = vstream_alloc_vstreamId(); + if (vstreamId == DEVDRV_MAX_SQ_NUM) { + ucc_err("vstreamId alloc failed.\n"); + return err; + } + if (!(sqcq_alloc_para->flag & TSDRV_CQ_REUSE)) + vcqId = vstream_alloc_vcqid(); + else + vcqId = sqcq_alloc_para->cqId; + + if (vcqId >= DEVDRV_MAX_CQ_NUM) { + ucc_err("vcqId alloc failed.\n"); + goto free_vstreamIds; + } + err = vstream_update_vcqtable(vcqId, vstreamId, sqcq_alloc_para->flag); + if (err) { + ucc_err("vcqtable update failed, vcqId:%d, vstreamId:%d, flag:%d.\n", + vcqId, vstreamId, sqcq_alloc_para->flag); + goto free_vcqid; + } + + sqcq_alloc_para->sqId = vstreamId; + sqcq_alloc_para->cqId = vcqId; + vstream = vstream_create_info(ctx, sqcq_alloc_para); + if (!vstream) { + ucc_err("vstream create failed: vcqId:%d, vstreamId:%d.\n", + vcqId, vstreamId); + err = -ENOSPC; + goto free_vcqtable; + } + + vstream->devId = sqcq_alloc_para->devId; + vstreamContainer[vstreamId] = vstream; + + vstream->group = select_sq(vstream); + if (!vstream->group) { + ucc_err("Failed to select sq\n"); + err = -EINVAL; + goto free_vstream_info; + } + + err = vstream_map_pfnaddr(ctx, sqcq_alloc_para); + if (err) { + ucc_err("vstream map failed, ret=%d.\n", err); + goto free_vstream_info; + } + return 0; + +free_vstream_info: + vstream_free_info(vstreamId); + +free_vcqtable: + vstream_free_map_table(vcqId, vstreamId, sqcq_alloc_para->flag); + +free_vcqid: + free_vcqId(vcqId, sqcq_alloc_para->flag); + +free_vstreamIds: + free_vstreamId(vstreamId); + + return err; +} + +int ascend_vstream_free(struct vstream_args *arg) +{ + int err = 0; + struct vstream_info *vstreamInfo = NULL; + struct normal_free_sqcq_para *sqcq_free_para = &arg->vf_args.ascend; + uint32_t vstreamId = sqcq_free_para->sqId; + uint32_t vcqId = sqcq_free_para->cqId; + + if (vstreamId >= DEVDRV_MAX_SQ_NUM || vcqId >= DEVDRV_MAX_CQ_NUM) { + ucc_err("vstream index out-of-range, vstreamId=%d, vcqId=%d.\n", + vstreamId, vcqId); + return -EPERM; + } + + vstreamInfo = vstream_get_info(vstreamId); + if (!vstreamInfo) { + ucc_err("vstreamInfo get failed, vstreamId=%d.\n", vstreamId); + return -EPERM; + } + err = ucc_free_task(vstreamInfo, vstreamInfo->privdata); + + free_vcqId(vcqId, sqcq_free_para->flag); + vstream_free_map_table(vcqId, vstreamId, sqcq_free_para->flag); + + vstream_unmap_pfnaddr(vstreamInfo->privdata, sqcq_free_para); + + vstream_free_info(vstreamId); + free_vstreamId(vstreamId); + return err; +} + +int ascend_vstream_kick(struct vstream_args *arg) +{ + int err = 0; + struct tsdrv_sqcq_data_para *sqcq_data_para = &arg->vk_args.ascend; + int vstreamId = sqcq_data_para->id; + int tail = sqcq_data_para->val; + struct vstream_info *vstreamInfo = NULL; + int push_num; + + vstreamInfo = vstream_get_info(vstreamId); + vstreamInfo->p = current; + + if (!vstreamInfo) { + ucc_err("vstreamInfo get failed, vstreamId=%d.\n", vstreamId); + return -ENOMEM; + } + + push_num = (tail - vstreamInfo->vsqNode->tail + MAX_VSTREAM_SIZE) % + MAX_VSTREAM_SIZE; + + raw_spin_lock(&vstreamInfo->vsqNode->spin_lock); + err = queue_push_by_tail(vstreamInfo->vsqNode, tail); + if (err) { + raw_spin_unlock(&vstreamInfo->vsqNode->spin_lock); + ucc_err("queue_push_by_tail error, ret = %d\n", err); + return err; + } + raw_spin_unlock(&vstreamInfo->vsqNode->spin_lock); + + err = ucc_wake_up(&vstreamInfo->se); + return err; +} + +int ascend_callback_vstream_wait(struct vstream_args *arg) +{ + int err = 0; + int cqeNum = 0; + int cqeSum = 0; + struct vstream_info *vstreamInfo = NULL; + struct vcq_map_table *vcqTable = NULL; + struct vcq_map_table *waitTable = NULL; + struct vstream_id *vstreamIdNode = NULL; + struct devdrv_report_para *report_para = &arg->cvw_args; + uint32_t *sqlist; + uint32_t sqlist_num = 0; + uint32_t vstreamId, vcqId; + + sqlist = kmalloc_array(DEVDRV_MAX_SQ_NUM, sizeof(uint32_t), GFP_KERNEL); + if (!sqlist) + return -ENOMEM; + + vcqId = report_para->cq_id; + if (vcqId >= DEVDRV_MAX_CQ_NUM) { + ucc_err("vcqId out-of-range, vcqId=%d.\n", vcqId); + err = -EPERM; + goto out; + } + + mutex_lock(&vcqId_Bitmap_mutex); + waitTable = vstream_get_map_table(vcqId); + if (!waitTable) { + ucc_err("No map found for vcq:%d.\n", vcqId); + mutex_unlock(&vcqId_Bitmap_mutex); + err = -EPERM; + goto out; + } + + list_for_each_entry(vstreamIdNode, &waitTable->vstreamId_list, list) + sqlist[sqlist_num++] = vstreamIdNode->vstreamId; + mutex_unlock(&vcqId_Bitmap_mutex); + + //get sqInfo from hardware + for (vstreamId = 0; vstreamId < sqlist_num; vstreamId++) { + vstreamInfo = vstream_get_info(sqlist[vstreamId]); + if (!vstreamInfo) + continue; + err |= ucc_wait_cq(vstreamInfo, vstreamInfo->privdata, + report_para, &cqeNum); + cqeSum += cqeNum; + if (cqeNum) + break; + } + + //update cqInfo + mutex_lock(&vcqId_Bitmap_mutex); + vcqTable = vstream_get_map_table(vcqId); + if (!vcqTable) { + ucc_err("No map found for vcq:%d.\n", vcqId); + err = -EPERM; + goto out; + } + + err = queue_push_by_num(vcqTable->vcqNode, cqeSum); + if (err) { + mutex_unlock(&vcqId_Bitmap_mutex); + ucc_err("failed to queue_push_by_num, ret = %d.\n", err); + goto out; + } + report_para->cq_tail = vcqTable->vcqNode->tail; + mutex_unlock(&vcqId_Bitmap_mutex); + +out: + kfree(sqlist); + return err; +} + +int ascend_callback_vstream_kick(struct vstream_args *arg) +{ + u32 vcqId, release_head; + struct vstream_info *vstreamInfo = NULL; + int err = 0; + + vcqId = arg->cvk_args.id; + release_head = arg->cvk_args.val; + if (vcqId >= DEVDRV_MAX_CQ_NUM || release_head >= MAX_VSTREAM_SIZE) { + ucc_err("vstream index out-of-range, vcqId=%d, release_head=%d.\n", + vcqId, release_head); + return -EPERM; + } + + mutex_lock(&vcqId_Bitmap_mutex); + vstreamInfo = vstream_get_info(vcqId); + if (!vstreamInfo) { + err = -EPERM; + goto out; + } + + err = queue_pop_by_head(vstreamInfo->vcqNode, release_head); + +out: + mutex_unlock(&vcqId_Bitmap_mutex); + return err; +} + +int ascend_vstream_get_head(struct vstream_args *arg) +{ + u32 vstreamId = arg->vh_args.id; + struct vstream_info *vstreamInfo = NULL; + + if (vstreamId >= DEVDRV_MAX_SQ_NUM) { + ucc_err("vstreamId out-of-range, vstreamId=%d.\n", vstreamId); + return -EINVAL; + } + + vstreamInfo = vstream_get_info(vstreamId); + if (!vstreamInfo) { + ucc_err("vstreamInfo get failed, vstreamId=%d.\n", vstreamId); + return -EINVAL; + } + arg->vh_args.val = vstreamInfo->vsqNode->head; + + return 0; +} + diff --git a/kernel/ucc/ascend_vstream.h b/kernel/ucc/ascend_vstream.h new file mode 100644 index 000000000000..0cd200168495 --- /dev/null +++ b/kernel/ucc/ascend_vstream.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#ifndef _ASCEND_VSTREAM_H +#define _ASCEND_VSTREAM_H + +int ascend_vstream_alloc(struct vstream_args *arg); +int ascend_vstream_free(struct vstream_args *arg); +int ascend_vstream_kick(struct vstream_args *arg); +int ascend_callback_vstream_wait(struct vstream_args *arg); +int ascend_callback_vstream_kick(struct vstream_args *arg); +int ascend_vstream_get_head(struct vstream_args *arg); + +#endif /* _ASCEND_VSTREAM_H */ diff --git a/kernel/ucc/vstream.c b/kernel/ucc/vstream.c new file mode 100644 index 000000000000..d4705f285b89 --- /dev/null +++ b/kernel/ucc/vstream.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/syscalls.h> +#include <linux/vstream.h> + +#include "ascend_vstream.h" + +static int amdgpu_vstream_alloc(struct vstream_args *arg) +{ + return 0; +} +static int amdgpu_vstream_free(struct vstream_args *arg) +{ + return 0; +} +static int amdgpu_vstream_kick(struct vstream_args *arg) +{ + return 0; +} +static int amdgpu_vstream_update(struct vstream_args *arg) +{ + return 0; +} + +/* + * vstream_manage_cmd table + */ +static vstream_manage_t (*vstream_command_table[AMDGPU_MAX_COMMAND + 1]) = { + ascend_vstream_alloc, // ASCEND_VSTREAM_ALLOC + ascend_vstream_free, // ASCEND_VSTREAM_FREE + ascend_vstream_kick, // ASCEND_VSTREAM_KICK + ascend_callback_vstream_wait, // ASCEND_CALLBACK_VSTREAM_WAIT + ascend_callback_vstream_kick, // ASCEND_CALLBACK_VSTREAM_KICK + ascend_vstream_get_head, // ASCEND_VSTREAM_GET_HEAD + NULL, // ASCEND_MAX_COMMAND + amdgpu_vstream_alloc, // AMDGPU_VSTREAM_ALLOC + amdgpu_vstream_free, // AMDGPU_VSTREAM_FREE + amdgpu_vstream_kick, // AMDGPU_VSTREAM_KICK + amdgpu_vstream_update, // AMDGPU_VSTREAM_UPDATE + NULL // AMDGPU_MAX_COMMAND +}; + +SYSCALL_DEFINE2(vstream_manage, struct vstream_args __user *, arg, int, cmd) +{ + int res = 0; + struct vstream_args vstream_arg; + + if (cmd > AMDGPU_MAX_COMMAND) + return -EINVAL; + + if (copy_from_user(&vstream_arg, arg, sizeof(struct vstream_args))) { + pr_err("copy_from_user failed\n"); + return -EFAULT; + } + res = vstream_command_table[cmd](&vstream_arg); + if (copy_to_user(arg, &vstream_arg, sizeof(struct vstream_args))) { + pr_err("copy_to_user failed\n"); + return -EFAULT; + } + + return res; +} diff --git a/kernel/ucc_sched/Makefile b/kernel/ucc_sched/Makefile new file mode 100644 index 000000000000..4a41f07d091c --- /dev/null +++ b/kernel/ucc_sched/Makefile @@ -0,0 +1 @@ +obj-(CONFIG_XPU_SCHEDULE) += core.o diff --git a/kernel/ucc_sched/core.c b/kernel/ucc_sched/core.c new file mode 100644 index 000000000000..4c7f1f59aeb9 --- /dev/null +++ b/kernel/ucc_sched/core.c @@ -0,0 +1,591 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) Huawei Technologies Co., Ltd. 2023. All rights reserved. + * Author: Huawei OS Kernel Lab + * Create: Tue Jan 17 22:19:17 2023 + */ + +#include <uapi/linux/sched/types.h> +#include <linux/kthread.h> +#include <linux/slab.h> +#include <linux/ucc_sched.h> + +#include "ucc_sched.h" +#include "../sched/sched.h" +#define CREATE_TRACE_POINTS +#include <trace/events/ucc_sched.h> + +#define MAX_XCU_NUM (100) +#define TS_SQ_TRANS_TASK_THRESHOLD (20) + +static struct xcu xcu_manager[MAX_XCU_NUM]; +static int num_active_xcu; +raw_spinlock_t xcu_mgr_lock; +int sysctl_ucc_sched_rcv_timeout_ms = 10; + +static struct task_struct vstream_idle_task; +static struct vstream_info vstream_idle = { + .vstreamId = UINT_MAX, + .p = &vstream_idle_task, +}; + +struct sched_args { + int cu_id; +}; + +static inline int is_xcu_offline(struct xcu *cu) +{ + return cu->state == XCU_INACTIVE; +} + +void ucc_set_vstream_state(struct vstream_info *vinfo, int state) +{ + vinfo->se.state = state; +} + +static inline int should_se_run(struct ucc_se *se) +{ + return se->state != SE_BLOCK && se->state != SE_DEAD; +} + +static inline void update_stats_run_start(struct xcu *cu, + struct ucc_se *se) +{ + u64 start; + + if (!schedstat_enabled()) + return; + + start = ktime_get_boot_ns(); + __schedstat_set(se->statistics.run_start, start); +} + +static inline void update_stats_run_end(struct xcu *cu, + struct ucc_se *se) +{ + + struct vstream_info *vinfo; + u64 delta; + + if (!schedstat_enabled()) + return; + + delta = ktime_get_boot_ns() - schedstat_val(se->statistics.run_start); + vinfo = container_of(se, struct vstream_info, se); + trace_ucc_sched_stat_run(vinfo, delta, se->is_timeout); + + __schedstat_set(se->statistics.run_max, + max(schedstat_val(se->statistics.run_max), delta)); + __schedstat_inc(se->statistics.run_count); + __schedstat_add(se->statistics.run_sum, delta); + __schedstat_set(se->statistics.run_start, 0); +} + +static inline void update_stats_preempt_start(struct xcu *cu, + struct ucc_se *se) +{ + u64 wait_start; + + if (!schedstat_enabled()) + return; + + wait_start = ktime_get_boot_ns(); + __schedstat_set(se->statistics.preempt_start, wait_start); +} + +static inline void update_stats_wait_start(struct xcu *cu, struct ucc_se *se) +{ + u64 wait_start; + + if (!schedstat_enabled()) + return; + + wait_start = ktime_get_boot_ns(); + __schedstat_set(se->statistics.wait_start, wait_start); +} + + +static inline void update_stats_wait_end(struct xcu *cu, struct ucc_se *se) +{ + struct vstream_info *vinfo; + u64 delta, preempt_delta; + + if (!schedstat_enabled()) + return; + + delta = ktime_get_boot_ns() - schedstat_val(se->statistics.wait_start); + vinfo = container_of(se, struct vstream_info, se); + trace_ucc_sched_stat_wait(vinfo, delta); + + __schedstat_set(se->statistics.wait_max, + max(schedstat_val(se->statistics.wait_max), delta)); + __schedstat_inc(se->statistics.wait_count); + __schedstat_add(se->statistics.wait_sum, delta); + __schedstat_set(se->statistics.wait_start, 0); + + if (se->statistics.preempt_start) { + preempt_delta = ktime_get_boot_ns() - + schedstat_val(se->statistics.preempt_start); + trace_ucc_sched_stat_preempt(vinfo, preempt_delta); + + __schedstat_set(se->statistics.preempt_max, + max(schedstat_val(se->statistics.preempt_max), + preempt_delta)); + __schedstat_inc(se->statistics.preempt_count); + __schedstat_add(se->statistics.preempt_sum, preempt_delta); + __schedstat_set(se->statistics.preempt_start, 0); + } +} + +void ucc_dump_statistics_info(struct ucc_se *se) +{ + struct vstream_info *vinfo = container_of(se, struct vstream_info, se); + + pr_info("comm %s pid %d vstreamId %d kernel_sum %llu wait_count %llu wait_max %llu[ns] wait_sum %llu[ns] preempt_count %llu preempt_max %llu[ns] preempt_sum %llu[ns]\n", + vinfo->p->comm, + vinfo->p->pid, + vinfo->vstreamId, + vinfo->se.statistics.kernel_sum, + vinfo->se.statistics.wait_count, + vinfo->se.statistics.wait_max, + vinfo->se.statistics.wait_sum, + vinfo->se.statistics.preempt_count, + vinfo->se.statistics.preempt_max, + vinfo->se.statistics.preempt_sum); +} + +static void put_prev_entity(struct xcu *cu, struct ucc_se *prev) +{ + if (!prev) + return; + + if (prev->on_cu) + update_stats_wait_start(cu, prev); + + prev->state = SE_READY; + cu->curr_se->state = SE_RUNNING; +} + +static void set_next_entity(struct xcu *cu, struct ucc_se *se) +{ + if (se->on_cu && se != cu->curr_se) + update_stats_wait_end(cu, se); + + cu->curr_se = se; +} + +static void dequeue_ucc_se(struct ucc_se *se, struct xcu *cu) +{ + raw_spin_lock(&cu->xcu_lock); + if (!se->on_cu) { + raw_spin_unlock(&cu->xcu_lock); + return; + } + + se->on_cu = 0; + + list_del_init(&se->run_list); + + if (list_empty(cu->queue + se->prio)) + __clear_bit(se->prio, cu->bitmap); + cu->rt_nr_running--; + + if (se != cu->curr_se) + update_stats_wait_end(cu, se); + + if (cu->curr_se == se) + cu->curr_se = NULL; + + raw_spin_unlock(&cu->xcu_lock); +} + +static void enqueue_ucc_se(struct ucc_se *se, struct xcu *cu) +{ + struct list_head *queue = cu->queue + se->prio; + + raw_spin_lock(&cu->xcu_lock); + if (se->on_cu) { + raw_spin_unlock(&cu->xcu_lock); + return; + } + se->on_cu = 1; + se->is_timeout = 0; + list_add_tail(&se->run_list, queue); + __set_bit(se->prio, cu->bitmap); + cu->rt_nr_running++; + + update_stats_wait_start(cu, se); + + raw_spin_unlock(&cu->xcu_lock); +} + +static struct xcu *ucc_select_cu(struct ucc_se *se) +{ + struct vstream_info *vstream_info; + int min_nr_running = INT_MAX; + struct xcu *cu; + int select_cu = 0; + int cu_id; + + vstream_info = container_of(se, struct vstream_info, se); + for (cu_id = 0; cu_id < num_active_xcu; cu_id++) { + cu = &xcu_manager[cu_id]; + + if (vstream_info->devId != cu->dev_id || + vstream_info->tsId != cu->ts_id) + continue; + + if (cu->rt_nr_running < min_nr_running) { + min_nr_running = cu->rt_nr_running; + select_cu = cu_id; + } + } + + vstream_info->cu_id = select_cu; + return &xcu_manager[select_cu]; +} + +static int ucc_check_preempt(struct ucc_se *se, struct xcu *cu) +{ + struct vstream_info *vinfo_curr, *vinfo; + struct ucc_se *curr_se; + + curr_se = cu->curr_se; + if (!curr_se) + return 1; + + vinfo = container_of(se, struct vstream_info, se); + vinfo_curr = container_of(curr_se, struct vstream_info, se); + if (vinfo_curr->p->ucc_priority > vinfo->p->ucc_priority) { + update_stats_preempt_start(cu, se); + curr_se->flag = UCC_TIF_PREEMPT; + return 1; + } + + return 0; +} + +static inline void ucc_wakeup_idle_worker(struct xcu *cu) +{ + wake_up_state(cu->worker, TASK_INTERRUPTIBLE); +} + +static inline void ucc_wakeup_running_worker(struct xcu *cu) +{ + wake_up_state(cu->worker, TASK_UNINTERRUPTIBLE); +} + +int ucc_schedule(int cu_id) +{ + struct xcu *cu; + + cu = &xcu_manager[cu_id]; + cu->is_wake = 1; + ucc_wakeup_running_worker(cu); + + return 0; +} +EXPORT_SYMBOL(ucc_schedule); + +int ucc_wake_up(struct ucc_se *se) +{ + struct xcu *cu; + + raw_spin_lock(&se->se_lock); + if (se->on_cu) { + raw_spin_unlock(&se->se_lock); + return 0; + } + + if (se->state == SE_BLOCK) + se->state = SE_READY; + + cu = ucc_select_cu(se); + if (!cu) { + raw_spin_unlock(&se->se_lock); + return -1; + } + + enqueue_ucc_se(se, cu); + if (ucc_check_preempt(se, cu)) + ucc_wakeup_idle_worker(cu); + + raw_spin_unlock(&se->se_lock); + + return 0; +} + +static struct ucc_se *pick_next_ucc_se(struct xcu *cu) +{ + struct ucc_se *se; + struct list_head *queue; + int idx; + + if (!cu->rt_nr_running) + return NULL; + + idx = sched_find_first_bit(cu->bitmap); + BUG_ON(idx >= MAX_UCC_PRIO); + + queue = cu->queue + idx; + se = list_entry(queue->next, struct ucc_se, run_list); + + return se; +} + +static int ucc_submit_kernel(struct xcu *cu, struct ucc_se *se) +{ + struct vstream_info *vstream_info; + struct xpu_group *group; + struct tsdrv_ctx *ctx; + int kernel_num, left; + + vstream_info = container_of(se, struct vstream_info, se); + ctx = vstream_info->privdata; + left = (vstream_info->vsqNode->tail - vstream_info->vsqNode->head + + MAX_VSTREAM_SIZE) % MAX_VSTREAM_SIZE; + + group = vstream_info->group; + + kernel_num = xpu_run(group, vstream_info, ctx); + if (kernel_num <= 0) + return kernel_num; + + //update vstream info head and tail; + update_vstream_head(vstream_info, kernel_num); + + left -= kernel_num; + + return kernel_num; +} + +static inline void ucc_wait_idle(struct xcu *cu) +{ + cu->state = XCU_IDLE; + + do { + schedule_timeout_interruptible(1); + } while (cu->rt_nr_running == 0); + + cu->state = XCU_BUSY; +} + +static inline void ucc_wait_running(struct xcu *cu, struct ucc_se *se) +{ + int cnt = 1; + + do { + schedule_timeout_uninterruptible( + msecs_to_jiffies(sysctl_ucc_sched_rcv_timeout_ms)); + } while (cu->is_wake == 0 && --cnt > 0); + + if (cnt == 0) { + __schedstat_inc(se->statistics.timeout_count); + se->is_timeout = 1; + } +} + +static inline void clear_se_flag(struct ucc_se *se) +{ + if (se) + se->flag = UCC_TIF_NONE; +} + +void ucc_dequeue_task(struct vstream_info *vInfo) +{ + struct xcu *cu = &xcu_manager[vInfo->cu_id]; + struct ucc_se *se = &vInfo->se; + + raw_spin_lock(&se->se_lock); + dequeue_ucc_se(se, cu); + raw_spin_unlock(&se->se_lock); +} + +/* + * dynamic padding: select kernels with no QoS confilcts to current ucc_se + * to fill cu; + */ +static void dynamic_padding(struct xcu *cu, struct ucc_se *se) +{ +} + +static int __ucc_schedule(void *args) +{ + struct sched_args *sargs = (struct sched_args *)args; + int cu_id = sargs->cu_id; + struct xcu *cu = &xcu_manager[cu_id]; + struct ucc_se *se = NULL, *curr_se = NULL; + struct ucc_se *prev_se = NULL; + struct vstream_info *vinfo; + int send_cnt = 0; + int kernel_num, preempt; + + while (!is_xcu_offline(cu)) { + raw_spin_lock(&cu->xcu_lock); + cu->is_sched = 0; + prev_se = cu->curr_se; + + preempt = 0; + if (prev_se) { + if (prev_se->flag != UCC_TIF_PREEMPT) + goto submit_kernel; + + vinfo = container_of(prev_se, struct vstream_info, se); + if (send_cnt < vinfo->p->ucc_step) + goto submit_kernel; + + preempt = 1; + } + + clear_se_flag(prev_se); + se = pick_next_ucc_se(cu); + if (!se) { + cu->is_sched = 1; + raw_spin_unlock(&cu->xcu_lock); + trace_ucc_sched_switch(0, &vstream_idle); + ucc_wait_idle(cu); + continue; + } + + set_next_entity(cu, se); + if (se != prev_se) { + put_prev_entity(cu, prev_se); + vinfo = container_of(se, struct vstream_info, se); + trace_ucc_sched_switch(preempt, vinfo); + } + send_cnt = 0; +submit_kernel: + curr_se = cu->curr_se; + dynamic_padding(cu, curr_se); + raw_spin_unlock(&cu->xcu_lock); + + curr_se->is_timeout = 0; + kernel_num = ucc_submit_kernel(cu, curr_se); + //has no more kernels to submit. + if (kernel_num <= 0 && !vstream_have_kernel(curr_se)) { + raw_spin_lock(&curr_se->se_lock); + curr_se->state = SE_BLOCK; + dequeue_ucc_se(curr_se, cu); + raw_spin_unlock(&curr_se->se_lock); + cu->is_sched = 1; + continue; + } + cu->is_sched = 1; + + vinfo = container_of(curr_se, struct vstream_info, se); + if (vinfo->send_cnt > TS_SQ_TRANS_TASK_THRESHOLD) { + update_stats_run_start(cu, curr_se); + /* kernel has not finish */ + if (!cu->is_wake) + ucc_wait_running(cu, curr_se); + + update_stats_run_end(cu, curr_se); + cu->is_wake = 0; + vinfo->send_cnt = 0; + } + + send_cnt += kernel_num; + schedstat_add(se->statistics.kernel_sum, kernel_num); + } + + return 0; +} + +static void init_xcu_rq(struct xcu *cu) +{ + int i; + + for (i = 0; i < MAX_UCC_PRIO; i++) { + INIT_LIST_HEAD(cu->queue + i); + __clear_bit(i, cu->bitmap); + } + + /* delimiter for bitsearch: */ + __set_bit(MAX_UCC_PRIO, cu->bitmap); + cu->rt_nr_running = 0; + raw_spin_lock_init(&cu->xcu_lock); +} + +static int alloc_cu_id(void) +{ + int cu_id = -1; + + raw_spin_lock(&xcu_mgr_lock); + if (num_active_xcu >= MAX_XCU_NUM) { + raw_spin_unlock(&xcu_mgr_lock); + return cu_id; + } + + cu_id = num_active_xcu; + num_active_xcu++; + raw_spin_unlock(&xcu_mgr_lock); + + return cu_id; +} + +int ucc_sched_register_xcu(int dev_id, int ts_id, int cu_num) +{ + int cu_id; + struct xcu *cu; + struct sched_args *args; + struct sched_param param = { .sched_priority = 1 }; + char id_buf[16]; + int i; + + for (i = 0; i < cu_num; i++) { + cu_id = alloc_cu_id(); + if (cu_id < 0) { + pr_err("alloc cu id failed\n"); + return -1; + } + + cu = &xcu_manager[cu_id]; + cu->cu_id = cu_id; + cu->state = XCU_IDLE; + cu->curr_se = NULL; + cu->dev_id = dev_id; + cu->ts_id = ts_id; + cu->is_wake = 0; + init_xcu_rq(cu); + + args = kzalloc(sizeof(struct sched_args), GFP_KERNEL); + if (!args) + return -1; + + args->cu_id = cu->cu_id; + snprintf(id_buf, sizeof(id_buf), "%d:%d:%d", + cu->cu_id, cu->dev_id, cu->ts_id); + cu->worker = kthread_create_on_node(__ucc_schedule, + (void *)args, NUMA_NO_NODE, + "u_sched/%s", id_buf); + sched_setscheduler_nocheck(cu->worker, SCHED_FIFO, ¶m); + wake_up_process(cu->worker); + } + + return 0; +} +EXPORT_SYMBOL(ucc_sched_register_xcu); + +int ucc_sched_init(void) +{ + raw_spin_lock_init(&xcu_mgr_lock); + return 0; +} + +int ucc_rt_nr_running(struct xcu *cu) +{ + return cu->rt_nr_running; +} +EXPORT_SYMBOL(ucc_rt_nr_running); + +struct xcu *ucc_get_xcu_by_id(int cu_id) +{ + return &xcu_manager[cu_id]; +} +EXPORT_SYMBOL(ucc_get_xcu_by_id); + +int ucc_xcu_is_sched(int cu_id) +{ + return xcu_manager[cu_id].is_sched; +} +EXPORT_SYMBOL(ucc_xcu_is_sched); diff --git a/kernel/ucc_sched/ucc_sched.h b/kernel/ucc_sched/ucc_sched.h new file mode 100644 index 000000000000..30e2aa10cf2f --- /dev/null +++ b/kernel/ucc_sched/ucc_sched.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) Huawei Technologies Co., Ltd. 2023. All rights reserved. + * Author: Huawei OS Kernel Lab + * Create: Tue Jan 17 22:27:22 2023 + */ +#ifndef __UCC_SCHED_USCHED_H__ +#define __UCC_SCHED_USCHED_H__ + +#include <linux/sched.h> +#include <linux/spinlock_types.h> +#include <linux/types.h> +#include <linux/vstream.h> + +//For simplicity, we set this parameter to 2. +#define MAX_UCC_PRIO (2) + +enum xcu_state { + XCU_INACTIVE, + XCU_IDLE, + XCU_BUSY, + XCU_SUBMIT, +}; + +/* + * This is the abstraction object of the xpu computing unit. + */ +struct xcu { + int is_sched; + int cu_id; + int dev_id; + int ts_id; + int rt_nr_running; + int is_wake; + struct task_struct *worker; + DECLARE_BITMAP(bitmap, MAX_UCC_PRIO); + struct list_head queue[MAX_UCC_PRIO]; + enum xcu_state state; + struct ucc_se *curr_se; + raw_spinlock_t xcu_lock; +}; + +#endif