
From: Liu Kai <liukai284@huawei.com> XSched support ko mode for npu Liu Kai (2): xsched ko: xsched support ko mode xsched ko: adapt 910b npu driver for xsched ko ...01-Adapt-910b-npu-driver-for-xsched-ko.txt | 547 ++++++++++++++ drivers/xsched/Makefile | 71 ++ drivers/xsched/hal/syms_lookup.c | 52 ++ drivers/xsched/hal/xcu_group.c | 237 ++++++ drivers/xsched/hal/xsched_npu_interface.c | 385 ++++++++++ drivers/xsched/include/syms_lookup.h | 20 + drivers/xsched/include/vstream.h | 87 +++ drivers/xsched/include/xcu_group.h | 113 +++ drivers/xsched/include/xsched.h | 353 +++++++++ drivers/xsched/include/xsched_ioctl.h | 60 ++ drivers/xsched/include/xsched_npu_interface.h | 117 +++ drivers/xsched/xsched/core.c | 686 ++++++++++++++++++ drivers/xsched/xsched/rt.c | 382 ++++++++++ drivers/xsched/xsched/xsched.c | 268 +++++++ drivers/xsched/xsched/xsched_ioctl.c | 466 ++++++++++++ 15 files changed, 3844 insertions(+) create mode 100644 drivers/xsched/0001-Adapt-910b-npu-driver-for-xsched-ko.txt create mode 100644 drivers/xsched/Makefile create mode 100644 drivers/xsched/hal/syms_lookup.c create mode 100644 drivers/xsched/hal/xcu_group.c create mode 100644 drivers/xsched/hal/xsched_npu_interface.c create mode 100644 drivers/xsched/include/syms_lookup.h create mode 100644 drivers/xsched/include/vstream.h create mode 100644 drivers/xsched/include/xcu_group.h create mode 100644 drivers/xsched/include/xsched.h create mode 100644 drivers/xsched/include/xsched_ioctl.h create mode 100644 drivers/xsched/include/xsched_npu_interface.h create mode 100644 drivers/xsched/xsched/core.c create mode 100644 drivers/xsched/xsched/rt.c create mode 100644 drivers/xsched/xsched/xsched.c create mode 100644 drivers/xsched/xsched/xsched_ioctl.c -- 2.33.0

From: Liu Kai <liukai284@huawei.com> Signed-off-by: Liu Kai <liukai284@huawei.com> --- drivers/xsched/Makefile | 71 ++ drivers/xsched/hal/syms_lookup.c | 52 ++ drivers/xsched/hal/xcu_group.c | 237 ++++++ drivers/xsched/hal/xsched_npu_interface.c | 385 ++++++++++ drivers/xsched/include/syms_lookup.h | 20 + drivers/xsched/include/vstream.h | 87 +++ drivers/xsched/include/xcu_group.h | 113 +++ drivers/xsched/include/xsched.h | 353 +++++++++ drivers/xsched/include/xsched_ioctl.h | 60 ++ drivers/xsched/include/xsched_npu_interface.h | 117 +++ drivers/xsched/xsched/core.c | 686 ++++++++++++++++++ drivers/xsched/xsched/rt.c | 382 ++++++++++ drivers/xsched/xsched/xsched.c | 268 +++++++ drivers/xsched/xsched/xsched_ioctl.c | 466 ++++++++++++ 14 files changed, 3297 insertions(+) create mode 100644 drivers/xsched/Makefile create mode 100644 drivers/xsched/hal/syms_lookup.c create mode 100644 drivers/xsched/hal/xcu_group.c create mode 100644 drivers/xsched/hal/xsched_npu_interface.c create mode 100644 drivers/xsched/include/syms_lookup.h create mode 100644 drivers/xsched/include/vstream.h create mode 100644 drivers/xsched/include/xcu_group.h create mode 100644 drivers/xsched/include/xsched.h create mode 100644 drivers/xsched/include/xsched_ioctl.h create mode 100644 drivers/xsched/include/xsched_npu_interface.h create mode 100644 drivers/xsched/xsched/core.c create mode 100644 drivers/xsched/xsched/rt.c create mode 100644 drivers/xsched/xsched/xsched.c create mode 100644 drivers/xsched/xsched/xsched_ioctl.c diff --git a/drivers/xsched/Makefile b/drivers/xsched/Makefile new file mode 100644 index 000000000000..a157d5ab273c --- /dev/null +++ b/drivers/xsched/Makefile @@ -0,0 +1,71 @@ +# compiler +CC := gcc +LD := ld +KERNEL_DIR := /lib/modules/$(shell uname -r)/build +PWD := $(shell pwd) + +# module name +MODULE_NAME := xsched + +# source code +KO_SRCS := xsched/xsched_ioctl.c xsched/xsched.c xsched/core.c xsched/rt.c +HAL_SRCS := hal/xcu_group.c hal/xsched_npu_interface.c hal/syms_lookup.c + +# header file +DRIVER_PATH := /usr/local/Ascend +INCDIR += -I$(PWD)/include/ +INCDIR += -I$(DRIVER_PATH)/driver/kernel/inc/driver/ +INCDIR += -I$(DRIVER_PATH)/driver/kernel/rms/trs_drv/trs_core/ +INCDIR += -I$(DRIVER_PATH)/driver/kernel/rms/trs_drv/inc +INCDIR += -I$(DRIVER_PATH)/driver/kernel/dev_inc/inc/ +INCDIR += -I$(DRIVER_PATH)/driver/kernel/drv_davinci_intf_host/ +INCDIR += -I$(DRIVER_PATH)/driver/kernel/tsch/ +INCDIR += -I$(DRIVER_PATH)/driver/kernel/dev_inc/inc/dbl/ +INCDIR += -I$(DRIVER_PATH)/driver/kernel/dbl/uda/ +INCDIR += -I$(LIB_PATH)/include/ + +# kernel module +obj-m := $(MODULE_NAME).o +KO_OBJS := $(KO_SRCS:.c=.o) +HAL_OBJS := $(HAL_SRCS:.c=.o) +$(MODULE_NAME)-objs := $(KO_OBJS) $(HAL_OBJS) + +# compilation flag +# remove -g flag to exclude debug symbols +EXTRA_CFLAGS += $(INCDIR) -Wall -Wno-unused-function -g +CFLAGS := $(INCDIR) -Wall -g + +# default +all: ko test + +# compile kernel module +ko: + @echo "Building kernel module $(MODULE_NAME)..." + $(MAKE) -C $(KERNEL_DIR) M=$(PWD) EXTRA_CFLAGS="$(EXTRA_CFLAGS)" modules + @echo "Kernel module build complete." + +# clean +clean: + @echo "Cleaning up..." + $(MAKE) -C $(KERNEL_DIR) M=$(PWD) clean + rm -rf test/$(TEST_NAME) + rm -f *.o *.ko *.mod.c *.mod.o *.symvers *.order .*.cmd *.so + rm -f xsched/*.o xsched/*.mod.c xsched/.*.cmd + @echo "Clean complete." + +# install kernel module +install: ko + @echo "Installing kernel module..." + sudo insmod $(MODULE_NAME).ko + @echo "Module installed." + +# uninstall kernel module +uninstall: + @echo "Uninstalling kernel module..." + sudo rmmod $(MODULE_NAME) 2>/dev/null || true + @echo "Module uninstalled." + +# recompile and install +reinstall: uninstall clean ko install + +.PHONY: all ko clean install uninstall reinstall diff --git a/drivers/xsched/hal/syms_lookup.c b/drivers/xsched/hal/syms_lookup.c new file mode 100644 index 000000000000..7b49040d5300 --- /dev/null +++ b/drivers/xsched/hal/syms_lookup.c @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/kprobes.h> +#include <linux/errno.h> + +#include "syms_lookup.h" +#include "xsched.h" + +kallsyms_lookup_name_t generic_kallsyms_lookup_name; + +static struct kprobe kp = { + /* lookup kallsyms_lookup_name */ + .symbol_name = "kallsyms_lookup_name", +}; + +/* + * kprobe pre_handler callback + */ +static int __kprobes handler_pre(struct kprobe *p, struct pt_regs *regs) +{ + return 0; +} + +int __init syms_lookup_init(void) +{ + int ret; + + if (generic_kallsyms_lookup_name) + return 0; + + kp.pre_handler = handler_pre; + ret = register_kprobe(&kp); + if (ret < 0) { + XSCHED_ERR("Failed to register kprobe for kallsyms_lookup_name, error: %d\n", ret); + return ret; + } + + generic_kallsyms_lookup_name = (kallsyms_lookup_name_t)kp.addr; + unregister_kprobe(&kp); + + if (!generic_kallsyms_lookup_name) { + XSCHED_ERR("Failed to get valid address for kallsyms_lookup_name\n"); + return -ENOENT; + } + XSCHED_INFO("Found kallsyms_lookup_name at address: %p\n", (void *)generic_kallsyms_lookup_name); + + return 0; +} + +void syms_lookup_exit(void) +{ + generic_kallsyms_lookup_name = NULL; +} diff --git a/drivers/xsched/hal/xcu_group.c b/drivers/xsched/hal/xcu_group.c new file mode 100644 index 000000000000..5eb6ed041d83 --- /dev/null +++ b/drivers/xsched/hal/xcu_group.c @@ -0,0 +1,237 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/rwsem.h> +#include <linux/slab.h> + +#include "xcu_group.h" +#include "xsched.h" + +static DECLARE_RWSEM(xcu_group_rwsem); + +struct xcu_group *xcu_group_init(int id) +{ + struct xcu_group *node = kzalloc(sizeof(*node), GFP_KERNEL); + + if (!node) + return NULL; + + node->id = id; + node->type = XCU_TYPE_NPU; + idr_init(&node->next_layer); + atomic_set(&node->child_count, 0); + return node; +} + +int __xcu_group_attach(struct xcu_group *new_group, + struct xcu_group *previous_group) +{ + int id = new_group->id; + + if (id == -1) + id = idr_alloc(&previous_group->next_layer, new_group, 0, + INT_MAX, GFP_KERNEL); + else + id = idr_alloc(&previous_group->next_layer, new_group, id, + id + 1, GFP_KERNEL); + + if (id < 0) { + XSCHED_ERR("Fail to attach xcu_group: id conflict @ %s\n", + __func__); + return -EEXIST; + } + new_group->id = id; + new_group->previous_layer = previous_group; + atomic_inc(&previous_group->child_count); + + return 0; +} + +int xcu_group_attach(struct xcu_group *new_group, + struct xcu_group *previous_group) +{ + int ret; + + down_write(&xcu_group_rwsem); + ret = __xcu_group_attach(new_group, previous_group); + up_write(&xcu_group_rwsem); + + return ret; +} + +static inline void __xcu_group_detach(struct xcu_group *group) +{ + if (!group || !group->previous_layer) + return; + + if (WARN_ON(!xcu_group_is_empty(group))) + XSCHED_ERR("Leaking group %u with children\n", group->id); + idr_remove(&group->previous_layer->next_layer, group->id); + atomic_dec(&group->previous_layer->child_count); + group->previous_layer = NULL; +} + +void xcu_group_detach(struct xcu_group *group) +{ + down_write(&xcu_group_rwsem); + __xcu_group_detach(group); + up_write(&xcu_group_rwsem); +} + +/* Must call xcu_group_detach() before xcu_group_free() */ +void xcu_group_free(struct xcu_group *group) +{ + idr_destroy(&group->next_layer); + if (group != xcu_group_root) + kfree(group); +} + +static struct xcu_group *__xcu_group_find_nolock( + struct xcu_group *group, int id) +{ + return idr_find(&group->next_layer, id); +} + +struct xcu_group *xcu_group_find(struct xcu_group *group, int id) +{ + struct xcu_group *result; + + down_read(&xcu_group_rwsem); + result = __xcu_group_find_nolock(group, id); + up_read(&xcu_group_rwsem); + + return result; +} + +bool xcu_group_is_empty(struct xcu_group *group) +{ + return atomic_read(&group->child_count) == 0; +} + +/* This function runs "run" callback for a given xcu_group + * and a given vstream that are passed within + * xcu_op_handler_params object + */ +int xcu_run(struct xcu_op_handler_params *params) +{ + if (!params->group->opt || !params->group->opt->run) { + XSCHED_ERR("No function [run] called.\n"); + return -EINVAL; + } + + return params->group->opt->run(params); +} + +/* This function runs "wait" callback for a given xcu_group + * and a given vstream that are passed within + * xcu_op_handler_params object + */ +int xcu_wait(struct xcu_op_handler_params *params) +{ + if (!params->group->opt || !params->group->opt->wait) { + XSCHED_ERR("No function [wait] called.\n"); + return -EINVAL; + } + + return params->group->opt->wait(params); +} + +/* This function runs "complete" callback for a given xcu_group + * and a given vstream that are passed within + * xcu_op_handler_params object. + */ +int xcu_complete(struct xcu_op_handler_params *params) +{ + return 0; +} + +/* This function runs "finish" callback for a given xcu_group + * and a given vstream that are passed within + * xcu_op_handler_params object. + * + * This handler provides an interface to implement deallocation + * and freeing memory for SQ and CQ buffers. + */ +int xcu_finish(struct xcu_op_handler_params *params) +{ + if (!params->group->opt || !params->group->opt->finish) { + XSCHED_ERR("No function [finish] called.\n"); + return -EINVAL; + } + + return params->group->opt->finish(params); +} + +/* This function runs a "alloc" callback for a given xcu_group + * and a given vstream that are passed within + * xcu_op_handler_params object. + * + * This handler provides an interface to implement allocation + * and registering memory for SQ and CQ buffers. + */ +int xcu_alloc(struct xcu_op_handler_params *params) +{ + if (!params->group->opt || !params->group->opt->alloc) { + XSCHED_ERR("No function [alloc] called.\n"); + return -EINVAL; + } + + return params->group->opt->alloc(params); +} + +/* This function runs a "logic_alloc" callback for a given xcu_group + * and a given vstream that are passed within + * xcu_op_handler_params object. + * + * This handler provides an interface to implement allocation + * and registering memory of logic CQ buffer. + */ +int xcu_logic_alloc(struct xcu_op_handler_params *params) +{ + if (!params->group->opt || !params->group->opt->logic_alloc) { + XSCHED_ERR("No function [logic_alloc] called.\n"); + return -EINVAL; + } + + return params->group->opt->logic_alloc(params); +} + +/* This function runs a "logic_free" callback for a given xcu_group + * and a given vstream that are passed within + * xcu_op_handler_params object. + * + * This handler provides an interface to implement deallocation + * and unregistering memory of a logic CQ buffer. + */ +int xcu_logic_free(struct xcu_op_handler_params *params) +{ + if (!params->group->opt || !params->group->opt->logic_free) { + XSCHED_ERR("No function [logic_free] called.\n"); + return -EINVAL; + } + + return params->group->opt->logic_free(params); +} + +/* This function runs a "sqe_op" callback for a given xcu_group + * and a given vstream that are passed within + * xcu_op_handler_params object. + * + * This handler provides an interface to set or get sqe info. + */ +int xcu_sqe_op(struct xcu_op_handler_params *params) +{ + if (!params->group->opt || !params->group->opt->sqe_op) { + XSCHED_ERR("No function [sqe_op] called.\n"); + return -EINVAL; + } + + return params->group->opt->sqe_op(params); +} + +static struct xcu_group __xcu_group_root = { + .id = 0, + .type = XCU_TYPE_ROOT, + .next_layer = IDR_INIT(next_layer), + .child_count = ATOMIC_INIT(0), +}; + +struct xcu_group *xcu_group_root = &__xcu_group_root; diff --git a/drivers/xsched/hal/xsched_npu_interface.c b/drivers/xsched/hal/xsched_npu_interface.c new file mode 100644 index 000000000000..050137cd8177 --- /dev/null +++ b/drivers/xsched/hal/xsched_npu_interface.c @@ -0,0 +1,385 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/cdev.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/errno.h> +#include <linux/list.h> +#include <linux/platform_device.h> +#include <linux/vmalloc.h> +#include <linux/file.h> + +#include "xcu_group.h" +#include "xsched_npu_interface.h" + +extern int xsched_xcu_register(struct xcu_group *group, int phys_id); +extern int xsched_xcu_unregister(struct xcu_group *group, int phys_id); +ioctl_trs_sqcq_handler_t ioctl_trs_sqcq_send_ptr; +ioctl_trs_sqcq_handler_t ioctl_trs_sqcq_alloc_ptr; +ioctl_trs_sqcq_handler_t ioctl_trs_sqcq_free_ptr; +ioctl_trs_sqcq_handler_t ioctl_trs_sqcq_recv_ptr; +uda_dev_inst_get_handler_t uda_dev_inst_get_ptr; +uda_dev_inst_put_handler_t uda_dev_inst_put_ptr; +soc_subsys_get_num_handler_t soc_subsys_get_num_ptr; + +/* Gets device driver TS context from a file descriptor of opened device. */ +static void *tsdrv_ctx_find(int fd) +{ + struct davinci_intf_private_stru *file_private_data; + void *ctx = NULL; + struct fd f; + + f = fdget(fd); + if (!f.file) + return NULL; + + file_private_data = f.file->private_data; + if (file_private_data) + ctx = file_private_data->priv_filep.private_data; + + fdput(f); + return ctx; +} + +int trs_xsched_ctx_run(struct xcu_op_handler_params *params) +{ + uint32_t sq_id = *(uint32_t *)params->param_1; + uint32_t tsId = *(uint32_t *)params->param_2; + uint8_t *sqe_addr = params->param_3; + uint32_t sqe_num = *(uint32_t *)params->param_4; + int32_t timeout = *(int32_t *)params->param_5; + int32_t type = *(int32_t *)params->param_6; + struct halTaskSendInfo input = {0}; + struct trs_proc_ctx *ctx = params->param_7; + uint32_t logic_cqId = *(uint32_t *)params->param_8; + + input.tsId = tsId; + input.sqId = sq_id; + input.timeout = timeout; + input.sqe_addr = sqe_addr; + input.sqe_num = sqe_num; + input.type = type; + + XSCHED_DEBUG("%s %d: tsId %u sqId %u timeout %d num %u\n", + __func__, __LINE__, tsId, sq_id, timeout, sqe_num); + + if (!ioctl_trs_sqcq_send_ptr) { + XSCHED_ERR("Invalid ioctl_trs_sqcq_send_ptr %p @ %s\n", ioctl_trs_sqcq_send_ptr, __func__); + return -ENOENT; + } + /* Send SQ tail to a doorbel. */ + return ioctl_trs_sqcq_send_ptr(ctx, logic_cqId, (unsigned long)&input); +} + +int trs_xsched_ctx_free(struct xcu_op_handler_params *params) +{ + struct trs_proc_ctx *ctx; + + ctx = tsdrv_ctx_find(params->fd); + if (!ctx) + return -ENOENT; + + if (!ioctl_trs_sqcq_free_ptr) { + XSCHED_ERR("Invalid ioctl_trs_sqcq_free_ptr %p @ %s\n", ioctl_trs_sqcq_free_ptr, __func__); + return -ENOENT; + } + return ioctl_trs_sqcq_free_ptr(ctx, 0, (unsigned long)params->payload); +} + +int trs_xsched_ctx_wait(struct xcu_op_handler_params *params) +{ + uint32_t tsId = *(uint32_t *)params->param_1; + uint32_t cqId = *(uint32_t *)params->param_2; + uint32_t streamId = *(uint32_t *)params->param_3; + struct ts_stars_sqe_header *sqe = params->param_4; + uint8_t *cqe_addr = params->param_5; + struct trs_proc_ctx *ctx = params->param_6; + int32_t timeout = *(uint32_t *)params->param_7; + int32_t cqe_num = 1; + struct halReportRecvInfo input = {0}; + uint32_t task_id = sqe->task_id; + + input.type = DRV_LOGIC_TYPE; + input.tsId = tsId; + input.cqId = cqId; + input.timeout = timeout; + input.cqe_num = cqe_num; + input.cqe_addr = cqe_addr; + input.stream_id = streamId; + input.task_id = task_id; + input.res[0] = 1; /* version 1 for new runtime. */ + + XSCHED_DEBUG("%s %d: tdId %u logic_cqId %u streamid %u task_id %d timeout %d\n", + __func__, __LINE__, tsId, cqId, streamId, task_id, timeout); + + if (!ioctl_trs_sqcq_recv_ptr) { + XSCHED_ERR("Invalid ioctl_trs_sqcq_recv_ptr %p @ %s\n", ioctl_trs_sqcq_recv_ptr, __func__); + return -ENOENT; + } + /* Wait for cq irq and read result. */ + return ioctl_trs_sqcq_recv_ptr(ctx, 0, (unsigned long)&input); +} + +int trs_xsched_ctx_complete(struct xcu_op_handler_params *params) +{ + return 0; +} + +int trs_xsched_ctx_alloc(struct xcu_op_handler_params *params) +{ + struct halSqCqInputInfo *input_info = params->payload; + uint32_t *tgid = (uint32_t *)params->param_1; + uint32_t *sq_id = (uint32_t *)params->param_2; + uint32_t *cq_id = (uint32_t *)params->param_3; + uint32_t *user_stream_id = (uint32_t *)params->param_4; + struct trs_proc_ctx *ctx; + int ret = 0; + + XSCHED_DEBUG("%s %d, input_info %lx, type: %d\n", + __func__, __LINE__, (unsigned long)input_info, input_info->type); + + ctx = tsdrv_ctx_find(params->fd); + if (!ctx) + return -ENOENT; + XSCHED_DEBUG("%s %d, pid %d, task_id %d, size %ld\n", + __func__, __LINE__, ctx->pid, ctx->task_id, sizeof(*ctx)); + + if (!ioctl_trs_sqcq_alloc_ptr) { + XSCHED_ERR("Invalid ioctl_trs_sqcq_alloc_ptr %p @ %s\n", ioctl_trs_sqcq_alloc_ptr, __func__); + return -ENOENT; + } + ret = ioctl_trs_sqcq_alloc_ptr(ctx, 0, (unsigned long)input_info); + if (ret != 0) + return ret; + + *tgid = ctx->pid; + *sq_id = input_info->sqId; + *cq_id = input_info->cqId; + *user_stream_id = input_info->info[0]; + params->param_5 = ctx; + return 0; +} + +int trs_xsched_ctx_logic_alloc(struct xcu_op_handler_params *params) +{ + struct halSqCqInputInfo *input_info = params->payload; + uint32_t *logic_cq_id = (uint32_t *)params->param_1; + struct trs_proc_ctx *ctx; + int ret = 0; + + XSCHED_DEBUG("%s %d, type: %d\n", __func__, __LINE__, input_info->type); + + ctx = tsdrv_ctx_find(params->fd); + if (!ctx) + return -ENOENT; + XSCHED_DEBUG("%s %d, pid %d, task_id %d, size %ld\n", + __func__, __LINE__, ctx->pid, ctx->task_id, sizeof(*ctx)); + + if (!ioctl_trs_sqcq_alloc_ptr) { + XSCHED_ERR("Invalid ioctl_trs_sqcq_alloc_ptr %p @ %s\n", ioctl_trs_sqcq_alloc_ptr, __func__); + return -ENOENT; + } + ret = ioctl_trs_sqcq_alloc_ptr(ctx, 0, (unsigned long)input_info); + if (ret != 0) + return ret; + + *logic_cq_id = input_info->cqId; + XSCHED_DEBUG("%s %d, type: %d, cq_id: %u\n", + __func__, __LINE__, input_info->type, *logic_cq_id); + return 0; +} + +int trs_xsched_ctx_sqe_op(struct xcu_op_handler_params *params) +{ + struct ts_stars_sqe_header *sqe = params->param_2; + int op_type = *(int *)(params->param_1); + + switch (op_type) { + case SQE_IS_NOTIFY: + return (sqe->type == 0) && (sqe->wr_cqe == 1); + case SQE_SET_NOTIFY: + if (sqe->type == 0) + sqe->wr_cqe = 1; + break; + default: + break; + } + + return 0; +} + +static struct xcu_operation trs_xsched_ctx_xcu_ops = { + .run = trs_xsched_ctx_run, + .finish = trs_xsched_ctx_free, + .wait = trs_xsched_ctx_wait, + .complete = trs_xsched_ctx_complete, + .alloc = trs_xsched_ctx_alloc, + .logic_alloc = trs_xsched_ctx_logic_alloc, + .sqe_op = trs_xsched_ctx_sqe_op, +}; + +/* + * build xcu_group like + * xcu_root + * / + * 910 + * / \ + * dev0 dev1 + * / \ + * channel0 channel1 + */ +int xsched_xcu_group_init( + uint32_t type, uint32_t dev_id, uint32_t channel_num) +{ + struct xcu_group *type_group, *dev_group, *channel_group; + int channel_id, err = 0; + + XSCHED_DEBUG("dev_id %u channel_num %u\n", dev_id, channel_num); + + type_group = xcu_group_find(xcu_group_root, type); + if (!type_group) { + type_group = xcu_group_init(type); + if (!type_group) { + XSCHED_ERR("Fail to alloc xcu group with NPU\n"); + return -ENOMEM; + } + + err = xcu_group_attach(type_group, xcu_group_root); + if (err) { + XSCHED_ERR("Fail to attach NPU group\n"); + xcu_group_free(type_group); + return err; + } + } + + dev_group = xcu_group_init(dev_id); + if (!dev_group) { + XSCHED_ERR("Fail to alloc device group id=%u\n", dev_id); + return -ENOMEM; + } + dev_group->id = dev_id; + + err = xcu_group_attach(dev_group, type_group); + if (err) { + XSCHED_ERR("Fail to attach device group id=%u\n", dev_id); + xcu_group_free(dev_group); + return err; + } + + for (channel_id = 0; channel_id < channel_num; channel_id++) { + channel_group = xcu_group_init(channel_id); + if (!channel_group) { + XSCHED_ERR("Fail to alloc channel group id=%u\n", channel_id); + err = -ENOMEM; + continue; + } + channel_group->opt = &trs_xsched_ctx_xcu_ops; + + err = xcu_group_attach(channel_group, dev_group); + if (err) { + XSCHED_ERR("Fail to attach channel group id=%u\n", channel_id); + xcu_group_free(channel_group); + continue; + } + + /* one xcu map to a channel group */ + err = xsched_xcu_register(channel_group, dev_id); + if (err) { + XSCHED_ERR("Fail to register channel_id=%u dev_id=%u\n", + channel_id, dev_id); + xcu_group_detach(channel_group); + xcu_group_free(channel_group); + continue; + } + + cond_resched(); + } + + return err; +} + +void xsched_xcu_group_exit( + uint32_t type, uint32_t dev_id, uint32_t channel_num) +{ + struct xcu_group *type_group, *dev_group, *channel_group; + int channel_id; + + type_group = xcu_group_find(xcu_group_root, type); + if (!type_group) + return; + + dev_group = xcu_group_find(type_group, dev_id); + if (!dev_group) + goto check_type_group; + + for (channel_id = 0; channel_id < channel_num; channel_id++) { + channel_group = xcu_group_find(dev_group, channel_id); + if (!channel_group) + continue; + + xsched_xcu_unregister(channel_group, dev_id); + xcu_group_detach(channel_group); + xcu_group_free(channel_group); + + cond_resched(); + } + xcu_group_detach(dev_group); + xcu_group_free(dev_group); + +check_type_group: + if (xcu_group_is_empty(type_group)) { + xcu_group_detach(type_group); + xcu_group_free(type_group); + } +} + +int xcu_populate(uint32_t dev_id) +{ + int ret; + uint32_t ts_num; + struct uda_dev_inst *dev_inst = uda_dev_inst_get_ptr(dev_id); + + if (!dev_inst) + return 0; + + /* Get number of TS in a device. */ + ret = soc_subsys_get_num_ptr(dev_id, TS_SUBSYS, &ts_num); + if (ret) { + XSCHED_ERR("Get ts num fail. (ret=%d; dev_id=%u; ts_num=%u)\n", ret, dev_id, ts_num); + goto err_out; + } + + ret = xsched_xcu_group_init(XCU_TYPE_NPU, dev_id, ts_num); + if (ret) { + XSCHED_ERR("Failed to initialize xcu group (dev_id=%u; ts_num=%u)\n", dev_id, ts_num); + goto err_out; + } + XSCHED_INFO("Registered device: dev_id=%d, ts_num=%d\n", dev_id, ts_num); + +err_out: + uda_dev_inst_put_ptr(dev_inst); + return ret; +} + +int xcu_depopulate(uint32_t dev_id) +{ + int ret; + uint32_t ts_num; + struct uda_dev_inst *dev_inst = uda_dev_inst_get_ptr(dev_id); + + if (!dev_inst) + return 0; + + /* Get number of TS in a device. */ + ret = soc_subsys_get_num_ptr(dev_id, TS_SUBSYS, &ts_num); + if (ret) { + XSCHED_ERR("Get ts num fail. (ret=%d; dev_id=%u; ts_num=%u)\n", ret, dev_id, ts_num); + goto err_out; + } + + xsched_xcu_group_exit(XCU_TYPE_NPU, dev_id, ts_num); + XSCHED_INFO("Unregistered device: dev_id=%d, ts_num=%d\n", dev_id, ts_num); + +err_out: + uda_dev_inst_put_ptr(dev_inst); + return ret; +} diff --git a/drivers/xsched/include/syms_lookup.h b/drivers/xsched/include/syms_lookup.h new file mode 100644 index 000000000000..c5da778a53ab --- /dev/null +++ b/drivers/xsched/include/syms_lookup.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _SYMS_LOOKUP_H +#define _SYMS_LOOKUP_H + +#include <linux/types.h> + +typedef unsigned long (*kallsyms_lookup_name_t)(const char *name); + +extern kallsyms_lookup_name_t generic_kallsyms_lookup_name; + +/* + * lookup kallsyms_lookup_name() + */ +extern int __init syms_lookup_init(void); + +/* + * free kallsyms_lookup_name() + */ +extern void syms_lookup_exit(void); +#endif /* _SYMS_LOOKUP_H */ diff --git a/drivers/xsched/include/vstream.h b/drivers/xsched/include/vstream.h new file mode 100644 index 000000000000..1fd3a8fccdc6 --- /dev/null +++ b/drivers/xsched/include/vstream.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _VSTREAM_H +#define _VSTREAM_H + +#include <linux/ktime.h> +#include "xsched_ioctl.h" + +#define MAX_VSTREAM_SIZE 2048 +#define XCU_CQE_SIZE_MAX 32 +#define XCU_CQE_REPORT_NUM 4 +#define XCU_CQE_BUF_SIZE (XCU_CQE_REPORT_NUM * XCU_CQE_SIZE_MAX) + +/* Vstream metadata describes each incoming kick + * that gets stored into a list of pending kicks + * inside a vstream to keep track of what is left + * to be processed by a driver. + */ +typedef struct vstream_metadata { + /* A value of SQ tail that has been passed with the + * kick that is described by this exact metadata object. + */ + uint32_t sq_tail; + uint32_t sqe_num; + uint32_t sq_id; + uint8_t sqe[XCU_SQE_SIZE_MAX]; + + /* Report buffer for fake read. */ + int8_t cqe[XCU_CQE_BUF_SIZE]; + uint32_t cqe_num; + int32_t timeout; + + /* A node for metadata list */ + struct list_head node; + + struct vstream_info *parent; + + /* Time of list insertion */ + ktime_t add_time; +} vstream_metadata_t; + +typedef struct vstream_info { + uint32_t user_stream_id; + uint32_t id; + uint32_t vcq_id; + uint32_t logic_vcq_id; + uint32_t dev_id; + uint32_t channel_id; + uint32_t fd; + uint32_t task_type; + int tgid; + int sqcq_type; + + void *drv_ctx; + + int inode_fd; + + /* Pointer to corresponding context. */ + struct xsched_context *ctx; + + /* List node in context's vstream list. */ + struct list_head ctx_node; + + /* Pointer to an CU object on which this + * vstream is currently being processed. + * NULL if vstream is not being processed. + */ + struct xsched_cu *xcu; + + /* List node in an CU list of vstreams that + * are currently being processed by this specific CU. + */ + struct list_head xcu_node; + + /* Private vstream data. */ + void *data; + + spinlock_t stream_lock; + + uint32_t kicks_count; + + /* List of metadata a.k.a. all recorded unprocesed + * kicks for this exact vstream. + */ + struct list_head metadata_list; +} vstream_info_t; + +#endif /* _VSTREAM_H */ diff --git a/drivers/xsched/include/xcu_group.h b/drivers/xsched/include/xcu_group.h new file mode 100644 index 000000000000..8863e29d8ee1 --- /dev/null +++ b/drivers/xsched/include/xcu_group.h @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _XSCHED_XCU_GROUP_H +#define _XSCHED_XCU_GROUP_H + +#include <linux/idr.h> +#include "xsched_ioctl.h" +#include "uda_pub_def.h" + +#define XSCHED_NR_CUS UDA_UDEV_MAX_NUM + +extern struct xcu_group *xcu_group_root; + +enum xcu_type { + XCU_TYPE_ROOT, + XCU_TYPE_NPU, +}; + +enum xcu_sqe_op_type { + SQE_SET_NOTIFY, + SQE_IS_NOTIFY, +}; + +/** + * @group: value for this entry. + * @hash_node: hash node list. + * @dev_id: device id to bind with ctx. + */ +struct ctx_devid_revmap_data { + unsigned int dev_id; + struct xcu_group *group; + struct hlist_node hash_node; +}; + +struct xcu_op_handler_params { + int fd; + struct xcu_group *group; + void *payload; + union { + struct { + void *param_1; + void *param_2; + void *param_3; + void *param_4; + void *param_5; + void *param_6; + void *param_7; + void *param_8; + }; + }; +}; + +typedef int (*xcu_op_handler_fn_t)(struct xcu_op_handler_params *params); + +struct xcu_operation { + xcu_op_handler_fn_t run; + xcu_op_handler_fn_t finish; + xcu_op_handler_fn_t wait; + xcu_op_handler_fn_t complete; + xcu_op_handler_fn_t alloc; + xcu_op_handler_fn_t logic_alloc; + xcu_op_handler_fn_t logic_free; + xcu_op_handler_fn_t sqe_op; +}; + +struct xcu_group { + /* sq id. */ + uint32_t id; + + /* Type of XCU group. */ + enum xcu_type type; + + /* Amount of left node in XCU group tree */ + atomic_t child_count; + + /* IDR for the next layer of XCU group tree. */ + struct idr next_layer; + + /* Pointer to the previous XCU group in the XCU group tree. */ + struct xcu_group *previous_layer; + + /* Pointer to operation fn pointers object describing + * this XCU group's callbacks. + */ + struct xcu_operation *opt; + + /* Pointer to the XCU related to this XCU group. */ + struct xsched_cu *xcu; + + /* Mask of XCU ids associated with this XCU group + * and this group's children's XCUs. + */ + DECLARE_BITMAP(xcu_mask, XSCHED_NR_CUS); +}; + +/* Code for NPU driver support */ +int xcu_group_attach( + struct xcu_group *new_group, struct xcu_group *previous_group); +void xcu_group_detach(struct xcu_group *group); +struct xcu_group *xcu_group_find(struct xcu_group *group, int id); +struct xcu_group *xcu_group_init(int id); +void xcu_group_free(struct xcu_group *group); +bool xcu_group_is_empty(struct xcu_group *group); + +extern int xcu_run(struct xcu_op_handler_params *params); +extern int xcu_wait(struct xcu_op_handler_params *params); +extern int xcu_complete(struct xcu_op_handler_params *params); +extern int xcu_finish(struct xcu_op_handler_params *params); +extern int xcu_alloc(struct xcu_op_handler_params *params); +extern int xcu_logic_alloc(struct xcu_op_handler_params *params); +extern int xcu_logic_free(struct xcu_op_handler_params *params); +extern int xcu_sqe_op(struct xcu_op_handler_params *params); + +#endif /* _XSCHED_XCU_GROUP_H */ diff --git a/drivers/xsched/include/xsched.h b/drivers/xsched/include/xsched.h new file mode 100644 index 000000000000..8405ee9e5281 --- /dev/null +++ b/drivers/xsched/include/xsched.h @@ -0,0 +1,353 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _XSCHED_H +#define _XSCHED_H + +#include <linux/hrtimer.h> +#include <linux/kref.h> + +#include "vstream.h" +#include "xcu_group.h" + +#ifndef pr_fmt +#define pr_fmt(fmt) fmt +#endif + +#define MAX_VSTREAM_NUM (512) + +#define MODULE_NAME "XSched" +#define XSCHED_ERR(fmt, ...) \ + pr_err(pr_fmt(MODULE_NAME " [ERROR]: " fmt), ##__VA_ARGS__) + +#define XSCHED_WARN(fmt, ...) \ + pr_warn(pr_fmt(MODULE_NAME " [WARNING]: " fmt), ##__VA_ARGS__) + +#define XSCHED_INFO(fmt, ...) \ + pr_info(pr_fmt(MODULE_NAME " [INFO]: " fmt), ##__VA_ARGS__) + +/* + * Debug specific prints for XSched + */ +#define XSCHED_DEBUG(fmt, ...) \ + pr_debug(pr_fmt(MODULE_NAME " [DEBUG]: " fmt), ##__VA_ARGS__) + +#define XCU_HASH_ORDER 6 + +#define RUNTIME_INF ((u64)~0ULL) +#define XSCHED_TIME_INF RUNTIME_INF + +#define XSCHED_KICK_SLICE 2 + +/* + * A default kick slice for RT class XSEs. + */ +#define XSCHED_RT_KICK_SLICE XSCHED_KICK_SLICE + +extern atomic_t pending_task_count; + +extern struct xsched_cu *xsched_cu_mgr[XSCHED_NR_CUS]; + +enum xcu_sched_type { + XSCHED_TYPE_RT, + XSCHED_TYPE_DFLT = XSCHED_TYPE_RT, + XSCHED_TYPE_NUM, +}; + +enum xse_prio { + XSE_PRIO_HIGH = 0, + XSE_PRIO_LOW = 4, + NR_XSE_PRIO, +}; + +extern struct xsched_class rt_xsched_class; +extern struct xsched_class fair_xsched_class; + +#define xsched_first_class \ + list_first_entry(&(xsched_class_list), struct xsched_class, node) + +#define for_each_xsched_class(class) \ + list_for_each_entry((class), &(xsched_class_list), node) + +#define for_each_xse_prio(prio) \ + for (prio = XSE_PRIO_HIGH; prio < NR_XSE_PRIO; prio++) +#define for_each_vstream_in_ctx(vs, ctx) \ + list_for_each_entry((vs), &((ctx)->vstream_list), ctx_node) + +/* Manages xsched RT-like class linked list based runqueue. + * + * Now RT-like class runqueue structs is identical + * but will most likely grow different in the + * future as the Xsched evolves. + */ +struct xsched_rq_rt { + struct list_head rq[NR_XSE_PRIO]; + unsigned int nr_running; +}; + +/* Base XSched runqueue object structure that contains both mutual and + * individual parameters for different scheduling classes. + */ +struct xsched_rq { + struct xsched_entity *curr_xse; + const struct xsched_class *class; + + int state; + + /* RT class run queue.*/ + struct xsched_rq_rt rt; +}; + +enum xcu_state { + XCU_INACTIVE, + XCU_IDLE, + XCU_BUSY, + XCU_SUBMIT, +}; + +enum xsched_cu_status { + /* Worker not initialized. */ + XSCHED_XCU_NONE, + + /* Worker is sleeping in idle state. */ + XSCHED_XCU_WAIT_IDLE, + + /* Worker is sleeping in running state. */ + XSCHED_XCU_WAIT_RUNNING, + + /* Worker is active but not processing anything. */ + XSCHED_XCU_ACTIVE, + + NR_XSCHED_XCU_STATUS, +}; + +/* This is the abstraction object of the xcu computing unit. */ +struct xsched_cu { + uint32_t id; + uint32_t state; + + atomic_t pending_kicks; + + struct task_struct *worker; + + /* Storage list for contexts associated with this xcu */ + uint32_t nr_ctx; + struct list_head ctx_list; + struct mutex ctx_list_lock; + + vstream_info_t *vs_array[MAX_VSTREAM_NUM]; + struct mutex vs_array_lock; + + struct xsched_rq xrq; + struct list_head vsm_list; + + struct xcu_group *group; + + struct mutex xcu_lock; + + wait_queue_head_t wq_xcu_idle; +}; + +extern int num_active_xcu; +#define for_each_active_xcu(xcu, id) \ + for ((id) = 0, xcu = xsched_cu_mgr[(id)]; \ + (id) < num_active_xcu && (xcu = xsched_cu_mgr[(id)]); (id)++) + +struct xsched_entity_rt { + struct list_head list_node; + enum xse_prio prio; + + ktime_t timeslice; +}; + +struct xsched_entity { + uint32_t task_type; + + bool on_rq; + + pid_t owner_pid; + pid_t tgid; + + /* Amount of pending kicks currently sitting on this context. */ + atomic_t kicks_pending_ctx_cnt; + + /* Amount of submitted kicks context, used for resched decision. */ + atomic_t submitted_one_kick; + + size_t total_scheduled; + size_t total_submitted; + + /* File descriptor coming from an associated context + * used for identifying a given xsched entity in + * info and error prints. + */ + uint32_t fd; + + /* Xsched class for this xse. */ + const struct xsched_class *class; + + /* RT class entity. */ + struct xsched_entity_rt rt; + + /* Pointer to context object. */ + struct xsched_context *ctx; + + /* Xsched entity execution statistics */ + u64 last_exec_runtime; + + /* Pointer to an XCU object that represents an XCU + * on which this xse is to be processed or is being + * processed currently. + */ + struct xsched_cu *xcu; + + /* General purpose xse lock. */ + spinlock_t xse_lock; +}; + +/* Increments pending kicks counter for an XCU that the given + * xsched entity is attached to and for xsched entity's xsched + * class. + */ +static inline int xsched_inc_pending_kicks_xse(struct xsched_entity *xse) +{ + atomic_inc(&xse->xcu->pending_kicks); + /* Icrement pending kicks for current XSE. */ + atomic_inc(&xse->kicks_pending_ctx_cnt); + return 0; +} + +/* Decrements pending kicks counter for an XCU that the given + * xsched entity is attached to and for XSched entity's sched + * class. + */ +static inline int xsched_dec_pending_kicks_xse(struct xsched_entity *xse) +{ + atomic_dec(&xse->xcu->pending_kicks); + /* Decrementing pending kicks for current XSE. */ + atomic_dec(&xse->kicks_pending_ctx_cnt); + return 0; +} + +/* Checks if there are pending kicks left on a given XCU for all + * xsched classes. + */ +static inline bool xsched_check_pending_kicks_xcu(struct xsched_cu *xcu) +{ + return atomic_read(&xcu->pending_kicks); +} + +static inline int xse_integrity_check(const struct xsched_entity *xse) +{ + if (!xse) { + XSCHED_ERR("xse is null @ %s\n", __func__); + return -EINVAL; + } + + if (!xse->class) { + XSCHED_ERR("xse->class is null @ %s\n", __func__); + return -EINVAL; + } + + return 0; +} + +struct xsched_context { + uint32_t fd; + uint32_t dev_id; + pid_t tgid; + + struct list_head vstream_list; + struct list_head ctx_node; + + struct xsched_entity xse; + + spinlock_t ctx_lock; + struct mutex ctx_mutex; + struct kref kref; +}; + +/* Returns a pointer to xsched_context object corresponding to a given + * tgid and xcu. + */ +static inline struct xsched_context * +ctx_find_by_tgid_and_xcu(pid_t tgid, struct xsched_cu *xcu) +{ + struct xsched_context *ctx; + struct xsched_context *ret = NULL; + + list_for_each_entry(ctx, &xcu->ctx_list, ctx_node) { + if (ctx->tgid == tgid) { + ret = ctx; + break; + } + } + return ret; +} + +/* Xsched class. */ +struct xsched_class { + const enum xcu_sched_type class_id; + struct list_head node; + + /* Initialize a new xsched entity */ + void (*xse_init)(struct xsched_entity *xse); + + /* Destroy XSE scheduler-specific data */ + void (*xse_deinit)(struct xsched_entity *xse); + + /* Initialize a new runqueue per xcu */ + void (*rq_init)(struct xsched_cu *xcu); + + /* Removes a given XSE from it's runqueue. */ + void (*dequeue_ctx)(struct xsched_entity *xse); + + /* Places a given XSE on a runqueue on a given XCU. */ + void (*enqueue_ctx)(struct xsched_entity *xse, struct xsched_cu *xcu); + + /* Returns a next XSE to be submitted on a given XCU. */ + struct xsched_entity *(*pick_next_ctx)(struct xsched_cu *xcu); + + /* Put a XSE back into rq during preemption. */ + void (*put_prev_ctx)(struct xsched_entity *xse); + + /* Check context preemption. */ + bool (*check_preempt)(struct xsched_entity *xse); + + /* Select jobs from XSE to submit on XCU */ + size_t (*select_work)(struct xsched_cu *xcu, struct xsched_entity *xse); +}; + +static inline void xsched_init_vsm(struct vstream_metadata *vsm, + struct vstream_info *vs, vstream_args_t *arg) +{ + vsm->sq_id = arg->sq_id; + vsm->sqe_num = arg->vk_args.sqe_num; + vsm->timeout = arg->vk_args.timeout; + memcpy(vsm->sqe, arg->vk_args.sqe, XCU_SQE_SIZE_MAX); + vsm->parent = vs; + INIT_LIST_HEAD(&vsm->node); +} + +int xsched_xcu_register(struct xcu_group *group, int phys_id); +int xsched_xcu_unregister(struct xcu_group *group, int phys_id); +void xsched_task_free(struct kref *kref); +int xsched_ctx_init_xse(struct xsched_context *ctx, struct vstream_info *vs); +int ctx_bind_to_xcu(vstream_info_t *vstream_info, struct xsched_context *ctx); +int vstream_bind_to_xcu(vstream_info_t *vstream_info); +struct xsched_cu *xcu_find( + uint32_t *type, uint32_t dev_id, uint32_t channel_id); + +/* Vstream metadata proccesing functions.*/ +int xsched_vsm_add_tail(struct vstream_info *vs, vstream_args_t *arg); +struct vstream_metadata *xsched_vsm_fetch_first(struct vstream_info *vs); + +int xsched_sched_init(void); +int xsched_rt_prio_set(pid_t tgid, unsigned int prio); +int xsched_rt_prio_get(pid_t tgid); +void tgid_prio_cleanup(void); + +void enqueue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu); +void dequeue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu); + +bool xsched_try_module_get(void); +void xsched_module_put(void); +#endif /* _XSCHED_H */ diff --git a/drivers/xsched/include/xsched_ioctl.h b/drivers/xsched/include/xsched_ioctl.h new file mode 100644 index 000000000000..3b006330c644 --- /dev/null +++ b/drivers/xsched/include/xsched_ioctl.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _XSCHED_IOCTL_H +#define _XSCHED_IOCTL_H + +#include <linux/ioctl.h> +#include <linux/types.h> + +#define PAYLOAD_SIZE_MAX 512 +#define XCU_SQE_SIZE_MAX 64 + +typedef struct vstream_alloc_args { + int type; + __u32 user_stream_id; +} vstream_alloc_args_t; + +typedef struct vstream_free_args { } vstream_free_args_t; + +typedef struct vstream_kick_args { + __u32 sqe_num; + __s32 timeout; + __s8 sqe[XCU_SQE_SIZE_MAX]; +} vstream_kick_args_t; + +typedef struct vstream_args { + __u32 channel_id; + __u32 fd; + __u32 dev_id; + __u32 task_type; + __u32 sq_id; + __u32 cq_id; + + /* Device related structures. */ + union { + vstream_alloc_args_t va_args; + vstream_free_args_t vf_args; + vstream_kick_args_t vk_args; + }; + + __u32 payload_size; + char payload[PAYLOAD_SIZE_MAX]; +} vstream_args_t; + +struct priority_args { + __s32 pid; + __u32 sched_priority; +}; + +#define XSCHED_MAGIC 'x' +#define XSCHED_ALLOC _IOWR(XSCHED_MAGIC, 0, vstream_args_t) +#define XSCHED_FREE _IOW(XSCHED_MAGIC, 1, vstream_args_t) +#define XSCHED_KICK _IOW(XSCHED_MAGIC, 2, vstream_args_t) +#define XSCHED_SET_PRIO _IOW(XSCHED_MAGIC, 3, struct priority_args) +#define XSCHED_GET_PRIO _IOR(XSCHED_MAGIC, 4, struct priority_args) + +/* XSched ioctl handler */ +int xsched_alloc(vstream_args_t *arg); +int xsched_free(vstream_args_t *arg); +int xsched_kick(vstream_args_t *arg); + +#endif /* _XSCHED_IOCTL_H */ diff --git a/drivers/xsched/include/xsched_npu_interface.h b/drivers/xsched/include/xsched_npu_interface.h new file mode 100644 index 000000000000..cfc97ee90119 --- /dev/null +++ b/drivers/xsched/include/xsched_npu_interface.h @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _XSCHED_NPU_INTERFACE_H +#define _XSCHED_NPU_INTERFACE_H + +#include "trs_pub_def.h" +#include "trs_res_id_def.h" +#include "trs_proc.h" +#include "davinci_api.h" +#include "davinci_interface.h" +#include "davinci_intf_init.h" +#include "task_struct.h" +#include "syms_lookup.h" +#include "soc_res.h" +#include "xsched.h" + +/* NPU IOCTL handlers */ +typedef int (*ioctl_trs_sqcq_handler_t)(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg); +typedef struct uda_dev_inst *(*uda_dev_inst_get_handler_t)(u32 udevid); +typedef void (*uda_dev_inst_put_handler_t)(struct uda_dev_inst *dev_inst); +typedef int (*soc_subsys_get_num_handler_t)(u32 devid, enum soc_sub_type type, u32 *subnum); + +extern ioctl_trs_sqcq_handler_t ioctl_trs_sqcq_send_ptr; +extern ioctl_trs_sqcq_handler_t ioctl_trs_sqcq_alloc_ptr; +extern ioctl_trs_sqcq_handler_t ioctl_trs_sqcq_free_ptr; +extern ioctl_trs_sqcq_handler_t ioctl_trs_sqcq_recv_ptr; + +extern uda_dev_inst_get_handler_t uda_dev_inst_get_ptr; +extern uda_dev_inst_put_handler_t uda_dev_inst_put_ptr; +extern soc_subsys_get_num_handler_t soc_subsys_get_num_ptr; + +static int __init ioctl_trs_sqcq_handler_find(void) +{ + if (!generic_kallsyms_lookup_name) { + XSCHED_ERR("Generic kallsyms_lookup_name is NULL\n"); + return -ENOENT; + } + + if (!ioctl_trs_sqcq_send_ptr) { + ioctl_trs_sqcq_send_ptr = (ioctl_trs_sqcq_handler_t)generic_kallsyms_lookup_name("ioctl_trs_sqcq_send"); + if (!ioctl_trs_sqcq_send_ptr) { + XSCHED_ERR("Failed to find ioctl_trs_sqcq_send symbol\n"); + return -ENOENT; + } + XSCHED_INFO("Found ioctl_trs_sqcq_send at %p\n", ioctl_trs_sqcq_send_ptr); + } + + if (!ioctl_trs_sqcq_alloc_ptr) { + ioctl_trs_sqcq_alloc_ptr = (ioctl_trs_sqcq_handler_t)generic_kallsyms_lookup_name("ioctl_trs_sqcq_alloc"); + if (!ioctl_trs_sqcq_alloc_ptr) { + XSCHED_ERR("Failed to find ioctl_trs_sqcq_alloc symbol\n"); + return -ENOENT; + } + XSCHED_INFO("Found ioctl_trs_sqcq_alloc at %p\n", ioctl_trs_sqcq_alloc_ptr); + } + + if (!ioctl_trs_sqcq_free_ptr) { + ioctl_trs_sqcq_free_ptr = (ioctl_trs_sqcq_handler_t)generic_kallsyms_lookup_name("ioctl_trs_sqcq_free"); + if (!ioctl_trs_sqcq_free_ptr) { + XSCHED_ERR("Failed to find ioctl_trs_sqcq_free symbol\n"); + return -ENOENT; + } + XSCHED_INFO("Found ioctl_trs_sqcq_free at %p\n", ioctl_trs_sqcq_free_ptr); + } + + if (!ioctl_trs_sqcq_recv_ptr) { + ioctl_trs_sqcq_recv_ptr = (ioctl_trs_sqcq_handler_t)generic_kallsyms_lookup_name("ioctl_trs_sqcq_recv"); + if (!ioctl_trs_sqcq_recv_ptr) { + XSCHED_ERR("Failed to find ioctl_trs_sqcq_recv symbol\n"); + return -ENOENT; + } + XSCHED_INFO("Found ioctl_trs_sqcq_recv at %p\n", ioctl_trs_sqcq_recv_ptr); + } + + if (!uda_dev_inst_get_ptr) { + uda_dev_inst_get_ptr = (uda_dev_inst_get_handler_t)generic_kallsyms_lookup_name("uda_dev_inst_get"); + if (!uda_dev_inst_get_ptr) { + XSCHED_ERR("Failed to find uda_dev_inst_get symbol\n"); + return -ENOENT; + } + XSCHED_INFO("Found uda_dev_inst_get at %p\n", uda_dev_inst_get_ptr); + } + + if (!uda_dev_inst_put_ptr) { + uda_dev_inst_put_ptr = (uda_dev_inst_put_handler_t)generic_kallsyms_lookup_name("uda_dev_inst_put"); + if (!uda_dev_inst_put_ptr) { + XSCHED_ERR("Failed to find uda_dev_inst_get symbol\n"); + return -ENOENT; + } + XSCHED_INFO("Found uda_dev_inst_put at %p\n", uda_dev_inst_put_ptr); + } + + if (!soc_subsys_get_num_ptr) { + soc_subsys_get_num_ptr = (soc_subsys_get_num_handler_t)generic_kallsyms_lookup_name("soc_resmng_subsys_get_num"); + if (!soc_subsys_get_num_ptr) { + XSCHED_ERR("Failed to find uda_dev_inst_get symbol\n"); + return -ENOENT; + } + XSCHED_INFO("Found soc_resmng_subsys_get_num at %p\n", soc_subsys_get_num_ptr); + } + + return 0; +} + +static void __exit ioctl_trs_sqcq_handler_free(void) +{ + ioctl_trs_sqcq_send_ptr = NULL; + ioctl_trs_sqcq_alloc_ptr = NULL; + ioctl_trs_sqcq_free_ptr = NULL; + ioctl_trs_sqcq_recv_ptr = NULL; + uda_dev_inst_get_ptr = NULL; + uda_dev_inst_put_ptr = NULL; + soc_subsys_get_num_ptr = NULL; +} + +int xcu_populate(uint32_t dev_id); +int xcu_depopulate(uint32_t dev_id); +#endif /* _XSCHED_NPU_INTERFACE_H */ diff --git a/drivers/xsched/xsched/core.c b/drivers/xsched/xsched/core.c new file mode 100644 index 000000000000..40ac19febd4d --- /dev/null +++ b/drivers/xsched/xsched/core.c @@ -0,0 +1,686 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Core kernel scheduler code for XPU device + */ +#include <linux/kthread.h> +#include <linux/slab.h> +#include <linux/mutex.h> +#include <linux/hashtable.h> +#include <linux/delay.h> + +#include "xcu_group.h" +#include "xsched.h" + +int num_active_xcu; +static DEFINE_SPINLOCK(xcu_mgr_lock); + +struct xsched_cu *xsched_cu_mgr[XSCHED_NR_CUS]; +/* List of scheduling classes available */ +struct list_head xsched_class_list; + +static DEFINE_MUTEX(revmap_mutex); +static DEFINE_HASHTABLE(ctx_revmap, XCU_HASH_ORDER); + +atomic_t pending_task_count = ATOMIC_INIT(0); + +static void put_prev_ctx(struct xsched_entity *xse) +{ + struct xsched_cu *xcu = xse->xcu; + + lockdep_assert_held(&xcu->xcu_lock); + xse->class->put_prev_ctx(xse); + xse->last_exec_runtime = 0; + atomic_set(&xse->submitted_one_kick, 0); + XSCHED_DEBUG("Put current xse %d @ %s\n", xse->tgid, __func__); +} + +static size_t select_work_def(struct xsched_cu *xcu, struct xsched_entity *xse) +{ + int kick_count, scheduled = 0, not_empty; + struct vstream_info *vs; + struct xcu_op_handler_params params; + struct vstream_metadata *vsm; + + kick_count = atomic_read(&xse->kicks_pending_ctx_cnt); + XSCHED_DEBUG("Before decrement XSE kick_count=%d @ %s\n", + kick_count, __func__); + + if (kick_count == 0) + return 0; + + do { + not_empty = 0; + for_each_vstream_in_ctx(vs, xse->ctx) { + spin_lock(&vs->stream_lock); + vsm = xsched_vsm_fetch_first(vs); + spin_unlock(&vs->stream_lock); + if (!vsm) + continue; + list_add_tail(&vsm->node, &xcu->vsm_list); + scheduled++; + xsched_dec_pending_kicks_xse(xse); + not_empty++; + } + } while ((scheduled < XSCHED_KICK_SLICE) && (not_empty)); + + /* + * Iterate over all vstreams in context: + * Set wr_cqe bit in last computing task in vsm_list + */ + for_each_vstream_in_ctx(vs, xse->ctx) { + list_for_each_entry_reverse(vsm, &xcu->vsm_list, node) { + if (vsm->parent == vs) { + params.group = vsm->parent->xcu->group; + params.param_1 = &(int){SQE_SET_NOTIFY}; + params.param_2 = &vsm->sqe; + xcu_sqe_op(¶ms); + break; + } + } + } + + kick_count = atomic_read(&xse->kicks_pending_ctx_cnt); + XSCHED_DEBUG("After decrement XSE kick_count=%d @ %s\n", + kick_count, __func__); + + xse->total_scheduled += scheduled; + return scheduled; +} + +static struct xsched_entity *__raw_pick_next_ctx(struct xsched_cu *xcu) +{ + const struct xsched_class *class; + struct xsched_entity *next = NULL; + size_t scheduled; + + lockdep_assert_held(&xcu->xcu_lock); + for_each_xsched_class(class) { + next = class->pick_next_ctx(xcu); + if (next) { + scheduled = class->select_work ? + class->select_work(xcu, next) : select_work_def(xcu, next); + + XSCHED_DEBUG("xse %d scheduled=%zu total=%zu @ %s\n", + next->tgid, scheduled, next->total_scheduled, __func__); + break; + } + } + + return next; +} + +void enqueue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu) +{ + lockdep_assert_held(&xcu->xcu_lock); + + if (xse_integrity_check(xse)) + return; + + if (!xse->on_rq) { + xse->on_rq = true; + xse->class->enqueue_ctx(xse, xcu); + } +} + +void dequeue_ctx(struct xsched_entity *xse, struct xsched_cu *xcu) +{ + lockdep_assert_held(&xcu->xcu_lock); + + if (xse_integrity_check(xse)) + return; + + if (xse->on_rq) { + xse->class->dequeue_ctx(xse); + xse->on_rq = false; + } +} + +static int delete_ctx(struct xsched_context *ctx) +{ + struct xsched_cu *xcu = ctx->xse.xcu; + struct xsched_entity *curr_xse = xcu->xrq.curr_xse; + struct xsched_entity *xse = &ctx->xse; + + if (xse_integrity_check(xse)) { + XSCHED_ERR("Fail to check xse integrity @ %s\n", __func__); + return -EINVAL; + } + + if (!xse->xcu) { + XSCHED_ERR("Try to delete ctx that is not attached to xcu @ %s\n", + __func__); + return -EINVAL; + } + + /* Wait till context has been submitted. */ + while (atomic_read(&xse->kicks_pending_ctx_cnt)) { + XSCHED_DEBUG("Deleting ctx %d, xse->kicks_pending_ctx_cnt=%d @ %s\n", + xse->tgid, atomic_read(&xse->kicks_pending_ctx_cnt), + __func__); + usleep_range(100, 200); + } + + if (atomic_read(&xse->kicks_pending_ctx_cnt)) { + XSCHED_ERR("Deleting ctx %d that has pending kicks left @ %s\n", + xse->tgid, __func__); + return -EINVAL; + } + + mutex_lock(&xcu->xcu_lock); + dequeue_ctx(xse, xcu); + if (curr_xse == xse) + xcu->xrq.curr_xse = NULL; + mutex_unlock(&xcu->xcu_lock); + XSCHED_DEBUG("Deleting ctx %d, pending kicks left=%d @ %s\n", xse->tgid, + atomic_read(&xse->kicks_pending_ctx_cnt), __func__); + + xse->class->xse_deinit(xse); + + return 0; +} + +/* Frees a given vstream and also frees and dequeues it's context + * if a given vstream is the last and only vstream attached to it's + * corresponding context object. + */ +void xsched_task_free(struct kref *kref) +{ + struct xsched_context *ctx; + vstream_info_t *vs, *tmp; + struct xsched_cu *xcu; + + ctx = container_of(kref, struct xsched_context, kref); + xcu = ctx->xse.xcu; + + mutex_lock(&xcu->ctx_list_lock); + /* Wait for XSE to finish submitting */ + delete_ctx(ctx); + list_for_each_entry_safe(vs, tmp, &ctx->vstream_list, ctx_node) { + list_del(&vs->ctx_node); + kfree(vs->data); + kfree(vs); + } + + list_del(&ctx->ctx_node); + --xcu->nr_ctx; + mutex_unlock(&xcu->ctx_list_lock); + + kfree(ctx); + atomic_dec(&pending_task_count); + xsched_module_put(); +} + +int ctx_bind_to_xcu(vstream_info_t *vstream_info, struct xsched_context *ctx) +{ + struct ctx_devid_revmap_data *revmap_data; + struct xsched_cu *xcu_found = NULL; + uint32_t type = XCU_TYPE_NPU; + + /* Find XCU history. */ + hash_for_each_possible(ctx_revmap, revmap_data, hash_node, + (unsigned long)ctx->dev_id) { + if (revmap_data && revmap_data->group) { + /* Bind ctx to group xcu.*/ + ctx->xse.xcu = revmap_data->group->xcu; + return 0; + } + } + + revmap_data = kzalloc(sizeof(struct ctx_devid_revmap_data), GFP_KERNEL); + if (revmap_data == NULL) { + XSCHED_ERR("Revmap_data is NULL @ %s\n", __func__); + return -ENOMEM; + } + + xcu_found = xcu_find(&type, ctx->dev_id, vstream_info->channel_id); + if (!xcu_found) { + kfree(revmap_data); + return -EINVAL; + } + + /* Bind ctx to an XCU from channel group. */ + revmap_data->group = xcu_found->group; + ctx->xse.xcu = xcu_found; + vstream_info->xcu = xcu_found; + revmap_data->dev_id = vstream_info->dev_id; + XSCHED_DEBUG("Ctx bind to xcu %u @ %s\n", xcu_found->id, __func__); + + hash_add(ctx_revmap, &revmap_data->hash_node, + (unsigned long)ctx->dev_id); + + return 0; +} + +int vstream_bind_to_xcu(vstream_info_t *vstream_info) +{ + struct xsched_cu *xcu_found = NULL; + uint32_t type = XCU_TYPE_NPU; + + xcu_found = xcu_find(&type, vstream_info->dev_id, vstream_info->channel_id); + if (!xcu_found) + return -EINVAL; + + /* Bind vstream to a xcu. */ + vstream_info->xcu = xcu_found; + vstream_info->dev_id = xcu_found->id; + XSCHED_DEBUG("XCU bound to a vstream: type=%u, dev_id=%u, chan_id=%u.\n", + type, vstream_info->dev_id, vstream_info->channel_id); + + return 0; +} + +struct xsched_cu *xcu_find( + uint32_t *type, uint32_t dev_id, uint32_t channel_id) +{ + struct xcu_group *group = NULL; + uint32_t local_type = *type; + + /* Find xcu by type. */ + group = xcu_group_find(xcu_group_root, local_type); + if (group == NULL) { + XSCHED_ERR("Fail to find type group.\n"); + return NULL; + } + + /* Find device id group. */ + group = xcu_group_find(group, dev_id); + if (group == NULL) { + XSCHED_ERR("Fail to find device group.\n"); + return NULL; + } + /* Find channel id group. */ + group = xcu_group_find(group, channel_id); + if (group == NULL) { + XSCHED_ERR("Fail to find channel group.\n"); + return NULL; + } + + *type = local_type; + XSCHED_DEBUG("XCU found: type=%u, dev_id=%u, chan_id=%u.\n", + local_type, dev_id, channel_id); + + return group->xcu; +} + +int xsched_xse_set_class(struct xsched_entity *xse) +{ + struct xsched_class *sched = xsched_first_class; + + xse->class = sched; + return 0; +} + +int xsched_ctx_init_xse(struct xsched_context *ctx, struct vstream_info *vs) +{ + int err = 0; + struct xsched_entity *xse = &ctx->xse; + + atomic_set(&xse->kicks_pending_ctx_cnt, 0); + atomic_set(&xse->submitted_one_kick, 0); + + xse->total_scheduled = 0; + xse->total_submitted = 0; + xse->last_exec_runtime = 0; + + xse->fd = ctx->fd; + xse->tgid = ctx->tgid; + + err = ctx_bind_to_xcu(vs, ctx); + if (err) { + XSCHED_ERR( + "Couldn't find valid xcu for vstream %u dev_id %u @ %s\n", + vs->id, vs->dev_id, __func__); + return -EINVAL; + } + + xse->ctx = ctx; + BUG_ON(vs->xcu == NULL); + xse->xcu = vs->xcu; + + err = xsched_xse_set_class(xse); + if (err) { + XSCHED_ERR("Fail to set xse class @ %s\n", __func__); + return err; + } + xse->class->xse_init(xse); + + WRITE_ONCE(xse->on_rq, false); + + spin_lock_init(&xse->xse_lock); + return err; +} + +static void submit_kick(struct vstream_metadata *vsm) +{ + struct vstream_info *vs = vsm->parent; + struct xcu_op_handler_params params; + + params.group = vs->xcu->group; + params.fd = vs->fd; + params.param_1 = &vs->id; + params.param_2 = &vs->channel_id; + params.param_3 = vsm->sqe; + params.param_4 = &vsm->sqe_num; + params.param_5 = &vsm->timeout; + params.param_6 = &vs->sqcq_type; + params.param_7 = vs->drv_ctx; + params.param_8 = &vs->logic_vcq_id; + + /* Send vstream on a device for processing. */ + if (xcu_run(¶ms) != 0) + XSCHED_ERR( + "Fail to send Vstream id %u tasks to a device for processing.\n", + vs->id); + + XSCHED_DEBUG("Vstream id %u submit vsm: sq_tail %u\n", vs->id, vsm->sq_tail); +} + +static void submit_wait(struct vstream_metadata *vsm) +{ + struct vstream_info *vs = vsm->parent; + struct xcu_op_handler_params params; + /* Wait timeout in ms. */ + int32_t timeout = 500; + + params.group = vs->xcu->group; + params.param_1 = &vs->channel_id; + params.param_2 = &vs->logic_vcq_id; + params.param_3 = &vs->user_stream_id; + params.param_4 = &vsm->sqe; + params.param_5 = vsm->cqe; + params.param_6 = vs->drv_ctx; + params.param_7 = &timeout; + + /* Wait for a device to complete processing. */ + if (xcu_wait(¶ms)) + XSCHED_WARN("Fail to wait Vstream id %u tasks, logic_cq_id %u.\n", + vs->id, vs->logic_vcq_id); + + XSCHED_DEBUG("Vstream id %u wait finish, logic_cq_id %u\n", + vs->id, vs->logic_vcq_id); +} + +static int __xsched_submit(struct xsched_cu *xcu, struct xsched_entity *xse) +{ + struct vstream_metadata *vsm, *tmp; + int submitted = 0; + long submit_exec_time = 0; + ktime_t t_start = 0; + struct xcu_op_handler_params params; + + XSCHED_DEBUG("%s called for xse %d on xcu %u\n", + __func__, xse->tgid, xcu->id); + list_for_each_entry_safe(vsm, tmp, &xcu->vsm_list, node) { + submit_kick(vsm); + XSCHED_DEBUG("Xse %d vsm %u sched_delay: %lld ns\n", + xse->tgid, vsm->sq_id, ktime_to_ns(ktime_sub(ktime_get(), vsm->add_time))); + + params.group = vsm->parent->xcu->group; + params.param_1 = &(int){SQE_IS_NOTIFY}; + params.param_2 = &vsm->sqe; + if (xcu_sqe_op(¶ms)) { + mutex_unlock(&xcu->xcu_lock); + t_start = ktime_get(); + submit_wait(vsm); + submit_exec_time += ktime_to_ns(ktime_sub(ktime_get(), t_start)); + mutex_lock(&xcu->xcu_lock); + } + submitted++; + list_del(&vsm->node); + kfree(vsm); + } + + xse->last_exec_runtime += submit_exec_time; + xse->total_submitted += submitted; + atomic_add(submitted, &xse->submitted_one_kick); + INIT_LIST_HEAD(&xcu->vsm_list); + XSCHED_DEBUG("Xse %d submitted=%d total=%zu, exec_time=%ld @ %s\n", + xse->tgid, submitted, xse->total_submitted, + submit_exec_time, __func__); + + return submitted; +} + +static inline bool should_preempt(struct xsched_entity *xse) +{ + return xse->class->check_preempt(xse); +} + +static int xsched_schedule(void *input_xcu) +{ + struct xsched_cu *xcu = input_xcu; + struct xsched_entity *curr_xse = NULL; + struct xsched_entity *next_xse = NULL; + + while (!kthread_should_stop()) { + mutex_unlock(&xcu->xcu_lock); + wait_event_interruptible(xcu->wq_xcu_idle, + xcu->xrq.rt.nr_running || kthread_should_stop()); + + mutex_lock(&xcu->xcu_lock); + if (kthread_should_stop()) { + mutex_unlock(&xcu->xcu_lock); + break; + } + + if (!xsched_check_pending_kicks_xcu(xcu)) { + XSCHED_WARN("%s: No pending kicks on xcu %u\n", __func__, xcu->id); + continue; + } + + next_xse = __raw_pick_next_ctx(xcu); + if (!next_xse) { + XSCHED_WARN("%s: Couldn't find next xse on xcu %u\n", __func__, xcu->id); + continue; + } + + xcu->xrq.curr_xse = next_xse; + __xsched_submit(xcu, next_xse); + + curr_xse = xcu->xrq.curr_xse; + if (!curr_xse) + continue; + + /* if not deleted yet */ + put_prev_ctx(curr_xse); + if (!atomic_read(&curr_xse->kicks_pending_ctx_cnt)) + dequeue_ctx(curr_xse, xcu); + + xcu->xrq.curr_xse = NULL; + } + + return 0; +} + +/* Initializes all xsched XCU objects. + * Should only be called from xsched_xcu_register function. + */ +static int xsched_xcu_init(struct xsched_cu *xcu, struct xcu_group *group, + int xcu_id) +{ + struct xsched_class *sched; + int err; + + xcu->id = xcu_id; + xcu->state = XSCHED_XCU_NONE; + xcu->group = group; + xcu->nr_ctx = 0; + + atomic_set(&xcu->pending_kicks, 0); + INIT_LIST_HEAD(&xcu->vsm_list); + INIT_LIST_HEAD(&xcu->ctx_list); + init_waitqueue_head(&xcu->wq_xcu_idle); + mutex_init(&xcu->xcu_lock); + mutex_init(&xcu->ctx_list_lock); + mutex_init(&xcu->vs_array_lock); + + /* Initialize current XCU's runqueue. */ + for_each_xsched_class(sched) { + sched->rq_init(xcu); + } + + xcu->xrq.curr_xse = NULL; + + /* This worker should set XCU to XSCHED_XCU_WAIT_IDLE. + * If after initialization XCU still has XSCHED_XCU_NONE + * status then we can assume that there was a problem + * with XCU kthread job. + */ + xcu->worker = kthread_run(xsched_schedule, xcu, "xcu_%u", xcu->id); + if (IS_ERR(xcu->worker)) { + err = PTR_ERR(xcu->worker); + xcu->worker = NULL; + return err; + } + return 0; +} + +/* Increment xcu id */ +static int nr_active_cu_inc(void) +{ + int cur_num = -1; + + spin_lock(&xcu_mgr_lock); + if (num_active_xcu >= XSCHED_NR_CUS) + goto out_unlock; + + cur_num = num_active_xcu; + num_active_xcu++; + +out_unlock: + spin_unlock(&xcu_mgr_lock); + return cur_num; +} + +static int nr_active_cu_dec(void) +{ + int cur_num = -1; + + spin_lock(&xcu_mgr_lock); + if (num_active_xcu < 0) + goto out_unlock; + + cur_num = num_active_xcu; + num_active_xcu--; + +out_unlock: + spin_unlock(&xcu_mgr_lock); + return cur_num; +} + +/* Adds vstream_metadata object to a specified vstream. */ +int xsched_vsm_add_tail(struct vstream_info *vs, vstream_args_t *arg) +{ + struct vstream_metadata *new_vsm; + + new_vsm = kmalloc(sizeof(struct vstream_metadata), GFP_ATOMIC); + if (!new_vsm) + return -ENOMEM; + + if (vs->kicks_count > MAX_VSTREAM_SIZE) { + kfree(new_vsm); + return -EBUSY; + } + + xsched_init_vsm(new_vsm, vs, arg); + list_add_tail(&new_vsm->node, &vs->metadata_list); + new_vsm->add_time = ktime_get(); + vs->kicks_count += 1; + + return 0; +} + +/* Fetch the first vstream metadata from vstream metadata list + * and removes it from that list. Returned vstream metadata pointer + * to be freed after. + */ +struct vstream_metadata *xsched_vsm_fetch_first(struct vstream_info *vs) +{ + struct vstream_metadata *vsm; + + if (list_empty(&vs->metadata_list)) + return NULL; + + vsm = list_first_entry(&vs->metadata_list, struct vstream_metadata, node); + if (!vsm) + return NULL; + + list_del(&vsm->node); + if (vs->kicks_count > 0) + vs->kicks_count -= 1; + + return vsm; +} + +static void xsched_register_sched_class(struct xsched_class *sched) +{ + list_add(&sched->node, &xsched_class_list); +} + +/* + * Initialize and register xcu in xcu_manager array. + */ +int xsched_xcu_register(struct xcu_group *group, int phys_id) +{ + int xcu_cur_num, ret = 0; + struct xsched_cu *xcu; + + xcu_cur_num = nr_active_cu_inc(); + if (xcu_cur_num < 0) { + XSCHED_ERR("Number of present XCU's exceeds %d: %d.\n", + XSCHED_NR_CUS, num_active_xcu); + return -ENOSPC; + }; + + xcu = kzalloc(sizeof(struct xsched_cu), GFP_KERNEL); + if (!xcu) { + XSCHED_ERR("Fail to alloc xcu.\n"); + return -ENOMEM; + }; + + group->xcu = xcu; + xsched_cu_mgr[phys_id] = xcu; + + /* Init xcu's internals. */ + ret = xsched_xcu_init(xcu, group, phys_id); + if (ret != 0) { + group->xcu = NULL; + xsched_cu_mgr[phys_id] = NULL; + kfree(xcu); + } + return ret; +} + +int xsched_xcu_unregister(struct xcu_group *group, int phys_id) +{ + struct xsched_cu *xcu; + + if (!group || !group->xcu) + return -EINVAL; + + if (nr_active_cu_dec() < 0) { + XSCHED_ERR("No active XCU\n"); + return -EPERM; + }; + + xcu = group->xcu; + mutex_lock(&xcu->xcu_lock); + wake_up_interruptible(&xcu->wq_xcu_idle); + mutex_unlock(&xcu->xcu_lock); + + group->xcu = NULL; + xsched_cu_mgr[phys_id] = NULL; + kthread_stop(xcu->worker); + xcu->worker = NULL; + kfree(xcu); + + return 0; +} + +int xsched_sched_init(void) +{ + INIT_LIST_HEAD(&xsched_class_list); + xsched_register_sched_class(&rt_xsched_class); + + return 0; +} diff --git a/drivers/xsched/xsched/rt.c b/drivers/xsched/xsched/rt.c new file mode 100644 index 000000000000..2a0015c3454d --- /dev/null +++ b/drivers/xsched/xsched/rt.c @@ -0,0 +1,382 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Real-Time Scheduling Class for XPU device + * + * Copyright (C) 2025-2026 Huawei Technologies Co., Ltd + * + * Author: Konstantin Meskhidze <konstantin.meskhidze@huawei.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <uapi/linux/sched/types.h> +#include <linux/kthread.h> +#include <linux/slab.h> +#include <linux/hashtable.h> +#include <linux/sched.h> + +#include "vstream.h" +#include "xsched.h" + +#define XSCHED_RT_TIMESLICE (10 * NSEC_PER_MSEC) +#define TGID_HASH_BITS 8 + +/* Mapping between tgid and priority */ +struct tgid_prio { + pid_t tgid; + int prio; + struct hlist_node hnode; +}; + +static DEFINE_HASHTABLE(tgid_prio_map, TGID_HASH_BITS); +static DEFINE_SPINLOCK(tgid_prio_lock); + +static int tgid_prio_insert(pid_t tgid, int prio) +{ + struct tgid_prio *new_map; + unsigned int hash_key; + + if (prio > XSE_PRIO_LOW || prio < XSE_PRIO_HIGH) { + XSCHED_ERR("Invalid priority\n"); + return -EINVAL; + } + + new_map = kzalloc(sizeof(struct tgid_prio), GFP_KERNEL); + if (!new_map) { + XSCHED_ERR("Fail to alloc mapping (tgid=%d) @ %s\n", + tgid, __func__); + return -ENOMEM; + } + + new_map->tgid = tgid; + new_map->prio = prio; + + hash_key = hash_32(tgid, TGID_HASH_BITS); + + spin_lock(&tgid_prio_lock); + hash_add_rcu(tgid_prio_map, &new_map->hnode, hash_key); + spin_unlock(&tgid_prio_lock); + + return 0; +} + +static struct tgid_prio *tgid_prio_find(pid_t tgid) +{ + struct tgid_prio *map = NULL; + unsigned int hash_key = hash_32(tgid, TGID_HASH_BITS); + + rcu_read_lock(); + hash_for_each_possible_rcu(tgid_prio_map, map, hnode, hash_key) { + if (map->tgid == tgid) + break; + } + rcu_read_unlock(); + return map; +} + +static void tgid_prio_delete(pid_t tgid) +{ + struct tgid_prio *map; + unsigned int hash_key = hash_32(tgid, TGID_HASH_BITS); + + spin_lock(&tgid_prio_lock); + hash_for_each_possible(tgid_prio_map, map, hnode, hash_key) { + if (map->tgid == tgid) { + hash_del_rcu(&map->hnode); + spin_unlock(&tgid_prio_lock); + kfree(map); + return; + } + } + spin_unlock(&tgid_prio_lock); +} + +void tgid_prio_cleanup(void) +{ + struct tgid_prio *map; + struct hlist_node *tmp; + int i; + + spin_lock(&tgid_prio_lock); + hash_for_each_safe(tgid_prio_map, i, tmp, map, hnode) { + hash_del(&map->hnode); + kfree(map); + } + spin_unlock(&tgid_prio_lock); +} + +static inline void +xse_rt_add(struct xsched_entity *xse, struct xsched_cu *xcu) +{ + list_add_tail(&xse->rt.list_node, &xcu->xrq.rt.rq[xse->rt.prio]); +} + +static inline void xse_rt_del(struct xsched_entity *xse) +{ + list_del_init(&xse->rt.list_node); +} + +static inline void xse_rt_move_tail(struct xsched_entity *xse) +{ + struct xsched_cu *xcu = xse->xcu; + + list_move_tail(&xse->rt.list_node, &xcu->xrq.rt.rq[xse->rt.prio]); +} + +/* Increase RT runqueue total and per prio nr_running stat. */ +static inline void xrq_inc_nr_running(struct xsched_entity *xse, + struct xsched_cu *xcu) +{ + xcu->xrq.rt.nr_running++; +} + +/* Decrease RT runqueue total and per prio nr_running stat + * and raise a bug if nr_running decrease beyond zero. + */ +static inline void xrq_dec_nr_running(struct xsched_entity *xse) +{ + struct xsched_cu *xcu = xse->xcu; + + xcu->xrq.rt.nr_running--; +} + +static void dequeue_ctx_rt(struct xsched_entity *xse) +{ + xse_rt_del(xse); + xrq_dec_nr_running(xse); +} + +static void enqueue_ctx_rt(struct xsched_entity *xse, struct xsched_cu *xcu) +{ + xse_rt_add(xse, xcu); + xrq_inc_nr_running(xse, xcu); +} + +static inline struct xsched_entity *xrq_next_xse( + struct xsched_cu *xcu, int prio) +{ + return list_first_entry(&xcu->xrq.rt.rq[prio], struct xsched_entity, + rt.list_node); +} + +/* Return the next priority for pick_next_ctx taking into + * account if there are pending kicks on certain priority. + */ +static inline uint32_t get_next_prio_rt(struct xsched_rq *xrq) +{ + unsigned int curr_prio; + + for_each_xse_prio(curr_prio) { + if (!list_empty(&xrq->rt.rq[curr_prio])) + return curr_prio; + } + return NR_XSE_PRIO; +} + +static struct xsched_entity *pick_next_ctx_rt(struct xsched_cu *xcu) +{ + struct xsched_entity *result; + int next_prio; + + next_prio = get_next_prio_rt(&xcu->xrq); + if (next_prio >= NR_XSE_PRIO) { + XSCHED_DEBUG("No pending kicks in RT class @ %s\n", __func__); + return NULL; + } + + result = xrq_next_xse(xcu, next_prio); + if (!result) + XSCHED_ERR("Next XSE not found @ %s\n", __func__); + + XSCHED_DEBUG("Next XSE %u at prio %u @ %s\n", result->tgid, next_prio, __func__); + return result; +} + +static void put_prev_ctx_rt(struct xsched_entity *xse) +{ + xse->rt.timeslice -= xse->last_exec_runtime; + XSCHED_DEBUG( + "Update XSE=%d timeslice=%lld, last_exec_time=%llu in RT class @ %s\n", + xse->tgid, ktime_to_ns(xse->rt.timeslice), + xse->last_exec_runtime, __func__); + + if (xse->rt.timeslice <= 0) { + xse->rt.timeslice = XSCHED_RT_TIMESLICE; + XSCHED_DEBUG("Refill XSE=%d timeslice=%lld in RT class @ %s\n", + xse->tgid, ktime_to_ns(xse->rt.timeslice), __func__); + xse_rt_move_tail(xse); + } +} + +static bool check_preempt_ctx_rt(struct xsched_entity *xse) +{ + return true; +} + +static size_t select_work_rt(struct xsched_cu *xcu, struct xsched_entity *xse) +{ + int kick_count, scheduled = 0; + struct vstream_info *vs; + struct vstream_metadata *vsm; + struct xcu_op_handler_params params; + + kick_count = atomic_read(&xse->kicks_pending_ctx_cnt); + XSCHED_DEBUG("Before decrement XSE kick_count=%d @ %s\n", + kick_count, __func__); + + if (kick_count == 0) + return 0; + + for_each_vstream_in_ctx(vs, xse->ctx) { + spin_lock(&vs->stream_lock); + while ((vsm = xsched_vsm_fetch_first(vs))) { + list_add_tail(&vsm->node, &xcu->vsm_list); + scheduled++; + xsched_dec_pending_kicks_xse(xse); + } + spin_unlock(&vs->stream_lock); + } + + /* + * Iterate over all vstreams in context: + * Set wr_cqe bit in last computing task in vsm_list + */ + for_each_vstream_in_ctx(vs, xse->ctx) { + list_for_each_entry_reverse(vsm, &xcu->vsm_list, node) { + if (vsm->parent == vs) { + params.group = vsm->parent->xcu->group; + params.param_1 = &(int){SQE_SET_NOTIFY}; + params.param_2 = &vsm->sqe; + xcu_sqe_op(¶ms); + break; + } + } + } + + kick_count = atomic_read(&xse->kicks_pending_ctx_cnt); + XSCHED_DEBUG("After decrement XSE kick_count=%d @ %s\n", + kick_count, __func__); + + xse->total_scheduled += scheduled; + return scheduled; +} + +void rq_init_rt(struct xsched_cu *xcu) +{ + int prio = 0; + + xcu->xrq.rt.nr_running = 0; + for_each_xse_prio(prio) { + INIT_LIST_HEAD(&xcu->xrq.rt.rq[prio]); + } +} + +void xse_init_rt(struct xsched_entity *xse) +{ + struct tgid_prio *map = tgid_prio_find(xse->tgid); + + xse->rt.prio = (map) ? map->prio : XSE_PRIO_LOW; + XSCHED_DEBUG("Xse init: set priority=%d.\n", xse->rt.prio); + if (!map) + tgid_prio_insert(xse->tgid, xse->rt.prio); + xse->rt.timeslice = XSCHED_RT_TIMESLICE; + INIT_LIST_HEAD(&xse->rt.list_node); +} + +void xse_deinit_rt(struct xsched_entity *xse) +{ + struct tgid_prio *map = tgid_prio_find(xse->tgid); + + if (map) { + tgid_prio_delete(xse->tgid); + XSCHED_DEBUG("Map deleted: tgid=%d\n", xse->tgid); + } +} + +struct xsched_class rt_xsched_class = { + .class_id = XSCHED_TYPE_RT, + .rq_init = rq_init_rt, + .xse_init = xse_init_rt, + .xse_deinit = xse_deinit_rt, + .dequeue_ctx = dequeue_ctx_rt, + .enqueue_ctx = enqueue_ctx_rt, + .pick_next_ctx = pick_next_ctx_rt, + .put_prev_ctx = put_prev_ctx_rt, + .check_preempt = check_preempt_ctx_rt +}; + +static pid_t convert_to_host_pid(pid_t tgid) +{ + struct pid *vpid; + + if (tgid < -1) + return -1; + + vpid = (tgid > 0) ? find_vpid(tgid) : find_vpid(current->pid); + if (!vpid) + return -1; + + return pid_nr_ns(vpid, &init_pid_ns); +} + +int xsched_rt_prio_set(pid_t tgid, unsigned int prio) +{ + unsigned int id; + int rt_prio; + struct xsched_cu *xcu; + struct xsched_context *ctx; + struct xsched_entity *xse; + pid_t host_pid; + + host_pid = convert_to_host_pid(tgid); + if (host_pid == -1) + return -EINVAL; + + rt_prio = NR_XSE_PRIO - prio; + tgid_prio_delete(host_pid); + tgid_prio_insert(host_pid, rt_prio); + + for_each_active_xcu(xcu, id) { + mutex_lock(&xcu->xcu_lock); + mutex_lock(&xcu->ctx_list_lock); + + ctx = ctx_find_by_tgid_and_xcu(tgid, xcu); + if (ctx) { + xse = &ctx->xse; + xse->rt.prio = prio; + if (xse->on_rq) { + xse_rt_del(xse); + xse_rt_add(xse, xcu); + } + } + + mutex_unlock(&xcu->ctx_list_lock); + mutex_unlock(&xcu->xcu_lock); + } + + return 0; +} + +int xsched_rt_prio_get(pid_t tgid) +{ + struct tgid_prio *map; + pid_t host_pid; + + host_pid = convert_to_host_pid(tgid); + if (host_pid == -1) + return -EINVAL; + + map = tgid_prio_find(host_pid); + if (!map) + return -EINVAL; + + return NR_XSE_PRIO - map->prio; +} diff --git a/drivers/xsched/xsched/xsched.c b/drivers/xsched/xsched/xsched.c new file mode 100644 index 000000000000..6b1208df2dab --- /dev/null +++ b/drivers/xsched/xsched/xsched.c @@ -0,0 +1,268 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/cdev.h> +#include <linux/device.h> +#include <linux/uaccess.h> +#include <linux/slab.h> +#include <linux/ioctl.h> +#include <linux/kallsyms.h> +#include <linux/atomic.h> + +#include "xsched_ioctl.h" +#include "xcu_group.h" +#include "xsched_npu_interface.h" +#include "vstream.h" +#include "xsched.h" + +#define DEVICE_NAME "xsched" +#define CLASS_NAME "xsched_class" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Huawei"); +MODULE_DESCRIPTION("XSched ko for XPU"); + +static int major_number; +static struct class *xsched_class; +static struct device *xsched_dev; + +/* sysfs: /sys/class/xsched_class/xsched/pending_tasks */ +static ssize_t pending_tasks_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%d\n", atomic_read(&pending_task_count)); +} + +static struct device_attribute dev_attr_pending_tasks = { + .attr = { .name = "pending_tasks", .mode = 0444 }, + .show = pending_tasks_show, + .store = NULL, +}; + +struct ioctl_handler { + unsigned int cmd; + size_t arg_size; + int (*handler)(void *kern_arg); +}; + +static int handle_vstream_kick(void *arg) +{ + return xsched_kick((vstream_args_t *)arg); +} + +static int handle_vstream_alloc(void *arg) +{ + return xsched_alloc((vstream_args_t *)arg); +} + +static int handle_vstream_free(void *arg) +{ + return xsched_free((vstream_args_t *)arg); +} + +static int handle_priority_set(void *arg) +{ + struct priority_args *args = (struct priority_args *)arg; + + return xsched_rt_prio_set(args->pid, args->sched_priority); +} + +static int handle_priority_get(void *arg) +{ + struct priority_args *args = (struct priority_args *)arg; + int ret = xsched_rt_prio_get(args->pid); + + if (ret < 0) + return ret; + + args->sched_priority = ret; + return 0; +} + +static const struct ioctl_handler handlers[] = { + {XSCHED_ALLOC, sizeof(vstream_args_t), handle_vstream_alloc}, + {XSCHED_FREE, sizeof(vstream_args_t), handle_vstream_free}, + {XSCHED_KICK, sizeof(vstream_args_t), handle_vstream_kick}, + {XSCHED_SET_PRIO, sizeof(struct priority_args), handle_priority_set}, + {XSCHED_GET_PRIO, sizeof(struct priority_args), handle_priority_get}, +}; + +static long xsched_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + const struct ioctl_handler *handler = NULL; + void *kern_arg = NULL; + void __user *user_arg = (void __user *)arg; + size_t arg_size = _IOC_SIZE(cmd); + int res, i; + + if (_IOC_TYPE(cmd) != XSCHED_MAGIC) { + XSCHED_ERR("Invalid magic: %u (need %u)\n", _IOC_TYPE(cmd), XSCHED_MAGIC); + return -ENOTTY; + } + + for (i = 0; i < ARRAY_SIZE(handlers); i++) { + if (handlers[i].cmd == cmd) { + handler = &handlers[i]; + break; + } + } + + if (!handler) { + XSCHED_ERR("Unknown IOCTL command: 0x%x\n", cmd); + return -ENOTTY; + } + + if (arg_size != handler->arg_size) + return -EINVAL; + + if (!access_ok(user_arg, arg_size)) { + XSCHED_ERR("User pointer not accessible: %p\n", user_arg); + return -EFAULT; + } + + kern_arg = kmalloc(arg_size, GFP_KERNEL); + if (!kern_arg) { + XSCHED_ERR("Failed to allocate kernel argument buffer\n"); + return -ENOMEM; + } + + if (copy_from_user(kern_arg, user_arg, arg_size)) { + XSCHED_ERR("Fail to copy_to_user\n"); + kfree(kern_arg); + return -EFAULT; + } + + res = handler->handler(kern_arg); + + if (copy_to_user(user_arg, kern_arg, arg_size)) { + XSCHED_ERR("Fail to copy_to_user\n"); + res = -EFAULT; + } + + kfree(kern_arg); + return res; +} + +static int xsched_open(struct inode *inode, struct file *file) +{ + file->private_data = NULL; + return 0; +} + +static int xsched_release(struct inode *inode, struct file *file) +{ + return 0; +} + +static const struct file_operations xsched_fops = { + .owner = THIS_MODULE, + .open = xsched_open, + .release = xsched_release, + .unlocked_ioctl = xsched_ioctl, +}; + +bool xsched_try_module_get(void) +{ + return try_module_get(xsched_fops.owner); +} + +void xsched_module_put(void) +{ + module_put(xsched_fops.owner); +} + +static int __init xsched_init(void) +{ + int ret = 0; + unsigned int dev_id = 0; + + ret = syms_lookup_init(); + if (ret < 0) { + XSCHED_ERR("Failed to initialize symbol lookup\n"); + return ret; + } + + ret = ioctl_trs_sqcq_handler_find(); + if (ret < 0) { + XSCHED_ERR("Failed to initialize NPU handlers\n"); + goto err_syms; + } + + xsched_sched_init(); + + for (dev_id = 0; dev_id < XSCHED_NR_CUS; dev_id++) { + ret = xcu_populate(dev_id); + if (ret) + goto err_xcu; + } + + major_number = register_chrdev(0, DEVICE_NAME, &xsched_fops); + if (major_number < 0) { + XSCHED_ERR("Failed to register a major number\n"); + ret = major_number; + goto err_xcu; + } + + xsched_class = class_create(THIS_MODULE, CLASS_NAME); + if (IS_ERR(xsched_class)) { + XSCHED_ERR("Failed to register device class\n"); + ret = PTR_ERR(xsched_class); + goto err_cdev; + } + + xsched_dev = device_create(xsched_class, NULL, MKDEV(major_number, 0), NULL, DEVICE_NAME); + if (IS_ERR(xsched_dev)) { + XSCHED_ERR("Failed to create the device\n"); + ret = PTR_ERR(xsched_dev); + goto err_class; + } + + ret = sysfs_create_file(&xsched_dev->kobj, &dev_attr_pending_tasks.attr); + if (ret) { + XSCHED_WARN("Failed to create open_handle_count attribute\n"); + goto err_device; + } + + XSCHED_INFO("Device created successfully with major: %d\n", major_number); + return 0; + +err_device: + device_destroy(xsched_class, MKDEV(major_number, 0)); +err_class: + class_destroy(xsched_class); +err_cdev: + unregister_chrdev(major_number, DEVICE_NAME); +err_xcu: + for (dev_id--; dev_id >= 0; dev_id--) + xcu_depopulate(dev_id); + xcu_group_free(xcu_group_root); + xcu_group_root = NULL; + ioctl_trs_sqcq_handler_free(); +err_syms: + syms_lookup_exit(); + return ret; +} + +static void __exit xsched_exit(void) +{ + unsigned int dev_id = 0; + + tgid_prio_cleanup(); + sysfs_remove_file(&xsched_dev->kobj, &dev_attr_pending_tasks.attr); + device_destroy(xsched_class, MKDEV(major_number, 0)); + class_destroy(xsched_class); + unregister_chrdev(major_number, DEVICE_NAME); + + for (dev_id = 0; dev_id < XSCHED_NR_CUS; dev_id++) + xcu_depopulate(dev_id); + + xcu_group_free(xcu_group_root); + xcu_group_root = NULL; + + ioctl_trs_sqcq_handler_free(); + syms_lookup_exit(); + XSCHED_INFO("Module unloaded\n"); +} + +module_init(xsched_init); +module_exit(xsched_exit); diff --git a/drivers/xsched/xsched/xsched_ioctl.c b/drivers/xsched/xsched/xsched_ioctl.c new file mode 100644 index 000000000000..c765e2ead420 --- /dev/null +++ b/drivers/xsched/xsched/xsched_ioctl.c @@ -0,0 +1,466 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/anon_inodes.h> +#include <linux/delay.h> +#include <linux/uaccess.h> +#include <linux/kref.h> +#include <linux/fs.h> +#include <linux/file.h> +#include <linux/slab.h> +#include <linux/fcntl.h> + +#include "xsched_ioctl.h" +#include "vstream.h" +#include "xcu_group.h" +#include "xsched.h" + +static int vstream_del(vstream_info_t *vstream, uint32_t vstream_id); +static int vstream_file_release(struct inode *inode, struct file *file); +static const struct file_operations vstreamfd_fops = { + .release = vstream_file_release, +}; + +static inline struct file *vstream_file_get(int vs_fd) +{ + return fget(vs_fd); +} + +static inline void vstream_file_put(struct file *vstream_file) +{ + fput(vstream_file); +} + +static int vstream_file_create(struct vstream_info *vs) +{ + return anon_inode_getfd("[vstreamfd]", &vstreamfd_fops, vs, + O_RDWR | O_CLOEXEC | O_NONBLOCK); +} + +static int vstream_destroy(vstream_info_t *vstream) +{ + int err; + struct xsched_context *ctx = NULL; + struct xsched_entity *xse = NULL; + + err = vstream_del(vstream, vstream->id); + if (err) + return err; + + xse = &vstream->ctx->xse; + ctx = vstream->ctx; + kref_put(&ctx->kref, xsched_task_free); + + return 0; +} + +static int vstream_file_release(struct inode *inode, struct file *file) +{ + vstream_info_t *vstream; + (void) inode; + + if (!file->private_data) + return 0; + + vstream = file->private_data; + return vstream_destroy(vstream); +} + +static void init_xsched_ctx(struct xsched_context *ctx, + const struct vstream_info *vs) +{ + ctx->tgid = vs->tgid; + ctx->fd = vs->fd; + ctx->dev_id = vs->dev_id; + kref_init(&ctx->kref); + + INIT_LIST_HEAD(&ctx->vstream_list); + INIT_LIST_HEAD(&ctx->ctx_node); + + spin_lock_init(&ctx->ctx_lock); + mutex_init(&ctx->ctx_mutex); +} + +/* Allocates a new xsched_context if a new vstream_info is bound + * to a device that no other vstream that is currently present + * is bound to. + */ +static int alloc_ctx_from_vstream(struct vstream_info *vstream_info, + struct xsched_context **ctx) +{ + struct xsched_cu *xcu = vstream_info->xcu; + int ret = 0; + + if (!xsched_try_module_get()) + return -ENODEV; + + *ctx = ctx_find_by_tgid_and_xcu(vstream_info->tgid, xcu); + if (*ctx) + goto out_module_put; + + *ctx = kzalloc(sizeof(struct xsched_context), GFP_KERNEL); + if (!*ctx) { + XSCHED_ERR("Fail to alloc xsched context (tgid=%d) @ %s\n", + vstream_info->tgid, __func__); + ret = -ENOMEM; + goto out_module_put; + } + + init_xsched_ctx(*ctx, vstream_info); + + ret = xsched_ctx_init_xse(*ctx, vstream_info); + if (ret != 0) { + XSCHED_ERR("Fail to initialize XSE for context @ %s\n", __func__); + kfree(*ctx); + goto out_module_put; + } + list_add(&(*ctx)->ctx_node, &xcu->ctx_list); + ++xcu->nr_ctx; + atomic_inc(&pending_task_count); + + return 0; + +out_module_put: + xsched_module_put(); + return ret; +} + +/* Bounds a new vstream_info object to a corresponding xsched context. */ +static int vstream_bind_to_ctx(struct vstream_info *vs) +{ + struct xsched_context *ctx = NULL; + struct xsched_cu *xcu = vs->xcu; + int err = 0; + + mutex_lock(&xcu->ctx_list_lock); + ctx = ctx_find_by_tgid_and_xcu(vs->tgid, xcu); + if (ctx) { + XSCHED_DEBUG("Ctx %d found @ %s\n", vs->tgid, __func__); + kref_get(&ctx->kref); + } else { + err = alloc_ctx_from_vstream(vs, &ctx); + if (err) + goto out_err; + } + + vs->ctx = ctx; + list_add(&vs->ctx_node, &vs->ctx->vstream_list); + +out_err: + mutex_unlock(&xcu->ctx_list_lock); + return err; +} + +static vstream_info_t *vstream_create(struct vstream_args *arg) +{ + struct vstream_info *vstream = NULL; + + vstream = kzalloc(sizeof(vstream_info_t), GFP_KERNEL); + if (!vstream) { + XSCHED_ERR("Failed to allocate vstream.\n"); + return NULL; + } + + vstream->dev_id = arg->dev_id; + vstream->channel_id = arg->channel_id; + vstream->kicks_count = 0; + vstream->xcu = NULL; + + INIT_LIST_HEAD(&vstream->ctx_node); + INIT_LIST_HEAD(&vstream->xcu_node); + INIT_LIST_HEAD(&vstream->metadata_list); + + spin_lock_init(&vstream->stream_lock); + + return vstream; +} + +static int vstream_add(vstream_info_t *vstream, uint32_t id) +{ + int err = 0; + struct xsched_cu *xcu = vstream->xcu; + + if (id >= MAX_VSTREAM_NUM) { + XSCHED_ERR("Vstream id=%u out of range @ %s.\n", + id, __func__); + return -EINVAL; + } + + mutex_lock(&xcu->vs_array_lock); + if (xcu->vs_array[id] != NULL) { + XSCHED_ERR("Vstream id=%u cell is busy.\n", id); + err = -EINVAL; + goto out_err; + } + xcu->vs_array[id] = vstream; + +out_err: + mutex_unlock(&xcu->vs_array_lock); + return err; +} + +static int vstream_del(vstream_info_t *vstream, uint32_t vstream_id) +{ + struct xsched_cu *xcu = vstream->xcu; + + if (vstream_id >= MAX_VSTREAM_NUM) { + XSCHED_ERR("Vstream id=%u out of range @ %s.\n", + vstream_id, __func__); + return -EINVAL; + } + + mutex_lock(&xcu->vs_array_lock); + xcu->vs_array[vstream_id] = NULL; + mutex_unlock(&xcu->vs_array_lock); + return 0; +} + +static vstream_info_t *vstream_get(struct xsched_cu *xcu, uint32_t vstream_id) +{ + vstream_info_t *vstream = NULL; + + if (vstream_id >= MAX_VSTREAM_NUM) { + XSCHED_ERR("Vstream id=%u out of range @ %s.\n", + vstream_id, __func__); + return NULL; + } + + mutex_lock(&xcu->vs_array_lock); + vstream = xcu->vs_array[vstream_id]; + mutex_unlock(&xcu->vs_array_lock); + + return vstream; +} + +static vstream_info_t * +vstream_get_by_user_stream_id(struct xsched_cu *xcu, uint32_t user_stream_id) +{ + int id; + static vstream_info_t *ret; + + mutex_lock(&xcu->vs_array_lock); + for (id = 0; id < MAX_VSTREAM_NUM; id++) { + if (xcu->vs_array[id] != NULL && + xcu->vs_array[id]->user_stream_id == user_stream_id) { + ret = xcu->vs_array[id]; + break; + } + } + mutex_unlock(&xcu->vs_array_lock); + return ret; +} + +static int sqcq_alloc(struct vstream_args *arg) +{ + vstream_alloc_args_t *va_args = &arg->va_args; + struct xsched_context *ctx = NULL; + struct xcu_op_handler_params params; + uint32_t logic_cq_id = 0; + vstream_info_t *vstream; + int ret = 0; + uint32_t tgid = 0; + uint32_t cq_id = 0; + uint32_t sq_id = 0; + + vstream = vstream_create(arg); + if (!vstream) + return -ENOMEM; + + vstream->fd = arg->fd; + vstream->task_type = arg->task_type; + + ret = vstream_bind_to_xcu(vstream); + if (ret) { + ret = -EINVAL; + goto out_err_vstream_free; + } + + /* Allocates vstream's SQ and CQ memory on a XCU for processing. */ + params.group = vstream->xcu->group; + params.fd = arg->fd; + params.payload = arg->payload; + params.param_1 = &tgid; + params.param_2 = &sq_id; + params.param_3 = &cq_id; + params.param_4 = &logic_cq_id; + ret = xcu_alloc(¶ms); + if (ret) { + XSCHED_ERR("Fail to allocate SQ/CQ memory to a vstream.\n"); + goto out_err_vstream_free; + } + + vstream->drv_ctx = params.param_5; + vstream->id = sq_id; + vstream->vcq_id = cq_id; + vstream->logic_vcq_id = logic_cq_id; + vstream->user_stream_id = va_args->user_stream_id; + vstream->tgid = tgid; + vstream->sqcq_type = va_args->type; + ret = vstream_bind_to_ctx(vstream); + if (ret) + goto out_err_vstream_free; + + ctx = vstream->ctx; + ret = vstream_file_create(vstream); + if (ret < 0) { + XSCHED_ERR("Fail to alloc anon inode for vstream %u @ %s\n", + vstream->id, __func__); + goto out_err_vstream_free; + } + vstream->inode_fd = ret; + + /* Add new vstream to array after allocating inode */ + ret = vstream_add(vstream, vstream->id); + if (ret) + goto out_err_vstream_free; + + arg->sq_id = sq_id; + arg->cq_id = cq_id; + + return 0; + +out_err_vstream_free: + kfree(vstream); + return ret; +} + +static int logic_cq_alloc(struct vstream_args *arg) +{ + int err = 0; + struct xcu_op_handler_params params; + vstream_info_t *vstream = NULL; + vstream_alloc_args_t *logic_cq_alloc_para = &arg->va_args; + struct xsched_cu *xcu_found = NULL; + uint32_t logic_cq_id = 0; + uint32_t type = XCU_TYPE_NPU; + + xcu_found = xcu_find(&type, arg->dev_id, arg->channel_id); + if (!xcu_found) + return -EINVAL; + + params.group = xcu_found->group; + params.fd = arg->fd; + params.payload = arg->payload; + params.param_1 = &logic_cq_id; + err = xcu_logic_alloc(¶ms); + if (err) { + XSCHED_ERR("Fail to alloc logic CQ memory to a vstream.\n"); + return err; + } + + vstream = vstream_get_by_user_stream_id(xcu_found, + logic_cq_alloc_para->user_stream_id); + if (vstream) + vstream->logic_vcq_id = logic_cq_id; + + return 0; +} + +int xsched_alloc(struct vstream_args *arg) +{ + vstream_alloc_args_t *va_args = &arg->va_args; + int ret; + + if (!va_args->type) + ret = sqcq_alloc(arg); + else + ret = logic_cq_alloc(arg); + + return ret; +} + +int xsched_free(struct vstream_args *arg) +{ + struct file *vs_file; + struct xcu_op_handler_params params; + struct xsched_cu *xcu_found; + uint32_t vstream_id = arg->sq_id; + uint32_t type = XCU_TYPE_NPU; + vstream_info_t *vstream = NULL; + int err = 0; + + xcu_found = xcu_find(&type, arg->dev_id, arg->channel_id); + if (!xcu_found) + return -EINVAL; + + vstream = vstream_get(xcu_found, vstream_id); + if (!vstream) { + XSCHED_ERR("Fail to free NULL vstream, vstream id=%u\n", vstream_id); + return -EINVAL; + } + + params.group = vstream->xcu->group; + params.fd = arg->fd; + params.payload = arg->payload; + + vs_file = vstream_file_get(vstream->inode_fd); + if (vs_file) { + vs_file->private_data = NULL; + vstream_file_put(vs_file); + } + + /* After vstream_get(), destroying the vstream may not fail */ + vstream_destroy(vstream); + err = xcu_finish(¶ms); + if (err) + XSCHED_ERR("Fail to free vstream sqId=%u, cqId=%u.\n", + arg->sq_id, arg->cq_id); + + return err; +} + +int xsched_kick(struct vstream_args *arg) +{ + vstream_info_t *vstream; + struct xsched_cu *xcu = NULL; + struct xsched_entity *xse; + int err = 0; + uint32_t vstream_id = arg->sq_id; + uint32_t type = XCU_TYPE_NPU; + + xcu = xcu_find(&type, arg->dev_id, arg->channel_id); + if (!xcu) + return -EINVAL; + + /* Get vstream. */ + vstream = vstream_get(xcu, vstream_id); + if (!vstream || !vstream->ctx) { + XSCHED_ERR("Vstream NULL or doesn't have a context. " + "vstream_id=%u, dev_id=%u\n", vstream_id, arg->dev_id); + return -EINVAL; + } + + xse = &vstream->ctx->xse; + XSCHED_DEBUG("New kick on xse %d @ %s\n", xse->tgid, __func__); + + do { + mutex_lock(&xcu->xcu_lock); + spin_lock(&vstream->stream_lock); + + /* Adding kick metadata. */ + err = xsched_vsm_add_tail(vstream, arg); + if (err == -EBUSY) { + spin_unlock(&vstream->stream_lock); + mutex_unlock(&xcu->xcu_lock); + + /* Retry after a while */ + usleep_range(100, 200); + continue; + } + + /* Don't forget to unlock */ + if (err) + break; + + enqueue_ctx(xse, xcu); + /* Increasing a total amount of kicks on an CU to which this + * context is attached to based on sched_class. + */ + xsched_inc_pending_kicks_xse(&vstream->ctx->xse); + } while (err == -EBUSY); + + spin_unlock(&vstream->stream_lock); + mutex_unlock(&xcu->xcu_lock); + if (!err) + wake_up_interruptible(&xcu->wq_xcu_idle); + + return err; +} -- 2.33.0

From: Liu Kai <liukai284@huawei.com> Signed-off-by: Liu Kai <liukai284@huawei.com> --- ...01-Adapt-910b-npu-driver-for-xsched-ko.txt | 547 ++++++++++++++++++ 1 file changed, 547 insertions(+) create mode 100644 drivers/xsched/0001-Adapt-910b-npu-driver-for-xsched-ko.txt diff --git a/drivers/xsched/0001-Adapt-910b-npu-driver-for-xsched-ko.txt b/drivers/xsched/0001-Adapt-910b-npu-driver-for-xsched-ko.txt new file mode 100644 index 000000000000..dda9a40cdb90 --- /dev/null +++ b/drivers/xsched/0001-Adapt-910b-npu-driver-for-xsched-ko.txt @@ -0,0 +1,547 @@ +From 94ad6b1ebc620c3f60cabaedc386f2867daab433 Mon Sep 17 00:00:00 2001 +From: Liu Kai <liukai284@huawei.com> +Date: Wed, 17 Sep 2025 20:30:22 +0800 +Subject: [PATCH] Adapt 910b npu driver for xsched ko + +hulk inclusion +category: feature +bugzilla: https://gitee.com/openeuler/kernel/issues/IC5EHB + +----------------------------------------- + +Adapt 910b npu driver for xsched ko + +Signed-off-by: Hui Tang <tanghui20@huawei.com> +Signed-off-by: Konstantin Meskhidze <konstantin.meskhidze@huawei.com> +Signed-off-by: Liu Kai <liukai284@huawei.com> +--- + rms/trs_drv/chan/chan_rxtx.c | 2 +- + rms/trs_drv/trs_core/Makefile | 1 + + rms/trs_drv/trs_core/trs_fops.c | 138 +++++++++++++++++++++------- + rms/trs_drv/trs_core/trs_hw_sqcq.c | 3 +- + rms/trs_drv/trs_core/trs_hw_sqcq.h | 3 +- + rms/trs_drv/trs_core/trs_logic_cq.c | 100 ++++++++++++-------- + rms/trs_drv/trs_core/trs_logic_cq.h | 3 +- + rms/trs_drv/trs_core/trs_sqcq_map.c | 4 + + ts_agent/src/ts_agent_update_sqe.c | 6 ++ + 9 files changed, 186 insertions(+), 74 deletions(-) + +diff --git a/rms/trs_drv/chan/chan_rxtx.c b/rms/trs_drv/chan/chan_rxtx.c +index 1fc72da..1e4ef38 100755 +--- a/rms/trs_drv/chan/chan_rxtx.c ++++ b/rms/trs_drv/chan/chan_rxtx.c +@@ -156,7 +156,7 @@ static int trs_chan_fill_sqe(struct trs_chan *chan, u8 *sqe, int timeout, int ad + /* if using bar to r/w sqe, it should use stack value to store sqe to avoid waster time */ + sqe_addr = trs_chan_mem_is_local_mem(&sq->mem_attr) ? dst_addr : sqe_tmp; + +- if (addr_domain == CHAN_ADDR_DOMAIN_KERNEL) { ++ if (addr_domain == CHAN_ADDR_DOMAIN_KERNEL || !access_ok(sqe, sq->para.sqe_size)) { + memcpy_s(sqe_addr, sq->para.sqe_size, sqe, sq->para.sqe_size); + } else { + ret_cpy = copy_from_user(sqe_addr, sqe, sq->para.sqe_size); +diff --git a/rms/trs_drv/trs_core/Makefile b/rms/trs_drv/trs_core/Makefile +index e0a6a55..8d27ad9 100755 +--- a/rms/trs_drv/trs_core/Makefile ++++ b/rms/trs_drv/trs_core/Makefile +@@ -41,6 +41,7 @@ endif + + EXTRA_CFLAGS += -I$(DRIVER_SRC_BASE_DIR)/rms/trs_drv/inc + EXTRA_CFLAGS += -I$(DRIVER_SRC_BASE_DIR)/rms/trs_drv/trs_core ++EXTRA_CFLAGS += -I$(DRIVER_SRC_BASE_DIR)/tsch/ + + obj-m += ascend_trs_core.o + ascend_trs_core-objs := trs_fops.o trs_ts_inst.o trs_proc.o trs_res_mng.o trs_sqcq_map.o trs_hw_sqcq.o trs_sw_sqcq.o trs_logic_cq.o trs_cb_sqcq.o trs_shm_sqcq.o trs_proc_fs.o +diff --git a/rms/trs_drv/trs_core/trs_fops.c b/rms/trs_drv/trs_core/trs_fops.c +index e5702d2..96ea16f 100755 +--- a/rms/trs_drv/trs_core/trs_fops.c ++++ b/rms/trs_drv/trs_core/trs_fops.c +@@ -33,6 +33,8 @@ + #include "trs_ts_inst.h" + #include "trs_cmd.h" + #include "trs_fops.h" ++#include "trs_logic_cq.h" ++#include "task_struct.h" + + static int (*const trs_res_id_handles[TRS_MAX_CMD])(struct trs_proc_ctx *proc_ctx, + struct trs_core_ts_inst *ts_inst, struct trs_res_id_para *para) = { +@@ -195,8 +197,17 @@ static int (*const trs_sqcq_alloc_handles[DRV_INVALID_TYPE])(struct trs_proc_ctx + [DRV_CTRL_TYPE] = trs_sw_sqcq_alloc + }; + +-static int ioctl_trs_sqcq_alloc(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg) ++static bool is_xsched_used(void __user *ptr, int size) + { ++ if (access_ok(ptr, size)) ++ return false; ++ ++ return true; ++} ++ ++int ioctl_trs_sqcq_alloc(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg) ++{ ++ bool xsched_used = is_xsched_used((void __user *)arg, sizeof(struct halSqCqInputInfo)); + struct trs_core_ts_inst *ts_inst = NULL; + struct halSqCqInputInfo para; + struct trs_alloc_para *alloc_para = NULL; +@@ -204,10 +215,14 @@ static int ioctl_trs_sqcq_alloc(struct trs_proc_ctx *proc_ctx, unsigned int cmd, + struct trs_uio_info uio_info; + int ret; + +- ret = copy_from_user(¶, (struct halSqCqInputInfo __user *)arg, sizeof(para)); +- if (ret != 0) { +- trs_err("Copy from user failed. (ret=%d)\n", ret); +- return ret; ++ if (xsched_used) { ++ memcpy(¶, (struct halSqCqInputInfo *)arg, sizeof(para)); ++ } else { ++ ret = copy_from_user(¶, (struct halSqCqInputInfo __user *)arg, sizeof(para)); ++ if (ret != 0) { ++ trs_err("Copy from user failed. (ret=%d)\n", ret); ++ return ret; ++ } + } + + alloc_para = get_alloc_para_addr(¶); +@@ -238,10 +253,18 @@ static int ioctl_trs_sqcq_alloc(struct trs_proc_ctx *proc_ctx, unsigned int cmd, + trs_core_inst_put(ts_inst); + + if (ret == 0) { +- ret = copy_to_user((struct halSqCqInputInfo __user *)arg, ¶, sizeof(para)); +- ret |= copy_to_user((struct trs_uio_info __user *)user_uio_info, &uio_info, sizeof(uio_info)); +- if (ret != 0) { +- trs_err("Copy to user failed. (ret=%d)\n", ret); ++ if (xsched_used) { ++ memcpy((struct halSqCqInputInfo *)arg, ¶, sizeof(para)); ++ ret = copy_to_user((struct trs_uio_info __user *)user_uio_info, &uio_info, sizeof(uio_info)); ++ if (ret != 0) { ++ trs_err("Copy to user failed. (ret=%d)\n", ret); ++ } ++ } else { ++ ret = copy_to_user((struct halSqCqInputInfo __user *)arg, ¶, sizeof(para)); ++ ret |= copy_to_user((struct trs_uio_info __user *)user_uio_info, &uio_info, sizeof(uio_info)); ++ if (ret != 0) { ++ trs_err("Copy to user failed. (ret=%d)\n", ret); ++ } + } + } + +@@ -257,16 +280,20 @@ static int (*const trs_sqcq_free_handles[DRV_INVALID_TYPE])(struct trs_proc_ctx + [DRV_CTRL_TYPE] = trs_sw_sqcq_free + }; + +-static int ioctl_trs_sqcq_free(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg) ++int ioctl_trs_sqcq_free(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg) + { + struct trs_core_ts_inst *ts_inst = NULL; + struct halSqCqFreeInfo para; + int ret; + +- ret = copy_from_user(¶, (struct halSqCqFreeInfo __user *)arg, sizeof(para)); +- if (ret != 0) { +- trs_err("Copy from user failed. (ret=%d)\n", ret); +- return ret; ++ if (is_xsched_used((void *)arg, sizeof(struct halSqCqFreeInfo))) { ++ memcpy(¶, (struct halSqCqFreeInfo *)arg, sizeof(para)); ++ } else { ++ ret = copy_from_user(¶, (struct halSqCqFreeInfo __user *)arg, sizeof(para)); ++ if (ret != 0) { ++ trs_err("Copy from user failed. (ret=%d)\n", ret); ++ return ret; ++ } + } + + if ((para.type < 0) || (para.type >= DRV_INVALID_TYPE)) { +@@ -362,17 +389,26 @@ static int (*const trs_sqcq_send_handles[DRV_INVALID_TYPE])(struct trs_proc_ctx + [DRV_CALLBACK_TYPE] = trs_cb_sqcq_send, + }; + +-static int ioctl_trs_sqcq_send(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg) ++int ioctl_trs_sqcq_send(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg) + { + struct trs_core_ts_inst *ts_inst = NULL; ++ struct halTaskSendInfo *kern_para = (struct halTaskSendInfo *)arg; + struct halTaskSendInfo __user *usr_para = (struct halTaskSendInfo __user *)arg; + struct halTaskSendInfo para; ++ struct trs_logic_cq *logic_cq = NULL; ++ struct ts_stars_sqe_header *sqe_header = NULL; ++ uint32_t logic_cqId = cmd; ++ bool xsched_used = is_xsched_used((void *)arg, sizeof(struct halTaskSendInfo)); + int ret; + +- ret = copy_from_user(¶, usr_para, sizeof(para)); +- if (ret != 0) { +- trs_err("Copy from user failed. (ret=%d)\n", ret); +- return ret; ++ if (xsched_used) { ++ memcpy(¶, (struct halTaskSendInfo *)arg, sizeof(para)); ++ } else { ++ ret = copy_from_user(¶, usr_para, sizeof(para)); ++ if (ret != 0) { ++ trs_err("Copy from user failed. (ret=%d)\n", ret); ++ return ret; ++ } + } + + if ((para.type < 0) || (para.type >= DRV_INVALID_TYPE) || (trs_sqcq_send_handles[para.type] == NULL) || +@@ -387,14 +423,39 @@ static int ioctl_trs_sqcq_send(struct trs_proc_ctx *proc_ctx, unsigned int cmd, + return -EINVAL; + } + ++ if (xsched_used) { ++ logic_cq = &ts_inst->logic_cq_ctx.cq[logic_cqId]; ++ if (logic_cq == NULL) { ++ trs_err("Invalid para. (logic_cqId=%u)\n", logic_cqId); ++ return -EINVAL; ++ } ++ ++ sqe_header = (struct ts_stars_sqe_header *)para.sqe_addr; ++ trs_debug("sqe_header->type=%u logic_cqId=%u stream_id=%u task_id=%u\n", ++ sqe_header->type, logic_cqId, sqe_header->rt_stream_id, sqe_header->task_id); ++ ++ if ((sqe_header->type == 0) && (sqe_header->wr_cqe == 1)) { ++ trs_debug("logic_cq->wakeup_num=%u\n", atomic_read(&logic_cq->wakeup_num)); ++ ++ if (atomic_read(&logic_cq->wakeup_num) > 0) { ++ atomic_dec(&logic_cq->wakeup_num); ++ trs_debug("logic_cq->wakeup_num=%u\n", atomic_read(&logic_cq->wakeup_num)); ++ } ++ } ++ } ++ + ret = trs_sqcq_send_handles[para.type](proc_ctx, ts_inst, ¶); + + trs_core_inst_put(ts_inst); + + if ((ret == 0) && (para.type == DRV_NORMAL_TYPE)) { +- ret = put_user(para.pos, &usr_para->pos); +- if (ret != 0) { +- trs_err("Put to user fail. (devid=%u; tsid=%u; sqId=%u)\n", proc_ctx->devid, para.tsId, para.sqId); ++ if (xsched_used) { ++ kern_para->pos = para.pos; ++ } else { ++ ret = put_user(para.pos, &usr_para->pos); ++ if (ret != 0) { ++ trs_err("Put to user fail. (devid=%u; tsid=%u; sqId=%u)\n", proc_ctx->devid, para.tsId, para.sqId); ++ } + } + } + +@@ -402,22 +463,28 @@ static int ioctl_trs_sqcq_send(struct trs_proc_ctx *proc_ctx, unsigned int cmd, + } + + static int (*const trs_sqcq_recv_handles[DRV_INVALID_TYPE])(struct trs_proc_ctx *proc_ctx, +- struct trs_core_ts_inst *ts_inst, struct halReportRecvInfo *para) = { ++ struct trs_core_ts_inst *ts_inst, struct halReportRecvInfo *para, bool is_xsched) = { + [DRV_NORMAL_TYPE] = trs_hw_sqcq_recv, + [DRV_LOGIC_TYPE] = trs_logic_cq_recv, + }; + +-static int ioctl_trs_sqcq_recv(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg) ++int ioctl_trs_sqcq_recv(struct trs_proc_ctx *proc_ctx, unsigned int cmd, unsigned long arg) + { + struct trs_core_ts_inst *ts_inst = NULL; +- struct halReportRecvInfo *usr_para = (struct halReportRecvInfo __user *)arg; ++ struct halReportRecvInfo *kern_para = (struct halReportRecvInfo *)arg; ++ struct halReportRecvInfo __user *usr_para = (struct halReportRecvInfo __user *)arg; + struct halReportRecvInfo para; + int ret; ++ bool xsched_used = is_xsched_used((void *)arg, sizeof(struct halReportRecvInfo)); + +- ret = copy_from_user(¶, usr_para, sizeof(para)); +- if (ret != 0) { +- trs_err("Copy from user failed. (ret=%d)\n", ret); +- return ret; ++ if (xsched_used) { ++ memcpy(¶, (struct halReportRecvInfo *)arg, sizeof(para)); ++ } else { ++ ret = copy_from_user(¶, usr_para, sizeof(para)); ++ if (ret != 0) { ++ trs_err("Copy from user failed. (ret=%d)\n", ret); ++ return ret; ++ } + } + + if ((para.type < 0) || (para.type >= DRV_INVALID_TYPE) || (trs_sqcq_recv_handles[para.type] == NULL) || +@@ -432,11 +499,16 @@ static int ioctl_trs_sqcq_recv(struct trs_proc_ctx *proc_ctx, unsigned int cmd, + return -EINVAL; + } + +- ret = trs_sqcq_recv_handles[para.type](proc_ctx, ts_inst, ¶); ++ ret = trs_sqcq_recv_handles[para.type](proc_ctx, ts_inst, ¶, xsched_used); ++ + if (ret == 0) { +- ret = put_user(para.report_cqe_num, &usr_para->report_cqe_num); +- if (ret != 0) { +- trs_err("Put to user fail. (devid=%u; tsid=%u; cqId=%u)\n", proc_ctx->devid, para.tsId, para.cqId); ++ if (xsched_used) { ++ kern_para->report_cqe_num = para.report_cqe_num; ++ } else { ++ ret = put_user(para.report_cqe_num, &usr_para->report_cqe_num); ++ if (ret != 0) { ++ trs_err("Put to user fail. (devid=%u; tsid=%u; cqId=%u)\n", proc_ctx->devid, para.tsId, para.cqId); ++ } + } + } else { + u32 ts_status; +diff --git a/rms/trs_drv/trs_core/trs_hw_sqcq.c b/rms/trs_drv/trs_core/trs_hw_sqcq.c +index 825d603..10f3903 100755 +--- a/rms/trs_drv/trs_core/trs_hw_sqcq.c ++++ b/rms/trs_drv/trs_core/trs_hw_sqcq.c +@@ -1160,7 +1160,8 @@ int trs_hw_sqcq_send(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_ + return ret; + } + +-int trs_hw_sqcq_recv(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halReportRecvInfo *para) ++int trs_hw_sqcq_recv(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halReportRecvInfo *para, ++ bool is_xsched) + { + struct trs_id_inst *inst = &ts_inst->inst; + struct trs_chan_recv_para recv_para; +diff --git a/rms/trs_drv/trs_core/trs_hw_sqcq.h b/rms/trs_drv/trs_core/trs_hw_sqcq.h +index b32cd64..b6affdf 100755 +--- a/rms/trs_drv/trs_core/trs_hw_sqcq.h ++++ b/rms/trs_drv/trs_core/trs_hw_sqcq.h +@@ -32,7 +32,8 @@ int trs_sqcq_config(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_i + int trs_sqcq_query(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halSqCqQueryInfo *para); + + int trs_hw_sqcq_send(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halTaskSendInfo *para); +-int trs_hw_sqcq_recv(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halReportRecvInfo *para); ++int trs_hw_sqcq_recv(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halReportRecvInfo *para, ++ bool is_xsched); + void trs_proc_diable_sq_status(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, + int res_type, u32 res_id); + void trs_hw_sqcq_recycle(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, int res_type, u32 res_id); +diff --git a/rms/trs_drv/trs_core/trs_logic_cq.c b/rms/trs_drv/trs_core/trs_logic_cq.c +index d35b8d5..d1fbe63 100755 +--- a/rms/trs_drv/trs_core/trs_logic_cq.c ++++ b/rms/trs_drv/trs_core/trs_logic_cq.c +@@ -265,13 +265,15 @@ static bool trs_logic_is_cqe_match(struct trs_logic_cq *logic_cq, void *cqe, u32 + } + + static int trs_logic_cq_recv_para_check(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, +- struct halReportRecvInfo *para) ++ struct halReportRecvInfo *para, bool is_xsched) + { + struct trs_id_inst *inst = &ts_inst->inst; + +- if (!trs_proc_has_res(proc_ctx, ts_inst, TRS_LOGIC_CQ, para->cqId)) { +- trs_err("Not proc owner cq. (devid=%u; tsid=%u; logic_cqid=%u)\n", inst->devid, inst->tsid, para->cqId); +- return -EINVAL; ++ if (!is_xsched) { ++ if (!trs_proc_has_res(proc_ctx, ts_inst, TRS_LOGIC_CQ, para->cqId)) { ++ trs_err("Not proc owner cq. (devid=%u; tsid=%u; logic_cqid=%u)\n", inst->devid, inst->tsid, para->cqId); ++ return -EINVAL; ++ } + } + + if (((para->timeout < 0) && (para->timeout != -1)) || (para->cqe_num == 0) || (para->cqe_addr == NULL)) { +@@ -441,7 +443,7 @@ static void trs_logic_cq_eliminate_holes(struct trs_logic_cq *logic_cq, u32 star + } + + static int trs_logic_cq_match_copy(struct trs_core_ts_inst *ts_inst, struct trs_logic_cq *logic_cq, +- struct halReportRecvInfo *para) ++ struct halReportRecvInfo *para, bool is_xsched) + { + u32 start, report_cnt, tail; + u32 rollback = 0; +@@ -463,11 +465,17 @@ static int trs_logic_cq_match_copy(struct trs_core_ts_inst *ts_inst, struct trs_ + } + + trs_logic_cq_copy_trace("Logic Cq Recv Match", ts_inst, logic_cq, start, report_cnt); +- ret = copy_to_user((void __user *)para->cqe_addr, logic_cq->addr + ((unsigned long)start * logic_cq->cqe_size), +- (unsigned long)report_cnt * logic_cq->cqe_size); +- if (ret != 0) { +- trs_err("copy to user fail, cqid=%u report_cnt=%u\n", logic_cq->cqid, report_cnt); +- return ret; ++ ++ if (is_xsched) { ++ memcpy((void *)para->cqe_addr, logic_cq->addr + ((unsigned long)start * logic_cq->cqe_size), ++ (unsigned long)report_cnt * logic_cq->cqe_size); ++ } else { ++ ret = copy_to_user((void __user *)para->cqe_addr, logic_cq->addr + ((unsigned long)start * logic_cq->cqe_size), ++ (unsigned long)report_cnt * logic_cq->cqe_size); ++ if (ret != 0) { ++ trs_err("copy to user fail, cqid=%u report_cnt=%u\n", logic_cq->cqid, report_cnt); ++ return ret; ++ } + } + + para->report_cqe_num = report_cnt; +@@ -480,7 +488,7 @@ static int trs_logic_cq_match_copy(struct trs_core_ts_inst *ts_inst, struct trs_ + } + + static int trs_logic_cq_non_match_copy(struct trs_core_ts_inst *ts_inst, struct trs_logic_cq *logic_cq, +- struct halReportRecvInfo *para) ++ struct halReportRecvInfo *para, bool is_xsched) + { + u32 start, report_cnt, tail; + int ret; +@@ -490,11 +498,17 @@ static int trs_logic_cq_non_match_copy(struct trs_core_ts_inst *ts_inst, struct + report_cnt = (tail > start) ? tail - start : logic_cq->cq_depth - start; + + trs_logic_cq_copy_trace("Logic Cq Recv NoMatch", ts_inst, logic_cq, start, report_cnt); +- ret = copy_to_user((void __user *)para->cqe_addr, logic_cq->addr + ((unsigned long)start * logic_cq->cqe_size), +- (unsigned long)report_cnt * logic_cq->cqe_size); +- if (ret != 0) { +- trs_err("copy to user fail, cqid=%u report_cnt=%u\n", logic_cq->cqid, report_cnt); +- return ret; ++ ++ if (is_xsched) { ++ memcpy((void *)para->cqe_addr, logic_cq->addr + ((unsigned long)start * logic_cq->cqe_size), ++ (unsigned long)report_cnt * logic_cq->cqe_size); ++ } else { ++ ret = copy_to_user((void __user *)para->cqe_addr, logic_cq->addr + ((unsigned long)start * logic_cq->cqe_size), ++ (unsigned long)report_cnt * logic_cq->cqe_size); ++ if (ret != 0) { ++ trs_err("copy to user fail, cqid=%u report_cnt=%u\n", logic_cq->cqid, report_cnt); ++ return ret; ++ } + } + + para->report_cqe_num = report_cnt; +@@ -503,7 +517,7 @@ static int trs_logic_cq_non_match_copy(struct trs_core_ts_inst *ts_inst, struct + } + + static int trs_logic_cq_copy_report(struct trs_core_ts_inst *ts_inst, +- struct trs_logic_cq *logic_cq, struct halReportRecvInfo *para) ++ struct trs_logic_cq *logic_cq, struct halReportRecvInfo *para, bool is_xsched) + { + u32 version = para->res[0]; + int full_flag = 0; +@@ -522,9 +536,9 @@ static int trs_logic_cq_copy_report(struct trs_core_ts_inst *ts_inst, + } + + if (version == 1) { +- ret = trs_logic_cq_match_copy(ts_inst, logic_cq, para); // runtime new version ++ ret = trs_logic_cq_match_copy(ts_inst, logic_cq, para, is_xsched); // runtime new version + } else { +- ret = trs_logic_cq_non_match_copy(ts_inst, logic_cq, para); ++ ret = trs_logic_cq_non_match_copy(ts_inst, logic_cq, para, is_xsched); + } + if (ret != 0) { + return ret; +@@ -553,8 +567,8 @@ static int trs_logic_cq_wait_event(struct trs_logic_cq *logic_cq, int timeout) + long ret, tm; + + atomic_inc(&logic_cq->wait_thread_num); +- trs_debug("Wake wait start. (logic_cqid=%u; timeout=%d; wait_thread_num=%d)\n", +- logic_cq->cqid, timeout, atomic_read(&logic_cq->wait_thread_num)); ++ trs_debug("Wake wait start. (logic_cqid=%u; timeout=%d; wait_thread_num=%d, wakeup_num=%d)\n", ++ logic_cq->cqid, timeout, atomic_read(&logic_cq->wait_thread_num), atomic_read(&logic_cq->wakeup_num)); + + tm = (timeout == -1) ? MAX_SCHEDULE_TIMEOUT : msecs_to_jiffies((u32)timeout); + (void)prepare_to_wait_exclusive(&logic_cq->wait_queue, &wq_entry, TASK_INTERRUPTIBLE); +@@ -592,12 +606,13 @@ static int trs_logic_cq_wait_event(struct trs_logic_cq *logic_cq, int timeout) + return ret; + } + +-int trs_logic_cq_recv(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halReportRecvInfo *para) ++int trs_logic_cq_recv(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halReportRecvInfo *para, ++ bool is_xsched) + { + struct trs_logic_cq *logic_cq = NULL; + int ret; + +- ret = trs_logic_cq_recv_para_check(proc_ctx, ts_inst, para); ++ ret = trs_logic_cq_recv_para_check(proc_ctx, ts_inst, para, is_xsched); + if (ret != 0) { + return ret; + } +@@ -609,24 +624,35 @@ int trs_logic_cq_recv(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts + return trs_thread_bind_irq_wait(logic_cq, para->timeout); + } + trs_logic_cq_recv_trace("Recv start", ts_inst, para); +- do { +- mutex_lock(&logic_cq->mutex); +- ret = trs_logic_cq_copy_report(ts_inst, logic_cq, para); +- mutex_unlock(&logic_cq->mutex); +- if (ret == 0) { +- logic_cq->stat.recv++; +- trs_logic_cq_recv_trace("Recv finish", ts_inst, para); +- return ret; +- } + +- if (ret == -EAGAIN) { ++ if (is_xsched) { + if (para->timeout == 0) { +- para->report_cqe_num = 0; +- return 0; ++ para->report_cqe_num = 0; ++ return 0; + } + ret = trs_logic_cq_wait_event(logic_cq, para->timeout); +- } +- } while (ret >= 0); ++ trs_debug("Skip reading report for xsched, waiting for cq irq: logic_cqid=%u, timeout=%u, ret=%u)\n", ++ para->cqId, para->timeout, ret); ++ } else { ++ do { ++ mutex_lock(&logic_cq->mutex); ++ ret = trs_logic_cq_copy_report(ts_inst, logic_cq, para, is_xsched); ++ mutex_unlock(&logic_cq->mutex); ++ if (ret == 0) { ++ logic_cq->stat.recv++; ++ trs_logic_cq_recv_trace("Recv finish", ts_inst, para); ++ return ret; ++ } ++ ++ if (ret == -EAGAIN) { ++ if (para->timeout == 0) { ++ para->report_cqe_num = 0; ++ return 0; ++ } ++ ret = trs_logic_cq_wait_event(logic_cq, para->timeout); ++ } ++ } while (ret >= 0); ++ } + + return ret; + } +diff --git a/rms/trs_drv/trs_core/trs_logic_cq.h b/rms/trs_drv/trs_core/trs_logic_cq.h +index a45b110..b776b7f 100755 +--- a/rms/trs_drv/trs_core/trs_logic_cq.h ++++ b/rms/trs_drv/trs_core/trs_logic_cq.h +@@ -90,7 +90,8 @@ struct trs_core_ts_inst; + + int trs_logic_cq_alloc(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halSqCqInputInfo *para); + int trs_logic_cq_free(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halSqCqFreeInfo *para); +-int trs_logic_cq_recv(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halReportRecvInfo *para); ++int trs_logic_cq_recv(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst, struct halReportRecvInfo *para, ++ bool is_xsched); + + void trs_logic_set_cqe_version(struct trs_core_ts_inst *ts_inst, u32 logic_cqid, u32 cqe_verion); + int trs_logic_cq_enque(struct trs_core_ts_inst *ts_inst, u32 logic_cq_id, u32 stream_id, u32 task_id, void *cqe); +diff --git a/rms/trs_drv/trs_core/trs_sqcq_map.c b/rms/trs_drv/trs_core/trs_sqcq_map.c +index 8103d65..998ecfb 100755 +--- a/rms/trs_drv/trs_core/trs_sqcq_map.c ++++ b/rms/trs_drv/trs_core/trs_sqcq_map.c +@@ -305,6 +305,10 @@ int trs_sq_remap(struct trs_proc_ctx *proc_ctx, struct trs_core_ts_inst *ts_inst + int sq_reg_type = TRS_MAP_TYPE_REG; + int ret; + ++ ret = 0; ++ goto out; ++ ++ + if ((sq_info->sq_phy_addr == 0) || (sq_info->db_addr == 0) || (uio_info->sq_que_addr == 0)) { + ret = 0; + goto out; +diff --git a/ts_agent/src/ts_agent_update_sqe.c b/ts_agent/src/ts_agent_update_sqe.c +index bb4e3b2..01fe60c 100755 +--- a/ts_agent/src/ts_agent_update_sqe.c ++++ b/ts_agent/src/ts_agent_update_sqe.c +@@ -1146,6 +1146,12 @@ static void cqe_set_drop_flag(ts_stars_cqe_t *cqe) + if (cqe->warn || (cqe->sqe_type == TS_STARS_SQE_TYPE_PCIE_DMA)) { + /* cqe has been processed in ts_agent, no need to send to runtime */ + cqe->drop_flag = 1U; ++ ts_agent_debug("cqe has been processed in ts_agent, no need to send to runtime, drop_flag=%u\n", cqe->drop_flag); ++ ++ /* no drop, xsched needs to proc cqe */ ++ cqe->drop_flag = 0U; ++ ts_agent_debug("send cqe to runtime/xsched anyway, drop_flag=%u\n", cqe->drop_flag); ++ + return; + } + cqe->drop_flag = 0U; +-- +2.33.0 -- 2.33.0

反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/18508 邮件列表地址:https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/YCH... FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/18508 Mailing list address: https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/YCH...
participants (2)
-
patchwork bot
-
Xia Fukun