Kernel
Threads by month
- ----- 2025 -----
- April
- March
- February
- January
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
September 2023
- 56 participants
- 248 discussions
From: Guan Jing <guanjing6(a)huawei.com>
hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I80YXE
CVE: NA
----------------------------------------
ucc support for XPU.
Signed-off-by: Chen Hui <judy.chenhui(a)huawei.com>
Signed-off-by: Yang Yanchao <yangyanchao6(a)huawei.com>
Signed-off-by: Hui Tang <tanghui20(a)huawei.com>
Signed-off-by: Guan Jing <guanjing6(a)huawei.com>
Signed-off-by: Jinjie Ruan <ruanjinjie(a)huawei.com>
---
Kconfig | 2 +
drivers/Kconfig | 2 +
drivers/Makefile | 1 +
drivers/xpu/Kconfig | 9 +
drivers/xpu/Makefile | 1 +
drivers/xpu/xpu_group.c | 175 ++++++++
fs/proc/base.c | 102 ++++-
include/linux/sched.h | 3 +
include/linux/ucc_common.h | 21 +
include/linux/ucc_kfd.h | 110 +++++
include/linux/ucc_sched.h | 36 ++
include/linux/ucc_sched/ucc_sched.h | 71 +++
include/linux/ucc_ts.h | 254 +++++++++++
include/linux/vstream.h | 123 ++++++
include/linux/xpu_group.h | 66 +++
include/trace/events/ucc_sched.h | 120 +++++
init/init_task.c | 4 +
init/main.c | 9 +
kernel/Makefile | 2 +
kernel/sched/Makefile | 1 +
kernel/sched/core.c | 5 +
kernel/sched/ucc_sched.c | 148 +++++++
kernel/sysctl.c | 17 +-
kernel/ucc/Kconfig | 21 +
kernel/ucc/Makefile | 1 +
kernel/ucc/ascend_vstream.c | 654 ++++++++++++++++++++++++++++
kernel/ucc/ascend_vstream.h | 13 +
kernel/ucc/vstream.c | 62 +++
kernel/ucc_sched/Makefile | 1 +
kernel/ucc_sched/core.c | 591 +++++++++++++++++++++++++
kernel/ucc_sched/ucc_sched.h | 43 ++
31 files changed, 2666 insertions(+), 2 deletions(-)
create mode 100644 drivers/xpu/Kconfig
create mode 100644 drivers/xpu/Makefile
create mode 100644 drivers/xpu/xpu_group.c
create mode 100644 include/linux/ucc_common.h
create mode 100644 include/linux/ucc_kfd.h
create mode 100644 include/linux/ucc_sched.h
create mode 100644 include/linux/ucc_sched/ucc_sched.h
create mode 100644 include/linux/ucc_ts.h
create mode 100644 include/linux/vstream.h
create mode 100644 include/linux/xpu_group.h
create mode 100644 include/trace/events/ucc_sched.h
create mode 100644 kernel/sched/ucc_sched.c
create mode 100644 kernel/ucc/Kconfig
create mode 100644 kernel/ucc/Makefile
create mode 100644 kernel/ucc/ascend_vstream.c
create mode 100644 kernel/ucc/ascend_vstream.h
create mode 100644 kernel/ucc/vstream.c
create mode 100644 kernel/ucc_sched/Makefile
create mode 100644 kernel/ucc_sched/core.c
create mode 100644 kernel/ucc_sched/ucc_sched.h
diff --git a/Kconfig b/Kconfig
index 48a80beab685..8e558777fb54 100644
--- a/Kconfig
+++ b/Kconfig
@@ -30,3 +30,5 @@ source "crypto/Kconfig"
source "lib/Kconfig"
source "lib/Kconfig.debug"
+
+source "kernel/ucc/Kconfig"
diff --git a/drivers/Kconfig b/drivers/Kconfig
index ab4d43923c4d..bd59e9e525ba 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -219,4 +219,6 @@ source "drivers/siox/Kconfig"
source "drivers/slimbus/Kconfig"
+source "drivers/xpu/Kconfig"
+
endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index 578f469f72fb..1130b2d92df1 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -186,3 +186,4 @@ obj-$(CONFIG_MULTIPLEXER) += mux/
obj-$(CONFIG_UNISYS_VISORBUS) += visorbus/
obj-$(CONFIG_SIOX) += siox/
obj-$(CONFIG_GNSS) += gnss/
+obj-$(CONFIG_XPU_SCHEDULE) += xpu/
diff --git a/drivers/xpu/Kconfig b/drivers/xpu/Kconfig
new file mode 100644
index 000000000000..c4a391d0039d
--- /dev/null
+++ b/drivers/xpu/Kconfig
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+
+menuconfig XPU_SCHEDULE
+ bool "xpu schedule"
+ default n
+ help
+ Support xpu schedule, Say Y here if you want support for use
+ xpu schedule.
+
diff --git a/drivers/xpu/Makefile b/drivers/xpu/Makefile
new file mode 100644
index 000000000000..9edc6dcdd4d0
--- /dev/null
+++ b/drivers/xpu/Makefile
@@ -0,0 +1 @@
+obj-y += xpu_group.o
diff --git a/drivers/xpu/xpu_group.c b/drivers/xpu/xpu_group.c
new file mode 100644
index 000000000000..53a598db0615
--- /dev/null
+++ b/drivers/xpu/xpu_group.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/xpu_group.h>
+#include <linux/rwsem.h>
+#include <linux/slab.h>
+
+extern int ucc_rt_nr_running(struct xcu *cu);
+static DECLARE_RWSEM(xpu_group_rwsem);
+
+static struct xpu_capability xpu_capability_root;
+
+struct xpu_group __xpu_root = {
+ .type = XPU_TYPE_ROOT,
+ .capability = &xpu_capability_root,
+
+ .next_layer = IDR_INIT(next_layer),
+};
+
+struct xpu_group *xpu_root = &__xpu_root;
+EXPORT_SYMBOL(xpu_root);
+
+int __xpu_group_attach(struct xpu_group *new_group,
+ struct xpu_group *previous_group)
+{
+ int id = new_group->id;
+
+ if (id == -1)
+ id = idr_alloc(&previous_group->next_layer, new_group,
+ 0, INT_MAX, GFP_KERNEL);
+ else
+ id = idr_alloc(&previous_group->next_layer, new_group,
+ id, id + 1, GFP_KERNEL);
+ if (id < 0)
+ return -EEXIST;
+
+ new_group->id = id;
+ new_group->previous_layer = previous_group;
+
+ return 0;
+}
+
+int xpu_group_attach(struct xpu_group *new_group,
+ struct xpu_group *previous_group)
+{
+ int ret;
+
+ down_write(&xpu_group_rwsem);
+ ret = __xpu_group_attach(new_group, previous_group);
+ up_write(&xpu_group_rwsem);
+ return ret;
+}
+EXPORT_SYMBOL(xpu_group_attach);
+
+struct xpu_group *xpu_group_alloc_and_attach(struct xpu_group *previous_group,
+ int id)
+{
+ struct xpu_group *new = xpu_group_alloc();
+
+ if (!new) {
+ pr_err("alloc xpu_group failed\n");
+ return NULL;
+ }
+
+ new->id = id;
+
+ if (!xpu_group_attach(new, previous_group))
+ return NULL;
+
+ return new;
+}
+EXPORT_SYMBOL(xpu_group_alloc_and_attach);
+
+int __xpu_group_detach(struct xpu_group *group)
+{
+ idr_remove(&group->previous_layer->next_layer, group->id);
+ return 0;
+}
+
+int xpu_group_detach(struct xpu_group *group)
+{
+ int ret;
+
+ down_write(&xpu_group_rwsem);
+ ret = __xpu_group_detach(group);
+ up_write(&xpu_group_rwsem);
+ return ret;
+}
+EXPORT_SYMBOL(xpu_group_detach);
+
+struct xpu_group *__xpu_group_find(struct xpu_group *group, int id)
+{
+ return idr_find(&group->next_layer, id);
+}
+
+struct xpu_group *xpu_group_find(struct xpu_group *group, int id)
+{
+ struct xpu_group *p;
+
+ p = xpu_group_alloc();
+
+ down_read(&xpu_group_rwsem);
+ p = __xpu_group_find(group, id);
+ up_read(&xpu_group_rwsem);
+
+ return p;
+}
+EXPORT_SYMBOL(xpu_group_find);
+
+
+struct xpu_group *xpu_idle_group_find(struct xpu_group *group)
+{
+ struct xpu_group *entry_group;
+ int id;
+
+ down_read(&xpu_group_rwsem);
+ idr_for_each_entry(&group->next_layer, entry_group, id) {
+ if (!entry_group->used) {
+ up_read(&xpu_group_rwsem);
+ return entry_group;
+ }
+ }
+ up_read(&xpu_group_rwsem);
+
+ return NULL;
+}
+
+int xpu_run(struct xpu_group *group, void *para1, void *para2)
+{
+ int ret = 0;
+
+ if (group->opt && group->opt->run)
+ ret = group->opt->run(group, para1, para2);
+
+ return ret;
+}
+
+int xpu_finish(struct xpu_group *group, void *para1, void *para2)
+{
+ if (group->opt && group->opt->finish)
+ return group->opt->finish(group, para1, para2);
+
+ return 0;
+}
+
+int xpu_wait(struct xpu_group *group, void *para1, void *para2, void *para3)
+{
+ if (group->opt && group->opt->wait)
+ return group->opt->wait(group, para1, para2, para3);
+
+ return 0;
+}
+
+int xpu_complete(struct xpu_group *group, void *para1, void *para2, void *para3)
+{
+ if (group->opt && group->opt->complete)
+ return group->opt->complete(group, para1, para2, para3);
+
+ return 0;
+}
+
+struct xpu_group *xpu_group_alloc(void)
+{
+ struct xpu_group *node = kzalloc(sizeof(*node), GFP_KERNEL);
+
+ if (!node)
+ return NULL;
+
+ node->type = XPU_TYPE_CUSTOM;
+ idr_init(&node->next_layer);
+
+ return node;
+}
+EXPORT_SYMBOL(xpu_group_alloc);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index dc9841826264..516eee1ae952 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -770,7 +770,6 @@ static const struct file_operations proc_single_file_operations = {
.release = single_release,
};
-
struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode)
{
struct task_struct *task = get_proc_task(inode);
@@ -1546,6 +1545,99 @@ static const struct file_operations proc_pid_sched_operations = {
#endif
+#ifdef CONFIG_XPU_SCHEDULE
+static ssize_t ucc_step_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct task_struct *task;
+ char numbuf[PROC_NUMBUF];
+ ssize_t len;
+
+ task = get_proc_task(file_inode(file));
+ if (!task)
+ return -ESRCH;
+
+ len = snprintf(numbuf, sizeof(numbuf), "%u\n", task->ucc_step);
+
+ put_task_struct(task);
+
+ return simple_read_from_buffer(buf, count, ppos, numbuf, len);
+}
+
+static ssize_t ucc_step_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *offset)
+{
+ struct inode *inode = file_inode(file);
+ struct task_struct *p;
+ int err;
+ unsigned int ucc_step;
+
+ p = get_proc_task(inode);
+ if (!p)
+ return -ESRCH;
+
+ err = kstrtouint_from_user(buf, count, 0, &ucc_step);
+ if (err)
+ return err;
+
+ p->ucc_step = ucc_step;
+ put_task_struct(p);
+
+ return count;
+}
+
+static const struct file_operations ucc_step_operations = {
+ .write = ucc_step_write,
+ .read = ucc_step_read,
+};
+
+static ssize_t ucc_priority_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct task_struct *task;
+ char numbuf[PROC_NUMBUF];
+ ssize_t len;
+
+ task = get_proc_task(file_inode(file));
+ if (!task)
+ return -ESRCH;
+
+ len = snprintf(numbuf, sizeof(numbuf), "%u\n", task->ucc_priority);
+
+ put_task_struct(task);
+
+ return simple_read_from_buffer(buf, count, ppos, numbuf, len);
+}
+
+static ssize_t ucc_priority_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *offset)
+{
+ struct inode *inode = file_inode(file);
+ struct task_struct *p;
+ int err;
+ unsigned int ucc_priority;
+
+ p = get_proc_task(inode);
+ if (!p)
+ return -ESRCH;
+
+ err = kstrtouint_from_user(buf, count, 0, &ucc_priority);
+ if (err)
+ return err;
+
+ p->ucc_priority = ucc_priority;
+ put_task_struct(p);
+
+ return count;
+}
+
+static const struct file_operations ucc_priority_operations = {
+ .write = ucc_priority_write,
+ .read = ucc_priority_read,
+};
+
+#endif
+
#ifdef CONFIG_SCHED_AUTOGROUP
/*
* Print out autogroup related information:
@@ -3151,6 +3243,10 @@ static const struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_ASCEND_SHARE_POOL
ONE("sp_group", S_IRUGO, proc_sp_group_state),
#endif
+#ifdef CONFIG_XPU_SCHEDULE
+ REG("ucc_priority", 0644, ucc_priority_operations),
+ REG("ucc_step", 0644, ucc_step_operations),
+#endif
};
static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
@@ -3537,6 +3633,10 @@ static const struct pid_entry tid_base_stuff[] = {
#ifdef CONFIG_ASCEND_SHARE_POOL
ONE("sp_group", S_IRUGO, proc_sp_group_state),
#endif
+#ifdef CONFIG_XPU_SCHEDULE
+ REG("ucc_priority", 0644, ucc_priority_operations),
+ REG("ucc_step", 0644, ucc_step_operations),
+#endif
};
static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8fd8c5b7cdc6..175659be95f3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1281,6 +1281,9 @@ struct task_struct {
#if !defined(__GENKSYMS__)
#if defined(CONFIG_QOS_SCHED_SMART_GRID)
struct sched_grid_qos *grid_qos;
+#elif defined(CONFIG_XPU_SCHEDULE)
+ u32 ucc_priority;
+ u32 ucc_step;
#else
KABI_RESERVE(8)
#endif
diff --git a/include/linux/ucc_common.h b/include/linux/ucc_common.h
new file mode 100644
index 000000000000..3875c2226d24
--- /dev/null
+++ b/include/linux/ucc_common.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _UCC_COMMON_H
+#define _UCC_COMMON_H
+
+/*
+ * UCC Print Function
+ */
+#ifndef pr_fmt
+#define pr_fmt(fmt) fmt
+#endif
+
+#define ucc_err(fmt, ...) printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
+
+#define ucc_warn(fmt, ...) printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
+
+#define ucc_info(fmt, ...) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
+
+#define ucc_dbg(fmt, ...) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+
+#endif
diff --git a/include/linux/ucc_kfd.h b/include/linux/ucc_kfd.h
new file mode 100644
index 000000000000..07eedc2fd5f2
--- /dev/null
+++ b/include/linux/ucc_kfd.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef KFD_PRIV_H_INCLUDED
+#define KFD_PRIV_H_INCLUDED
+
+#include <linux/mmu_notifier.h>
+#include <linux/types.h>
+#include <linux/kref.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/mmu_notifier.h>
+#include <linux/idr.h>
+#include <linux/dma-fence.h>
+#include <linux/workqueue.h>
+#include <linux/fs.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+
+struct process_queue_manager;
+struct kfd_process;
+struct kfd_signal_page;
+
+struct process_queue_manager {
+ struct kfd_process *process;
+ struct list_head queues;
+ unsigned long *queue_slot_bitmap;
+};
+
+struct kfd_signal_page {
+ uint64_t *kernel_address;
+ uint64_t __user *user_address;
+ bool need_to_free_pages;
+};
+
+/* Process data */
+struct kfd_process {
+ struct hlist_node kfd_processes;
+ void *mm;
+ struct kref ref;
+ struct work_struct release_work;
+ struct mutex mutex;
+ struct task_struct *lead_thread;
+ struct mmu_notifier mmu_notifier;
+/* TODO: check if use right branch */
+ struct rcu_head rcu;
+ uint16_t pasid;
+ struct list_head per_device_data;
+ struct process_queue_manager pqm;
+ bool is_32bit_user_mode;
+ struct mutex event_mutex;
+ struct idr event_idr;
+ struct kfd_signal_page *signal_page;
+ size_t signal_mapped_size;
+ size_t signal_event_count;
+ bool signal_event_limit_reached;
+/* TODO: check if use right branch */
+ struct rb_root bo_interval_tree;
+ void *kgd_process_info;
+ struct dma_fence *ef;
+ struct delayed_work eviction_work;
+ struct delayed_work restore_work;
+ unsigned int last_eviction_seqno;
+ unsigned long last_restore_timestamp;
+ unsigned long last_evict_timestamp;
+ bool debug_trap_enabled;
+ uint32_t trap_debug_wave_launch_mode;
+ struct file *dbg_ev_file;
+ uint32_t allocated_debug_watch_point_bitmask;
+ struct kobject *kobj;
+ struct kobject *kobj_queues;
+ struct attribute attr_pasid;
+ bool has_cwsr;
+ uint64_t exception_enable_mask;
+ uint64_t exception_status;
+};
+
+struct kfd_ioctl_create_queue_args {
+ __u64 ring_base_address; /* to KFD */
+ __u64 write_pointer_address; /* from KFD */
+ __u64 read_pointer_address; /* from KFD */
+ __u64 doorbell_offset; /* from KFD */
+
+ __u32 ring_size; /* to KFD */
+ __u32 gpu_id; /* to KFD */
+ __u32 queue_type; /* to KFD */
+ __u32 queue_percentage; /* to KFD */
+ __u32 queue_priority; /* to KFD */
+ __u32 queue_id; /* from KFD */
+
+ __u64 eop_buffer_address; /* to KFD */
+ __u64 eop_buffer_size; /* to KFD */
+ __u64 ctx_save_restore_address; /* to KFD */
+ __u32 ctx_save_restore_size; /* to KFD */
+ __u32 ctl_stack_size; /* to KFD */
+};
+
+struct kfd_ioctl_destroy_queue_args {
+ __u32 queue_id; /* to KFD */
+ __u32 pad;
+};
+
+struct kfd_ioctl_update_queue_args {
+ __u64 ring_base_address; /* to KFD */
+
+ __u32 queue_id; /* to KFD */
+ __u32 ring_size; /* to KFD */
+ __u32 queue_percentage; /* to KFD */
+ __u32 queue_priority; /* to KFD */
+};
+#endif
diff --git a/include/linux/ucc_sched.h b/include/linux/ucc_sched.h
new file mode 100644
index 000000000000..5b170545f7c2
--- /dev/null
+++ b/include/linux/ucc_sched.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __LINUX_UCC_SCHED_H__
+#define __LINUX_UCC_SCHED_H__
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/hash.h>
+#include <linux/rculist.h>
+#include <linux/idr.h>
+#include <linux/xpu_group.h>
+#include <linux/hashtable.h>
+#include <linux/vstream.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+
+#define VRTSQ_RTSQ_HASH_ORDER 6
+
+#ifdef CONFIG_XPU_SCHEDULE
+int ucc_process_task(struct vstream_info *vsqcq_info, struct tsdrv_ctx *ctx,
+ int *sqenum);
+int ucc_free_task(struct vstream_info *vsqcq_info, struct tsdrv_ctx *ctx);
+int ucc_wait_cq(struct vstream_info *vsqcq_info, struct tsdrv_ctx *ctx,
+ struct devdrv_report_para *arg, int *sqenum);
+struct xpu_group *select_sq(struct vstream_info *vstream_info);
+int ucc_sched_register_xcu(int dev_id, int ts_id, int cu_num);
+void ucc_set_vstream_state(struct vstream_info *vinfo, int state);
+void ucc_dequeue_task(struct vstream_info *vInfo);
+int ucc_rt_nr_running(struct xcu *cu);
+struct xcu *ucc_get_xcu_by_id(int cu_id);
+int ucc_xcu_is_sched(int cu_id);
+void ucc_dump_statistics_info(struct ucc_se *se);
+#endif
+
+#endif
diff --git a/include/linux/ucc_sched/ucc_sched.h b/include/linux/ucc_sched/ucc_sched.h
new file mode 100644
index 000000000000..6edd8930e09e
--- /dev/null
+++ b/include/linux/ucc_sched/ucc_sched.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) Huawei Technologies Co., Ltd. 2019. All rights reserved.
+ * Author: Huawei OS Kernel Lab
+ * Create: Mon Jan 30 14:29:19 2023
+ */
+
+#ifndef __LINUX_UCC_SCHED_USCHED_H__
+#define __LINUX_UCC_SCHED_USCHED_H__
+
+enum ucc_se_state {
+ SE_PREPARE,
+ SE_READY,
+ SE_RUNNING,
+ SE_BLOCK,
+ SE_DEAD,
+};
+
+enum ucc_se_flag {
+ UCC_TIF_NONE,
+ UCC_TIF_PREEMPT,
+ UCC_TIF_BALANCE,
+};
+
+enum ucc_se_prio {
+ UCC_PRIO_HIGH,
+ UCC_PRIO_LOW,
+};
+
+enum ucc_se_step {
+ UCC_STEP_SLOW = 1,
+ UCC_STEP_FAST = 10,
+};
+
+struct ucc_statistics {
+ u64 wait_start;
+ u64 wait_max;
+ u64 wait_count;
+ u64 wait_sum;
+
+ u64 preempt_start;
+ u64 preempt_max;
+ u64 preempt_count;
+ u64 preempt_sum;
+
+ u64 kernel_sum;
+ u64 timeout_count;
+
+ u64 run_start;
+ u64 run_max;
+ u64 run_count;
+ u64 run_sum;
+};
+
+struct ucc_se {
+ int on_cu;
+ struct list_head run_list;
+ enum ucc_se_state state;
+ enum ucc_se_flag flag;
+ enum ucc_se_prio prio;
+ enum ucc_se_step step;
+ raw_spinlock_t se_lock;
+ struct ucc_statistics statistics;
+ int is_timeout;
+};
+
+int ucc_sched_init(void);
+int ucc_schedule(int cu_id);
+int ucc_wake_up(struct ucc_se *se);
+
+#endif
diff --git a/include/linux/ucc_ts.h b/include/linux/ucc_ts.h
new file mode 100644
index 000000000000..7280ccca1059
--- /dev/null
+++ b/include/linux/ucc_ts.h
@@ -0,0 +1,254 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef TS_H
+#define TS_H
+
+#include <linux/file.h>
+#include <linux/device.h>
+#include <linux/cdev.h>
+#include <linux/fs.h>
+
+#define DEVDRV_MAX_SQ_DEPTH (1024)
+#define DEVDRV_SQ_SLOT_SIZE (64)
+
+#define DEVDRV_MAX_SQ_NUM (512 - 1)
+#define DEVDRV_MAX_CQ_NUM (352 - 1)
+
+#define DEVDRV_MAX_TS_NUM (1)
+
+#define REMAP_ALIGN_SIZE (64 * 1024)
+#define REMAP_ALIGN_MASK (~(REMAP_ALIGN_SIZE - 1))
+#define REMAP_ALIGN(x) (((x) + REMAP_ALIGN_SIZE - 1) & \
+ REMAP_ALIGN_MASK)
+
+#define DEVDRV_DB_SPACE_SIZE (1024 * 4096)
+
+#define SQCQ_RTS_INFO_LENGTH 5
+#define SQCQ_RESV_LENGTH 8
+
+#define DEVDRV_CBCQ_MAX_GID 128
+
+enum phy_sqcq_type {
+ NORMAL_SQCQ_TYPE = 0,
+ CALLBACK_SQCQ_TYPE,
+ LOGIC_SQCQ_TYPE,
+ SHM_SQCQ_TYPE,
+ DFX_SQCQ_TYPE,
+ TS_SQCQ_TYPE,
+ KERNEL_SQCQ_TYPE,
+};
+
+struct notifier_operations {
+ int (*notifier_call)(struct file *file_op, unsigned long mode);
+};
+
+#define MAX_DEVICE_COUNT 64
+
+struct davinci_intf_stru {
+ atomic_t count;
+ struct mutex dmutex;
+ struct cdev cdev;
+ struct device *device;
+ struct list_head process_list;
+ struct list_head module_list;
+ unsigned int device_status[MAX_DEVICE_COUNT];
+ cpumask_var_t cpumask;
+};
+
+#define DAVINIC_MODULE_NAME_MAX 256
+struct davinci_intf_private_stru {
+ char module_name[DAVINIC_MODULE_NAME_MAX];
+ unsigned int device_id;
+ pid_t owner_pid;
+ int close_flag;
+ atomic_t work_count;
+ int release_status;
+ struct mutex fmutex;
+ const struct file_operations fops;
+ struct notifier_operations notifier;
+ struct davinci_intf_stru *device_cb;
+ struct file priv_filep;
+ unsigned int free_type;
+};
+
+enum sqcq_alloc_status {
+ SQCQ_INACTIVE = 0,
+ SQCQ_ACTIVE
+};
+
+struct devdrv_ts_sq_info {
+ enum phy_sqcq_type type;
+ pid_t tgid;
+ u32 head;
+ u32 tail;
+ u32 credit;
+ u32 index;
+ int uio_fd;
+
+ u8 *uio_addr;
+ int uio_size;
+
+ enum sqcq_alloc_status alloc_status;
+ u64 send_count;
+
+ void *sq_sub;
+};
+
+struct devdrv_ts_cq_info {
+ enum phy_sqcq_type type;
+ pid_t tgid;
+ u32 vfid;
+
+ u32 head;
+ u32 tail;
+ u32 release_head; /* runtime read cq head value */
+ u32 index;
+ u32 phase;
+ u32 int_flag;
+
+ int uio_fd;
+
+ u8 *uio_addr;
+ int uio_size;
+
+ enum sqcq_alloc_status alloc_status;
+ u64 receive_count;
+
+ void *cq_sub;
+
+ void (*complete_handle)(struct devdrv_ts_cq_info *cq_info);
+
+ u8 slot_size;
+};
+
+#define DEVDRV_SQ_INFO_OCCUPY_SIZE \
+ (sizeof(struct devdrv_ts_sq_info) * DEVDRV_MAX_SQ_NUM)
+#define DEVDRV_CQ_INFO_OCCUPY_SIZE \
+ (sizeof(struct devdrv_ts_cq_info) * DEVDRV_MAX_CQ_NUM)
+
+#define DEVDRV_MAX_INFO_SIZE \
+ (DEVDRV_SQ_INFO_OCCUPY_SIZE + DEVDRV_CQ_INFO_OCCUPY_SIZE)
+#define DEVDRV_VM_SQ_MEM_OFFSET 0
+#define DEVDRV_VM_SQ_SLOT_SIZE \
+ REMAP_ALIGN(DEVDRV_MAX_SQ_DEPTH * DEVDRV_SQ_SLOT_SIZE)
+#define DEVDRV_VM_SQ_MEM_SIZE \
+ (DEVDRV_VM_SQ_SLOT_SIZE * DEVDRV_MAX_SQ_NUM)
+
+#define DEVDRV_VM_INFO_MEM_OFFSET \
+ (DEVDRV_VM_SQ_MEM_OFFSET + DEVDRV_VM_SQ_MEM_SIZE)
+#define DEVDRV_VM_INFO_MEM_SIZE REMAP_ALIGN(DEVDRV_MAX_INFO_SIZE)
+
+#define DEVDRV_VM_DB_MEM_OFFSET \
+ (DEVDRV_VM_INFO_MEM_OFFSET + DEVDRV_VM_INFO_MEM_SIZE)
+#define DEVDRV_VM_DB_MEM_SIZE REMAP_ALIGN(DEVDRV_DB_SPACE_SIZE)
+
+#define DEVDRV_VM_CQ_MEM_OFFSET \
+ (DEVDRV_VM_DB_MEM_OFFSET + DEVDRV_VM_DB_MEM_SIZE)
+
+enum tsdrv_id_type {
+ TSDRV_STREAM_ID,
+ TSDRV_NOTIFY_ID,
+ TSDRV_MODEL_ID,
+ TSDRV_EVENT_SW_ID, /* should use for event alloc/free/inquiry res_num*/
+ TSDRV_EVENT_HW_ID,
+ TSDRV_IPC_EVENT_ID,
+ TSDRV_SQ_ID,
+ TSDRV_CQ_ID,
+ TSDRV_PCQ_ID,
+ TSDRV_MAX_ID,
+};
+
+#define TSDRV_CQ_REUSE 0x00000001
+#define TSDRV_SQ_REUSE 0x00000002
+
+struct normal_alloc_sqcq_para {
+ uint32_t fd;
+ uint32_t tsId;
+ uint32_t devId;
+ uint32_t sqeSize;
+ uint32_t cqeSize;
+ uint32_t sqeDepth;
+ uint32_t cqeDepth;
+ uint32_t grpId;
+ uint32_t flag;
+ uint32_t sqId;
+ uint32_t cqId;
+ uint32_t priority;
+ uint32_t info[SQCQ_RTS_INFO_LENGTH];
+ uint32_t res[SQCQ_RESV_LENGTH];
+};
+
+struct normal_free_sqcq_para {
+ uint32_t tsId;
+ uint32_t flag;
+ uint32_t sqId;
+ uint32_t cqId;
+ uint32_t res[SQCQ_RESV_LENGTH];
+};
+
+struct tsdrv_sqcq_data_para {
+ uint32_t id;
+ uint32_t val;
+};
+
+struct devdrv_report_para {
+ int timeout;
+ u32 cq_tail;
+ u32 cq_id;
+};
+
+struct tsdrv_ts_id_ctx {
+ u32 id_num;
+ struct list_head id_list;
+ spinlock_t id_lock;
+};
+struct tsdrv_ts_ctx {
+ u32 tsid;
+ atomic_t status;
+ u32 send_count;
+ u64 receive_count;
+
+ int32_t cq_tail_updated;
+ wait_queue_head_t report_wait;
+
+ struct work_struct recycle_work;
+
+ wait_queue_head_t cbcq_wait[DEVDRV_CBCQ_MAX_GID];
+
+ void *shm_sqcq_ctx;
+ void *logic_sqcq_ctx;
+ void *sync_cb_sqcq_ctx; // mini callback
+
+ struct tsdrv_ts_id_ctx id_ctx[TSDRV_MAX_ID];
+
+ /* only used by vm */
+ u32 vcqid;
+ u32 wait_queue_inited;
+ u32 cq_report_status;
+ int32_t cq_tail;
+ spinlock_t ctx_lock;
+
+ u32 recycle_cbsqcq_num; // min callback
+};
+
+//Context Delivers
+struct tsdrv_ctx {
+ u32 ctx_index;
+ atomic_t status;
+ atomic_t type;
+ pid_t tgid;
+ pid_t pid;
+ int32_t ssid;
+ u32 thread_bind_irq_num;
+ u32 mirror_ctx_status;
+ struct rb_node node;
+ struct list_head list;
+ struct vm_area_struct *vma[DEVDRV_MAX_TS_NUM];
+ spinlock_t ctx_lock;
+ struct mutex mutex_lock;
+ struct tsdrv_ts_ctx ts_ctx[DEVDRV_MAX_TS_NUM];
+
+ u64 unique_id; /* mark unique processes for vm */
+};
+
+#endif
diff --git a/include/linux/vstream.h b/include/linux/vstream.h
new file mode 100644
index 000000000000..14d799296053
--- /dev/null
+++ b/include/linux/vstream.h
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_VSTREAM_H
+#define _LINUX_VSTREAM_H
+
+#include <linux/ucc_kfd.h>
+#include <linux/ucc_sched/ucc_sched.h>
+#include <linux/ucc_ts.h>
+
+#define MAX_VSTREAM_SIZE 1024
+#define MAX_VSTREAM_SLOT_SIZE 64
+#define MAX_CQ_SLOT_SIZE 12
+
+/*
+ * XXX_VSTREAM_ALLOC: alloc a vstream, buffer for tasks
+ * XXX_VSTREAM_FREE: free a vstream
+ * XXX_VSTREAM_KICK: there are tasks to be executed in the vstream
+ * XXX_VSTREAM_UPDATE: update information for an existing vstream
+ * XXX_CALLBACK_VSTREAM_WAIT: waiting for callback tasks
+ * XXX_CALLBACK_VSTREAM_KICK: callback tasks have been executed
+ *
+ * NOTE: Callback vstream is only for Ascend now. We do not need
+ * CALLBACK_VSTREAM_ALLOC because the callback vstream will be
+ * alloced with vstream on Ascend.
+ */
+enum VSTREAM_COMMAND {
+ /* vstream command for Ascend */
+ ASCEND_VSTREAM_ALLOC = 0,
+ ASCEND_VSTREAM_FREE,
+ ASCEND_VSTREAM_KICK,
+ ASCEND_CALLBACK_VSTREAM_WAIT,
+ ASCEND_CALLBACK_VSTREAM_KICK,
+ ASCEND_VSTREAM_GET_HEAD,
+ ASCEND_MAX_COMMAND,
+
+ /* vstream command for amdgpu */
+ AMDGPU_VSTREAM_ALLOC = ASCEND_MAX_COMMAND + 1,
+ AMDGPU_VSTREAM_FREE,
+ AMDGPU_VSTREAM_KICK,
+ AMDGPU_VSTREAM_UPDATE,
+ AMDGPU_MAX_COMMAND,
+};
+
+struct vstream_alloc_args {
+ union {
+ /* For Ascend */
+ struct normal_alloc_sqcq_para ascend;
+ /* For amdgpu */
+ struct kfd_ioctl_create_queue_args amdgpu;
+ };
+};
+
+struct vstream_free_args {
+ union {
+ /* For Ascend */
+ struct normal_free_sqcq_para ascend;
+ /* For amdgpu */
+ struct kfd_ioctl_destroy_queue_args amdgpu;
+ };
+};
+
+struct vstream_kick_args {
+ union {
+ /* For Ascend */
+ struct tsdrv_sqcq_data_para ascend;
+ /* For amdgpu */
+ };
+};
+
+struct vstream_args {
+ union {
+ struct vstream_alloc_args va_args;
+ struct vstream_free_args vf_args;
+ struct vstream_kick_args vk_args;
+ struct kfd_ioctl_update_queue_args vu_args;
+ struct tsdrv_sqcq_data_para vh_args;
+ struct devdrv_report_para cvw_args;
+ struct tsdrv_sqcq_data_para cvk_args;
+ };
+};
+
+struct vstream_node {
+ uint32_t id;
+ uint32_t head;
+ uint32_t tail;
+ uint32_t credit;
+ void *vstreamData;
+ raw_spinlock_t spin_lock;
+};
+
+struct vstream_id {
+ uint32_t vstreamId;
+ struct list_head list;
+};
+
+struct vcq_map_table {
+ uint32_t vcqId;
+ struct vstream_node *vcqNode;
+ struct list_head vstreamId_list;
+};
+
+struct vstream_info {
+ uint32_t vstreamId; //key
+ uint32_t vcqId;
+ uint32_t devId;
+ uint32_t tsId;
+ struct ucc_se se;
+ //TODO::check name
+ struct vstream_node *vsqNode;
+ struct vstream_node *vcqNode;
+ void *privdata;
+ uint32_t info[SQCQ_RTS_INFO_LENGTH];
+ int cu_id;
+ struct xpu_group *group;
+ int send_cnt;
+ struct task_struct *p;
+};
+
+typedef int vstream_manage_t(struct vstream_args *arg);
+int update_vstream_head(struct vstream_info *vstream_info, int num);
+struct vstream_info *vstream_get_info(uint32_t id);
+bool vstream_have_kernel(struct ucc_se *se);
+
+#endif /* _LINUX_VSTREAM_H */
diff --git a/include/linux/xpu_group.h b/include/linux/xpu_group.h
new file mode 100644
index 000000000000..5e3a96b15f9c
--- /dev/null
+++ b/include/linux/xpu_group.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __XPU_GROUP_H__
+#define __XPU_GROUP_H__
+#include <linux/idr.h>
+
+struct xpu_group;
+struct xcu;
+
+enum xpu_type {
+ XPU_TYPE_ROOT,
+ XPU_TYPE_TASK_QUEUE,
+ XPU_TYPE_NPU_310,
+ XPU_TYPE_CUSTOM,
+};
+
+enum xpu_capability_type {
+ TYPE_1,
+ XPU_CAPABILITY_TYPE_NR,
+};
+
+struct xpu_capability {
+ unsigned long capacities[XPU_CAPABILITY_TYPE_NR];
+};
+
+struct xpu_operation {
+ int (*run)(struct xpu_group *group, void *para1, void *para2);
+ int (*finish)(struct xpu_group *group, void *para1, void *para2);
+ int (*wait)(struct xpu_group *group, void *para1, void *para2,
+ void *para3);
+ int (*complete)(struct xpu_group *group, void *para1, void *para2,
+ void *para3);
+};
+
+struct xpu_group {
+ int id;
+ enum xpu_type type;
+ struct xpu_capability *capability;
+
+ struct xpu_group *previous_layer;
+ struct idr next_layer;
+
+ struct xpu_operation *opt;
+
+ int used;
+
+ void *data;
+};
+
+extern struct xpu_group *xpu_root;
+
+#ifdef CONFIG_XPU_SCHEDULE
+int xpu_group_attach(struct xpu_group *new_group,
+ struct xpu_group *previous_group);
+int xpu_group_detach(struct xpu_group *group);
+struct xpu_group *xpu_group_find(struct xpu_group *group, int id);
+struct xpu_group *xpu_idle_group_find(struct xpu_group *group);
+struct xpu_group *xpu_group_alloc(void);
+struct xpu_group *xpu_group_alloc_and_attach(struct xpu_group *previous_group,
+ int id);
+int xpu_run(struct xpu_group *group, void *para1, void *para2);
+int xpu_finish(struct xpu_group *group, void *para1, void *para2);
+int xpu_wait(struct xpu_group *group, void *para1, void *para2, void *para3);
+#endif
+
+#endif
diff --git a/include/trace/events/ucc_sched.h b/include/trace/events/ucc_sched.h
new file mode 100644
index 000000000000..104a39b2f41c
--- /dev/null
+++ b/include/trace/events/ucc_sched.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM ucc_sched
+
+#if !defined(_TRACE_UCC_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_UCC_SCHED_H
+
+#include <linux/tracepoint.h>
+#include <linux/binfmts.h>
+
+/*
+ * XXX the below ucc_sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
+ * adding ucc_sched_stat support to SCHED_FIFO/RR would be welcome.
+ */
+DECLARE_EVENT_CLASS(ucc_sched_stat_template,
+
+ TP_PROTO(struct vstream_info *vinfo, u64 delay),
+
+ TP_ARGS(vinfo, delay),
+
+ TP_STRUCT__entry(
+ __array(char, comm, TASK_COMM_LEN)
+ __field(pid_t, pid)
+ __field(int, cu_id)
+ __field(u32, vstreamId)
+ __field(u32, prio)
+ __field(u64, delay)
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->comm, vinfo->p->comm, TASK_COMM_LEN);
+ __entry->pid = vinfo->p->pid;
+ __entry->cu_id = vinfo->cu_id;
+ __entry->vstreamId = vinfo->vstreamId;
+ __entry->prio = vinfo->p->ucc_priority;
+ __entry->delay = delay;
+ ),
+
+ TP_printk("comm=%s pid=%d cu_id=%d vstreamId %u prio %u, delay=%llu [ns]",
+ __entry->comm, __entry->pid,
+ __entry->cu_id, __entry->vstreamId, __entry->prio,
+ (unsigned long long)__entry->delay)
+);
+
+DECLARE_EVENT_CLASS(ucc_sched_stat_template_1,
+
+ TP_PROTO(struct vstream_info *vinfo, u64 delay, int is_timeout),
+
+ TP_ARGS(vinfo, delay, is_timeout),
+
+ TP_STRUCT__entry(
+ __array(char, comm, TASK_COMM_LEN)
+ __field(pid_t, pid)
+ __field(int, cu_id)
+ __field(u32, vstreamId)
+ __field(u64, delay)
+ __field(int, is_timeout)
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->comm, vinfo->p->comm, TASK_COMM_LEN);
+ __entry->pid = vinfo->p->pid;
+ __entry->cu_id = vinfo->cu_id;
+ __entry->vstreamId = vinfo->vstreamId;
+ __entry->delay = delay;
+ __entry->is_timeout = is_timeout;
+ ),
+
+ TP_printk("comm=%s pid=%d cu_id=%d vstreamId %u, delay=%llu [ns]:%d",
+ __entry->comm, __entry->pid,
+ __entry->cu_id, __entry->vstreamId,
+ (unsigned long long)__entry->delay,
+ __entry->is_timeout)
+);
+/*
+ * Tracepoint for accounting wait time (time the task is runnable
+ * but not actually running due to scheduler contention).
+ */
+DEFINE_EVENT(ucc_sched_stat_template, ucc_sched_stat_wait,
+ TP_PROTO(struct vstream_info *vinfo, u64 delay),
+ TP_ARGS(vinfo, delay));
+
+DEFINE_EVENT(ucc_sched_stat_template, ucc_sched_stat_preempt,
+ TP_PROTO(struct vstream_info *vinfo, u64 delay),
+ TP_ARGS(vinfo, delay));
+
+DEFINE_EVENT(ucc_sched_stat_template_1, ucc_sched_stat_run,
+ TP_PROTO(struct vstream_info *vinfo, u64 delay, int is_timeout),
+ TP_ARGS(vinfo, delay, is_timeout));
+
+TRACE_EVENT(ucc_sched_switch,
+
+ TP_PROTO(int preempt,
+ struct vstream_info *next),
+
+ TP_ARGS(preempt, next),
+
+ TP_STRUCT__entry(
+ __field(int, cu_id)
+ __field(u32, next_vstreamId)
+ __field(u32, next_prio)
+ __field(int, preempt)
+ ),
+
+ TP_fast_assign(
+ __entry->cu_id = next->cu_id;
+ __entry->next_vstreamId = next->vstreamId;
+ __entry->next_prio = next->p->ucc_priority;
+ __entry->preempt = preempt;
+ ),
+
+ TP_printk("cu_id=%d next_vstreamId %u next_prio %u preempt[%d]",
+ __entry->cu_id,
+ __entry->next_vstreamId, __entry->next_prio,
+ __entry->preempt)
+);
+#endif /* _TRACE_UCC_SCHED_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/init/init_task.c b/init/init_task.c
index b312a045f4b9..c1a78b4da368 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -188,6 +188,10 @@ struct task_struct init_task
.fork_pid = 0,
},
#endif
+#ifdef CONFIG_XPU_SCHEDULE
+ .ucc_priority = 1,
+ .ucc_step = 1,
+#endif
};
EXPORT_SYMBOL(init_task);
diff --git a/init/main.c b/init/main.c
index 50af60ff0ef6..7ed2e67d7011 100644
--- a/init/main.c
+++ b/init/main.c
@@ -66,6 +66,7 @@
#include <linux/kthread.h>
#include <linux/sched.h>
#include <linux/sched/init.h>
+#include <linux/ucc_sched/ucc_sched.h>
#include <linux/signal.h>
#include <linux/idr.h>
#include <linux/kgdb.h>
@@ -599,6 +600,14 @@ asmlinkage __visible void __init start_kernel(void)
* time - but meanwhile we still have a functioning scheduler.
*/
sched_init();
+
+#ifdef CONFIG_XPU_SCHEDULE
+ /*
+ * Set up the ucc scheduler, to enable heterogeneous scheduling.
+ */
+ ucc_sched_init();
+#endif
+
/*
* Disable preemption - early bootup scheduling is extremely
* fragile until we cpu_idle() for the first time.
diff --git a/kernel/Makefile b/kernel/Makefile
index d0482bd27ba4..273fe481d303 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -43,6 +43,8 @@ obj-y += irq/
obj-y += rcu/
obj-y += livepatch/
obj-y += dma/
+obj-(CONFIG_XPU_SCHEDULE) += ucc_sched/
+obj-(CONFIG_XPU_UCC) += ucc/
obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o
obj-$(CONFIG_FREEZER) += freezer.o
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 0612af002ae5..0f659b2ad251 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -19,6 +19,7 @@ endif
obj-y += core.o loadavg.o clock.o cputime.o
obj-y += idle.o fair.o rt.o deadline.o
obj-y += wait.o wait_bit.o swait.o completion.o
+obj-(CONFIG_XPU_SCHEDULE) += ucc_sched.o
obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o
obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 67bda877bfa8..89348097b29a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2316,6 +2316,11 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
*/
p->prio = current->normal_prio;
+#ifdef CONFIG_XPU_SCHEDULE
+ p->ucc_priority = current->ucc_priority;
+ p->ucc_step = current->ucc_step;
+#endif
+
/*
* Revert to default priority/policy on fork if requested.
*/
diff --git a/kernel/sched/ucc_sched.c b/kernel/sched/ucc_sched.c
new file mode 100644
index 000000000000..646f120c3c34
--- /dev/null
+++ b/kernel/sched/ucc_sched.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/ucc_sched.h>
+#include <linux/ucc_common.h>
+
+static DEFINE_MUTEX(revmap_mutex);
+
+static DEFINE_HASHTABLE(vrtsq_rtsq_revmap, VRTSQ_RTSQ_HASH_ORDER);
+
+/**
+ * @group: value for this entry.
+ * @hash_node : hash node list.
+ * @
+ */
+struct vsqce_idx_revmap_data {
+ unsigned int vrtsdId;
+ struct xpu_group *group;
+ struct hlist_node hash_node;
+};
+
+struct xpu_group *select_sq(struct vstream_info *vstream_info)
+{
+ struct vsqce_idx_revmap_data *revmap_data;
+
+ /* find history */
+ mutex_lock(&revmap_mutex);
+ hash_for_each_possible(vrtsq_rtsq_revmap, revmap_data, hash_node,
+ (unsigned long)vstream_info->vstreamId) {
+ if (revmap_data && revmap_data->group) {
+ mutex_unlock(&revmap_mutex);
+ return revmap_data->group;
+ }
+ }
+ mutex_unlock(&revmap_mutex);
+
+ revmap_data = kzalloc(sizeof(struct vsqce_idx_revmap_data), GFP_KERNEL);
+ if (revmap_data == NULL)
+ return NULL;
+ /* find XPU group */
+ revmap_data->group = xpu_group_find(xpu_root, XPU_TYPE_NPU_310);
+ if (revmap_data->group == NULL) {
+ ucc_err("find XPU group is failed.\n");
+ return NULL;
+ }
+ /* find device group */
+ revmap_data->group = xpu_group_find(revmap_data->group,
+ vstream_info->devId);
+ if (revmap_data->group == NULL) {
+ ucc_err("find device group is failed.\n");
+ return NULL;
+ }
+ /* find tsgroup */
+ revmap_data->group = xpu_group_find(revmap_data->group,
+ vstream_info->tsId);
+ if (revmap_data->group == NULL) {
+ ucc_err("find ts group is failed.\n");
+ return NULL;
+ }
+
+ /* select idle xcu */
+ revmap_data->group = xpu_idle_group_find(revmap_data->group);
+ if (revmap_data->group == NULL) {
+ ucc_err("find rtsq group is failed.\n");
+ return NULL;
+ }
+
+ revmap_data->vrtsdId = vstream_info->vstreamId;
+ /* set group used : 1 */
+ revmap_data->group->used = 1;
+
+ mutex_lock(&revmap_mutex);
+ hash_add(vrtsq_rtsq_revmap, &revmap_data->hash_node,
+ (unsigned long)vstream_info->vstreamId);
+ mutex_unlock(&revmap_mutex);
+ return revmap_data->group;
+}
+
+int ucc_process_task(struct vstream_info *vstream_info, struct tsdrv_ctx *ctx,
+ int *sqenum)
+{
+ struct xpu_group *group = NULL;
+
+ if (vstream_info == NULL) {
+ ucc_err("vsqcq_info is NULL\n");
+ return -1;
+ }
+
+ group = select_sq(vstream_info);
+ if (group == NULL) {
+ ucc_err("find group is failed.\n");
+ return -1;
+ }
+ /* send sqe */
+ *sqenum = xpu_run(group, vstream_info, ctx);
+
+ return 0;
+}
+EXPORT_SYMBOL(ucc_process_task);
+
+int ucc_free_task(struct vstream_info *vstream_info, struct tsdrv_ctx *ctx)
+{
+ struct vsqce_idx_revmap_data *revmap_data;
+
+ ucc_dequeue_task(vstream_info);
+
+ while (!ucc_xcu_is_sched(vstream_info->cu_id))
+ schedule_timeout_interruptible(10);
+
+ ucc_dump_statistics_info(&vstream_info->se);
+
+ mutex_lock(&revmap_mutex);
+ hash_for_each_possible(vrtsq_rtsq_revmap, revmap_data, hash_node,
+ (unsigned long)vstream_info->vstreamId) {
+ if (revmap_data &&
+ revmap_data->vrtsdId == vstream_info->vstreamId &&
+ revmap_data->group) {
+ xpu_finish(revmap_data->group, vstream_info, ctx);
+ /* set group unused : 0 */
+ revmap_data->group->used = 0;
+ hash_del(&revmap_data->hash_node);
+ kfree(revmap_data);
+ revmap_data = NULL;
+ break;
+ }
+ }
+ mutex_unlock(&revmap_mutex);
+
+ return 0;
+}
+EXPORT_SYMBOL(ucc_free_task);
+
+int ucc_wait_cq(struct vstream_info *vstream_info, struct tsdrv_ctx *ctx,
+ struct devdrv_report_para *arg, int *cqenum)
+{
+ struct vsqce_idx_revmap_data *revmap_data;
+
+ hash_for_each_possible(vrtsq_rtsq_revmap, revmap_data, hash_node,
+ (unsigned long)vstream_info->vstreamId) {
+ if (revmap_data &&
+ revmap_data->vrtsdId == vstream_info->vstreamId &&
+ revmap_data->group)
+ *cqenum = xpu_wait(revmap_data->group, vstream_info,
+ ctx, arg);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(ucc_wait_cq);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c7064f67f4a5..aeceb9e9c927 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -117,6 +117,10 @@ extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max;
extern int sysctl_nr_trim_pages;
#endif
+#ifdef CONFIG_XPU_SCHEDULE
+extern int sysctl_ucc_sched_rcv_timeout_ms;
+#endif
+
/* Constants used for minimum and maximum */
#ifdef CONFIG_LOCKUP_DETECTOR
static int sixty = 60;
@@ -139,7 +143,7 @@ static int one_thousand = 1000;
#ifdef CONFIG_PRINTK
static int ten_thousand = 10000;
#endif
-#if defined(CONFIG_QOS_SCHED) || defined(CONFIG_QOS_SCHED_SMART_GRID)
+#if defined(CONFIG_QOS_SCHED) || defined(CONFIG_QOS_SCHED_SMART_GRID) || defined(CONFIG_XPU_SCHEDULE)
static int hundred_thousand = 100000;
#endif
#ifdef CONFIG_PERF_EVENTS
@@ -352,6 +356,17 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+#ifdef CONFIG_XPU_SCHEDULE
+ {
+ .procname = "ucc_sched_rcv_timeout",
+ .data = &sysctl_ucc_sched_rcv_timeout_ms,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &hundred_thousand,
+ },
+#endif
#ifdef CONFIG_SCHED_DEBUG
{
.procname = "sched_min_granularity_ns",
diff --git a/kernel/ucc/Kconfig b/kernel/ucc/Kconfig
new file mode 100644
index 000000000000..279c11f702b1
--- /dev/null
+++ b/kernel/ucc/Kconfig
@@ -0,0 +1,21 @@
+#
+# TODO: add description
+#
+
+config XPU_UCC
+ bool "ucc"
+ default n
+ depends on ARM64 || X86
+ help
+ Say Y here if you want support for using XPU UCC. XPU UCC
+ is helpfer for XPU schedule. The full name of UCC is
+ Universal Converged Computing.
+
+
+config XPU_VSTREAM
+ bool "virtual submit queue and complete queue"
+ default n
+ depends on XPU_UCC
+ help
+ virtual Submit Queue and Complete Queue support for XPU.
+ It is used to help XPU schedule.
diff --git a/kernel/ucc/Makefile b/kernel/ucc/Makefile
new file mode 100644
index 000000000000..0e2735d2aef4
--- /dev/null
+++ b/kernel/ucc/Makefile
@@ -0,0 +1 @@
+obj-y += ascend_vstream.o vstream.o
diff --git a/kernel/ucc/ascend_vstream.c b/kernel/ucc/ascend_vstream.c
new file mode 100644
index 000000000000..d248aaff7639
--- /dev/null
+++ b/kernel/ucc/ascend_vstream.c
@@ -0,0 +1,654 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/syscalls.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/vstream.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/ucc_common.h>
+#include <linux/ucc_sched.h>
+
+DEFINE_MUTEX(vstreamId_Bitmap_mutex);
+static DECLARE_BITMAP(vstreamIdBitmap, DEVDRV_MAX_SQ_NUM);
+
+static DEFINE_MUTEX(vcqId_Bitmap_mutex);
+static DECLARE_BITMAP(vcqIdBitmap, DEVDRV_MAX_CQ_NUM);
+
+static DEFINE_MUTEX(revmap_mutex);
+
+static struct vstream_info *vstreamContainer[DEVDRV_MAX_SQ_NUM];
+static struct vcq_map_table *vsqcqMapTable[DEVDRV_MAX_CQ_NUM];
+
+#define MAX_SQ_SIZE (MAX_VSTREAM_SIZE * MAX_VSTREAM_SLOT_SIZE)
+#define MAX_CQ_SIZE (MAX_VSTREAM_SIZE * MAX_CQ_SLOT_SIZE)
+
+#define SQ_USER_ADDR_OFFSET(id) ((unsigned long)REMAP_ALIGN(MAX_SQ_SIZE) * id)
+#define CQ_USER_ADDR_OFFSET(id) ((unsigned long)REMAP_ALIGN(MAX_CQ_SIZE) * id)
+
+#define SQ_VSTREAM_DATA(id) vstreamContainer[id]->vsqNode->vstreamData
+#define CQ_VSTREAM_DATA(id) vstreamContainer[id]->vcqNode->vstreamData
+
+static struct tsdrv_ctx *get_ctx(int fd)
+{
+ struct fd f;
+ struct davinci_intf_private_stru *file_private_data;
+ struct tsdrv_ctx *ctx = NULL;
+
+ f = fdget(fd);
+ if (!f.file)
+ goto out;
+
+ file_private_data = f.file->private_data;
+ if (!file_private_data)
+ goto out;
+
+ ctx = file_private_data->priv_filep.private_data;
+
+out:
+ fdput(f);
+ return ctx;
+}
+
+static struct vcq_map_table *vstream_get_map_table(uint32_t id)
+{
+ return vsqcqMapTable[id];
+}
+
+static void free_vstreamId(uint32_t vstreamId)
+{
+ mutex_lock(&vstreamId_Bitmap_mutex);
+ clear_bit(vstreamId, vstreamIdBitmap);
+ mutex_unlock(&vstreamId_Bitmap_mutex);
+}
+
+static void free_vcqId(uint32_t vcqId, uint32_t flag)
+{
+ mutex_lock(&vcqId_Bitmap_mutex);
+ if (!(flag & TSDRV_CQ_REUSE))
+ clear_bit(vcqId, vcqIdBitmap);
+ mutex_unlock(&vcqId_Bitmap_mutex);
+}
+
+static void vstream_free_map_table(uint32_t vcqId, uint32_t vstreamId,
+ uint32_t flag)
+{
+ struct vcq_map_table *freeTable = NULL;
+ struct vstream_id *vstreamIdNode = NULL;
+
+ freeTable = vstream_get_map_table(vcqId);
+ if (!freeTable) {
+ ucc_err("No map found for vcq:%d.\n", vcqId);
+ return;
+ }
+
+ list_for_each_entry(vstreamIdNode, &freeTable->vstreamId_list, list) {
+ if (vstreamIdNode->vstreamId == vstreamId) {
+ list_del(&vstreamIdNode->list);
+ kfree(vstreamIdNode);
+ break;
+ }
+ }
+ if (!(flag & TSDRV_CQ_REUSE)) {
+ kfree(freeTable->vcqNode->vstreamData);
+ kfree(freeTable->vcqNode);
+ kfree(freeTable);
+ }
+}
+
+static void vstream_alloc_ucc_se(struct ucc_se *se)
+{
+ memset(&se->statistics, 0, sizeof(se->statistics));
+ se->on_cu = 0;
+ se->state = SE_PREPARE;
+ se->flag = UCC_TIF_NONE;
+ se->prio = UCC_PRIO_HIGH;
+ se->step = UCC_STEP_SLOW;
+ raw_spin_lock_init(&se->se_lock);
+}
+
+static struct vstream_info *vstream_create_info(struct tsdrv_ctx *ctx,
+ struct normal_alloc_sqcq_para *para)
+{
+ struct vcq_map_table *mapTable = NULL;
+
+ struct vstream_info *vstream = kzalloc(sizeof(struct vstream_info),
+ GFP_KERNEL);
+ if (!vstream)
+ return NULL;
+
+ (void)memcpy(vstream->info, para->info,
+ sizeof(uint32_t) * SQCQ_RTS_INFO_LENGTH);
+
+ vstream->privdata = ctx;
+ vstream->tsId = para->tsId;
+ vstream->vstreamId = para->sqId;
+ vstream->vcqId = para->cqId;
+
+ mapTable = vstream_get_map_table(vstream->vcqId);
+ if (!mapTable || !mapTable->vcqNode) {
+ ucc_err("No map found for vcqId:%d.\n", vstream->vcqId);
+ goto free_vstream;
+ }
+ vstream->vcqNode = mapTable->vcqNode;
+ vstream->vsqNode = kmalloc(sizeof(struct vstream_node), GFP_KERNEL);
+ if (!vstream->vsqNode) {
+ ucc_err("Failed to alloc memory for vsqNode:%d.\n",
+ vstream->vstreamId);
+ goto free_vstream;
+ }
+ vstream->vsqNode->vstreamData = kmalloc(MAX_SQ_SIZE, GFP_KERNEL);
+ if (!vstream->vsqNode->vstreamData)
+ goto free_vsqNode;
+ vstream->vsqNode->id = vstream->vstreamId;
+ vstream->vsqNode->head = 0;
+ vstream->vsqNode->tail = 0;
+ vstream->vsqNode->credit = MAX_VSTREAM_SIZE;
+ raw_spin_lock_init(&vstream->vsqNode->spin_lock);
+ vstream->send_cnt = 0;
+ vstream->p = current;
+ vstream_alloc_ucc_se(&vstream->se);
+
+ return vstream;
+
+free_vsqNode:
+ kfree(vstream->vsqNode);
+
+free_vstream:
+ kfree(vstream);
+ return NULL;
+}
+
+struct vstream_info *vstream_get_info(uint32_t id)
+{
+ return vstreamContainer[id];
+}
+
+static void vstream_free_info(uint32_t id)
+{
+ struct vstream_info *freeInfo = vstream_get_info(id);
+
+ ucc_set_vstream_state(freeInfo, SE_DEAD);
+
+ if (freeInfo) {
+ if (freeInfo->vsqNode)
+ kfree(freeInfo->vsqNode->vstreamData);
+
+ kfree(freeInfo->vsqNode);
+ }
+
+ kfree(freeInfo);
+}
+
+static int queue_pop_by_num(struct vstream_node *node, uint32_t pop_num)
+{
+ if (node->credit + pop_num > MAX_VSTREAM_SIZE) {
+ ucc_err("Queue usage out-of-bounds");
+ return -EACCES;
+ }
+
+ node->credit += pop_num;
+ node->head = (node->head + pop_num) % MAX_VSTREAM_SIZE;
+ return 0;
+}
+
+static int queue_pop_by_head(struct vstream_node *node, uint32_t head)
+{
+ int pop_num = (head - node->head + MAX_VSTREAM_SIZE) %
+ MAX_VSTREAM_SIZE;
+ return queue_pop_by_num(node, pop_num);
+}
+
+int update_vstream_head(struct vstream_info *vstream_info, int num)
+{
+ struct vstream_node *node = vstream_info->vsqNode;
+
+ raw_spin_lock(&node->spin_lock);
+ if (node->credit + num > MAX_VSTREAM_SIZE) {
+ raw_spin_unlock(&node->spin_lock);
+ return -1;
+ }
+
+ node->credit += num;
+ node->head = (node->head + num) % MAX_VSTREAM_SIZE;
+ raw_spin_unlock(&node->spin_lock);
+
+ return 0;
+}
+
+bool vstream_have_kernel(struct ucc_se *se)
+{
+ struct vstream_info *vinfo;
+
+ vinfo = container_of(se, struct vstream_info, se);
+ return vinfo->vsqNode->credit != MAX_VSTREAM_SIZE;
+}
+
+static int queue_push_by_num(struct vstream_node *node, uint32_t push_num)
+{
+ if (node->credit - push_num < 0)
+ return -EACCES;
+
+ node->credit -= push_num;
+ node->tail = (node->tail + push_num) % MAX_VSTREAM_SIZE;
+ return 0;
+}
+
+static int queue_push_by_tail(struct vstream_node *node, uint32_t tail)
+{
+ int push_num = (tail - node->tail + MAX_VSTREAM_SIZE) %
+ MAX_VSTREAM_SIZE;
+ return queue_push_by_num(node, push_num);
+}
+
+static uint32_t vstream_alloc_vstreamId(void)
+{
+ uint32_t vstreamId = DEVDRV_MAX_SQ_NUM;
+
+ /* alloc vstreamId */
+ mutex_lock(&vstreamId_Bitmap_mutex);
+ vstreamId = find_first_zero_bit(vstreamIdBitmap, DEVDRV_MAX_SQ_NUM);
+ if (vstreamId == DEVDRV_MAX_SQ_NUM) {
+ ucc_err("vstreamId exhausted.\n");
+ mutex_unlock(&vstreamId_Bitmap_mutex);
+ return DEVDRV_MAX_SQ_NUM;
+ }
+ set_bit(vstreamId, vstreamIdBitmap);
+ mutex_unlock(&vstreamId_Bitmap_mutex);
+
+ return vstreamId;
+}
+
+static uint32_t vstream_alloc_vcqid(void)
+{
+ uint32_t vcqId = DEVDRV_MAX_CQ_NUM;
+
+ /* alloc vcqid */
+ mutex_lock(&vcqId_Bitmap_mutex);
+ vcqId = find_first_zero_bit(vcqIdBitmap, DEVDRV_MAX_CQ_NUM);
+ if (vcqId == DEVDRV_MAX_CQ_NUM) {
+ ucc_err("vcqId has been used up.\n");
+ mutex_unlock(&vcqId_Bitmap_mutex);
+ return DEVDRV_MAX_CQ_NUM;
+ }
+ set_bit(vcqId, vcqIdBitmap);
+ mutex_unlock(&vcqId_Bitmap_mutex);
+
+ ucc_info("vcqId = %d\n", vcqId);
+ return vcqId;
+}
+
+int vstream_map_pfnaddr(struct tsdrv_ctx *ctx,
+ struct normal_alloc_sqcq_para *para)
+{
+ int err = 0;
+ unsigned long vsqAddr;
+ unsigned long vcqAddr;
+ pgprot_t vm_page_prot;
+ struct vm_area_struct *vma = ctx->vma[para->tsId];
+
+ vsqAddr = vma->vm_start + SQ_USER_ADDR_OFFSET(para->sqId);
+ vm_page_prot = pgprot_device(vma->vm_page_prot);
+ err = remap_pfn_range(vma, vsqAddr,
+ virt_to_pfn(SQ_VSTREAM_DATA(para->sqId)),
+ MAX_SQ_SIZE, vm_page_prot);
+ if (err) {
+ ucc_err("remap_pfn_range failed,ret=%d.\n", err);
+ return -EFAULT;
+ }
+ if (!(para->flag & TSDRV_CQ_REUSE)) {
+ vcqAddr = vma->vm_start + DEVDRV_VM_CQ_MEM_OFFSET +
+ CQ_USER_ADDR_OFFSET(para->cqId);
+ err = remap_pfn_range(vma, vcqAddr,
+ virt_to_pfn(CQ_VSTREAM_DATA(para->sqId)),
+ MAX_CQ_SIZE, vm_page_prot);
+ if (err) {
+ ucc_err("remap_pfn_range failed,ret=%d.\n", err);
+ return -EFAULT;
+ }
+ }
+
+ return err;
+}
+
+void vstream_unmap_pfnaddr(struct tsdrv_ctx *ctx,
+ struct normal_free_sqcq_para *para)
+{
+ unsigned long vsqAddr;
+ unsigned long vcqAddr;
+ size_t cqSize = PAGE_ALIGN(MAX_CQ_SIZE);
+ struct vm_area_struct *vma = ctx->vma[para->tsId];
+
+ vsqAddr = vma->vm_start + SQ_USER_ADDR_OFFSET(para->sqId);
+ zap_vma_ptes(vma, vsqAddr, MAX_SQ_SIZE);
+
+ if (!(para->flag & TSDRV_CQ_REUSE)) {
+ vcqAddr = vma->vm_start + DEVDRV_VM_CQ_MEM_OFFSET +
+ CQ_USER_ADDR_OFFSET(para->cqId);
+ zap_vma_ptes(vma, vcqAddr, cqSize);
+ }
+}
+
+static int vstream_update_vcqtable(uint32_t vcqId, uint32_t vstreamId,
+ uint32_t flag)
+{
+ int err = -ENOSPC;
+ struct vcq_map_table *vcqTable = NULL;
+ struct vstream_id *vstreamIdNode = NULL;
+
+ if (!(flag & TSDRV_CQ_REUSE)) {
+ vcqTable = kmalloc(sizeof(struct vcq_map_table), GFP_KERNEL);
+ if (!vcqTable)
+ return -ENOMEM;
+
+ vcqTable->vcqId = vcqId;
+ vcqTable->vcqNode = kmalloc(sizeof(struct vstream_node),
+ GFP_KERNEL);
+ if (!vcqTable->vcqNode) {
+ err = -ENOMEM;
+ goto free_vcqTable;
+ }
+
+ vcqTable->vcqNode->vstreamData = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!vcqTable->vcqNode->vstreamData) {
+ err = -ENOMEM;
+ goto free_vcqNode;
+ }
+ vcqTable->vcqNode->id = vcqId;
+ vcqTable->vcqNode->head = 0;
+ vcqTable->vcqNode->tail = 0;
+ vcqTable->vcqNode->credit = MAX_VSTREAM_SIZE;
+ INIT_LIST_HEAD(&vcqTable->vstreamId_list);
+ vsqcqMapTable[vcqId] = vcqTable;
+ } else {
+ vcqTable = vsqcqMapTable[vcqId];
+ }
+ vstreamIdNode = kmalloc(sizeof(struct vstream_id), GFP_KERNEL);
+ if (!vstreamIdNode) {
+ err = -ENOMEM;
+
+ if (!(flag & TSDRV_CQ_REUSE))
+ goto free_vstreamData;
+ return err;
+ }
+ vstreamIdNode->vstreamId = vstreamId;
+ list_add(&vstreamIdNode->list, &vcqTable->vstreamId_list);
+
+ return 0;
+
+free_vstreamData:
+ kfree(vcqTable->vcqNode->vstreamData);
+
+free_vcqNode:
+ kfree(vcqTable->vcqNode);
+
+free_vcqTable:
+ kfree(vcqTable);
+
+ return err;
+}
+
+int ascend_vstream_alloc(struct vstream_args *arg)
+{
+ uint32_t vstreamId;
+ uint32_t vcqId = DEVDRV_MAX_CQ_NUM;
+ int err = -EINVAL;
+ struct vstream_info *vstream = NULL;
+ struct tsdrv_ctx *ctx = NULL;
+ struct normal_alloc_sqcq_para *sqcq_alloc_para = &arg->va_args.ascend;
+
+ ctx = get_ctx(sqcq_alloc_para->fd);
+ if (!ctx)
+ return err;
+
+ vstreamId = vstream_alloc_vstreamId();
+ if (vstreamId == DEVDRV_MAX_SQ_NUM) {
+ ucc_err("vstreamId alloc failed.\n");
+ return err;
+ }
+ if (!(sqcq_alloc_para->flag & TSDRV_CQ_REUSE))
+ vcqId = vstream_alloc_vcqid();
+ else
+ vcqId = sqcq_alloc_para->cqId;
+
+ if (vcqId >= DEVDRV_MAX_CQ_NUM) {
+ ucc_err("vcqId alloc failed.\n");
+ goto free_vstreamIds;
+ }
+ err = vstream_update_vcqtable(vcqId, vstreamId, sqcq_alloc_para->flag);
+ if (err) {
+ ucc_err("vcqtable update failed, vcqId:%d, vstreamId:%d, flag:%d.\n",
+ vcqId, vstreamId, sqcq_alloc_para->flag);
+ goto free_vcqid;
+ }
+
+ sqcq_alloc_para->sqId = vstreamId;
+ sqcq_alloc_para->cqId = vcqId;
+ vstream = vstream_create_info(ctx, sqcq_alloc_para);
+ if (!vstream) {
+ ucc_err("vstream create failed: vcqId:%d, vstreamId:%d.\n",
+ vcqId, vstreamId);
+ err = -ENOSPC;
+ goto free_vcqtable;
+ }
+
+ vstream->devId = sqcq_alloc_para->devId;
+ vstreamContainer[vstreamId] = vstream;
+
+ vstream->group = select_sq(vstream);
+ if (!vstream->group) {
+ ucc_err("Failed to select sq\n");
+ err = -EINVAL;
+ goto free_vstream_info;
+ }
+
+ err = vstream_map_pfnaddr(ctx, sqcq_alloc_para);
+ if (err) {
+ ucc_err("vstream map failed, ret=%d.\n", err);
+ goto free_vstream_info;
+ }
+ return 0;
+
+free_vstream_info:
+ vstream_free_info(vstreamId);
+
+free_vcqtable:
+ vstream_free_map_table(vcqId, vstreamId, sqcq_alloc_para->flag);
+
+free_vcqid:
+ free_vcqId(vcqId, sqcq_alloc_para->flag);
+
+free_vstreamIds:
+ free_vstreamId(vstreamId);
+
+ return err;
+}
+
+int ascend_vstream_free(struct vstream_args *arg)
+{
+ int err = 0;
+ struct vstream_info *vstreamInfo = NULL;
+ struct normal_free_sqcq_para *sqcq_free_para = &arg->vf_args.ascend;
+ uint32_t vstreamId = sqcq_free_para->sqId;
+ uint32_t vcqId = sqcq_free_para->cqId;
+
+ if (vstreamId >= DEVDRV_MAX_SQ_NUM || vcqId >= DEVDRV_MAX_CQ_NUM) {
+ ucc_err("vstream index out-of-range, vstreamId=%d, vcqId=%d.\n",
+ vstreamId, vcqId);
+ return -EPERM;
+ }
+
+ vstreamInfo = vstream_get_info(vstreamId);
+ if (!vstreamInfo) {
+ ucc_err("vstreamInfo get failed, vstreamId=%d.\n", vstreamId);
+ return -EPERM;
+ }
+ err = ucc_free_task(vstreamInfo, vstreamInfo->privdata);
+
+ free_vcqId(vcqId, sqcq_free_para->flag);
+ vstream_free_map_table(vcqId, vstreamId, sqcq_free_para->flag);
+
+ vstream_unmap_pfnaddr(vstreamInfo->privdata, sqcq_free_para);
+
+ vstream_free_info(vstreamId);
+ free_vstreamId(vstreamId);
+ return err;
+}
+
+int ascend_vstream_kick(struct vstream_args *arg)
+{
+ int err = 0;
+ struct tsdrv_sqcq_data_para *sqcq_data_para = &arg->vk_args.ascend;
+ int vstreamId = sqcq_data_para->id;
+ int tail = sqcq_data_para->val;
+ struct vstream_info *vstreamInfo = NULL;
+ int push_num;
+
+ vstreamInfo = vstream_get_info(vstreamId);
+ vstreamInfo->p = current;
+
+ if (!vstreamInfo) {
+ ucc_err("vstreamInfo get failed, vstreamId=%d.\n", vstreamId);
+ return -ENOMEM;
+ }
+
+ push_num = (tail - vstreamInfo->vsqNode->tail + MAX_VSTREAM_SIZE) %
+ MAX_VSTREAM_SIZE;
+
+ raw_spin_lock(&vstreamInfo->vsqNode->spin_lock);
+ err = queue_push_by_tail(vstreamInfo->vsqNode, tail);
+ if (err) {
+ raw_spin_unlock(&vstreamInfo->vsqNode->spin_lock);
+ ucc_err("queue_push_by_tail error, ret = %d\n", err);
+ return err;
+ }
+ raw_spin_unlock(&vstreamInfo->vsqNode->spin_lock);
+
+ err = ucc_wake_up(&vstreamInfo->se);
+ return err;
+}
+
+int ascend_callback_vstream_wait(struct vstream_args *arg)
+{
+ int err = 0;
+ int cqeNum = 0;
+ int cqeSum = 0;
+ struct vstream_info *vstreamInfo = NULL;
+ struct vcq_map_table *vcqTable = NULL;
+ struct vcq_map_table *waitTable = NULL;
+ struct vstream_id *vstreamIdNode = NULL;
+ struct devdrv_report_para *report_para = &arg->cvw_args;
+ uint32_t *sqlist;
+ uint32_t sqlist_num = 0;
+ uint32_t vstreamId, vcqId;
+
+ sqlist = kmalloc_array(DEVDRV_MAX_SQ_NUM, sizeof(uint32_t), GFP_KERNEL);
+ if (!sqlist)
+ return -ENOMEM;
+
+ vcqId = report_para->cq_id;
+ if (vcqId >= DEVDRV_MAX_CQ_NUM) {
+ ucc_err("vcqId out-of-range, vcqId=%d.\n", vcqId);
+ err = -EPERM;
+ goto out;
+ }
+
+ mutex_lock(&vcqId_Bitmap_mutex);
+ waitTable = vstream_get_map_table(vcqId);
+ if (!waitTable) {
+ ucc_err("No map found for vcq:%d.\n", vcqId);
+ mutex_unlock(&vcqId_Bitmap_mutex);
+ err = -EPERM;
+ goto out;
+ }
+
+ list_for_each_entry(vstreamIdNode, &waitTable->vstreamId_list, list)
+ sqlist[sqlist_num++] = vstreamIdNode->vstreamId;
+ mutex_unlock(&vcqId_Bitmap_mutex);
+
+ //get sqInfo from hardware
+ for (vstreamId = 0; vstreamId < sqlist_num; vstreamId++) {
+ vstreamInfo = vstream_get_info(sqlist[vstreamId]);
+ if (!vstreamInfo)
+ continue;
+ err |= ucc_wait_cq(vstreamInfo, vstreamInfo->privdata,
+ report_para, &cqeNum);
+ cqeSum += cqeNum;
+ if (cqeNum)
+ break;
+ }
+
+ //update cqInfo
+ mutex_lock(&vcqId_Bitmap_mutex);
+ vcqTable = vstream_get_map_table(vcqId);
+ if (!vcqTable) {
+ ucc_err("No map found for vcq:%d.\n", vcqId);
+ err = -EPERM;
+ goto out;
+ }
+
+ err = queue_push_by_num(vcqTable->vcqNode, cqeSum);
+ if (err) {
+ mutex_unlock(&vcqId_Bitmap_mutex);
+ ucc_err("failed to queue_push_by_num, ret = %d.\n", err);
+ goto out;
+ }
+ report_para->cq_tail = vcqTable->vcqNode->tail;
+ mutex_unlock(&vcqId_Bitmap_mutex);
+
+out:
+ kfree(sqlist);
+ return err;
+}
+
+int ascend_callback_vstream_kick(struct vstream_args *arg)
+{
+ u32 vcqId, release_head;
+ struct vstream_info *vstreamInfo = NULL;
+ int err = 0;
+
+ vcqId = arg->cvk_args.id;
+ release_head = arg->cvk_args.val;
+ if (vcqId >= DEVDRV_MAX_CQ_NUM || release_head >= MAX_VSTREAM_SIZE) {
+ ucc_err("vstream index out-of-range, vcqId=%d, release_head=%d.\n",
+ vcqId, release_head);
+ return -EPERM;
+ }
+
+ mutex_lock(&vcqId_Bitmap_mutex);
+ vstreamInfo = vstream_get_info(vcqId);
+ if (!vstreamInfo) {
+ err = -EPERM;
+ goto out;
+ }
+
+ err = queue_pop_by_head(vstreamInfo->vcqNode, release_head);
+
+out:
+ mutex_unlock(&vcqId_Bitmap_mutex);
+ return err;
+}
+
+int ascend_vstream_get_head(struct vstream_args *arg)
+{
+ u32 vstreamId = arg->vh_args.id;
+ struct vstream_info *vstreamInfo = NULL;
+
+ if (vstreamId >= DEVDRV_MAX_SQ_NUM) {
+ ucc_err("vstreamId out-of-range, vstreamId=%d.\n", vstreamId);
+ return -EINVAL;
+ }
+
+ vstreamInfo = vstream_get_info(vstreamId);
+ if (!vstreamInfo) {
+ ucc_err("vstreamInfo get failed, vstreamId=%d.\n", vstreamId);
+ return -EINVAL;
+ }
+ arg->vh_args.val = vstreamInfo->vsqNode->head;
+
+ return 0;
+}
+
diff --git a/kernel/ucc/ascend_vstream.h b/kernel/ucc/ascend_vstream.h
new file mode 100644
index 000000000000..0cd200168495
--- /dev/null
+++ b/kernel/ucc/ascend_vstream.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#ifndef _ASCEND_VSTREAM_H
+#define _ASCEND_VSTREAM_H
+
+int ascend_vstream_alloc(struct vstream_args *arg);
+int ascend_vstream_free(struct vstream_args *arg);
+int ascend_vstream_kick(struct vstream_args *arg);
+int ascend_callback_vstream_wait(struct vstream_args *arg);
+int ascend_callback_vstream_kick(struct vstream_args *arg);
+int ascend_vstream_get_head(struct vstream_args *arg);
+
+#endif /* _ASCEND_VSTREAM_H */
diff --git a/kernel/ucc/vstream.c b/kernel/ucc/vstream.c
new file mode 100644
index 000000000000..d4705f285b89
--- /dev/null
+++ b/kernel/ucc/vstream.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/syscalls.h>
+#include <linux/vstream.h>
+
+#include "ascend_vstream.h"
+
+static int amdgpu_vstream_alloc(struct vstream_args *arg)
+{
+ return 0;
+}
+static int amdgpu_vstream_free(struct vstream_args *arg)
+{
+ return 0;
+}
+static int amdgpu_vstream_kick(struct vstream_args *arg)
+{
+ return 0;
+}
+static int amdgpu_vstream_update(struct vstream_args *arg)
+{
+ return 0;
+}
+
+/*
+ * vstream_manage_cmd table
+ */
+static vstream_manage_t (*vstream_command_table[AMDGPU_MAX_COMMAND + 1]) = {
+ ascend_vstream_alloc, // ASCEND_VSTREAM_ALLOC
+ ascend_vstream_free, // ASCEND_VSTREAM_FREE
+ ascend_vstream_kick, // ASCEND_VSTREAM_KICK
+ ascend_callback_vstream_wait, // ASCEND_CALLBACK_VSTREAM_WAIT
+ ascend_callback_vstream_kick, // ASCEND_CALLBACK_VSTREAM_KICK
+ ascend_vstream_get_head, // ASCEND_VSTREAM_GET_HEAD
+ NULL, // ASCEND_MAX_COMMAND
+ amdgpu_vstream_alloc, // AMDGPU_VSTREAM_ALLOC
+ amdgpu_vstream_free, // AMDGPU_VSTREAM_FREE
+ amdgpu_vstream_kick, // AMDGPU_VSTREAM_KICK
+ amdgpu_vstream_update, // AMDGPU_VSTREAM_UPDATE
+ NULL // AMDGPU_MAX_COMMAND
+};
+
+SYSCALL_DEFINE2(vstream_manage, struct vstream_args __user *, arg, int, cmd)
+{
+ int res = 0;
+ struct vstream_args vstream_arg;
+
+ if (cmd > AMDGPU_MAX_COMMAND)
+ return -EINVAL;
+
+ if (copy_from_user(&vstream_arg, arg, sizeof(struct vstream_args))) {
+ pr_err("copy_from_user failed\n");
+ return -EFAULT;
+ }
+ res = vstream_command_table[cmd](&vstream_arg);
+ if (copy_to_user(arg, &vstream_arg, sizeof(struct vstream_args))) {
+ pr_err("copy_to_user failed\n");
+ return -EFAULT;
+ }
+
+ return res;
+}
diff --git a/kernel/ucc_sched/Makefile b/kernel/ucc_sched/Makefile
new file mode 100644
index 000000000000..4a41f07d091c
--- /dev/null
+++ b/kernel/ucc_sched/Makefile
@@ -0,0 +1 @@
+obj-(CONFIG_XPU_SCHEDULE) += core.o
diff --git a/kernel/ucc_sched/core.c b/kernel/ucc_sched/core.c
new file mode 100644
index 000000000000..4c7f1f59aeb9
--- /dev/null
+++ b/kernel/ucc_sched/core.c
@@ -0,0 +1,591 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) Huawei Technologies Co., Ltd. 2023. All rights reserved.
+ * Author: Huawei OS Kernel Lab
+ * Create: Tue Jan 17 22:19:17 2023
+ */
+
+#include <uapi/linux/sched/types.h>
+#include <linux/kthread.h>
+#include <linux/slab.h>
+#include <linux/ucc_sched.h>
+
+#include "ucc_sched.h"
+#include "../sched/sched.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/ucc_sched.h>
+
+#define MAX_XCU_NUM (100)
+#define TS_SQ_TRANS_TASK_THRESHOLD (20)
+
+static struct xcu xcu_manager[MAX_XCU_NUM];
+static int num_active_xcu;
+raw_spinlock_t xcu_mgr_lock;
+int sysctl_ucc_sched_rcv_timeout_ms = 10;
+
+static struct task_struct vstream_idle_task;
+static struct vstream_info vstream_idle = {
+ .vstreamId = UINT_MAX,
+ .p = &vstream_idle_task,
+};
+
+struct sched_args {
+ int cu_id;
+};
+
+static inline int is_xcu_offline(struct xcu *cu)
+{
+ return cu->state == XCU_INACTIVE;
+}
+
+void ucc_set_vstream_state(struct vstream_info *vinfo, int state)
+{
+ vinfo->se.state = state;
+}
+
+static inline int should_se_run(struct ucc_se *se)
+{
+ return se->state != SE_BLOCK && se->state != SE_DEAD;
+}
+
+static inline void update_stats_run_start(struct xcu *cu,
+ struct ucc_se *se)
+{
+ u64 start;
+
+ if (!schedstat_enabled())
+ return;
+
+ start = ktime_get_boot_ns();
+ __schedstat_set(se->statistics.run_start, start);
+}
+
+static inline void update_stats_run_end(struct xcu *cu,
+ struct ucc_se *se)
+{
+
+ struct vstream_info *vinfo;
+ u64 delta;
+
+ if (!schedstat_enabled())
+ return;
+
+ delta = ktime_get_boot_ns() - schedstat_val(se->statistics.run_start);
+ vinfo = container_of(se, struct vstream_info, se);
+ trace_ucc_sched_stat_run(vinfo, delta, se->is_timeout);
+
+ __schedstat_set(se->statistics.run_max,
+ max(schedstat_val(se->statistics.run_max), delta));
+ __schedstat_inc(se->statistics.run_count);
+ __schedstat_add(se->statistics.run_sum, delta);
+ __schedstat_set(se->statistics.run_start, 0);
+}
+
+static inline void update_stats_preempt_start(struct xcu *cu,
+ struct ucc_se *se)
+{
+ u64 wait_start;
+
+ if (!schedstat_enabled())
+ return;
+
+ wait_start = ktime_get_boot_ns();
+ __schedstat_set(se->statistics.preempt_start, wait_start);
+}
+
+static inline void update_stats_wait_start(struct xcu *cu, struct ucc_se *se)
+{
+ u64 wait_start;
+
+ if (!schedstat_enabled())
+ return;
+
+ wait_start = ktime_get_boot_ns();
+ __schedstat_set(se->statistics.wait_start, wait_start);
+}
+
+
+static inline void update_stats_wait_end(struct xcu *cu, struct ucc_se *se)
+{
+ struct vstream_info *vinfo;
+ u64 delta, preempt_delta;
+
+ if (!schedstat_enabled())
+ return;
+
+ delta = ktime_get_boot_ns() - schedstat_val(se->statistics.wait_start);
+ vinfo = container_of(se, struct vstream_info, se);
+ trace_ucc_sched_stat_wait(vinfo, delta);
+
+ __schedstat_set(se->statistics.wait_max,
+ max(schedstat_val(se->statistics.wait_max), delta));
+ __schedstat_inc(se->statistics.wait_count);
+ __schedstat_add(se->statistics.wait_sum, delta);
+ __schedstat_set(se->statistics.wait_start, 0);
+
+ if (se->statistics.preempt_start) {
+ preempt_delta = ktime_get_boot_ns() -
+ schedstat_val(se->statistics.preempt_start);
+ trace_ucc_sched_stat_preempt(vinfo, preempt_delta);
+
+ __schedstat_set(se->statistics.preempt_max,
+ max(schedstat_val(se->statistics.preempt_max),
+ preempt_delta));
+ __schedstat_inc(se->statistics.preempt_count);
+ __schedstat_add(se->statistics.preempt_sum, preempt_delta);
+ __schedstat_set(se->statistics.preempt_start, 0);
+ }
+}
+
+void ucc_dump_statistics_info(struct ucc_se *se)
+{
+ struct vstream_info *vinfo = container_of(se, struct vstream_info, se);
+
+ pr_info("comm %s pid %d vstreamId %d kernel_sum %llu wait_count %llu wait_max %llu[ns] wait_sum %llu[ns] preempt_count %llu preempt_max %llu[ns] preempt_sum %llu[ns]\n",
+ vinfo->p->comm,
+ vinfo->p->pid,
+ vinfo->vstreamId,
+ vinfo->se.statistics.kernel_sum,
+ vinfo->se.statistics.wait_count,
+ vinfo->se.statistics.wait_max,
+ vinfo->se.statistics.wait_sum,
+ vinfo->se.statistics.preempt_count,
+ vinfo->se.statistics.preempt_max,
+ vinfo->se.statistics.preempt_sum);
+}
+
+static void put_prev_entity(struct xcu *cu, struct ucc_se *prev)
+{
+ if (!prev)
+ return;
+
+ if (prev->on_cu)
+ update_stats_wait_start(cu, prev);
+
+ prev->state = SE_READY;
+ cu->curr_se->state = SE_RUNNING;
+}
+
+static void set_next_entity(struct xcu *cu, struct ucc_se *se)
+{
+ if (se->on_cu && se != cu->curr_se)
+ update_stats_wait_end(cu, se);
+
+ cu->curr_se = se;
+}
+
+static void dequeue_ucc_se(struct ucc_se *se, struct xcu *cu)
+{
+ raw_spin_lock(&cu->xcu_lock);
+ if (!se->on_cu) {
+ raw_spin_unlock(&cu->xcu_lock);
+ return;
+ }
+
+ se->on_cu = 0;
+
+ list_del_init(&se->run_list);
+
+ if (list_empty(cu->queue + se->prio))
+ __clear_bit(se->prio, cu->bitmap);
+ cu->rt_nr_running--;
+
+ if (se != cu->curr_se)
+ update_stats_wait_end(cu, se);
+
+ if (cu->curr_se == se)
+ cu->curr_se = NULL;
+
+ raw_spin_unlock(&cu->xcu_lock);
+}
+
+static void enqueue_ucc_se(struct ucc_se *se, struct xcu *cu)
+{
+ struct list_head *queue = cu->queue + se->prio;
+
+ raw_spin_lock(&cu->xcu_lock);
+ if (se->on_cu) {
+ raw_spin_unlock(&cu->xcu_lock);
+ return;
+ }
+ se->on_cu = 1;
+ se->is_timeout = 0;
+ list_add_tail(&se->run_list, queue);
+ __set_bit(se->prio, cu->bitmap);
+ cu->rt_nr_running++;
+
+ update_stats_wait_start(cu, se);
+
+ raw_spin_unlock(&cu->xcu_lock);
+}
+
+static struct xcu *ucc_select_cu(struct ucc_se *se)
+{
+ struct vstream_info *vstream_info;
+ int min_nr_running = INT_MAX;
+ struct xcu *cu;
+ int select_cu = 0;
+ int cu_id;
+
+ vstream_info = container_of(se, struct vstream_info, se);
+ for (cu_id = 0; cu_id < num_active_xcu; cu_id++) {
+ cu = &xcu_manager[cu_id];
+
+ if (vstream_info->devId != cu->dev_id ||
+ vstream_info->tsId != cu->ts_id)
+ continue;
+
+ if (cu->rt_nr_running < min_nr_running) {
+ min_nr_running = cu->rt_nr_running;
+ select_cu = cu_id;
+ }
+ }
+
+ vstream_info->cu_id = select_cu;
+ return &xcu_manager[select_cu];
+}
+
+static int ucc_check_preempt(struct ucc_se *se, struct xcu *cu)
+{
+ struct vstream_info *vinfo_curr, *vinfo;
+ struct ucc_se *curr_se;
+
+ curr_se = cu->curr_se;
+ if (!curr_se)
+ return 1;
+
+ vinfo = container_of(se, struct vstream_info, se);
+ vinfo_curr = container_of(curr_se, struct vstream_info, se);
+ if (vinfo_curr->p->ucc_priority > vinfo->p->ucc_priority) {
+ update_stats_preempt_start(cu, se);
+ curr_se->flag = UCC_TIF_PREEMPT;
+ return 1;
+ }
+
+ return 0;
+}
+
+static inline void ucc_wakeup_idle_worker(struct xcu *cu)
+{
+ wake_up_state(cu->worker, TASK_INTERRUPTIBLE);
+}
+
+static inline void ucc_wakeup_running_worker(struct xcu *cu)
+{
+ wake_up_state(cu->worker, TASK_UNINTERRUPTIBLE);
+}
+
+int ucc_schedule(int cu_id)
+{
+ struct xcu *cu;
+
+ cu = &xcu_manager[cu_id];
+ cu->is_wake = 1;
+ ucc_wakeup_running_worker(cu);
+
+ return 0;
+}
+EXPORT_SYMBOL(ucc_schedule);
+
+int ucc_wake_up(struct ucc_se *se)
+{
+ struct xcu *cu;
+
+ raw_spin_lock(&se->se_lock);
+ if (se->on_cu) {
+ raw_spin_unlock(&se->se_lock);
+ return 0;
+ }
+
+ if (se->state == SE_BLOCK)
+ se->state = SE_READY;
+
+ cu = ucc_select_cu(se);
+ if (!cu) {
+ raw_spin_unlock(&se->se_lock);
+ return -1;
+ }
+
+ enqueue_ucc_se(se, cu);
+ if (ucc_check_preempt(se, cu))
+ ucc_wakeup_idle_worker(cu);
+
+ raw_spin_unlock(&se->se_lock);
+
+ return 0;
+}
+
+static struct ucc_se *pick_next_ucc_se(struct xcu *cu)
+{
+ struct ucc_se *se;
+ struct list_head *queue;
+ int idx;
+
+ if (!cu->rt_nr_running)
+ return NULL;
+
+ idx = sched_find_first_bit(cu->bitmap);
+ BUG_ON(idx >= MAX_UCC_PRIO);
+
+ queue = cu->queue + idx;
+ se = list_entry(queue->next, struct ucc_se, run_list);
+
+ return se;
+}
+
+static int ucc_submit_kernel(struct xcu *cu, struct ucc_se *se)
+{
+ struct vstream_info *vstream_info;
+ struct xpu_group *group;
+ struct tsdrv_ctx *ctx;
+ int kernel_num, left;
+
+ vstream_info = container_of(se, struct vstream_info, se);
+ ctx = vstream_info->privdata;
+ left = (vstream_info->vsqNode->tail - vstream_info->vsqNode->head +
+ MAX_VSTREAM_SIZE) % MAX_VSTREAM_SIZE;
+
+ group = vstream_info->group;
+
+ kernel_num = xpu_run(group, vstream_info, ctx);
+ if (kernel_num <= 0)
+ return kernel_num;
+
+ //update vstream info head and tail;
+ update_vstream_head(vstream_info, kernel_num);
+
+ left -= kernel_num;
+
+ return kernel_num;
+}
+
+static inline void ucc_wait_idle(struct xcu *cu)
+{
+ cu->state = XCU_IDLE;
+
+ do {
+ schedule_timeout_interruptible(1);
+ } while (cu->rt_nr_running == 0);
+
+ cu->state = XCU_BUSY;
+}
+
+static inline void ucc_wait_running(struct xcu *cu, struct ucc_se *se)
+{
+ int cnt = 1;
+
+ do {
+ schedule_timeout_uninterruptible(
+ msecs_to_jiffies(sysctl_ucc_sched_rcv_timeout_ms));
+ } while (cu->is_wake == 0 && --cnt > 0);
+
+ if (cnt == 0) {
+ __schedstat_inc(se->statistics.timeout_count);
+ se->is_timeout = 1;
+ }
+}
+
+static inline void clear_se_flag(struct ucc_se *se)
+{
+ if (se)
+ se->flag = UCC_TIF_NONE;
+}
+
+void ucc_dequeue_task(struct vstream_info *vInfo)
+{
+ struct xcu *cu = &xcu_manager[vInfo->cu_id];
+ struct ucc_se *se = &vInfo->se;
+
+ raw_spin_lock(&se->se_lock);
+ dequeue_ucc_se(se, cu);
+ raw_spin_unlock(&se->se_lock);
+}
+
+/*
+ * dynamic padding: select kernels with no QoS confilcts to current ucc_se
+ * to fill cu;
+ */
+static void dynamic_padding(struct xcu *cu, struct ucc_se *se)
+{
+}
+
+static int __ucc_schedule(void *args)
+{
+ struct sched_args *sargs = (struct sched_args *)args;
+ int cu_id = sargs->cu_id;
+ struct xcu *cu = &xcu_manager[cu_id];
+ struct ucc_se *se = NULL, *curr_se = NULL;
+ struct ucc_se *prev_se = NULL;
+ struct vstream_info *vinfo;
+ int send_cnt = 0;
+ int kernel_num, preempt;
+
+ while (!is_xcu_offline(cu)) {
+ raw_spin_lock(&cu->xcu_lock);
+ cu->is_sched = 0;
+ prev_se = cu->curr_se;
+
+ preempt = 0;
+ if (prev_se) {
+ if (prev_se->flag != UCC_TIF_PREEMPT)
+ goto submit_kernel;
+
+ vinfo = container_of(prev_se, struct vstream_info, se);
+ if (send_cnt < vinfo->p->ucc_step)
+ goto submit_kernel;
+
+ preempt = 1;
+ }
+
+ clear_se_flag(prev_se);
+ se = pick_next_ucc_se(cu);
+ if (!se) {
+ cu->is_sched = 1;
+ raw_spin_unlock(&cu->xcu_lock);
+ trace_ucc_sched_switch(0, &vstream_idle);
+ ucc_wait_idle(cu);
+ continue;
+ }
+
+ set_next_entity(cu, se);
+ if (se != prev_se) {
+ put_prev_entity(cu, prev_se);
+ vinfo = container_of(se, struct vstream_info, se);
+ trace_ucc_sched_switch(preempt, vinfo);
+ }
+ send_cnt = 0;
+submit_kernel:
+ curr_se = cu->curr_se;
+ dynamic_padding(cu, curr_se);
+ raw_spin_unlock(&cu->xcu_lock);
+
+ curr_se->is_timeout = 0;
+ kernel_num = ucc_submit_kernel(cu, curr_se);
+ //has no more kernels to submit.
+ if (kernel_num <= 0 && !vstream_have_kernel(curr_se)) {
+ raw_spin_lock(&curr_se->se_lock);
+ curr_se->state = SE_BLOCK;
+ dequeue_ucc_se(curr_se, cu);
+ raw_spin_unlock(&curr_se->se_lock);
+ cu->is_sched = 1;
+ continue;
+ }
+ cu->is_sched = 1;
+
+ vinfo = container_of(curr_se, struct vstream_info, se);
+ if (vinfo->send_cnt > TS_SQ_TRANS_TASK_THRESHOLD) {
+ update_stats_run_start(cu, curr_se);
+ /* kernel has not finish */
+ if (!cu->is_wake)
+ ucc_wait_running(cu, curr_se);
+
+ update_stats_run_end(cu, curr_se);
+ cu->is_wake = 0;
+ vinfo->send_cnt = 0;
+ }
+
+ send_cnt += kernel_num;
+ schedstat_add(se->statistics.kernel_sum, kernel_num);
+ }
+
+ return 0;
+}
+
+static void init_xcu_rq(struct xcu *cu)
+{
+ int i;
+
+ for (i = 0; i < MAX_UCC_PRIO; i++) {
+ INIT_LIST_HEAD(cu->queue + i);
+ __clear_bit(i, cu->bitmap);
+ }
+
+ /* delimiter for bitsearch: */
+ __set_bit(MAX_UCC_PRIO, cu->bitmap);
+ cu->rt_nr_running = 0;
+ raw_spin_lock_init(&cu->xcu_lock);
+}
+
+static int alloc_cu_id(void)
+{
+ int cu_id = -1;
+
+ raw_spin_lock(&xcu_mgr_lock);
+ if (num_active_xcu >= MAX_XCU_NUM) {
+ raw_spin_unlock(&xcu_mgr_lock);
+ return cu_id;
+ }
+
+ cu_id = num_active_xcu;
+ num_active_xcu++;
+ raw_spin_unlock(&xcu_mgr_lock);
+
+ return cu_id;
+}
+
+int ucc_sched_register_xcu(int dev_id, int ts_id, int cu_num)
+{
+ int cu_id;
+ struct xcu *cu;
+ struct sched_args *args;
+ struct sched_param param = { .sched_priority = 1 };
+ char id_buf[16];
+ int i;
+
+ for (i = 0; i < cu_num; i++) {
+ cu_id = alloc_cu_id();
+ if (cu_id < 0) {
+ pr_err("alloc cu id failed\n");
+ return -1;
+ }
+
+ cu = &xcu_manager[cu_id];
+ cu->cu_id = cu_id;
+ cu->state = XCU_IDLE;
+ cu->curr_se = NULL;
+ cu->dev_id = dev_id;
+ cu->ts_id = ts_id;
+ cu->is_wake = 0;
+ init_xcu_rq(cu);
+
+ args = kzalloc(sizeof(struct sched_args), GFP_KERNEL);
+ if (!args)
+ return -1;
+
+ args->cu_id = cu->cu_id;
+ snprintf(id_buf, sizeof(id_buf), "%d:%d:%d",
+ cu->cu_id, cu->dev_id, cu->ts_id);
+ cu->worker = kthread_create_on_node(__ucc_schedule,
+ (void *)args, NUMA_NO_NODE,
+ "u_sched/%s", id_buf);
+ sched_setscheduler_nocheck(cu->worker, SCHED_FIFO, ¶m);
+ wake_up_process(cu->worker);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(ucc_sched_register_xcu);
+
+int ucc_sched_init(void)
+{
+ raw_spin_lock_init(&xcu_mgr_lock);
+ return 0;
+}
+
+int ucc_rt_nr_running(struct xcu *cu)
+{
+ return cu->rt_nr_running;
+}
+EXPORT_SYMBOL(ucc_rt_nr_running);
+
+struct xcu *ucc_get_xcu_by_id(int cu_id)
+{
+ return &xcu_manager[cu_id];
+}
+EXPORT_SYMBOL(ucc_get_xcu_by_id);
+
+int ucc_xcu_is_sched(int cu_id)
+{
+ return xcu_manager[cu_id].is_sched;
+}
+EXPORT_SYMBOL(ucc_xcu_is_sched);
diff --git a/kernel/ucc_sched/ucc_sched.h b/kernel/ucc_sched/ucc_sched.h
new file mode 100644
index 000000000000..30e2aa10cf2f
--- /dev/null
+++ b/kernel/ucc_sched/ucc_sched.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) Huawei Technologies Co., Ltd. 2023. All rights reserved.
+ * Author: Huawei OS Kernel Lab
+ * Create: Tue Jan 17 22:27:22 2023
+ */
+#ifndef __UCC_SCHED_USCHED_H__
+#define __UCC_SCHED_USCHED_H__
+
+#include <linux/sched.h>
+#include <linux/spinlock_types.h>
+#include <linux/types.h>
+#include <linux/vstream.h>
+
+//For simplicity, we set this parameter to 2.
+#define MAX_UCC_PRIO (2)
+
+enum xcu_state {
+ XCU_INACTIVE,
+ XCU_IDLE,
+ XCU_BUSY,
+ XCU_SUBMIT,
+};
+
+/*
+ * This is the abstraction object of the xpu computing unit.
+ */
+struct xcu {
+ int is_sched;
+ int cu_id;
+ int dev_id;
+ int ts_id;
+ int rt_nr_running;
+ int is_wake;
+ struct task_struct *worker;
+ DECLARE_BITMAP(bitmap, MAX_UCC_PRIO);
+ struct list_head queue[MAX_UCC_PRIO];
+ enum xcu_state state;
+ struct ucc_se *curr_se;
+ raw_spinlock_t xcu_lock;
+};
+
+#endif
--
2.34.1
2
1

[PATCH openEuler-22.03-LTS-SP1] netfilter: nf_tables: skip immediate deactivate in _PREPARE_ERROR
by Guo Mengqi 13 Sep '23
by Guo Mengqi 13 Sep '23
13 Sep '23
From: Pablo Neira Ayuso <pablo(a)netfilter.org>
mainline inclusion
from mainline-v6.5-rc4
commit 0a771f7b266b02d262900c75f1e175c7fe76fec2
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I7YIXO
CVE: CVE-2022-40982
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
---------------------------
On error when building the rule, the immediate expression unbinds the
chain, hence objects can be deactivated by the transaction records.
Otherwise, it is possible to trigger the following warning:
WARNING: CPU: 3 PID: 915 at net/netfilter/nf_tables_api.c:2013 nf_tables_chain_destroy+0x1f7/0x210 [nf_tables]
CPU: 3 PID: 915 Comm: chain-bind-err- Not tainted 6.1.39 #1
RIP: 0010:nf_tables_chain_destroy+0x1f7/0x210 [nf_tables]
Fixes: 4bedf9eee016 ("netfilter: nf_tables: fix chain binding transaction logic")
Reported-by: Kevin Rich <kevinrich1337(a)gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo(a)netfilter.org>
Signed-off-by: Florian Westphal <fw(a)strlen.de>
conflict:
net/netfilter/nft_immediate.c
Signed-off-by: Lu Wei <luwei32(a)huawei.com>
---
net/netfilter/nft_immediate.c | 27 ++++++++++++++++++---------
1 file changed, 18 insertions(+), 9 deletions(-)
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index 6b0efab4fad0..6bf1c852e8ea 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -125,15 +125,27 @@ static void nft_immediate_activate(const struct nft_ctx *ctx,
return nft_data_hold(&priv->data, nft_dreg_to_type(priv->dreg));
}
+static void nft_immediate_chain_deactivate(const struct nft_ctx *ctx,
+ struct nft_chain *chain,
+ enum nft_trans_phase phase)
+{
+ struct nft_ctx chain_ctx;
+ struct nft_rule *rule;
+
+ chain_ctx = *ctx;
+ chain_ctx.chain = chain;
+
+ list_for_each_entry(rule, &chain->rules, list)
+ nft_rule_expr_deactivate(&chain_ctx, rule, phase);
+}
+
static void nft_immediate_deactivate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
enum nft_trans_phase phase)
{
const struct nft_immediate_expr *priv = nft_expr_priv(expr);
const struct nft_data *data = &priv->data;
- struct nft_ctx chain_ctx;
struct nft_chain *chain;
- struct nft_rule *rule;
if (priv->dreg == NFT_REG_VERDICT) {
switch (data->verdict.code) {
@@ -143,20 +155,17 @@ static void nft_immediate_deactivate(const struct nft_ctx *ctx,
if (!nft_chain_binding(chain))
break;
- chain_ctx = *ctx;
- chain_ctx.chain = chain;
-
- list_for_each_entry(rule, &chain->rules, list)
- nft_rule_expr_deactivate(&chain_ctx, rule, phase);
-
switch (phase) {
case NFT_TRANS_PREPARE_ERROR:
nf_tables_unbind_chain(ctx, chain);
- fallthrough;
+ nft_deactivate_next(ctx->net, chain);
+ break;
case NFT_TRANS_PREPARE:
+ nft_immediate_chain_deactivate(ctx, chain, phase);
nft_deactivate_next(ctx->net, chain);
break;
default:
+ nft_immediate_chain_deactivate(ctx, chain, phase);
nft_chain_del(chain);
chain->bound = false;
chain->table->use--;
--
2.17.1
2
1

13 Sep '23
Wang Wensheng (5):
mm/mmap: Don't merge vma from sharepool
mm/sharepool: Use mmap_write_[un]lock helper
mm/sharepool: Return -ENOMEM when allocate hugepage failed
mm/sharepool: Protect the va reserved for sharepool
mm/sharepool: Mmap for the current process at first
include/linux/share_pool.h | 31 +++++++++++++--------
mm/mmap.c | 17 +++++++++---
mm/mremap.c | 4 +++
mm/share_pool.c | 56 ++++++++++++++++++++++++--------------
4 files changed, 73 insertions(+), 35 deletions(-)
--
2.17.1
2
6

[PATCH openEuler-1.0-LTS v2] io_uring: ensure IOPOLL locks around deferred work
by Zhihao Cheng 13 Sep '23
by Zhihao Cheng 13 Sep '23
13 Sep '23
From: Jens Axboe <axboe(a)kernel.dk>
stable inclusion
from stable-v5.10.188
commit 810e401b34c4c4c244d8b93b9947ea5b3d4d49f8
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I7KXLN
CVE: CVE-2023-21400
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
--------------------------------
No direct upstream commit exists for this issue. It was fixed in
5.18 as part of a larger rework of the completion side.
io_commit_cqring() writes the CQ ring tail to make it visible, but it
also kicks off any deferred work we have. A ring setup with IOPOLL
does not need any locking around the CQ ring updates, as we're always
under the ctx uring_lock. But if we have deferred work that needs
processing, then io_queue_deferred() assumes that the completion_lock
is held, as it is for !IOPOLL.
Add a lockdep assertion to check and document this fact, and have
io_iopoll_complete() check if we have deferred work and run that
separately with the appropriate lock grabbed.
Cc: stable(a)vger.kernel.org # 5.10, 5.15
Reported-by: dghost david <daviduniverse18(a)gmail.com>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Lin Yujun <linyujun809(a)huawei.com>
Conflicts:
fs/io_uring.c
Signed-off-by: Zhihao Cheng <chengzhihao1(a)huawei.com>
---
v1->v2: Add completion_lock for whole io_commit_cqring in iopoll
completion
fs/io_uring.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index ce60df5e4d91..88eca93c55b7 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1310,6 +1310,8 @@ static void io_kill_timeouts(struct io_ring_ctx *ctx)
static void __io_queue_deferred(struct io_ring_ctx *ctx)
{
+ lockdep_assert_held(&ctx->completion_lock);
+
do {
struct io_defer_entry *de = list_first_entry(&ctx->defer_list,
struct io_defer_entry, list);
@@ -2154,6 +2156,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
struct req_batch rb;
struct io_kiocb *req;
LIST_HEAD(again);
+ unsigned long flags;
/* order with ->result store in io_complete_rw_iopoll() */
smp_rmb();
@@ -2181,7 +2184,10 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
io_req_free_batch(&rb, req);
}
+ spin_lock_irqsave(&ctx->completion_lock, flags);
io_commit_cqring(ctx);
+ spin_unlock_irqrestore(&ctx->completion_lock, flags);
+
if (ctx->flags & IORING_SETUP_SQPOLL)
io_cqring_ev_posted(ctx);
io_req_free_batch_finish(ctx, &rb);
--
2.31.1
2
1

[PATCH openEuler-22.03-LTS-SP1] net/sched: sch_hfsc: Ensure inner classes have fsc curve
by Zhengchao Shao 13 Sep '23
by Zhengchao Shao 13 Sep '23
13 Sep '23
From: Budimir Markovic <markovicbudimir(a)gmail.com>
mainline inclusion
from mainline-v6.5-rc7
commit b3d26c5702c7d6c45456326e56d2ccf3f103e60f
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I7Z7CD
CVE: CVE-2023-4623
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
--------------------------------
HFSC assumes that inner classes have an fsc curve, but it is currently
possible for classes without an fsc curve to become parents. This leads
to bugs including a use-after-free.
Don't allow non-root classes without HFSC_FSC to become parents.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-by: Budimir Markovic <markovicbudimir(a)gmail.com>
Signed-off-by: Budimir Markovic <markovicbudimir(a)gmail.com>
Acked-by: Jamal Hadi Salim <jhs(a)mojatatu.com>
Link: https://lore.kernel.org/r/20230824084905.422-1-markovicbudimir@gmail.com
Signed-off-by: Jakub Kicinski <kuba(a)kernel.org>
Signed-off-by: Zhengchao Shao <shaozhengchao(a)huawei.com>
---
net/sched/sch_hfsc.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index cdc43a06aa9b..6076294a632c 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1012,6 +1012,10 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (parent == NULL)
return -ENOENT;
}
+ if (!(parent->cl_flags & HFSC_FSC) && parent != &q->root) {
+ NL_SET_ERR_MSG(extack, "Invalid parent - parent class must have FSC");
+ return -EINVAL;
+ }
if (classid == 0 || TC_H_MAJ(classid ^ sch->handle) != 0)
return -EINVAL;
--
2.34.1
2
1

[PATCH openEuler-22.03-LTS-SP1] netfilter: nf_tables: skip bound chain on rule flush
by Zhengchao Shao 13 Sep '23
by Zhengchao Shao 13 Sep '23
13 Sep '23
From: Pablo Neira Ayuso <pablo(a)netfilter.org>
stable inclusion
from stable-v5.10.188
commit 30e5460d69e631c0e84db37dba2d8f98648778d4
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I7YIXI
CVE: CVE-2023-3777
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
--------------------------------
[ Upstream commit 6eaf41e87a223ae6f8e7a28d6e78384ad7e407f8 ]
Skip bound chain when flushing table rules, the rule that owns this
chain releases these objects.
Otherwise, the following warning is triggered:
WARNING: CPU: 2 PID: 1217 at net/netfilter/nf_tables_api.c:2013 nf_tables_chain_destroy+0x1f7/0x210 [nf_tables]
CPU: 2 PID: 1217 Comm: chain-flush Not tainted 6.1.39 #1
RIP: 0010:nf_tables_chain_destroy+0x1f7/0x210 [nf_tables]
Fixes: d0e2c7de92c7 ("netfilter: nf_tables: add NFT_CHAIN_BINDING")
Reported-by: Kevin Rich <kevinrich1337(a)gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo(a)netfilter.org>
Signed-off-by: Florian Westphal <fw(a)strlen.de>
Signed-off-by: Sasha Levin <sashal(a)kernel.org>
Signed-off-by: Zhengchao Shao <shaozhengchao(a)huawei.com>
---
net/netfilter/nf_tables_api.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 3b2275b151a2..bbe6e7023683 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3516,6 +3516,8 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
list_for_each_entry(chain, &table->chains, list) {
if (!nft_is_active_next(net, chain))
continue;
+ if (nft_chain_is_bound(chain))
+ continue;
ctx.chain = chain;
err = nft_delrule_by_chain(&ctx);
--
2.34.1
2
1

12 Sep '23
From: Shuchang Li <lishuchang(a)hust.edu.cn>
stable inclusion
from stable-v5.15.111
commit fd8c83d8375b9dac1949f2753485a5c055ebfad0
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I7ZCDZ
CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/dri…
--------------------------------
[ Upstream commit 91a0c0c1413239d0548b5aac4c82f38f6d53a91e ]
When if_type equals zero and pci_resource_start(pdev, PCI_64BIT_BAR4)
returns false, drbl_regs_memmap_p is not remapped. This passes a NULL
pointer to iounmap(), which can trigger a WARN() on certain arches.
When if_type equals six and pci_resource_start(pdev, PCI_64BIT_BAR4)
returns true, drbl_regs_memmap_p may has been remapped and
ctrl_regs_memmap_p is not remapped. This is a resource leak and passes a
NULL pointer to iounmap().
To fix these issues, we need to add null checks before iounmap(), and
change some goto labels.
Fixes: 1351e69fc6db ("scsi: lpfc: Add push-to-adapter support to sli4")
Signed-off-by: Shuchang Li <lishuchang(a)hust.edu.cn>
Link: https://lore.kernel.org/r/20230404072133.1022-1-lishuchang@hust.edu.cn
Reviewed-by: Justin Tee <justin.tee(a)broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen(a)oracle.com>
Signed-off-by: Yong Hu <yong.hu(a)windriver.com>
---
drivers/scsi/lpfc/lpfc_init.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 17200b453cbb..1bb3c96a04bd 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -10477,7 +10477,7 @@ lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba)
goto out_iounmap_all;
} else {
error = -ENOMEM;
- goto out_iounmap_all;
+ goto out_iounmap_ctrl;
}
}
@@ -10495,7 +10495,7 @@ lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba)
dev_err(&pdev->dev,
"ioremap failed for SLI4 HBA dpp registers.\n");
error = -ENOMEM;
- goto out_iounmap_ctrl;
+ goto out_iounmap_all;
}
phba->pci_bar4_memmap_p = phba->sli4_hba.dpp_regs_memmap_p;
}
@@ -10520,9 +10520,11 @@ lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba)
return 0;
out_iounmap_all:
- iounmap(phba->sli4_hba.drbl_regs_memmap_p);
+ if (phba->sli4_hba.drbl_regs_memmap_p)
+ iounmap(phba->sli4_hba.drbl_regs_memmap_p);
out_iounmap_ctrl:
- iounmap(phba->sli4_hba.ctrl_regs_memmap_p);
+ if (phba->sli4_hba.ctrl_regs_memmap_p)
+ iounmap(phba->sli4_hba.ctrl_regs_memmap_p);
out_iounmap_conf:
iounmap(phba->sli4_hba.conf_regs_memmap_p);
--
2.33.0
2
1

[PATCH openEuler-22.03-LTS-SP1] af_unix: Fix null-ptr-deref in unix_stream_sendpage().
by Liu Jian 12 Sep '23
by Liu Jian 12 Sep '23
12 Sep '23
From: Kuniyuki Iwashima <kuniyu(a)amazon.com>
stable inclusion
from stable-v5.10.192
commit c080cee930303124624fe64fc504f66c815ee6b9
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I7Z7C9
CVE: CVE-2023-4622
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
---------------------------
Bing-Jhong Billy Jheng reported null-ptr-deref in unix_stream_sendpage()
with detailed analysis and a nice repro.
unix_stream_sendpage() tries to add data to the last skb in the peer's
recv queue without locking the queue.
If the peer's FD is passed to another socket and the socket's FD is
passed to the peer, there is a loop between them. If we close both
sockets without receiving FD, the sockets will be cleaned up by garbage
collection.
The garbage collection iterates such sockets and unlinks skb with
FD from the socket's receive queue under the queue's lock.
So, there is a race where unix_stream_sendpage() could access an skb
locklessly that is being released by garbage collection, resulting in
use-after-free.
To avoid the issue, unix_stream_sendpage() must lock the peer's recv
queue.
Note the issue does not exist in 6.5+ thanks to the recent sendpage()
refactoring.
This patch is originally written by Linus Torvalds.
BUG: unable to handle page fault for address: ffff988004dd6870
PF: supervisor read access in kernel mode
PF: error_code(0x0000) - not-present page
PGD 0 P4D 0
PREEMPT SMP PTI
CPU: 4 PID: 297 Comm: garbage_uaf Not tainted 6.1.46 #1
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
RIP: 0010:kmem_cache_alloc_node+0xa2/0x1e0
Code: c0 0f 84 32 01 00 00 41 83 fd ff 74 10 48 8b 00 48 c1 e8 3a 41 39 c5 0f 85 1c 01 00 00 41 8b 44 24 28 49 8b 3c 24 48 8d 4a 40 <49> 8b 1c 06 4c 89 f0 65 48 0f c7 0f 0f 94 c0 84 c0 74 a1 41 8b 44
RSP: 0018:ffffc9000079fac0 EFLAGS: 00000246
RAX: 0000000000000070 RBX: 0000000000000005 RCX: 000000000001a284
RDX: 000000000001a244 RSI: 0000000000400cc0 RDI: 000000000002eee0
RBP: 0000000000400cc0 R08: 0000000000400cc0 R09: 0000000000000003
R10: 0000000000000001 R11: 0000000000000000 R12: ffff888003970f00
R13: 00000000ffffffff R14: ffff988004dd6800 R15: 00000000000000e8
FS: 00007f174d6f3600(0000) GS:ffff88807db00000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: ffff988004dd6870 CR3: 00000000092be000 CR4: 00000000007506e0
PKRU: 55555554
Call Trace:
<TASK>
? __die_body.cold+0x1a/0x1f
? page_fault_oops+0xa9/0x1e0
? fixup_exception+0x1d/0x310
? exc_page_fault+0xa8/0x150
? asm_exc_page_fault+0x22/0x30
? kmem_cache_alloc_node+0xa2/0x1e0
? __alloc_skb+0x16c/0x1e0
__alloc_skb+0x16c/0x1e0
alloc_skb_with_frags+0x48/0x1e0
sock_alloc_send_pskb+0x234/0x270
unix_stream_sendmsg+0x1f5/0x690
sock_sendmsg+0x5d/0x60
____sys_sendmsg+0x210/0x260
___sys_sendmsg+0x83/0xd0
? kmem_cache_alloc+0xc6/0x1c0
? avc_disable+0x20/0x20
? percpu_counter_add_batch+0x53/0xc0
? alloc_empty_file+0x5d/0xb0
? alloc_file+0x91/0x170
? alloc_file_pseudo+0x94/0x100
? __fget_light+0x9f/0x120
__sys_sendmsg+0x54/0xa0
do_syscall_64+0x3b/0x90
entry_SYSCALL_64_after_hwframe+0x69/0xd3
RIP: 0033:0x7f174d639a7d
Code: 28 89 54 24 1c 48 89 74 24 10 89 7c 24 08 e8 8a c1 f4 ff 8b 54 24 1c 48 8b 74 24 10 41 89 c0 8b 7c 24 08 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 33 44 89 c7 48 89 44 24 08 e8 de c1 f4 ff 48
RSP: 002b:00007ffcb563ea50 EFLAGS: 00000293 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f174d639a7d
RDX: 0000000000000000 RSI: 00007ffcb563eab0 RDI: 0000000000000007
RBP: 00007ffcb563eb10 R08: 0000000000000000 R09: 00000000ffffffff
R10: 00000000004040a0 R11: 0000000000000293 R12: 00007ffcb563ec28
R13: 0000000000401398 R14: 0000000000403e00 R15: 00007f174d72c000
</TASK>
Fixes: 869e7c62486e ("net: af_unix: implement stream sendpage support")
Reported-by: Bing-Jhong Billy Jheng <billy(a)starlabs.sg>
Reviewed-by: Bing-Jhong Billy Jheng <billy(a)starlabs.sg>
Co-developed-by: Linus Torvalds <torvalds(a)linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
Signed-off-by: Kuniyuki Iwashima <kuniyu(a)amazon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Liu Jian <liujian56(a)huawei.com>
---
net/unix/af_unix.c | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index a80f23be06e3..02a6aea24e0e 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2118,6 +2118,7 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
if (false) {
alloc_skb:
+ spin_unlock(&other->sk_receive_queue.lock);
unix_state_unlock(other);
mutex_unlock(&unix_sk(other)->iolock);
newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
@@ -2157,6 +2158,7 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
init_scm = false;
}
+ spin_lock(&other->sk_receive_queue.lock);
skb = skb_peek_tail(&other->sk_receive_queue);
if (tail && tail == skb) {
skb = newskb;
@@ -2187,14 +2189,11 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
refcount_add(size, &sk->sk_wmem_alloc);
if (newskb) {
- err = unix_scm_to_skb(&scm, skb, false);
- if (err)
- goto err_state_unlock;
- spin_lock(&other->sk_receive_queue.lock);
+ unix_scm_to_skb(&scm, skb, false);
__skb_queue_tail(&other->sk_receive_queue, newskb);
- spin_unlock(&other->sk_receive_queue.lock);
}
+ spin_unlock(&other->sk_receive_queue.lock);
unix_state_unlock(other);
mutex_unlock(&unix_sk(other)->iolock);
--
2.34.1
2
1

12 Sep '23
From: zhoushuiqing <zhoushuiqing2(a)huawei.com>
v2:
-remove unused variable in ima_main.c
v3:
-modify patch header information
v4:
-add the CONFIG_IMA_DIGEST_LIST macro to isolate the code
v5:
-use other macros to isolate PGP-related code
v6:
-remove unnecessary macro and fix potential leak
David Howells (4):
PGPLIB: PGP definitions (RFC 4880)
PGPLIB: Basic packet parser
KEYS: Provide PGP key description autogeneration
KEYS: Provide a function to load keys from a PGP keyring blob
Mimi Zohar (1):
initramfs: add file metadata
Roberto Sassu (34):
initramfs: read metadata from special file METADATA!!!
gen_init_cpio: add support for file metadata
init: Add kernel option to force usage of tmpfs for rootfs
ima: Add enforce-evm and log-evm modes to strictly check EVM status
ima: Allow choice of file hash algorithm for measurement and audit
ima: Generalize ima_read_policy()
ima: Generalize ima_write_policy() and raise uploaded data size limit
ima: Generalize policy file operations
ima: Use ima_show_htable_value to show violations and hash table data
ima: Add parser of compact digest list
ima: Prevent usage of digest lists not measured or appraised
ima: Introduce new securityfs files
ima: Introduce new hook DIGEST_LIST_CHECK
ima: Load all digest lists from a directory at boot time
ima: Add support for measurement with digest lists
ima: Add support for appraisal with digest lists
evm: Add support for digest lists of metadata
ima: Add meta_immutable appraisal type
ima: Introduce exec_tcb policy
ima: Introduce appraise_exec_tcb policy
ima: Introduce appraise_exec_immutable policy
ima: Add Documentation/security/IMA-digest-lists.txt
mpi: introduce mpi_key_length()
rsa: add parser of raw format
KEYS: PGP data parser
KEYS: Introduce load_pgp_public_keyring()
certs: Introduce search_trusted_key()
ima: Search key in the built-in keyrings
ima: Allow direct upload of digest lists to securityfs
ima: Add parser keyword to the policy
evm: Extend evm= with x509. allow_metadata_writes and complete values
ima: Execute parser to upload digest lists not recognizable by the
kernel
evm: Propagate choice of HMAC algorithm in evm_crypto.c
config: add digest list options for arm64 and x86
Zhang Tianxing (5):
ima: fix a memory leak in ima_del_digest_data_entry
ima: Add max size for IMA digest database
ima: don't allow control characters in policy path
ima: fix CONFIG_IMA_DIGEST_DB_MEGABYTES in openeuler_defconfig
ima: fix db size overflow and Kconfig issues
Zheng Zengkai (1):
Revert "evm: Refuse EVM_ALLOW_METADATA_WRITES only if an HMAC key is
loaded"
shenxiangwei (1):
ima: bugfix for digest lists importing
Documentation/ABI/testing/evm | 4 +-
.../admin-guide/kernel-parameters.txt | 49 +-
Documentation/security/IMA-digest-lists.txt | 259 ++++++++++
arch/arm64/configs/openeuler_defconfig | 11 +
arch/x86/configs/openeuler_defconfig | 29 +-
certs/Kconfig | 7 +
certs/Makefile | 7 +
certs/system_certificates.S | 18 +
certs/system_keyring.c | 46 ++
crypto/asymmetric_keys/Kconfig | 25 +
crypto/asymmetric_keys/Makefile | 10 +
crypto/asymmetric_keys/pgp_library.c | 284 +++++++++++
crypto/asymmetric_keys/pgp_parser.h | 27 +
crypto/asymmetric_keys/pgp_preload.c | 123 +++++
crypto/asymmetric_keys/pgp_public_key.c | 387 +++++++++++++++
crypto/rsa.c | 16 +
crypto/rsa_helper.c | 76 +++
include/crypto/internal/rsa.h | 10 +
include/linux/initramfs.h | 21 +
include/linux/kernel_read_file.h | 13 +
include/linux/mpi.h | 4 +
include/linux/pgp.h | 223 +++++++++
include/linux/pgplib.h | 48 ++
include/linux/verification.h | 7 +
init/do_mounts.c | 19 +
init/initramfs.c | 161 ++++++
lib/mpi/mpicoder.c | 37 ++
security/integrity/digsig_asymmetric.c | 13 +
security/integrity/evm/Kconfig | 32 ++
security/integrity/evm/evm.h | 3 +
security/integrity/evm/evm_crypto.c | 46 +-
security/integrity/evm/evm_main.c | 141 +++++-
security/integrity/evm/evm_secfs.c | 4 +
security/integrity/iint.c | 4 +
security/integrity/ima/Kconfig | 49 ++
security/integrity/ima/Makefile | 1 +
security/integrity/ima/ima.h | 56 +++
security/integrity/ima/ima_api.c | 55 ++-
security/integrity/ima/ima_appraise.c | 137 +++++
security/integrity/ima/ima_digest_list.c | 466 ++++++++++++++++++
security/integrity/ima/ima_digest_list.h | 54 ++
security/integrity/ima/ima_efi.c | 3 +
security/integrity/ima/ima_fs.c | 320 ++++++++++++
security/integrity/ima/ima_init.c | 4 +
security/integrity/ima/ima_main.c | 116 ++++-
security/integrity/ima/ima_policy.c | 177 ++++++-
security/integrity/integrity.h | 42 ++
usr/Kconfig | 8 +
usr/Makefile | 4 +-
usr/gen_init_cpio.c | 158 ++++++
usr/gen_initramfs.sh | 7 +-
51 files changed, 3798 insertions(+), 23 deletions(-)
create mode 100644 Documentation/security/IMA-digest-lists.txt
create mode 100644 crypto/asymmetric_keys/pgp_library.c
create mode 100644 crypto/asymmetric_keys/pgp_parser.h
create mode 100644 crypto/asymmetric_keys/pgp_preload.c
create mode 100644 crypto/asymmetric_keys/pgp_public_key.c
create mode 100644 include/linux/initramfs.h
create mode 100644 include/linux/pgp.h
create mode 100644 include/linux/pgplib.h
create mode 100644 security/integrity/ima/ima_digest_list.c
create mode 100644 security/integrity/ima/ima_digest_list.h
--
2.33.0
2
47

12 Sep '23
From: zhoushuiqing <zhoushuiqing2(a)huawei.com>
v2:
-remove unused variable in ima_main.c
v3:
-modify patch header information
v4:
-add the CONFIG_IMA_DIGEST_LIST macro to isolate the code
v5:
-use other macros to isolate PGP-related code
v6:
-remove unnecessary macro controls and fix potential memory leaks in ima_digest_list.c
David Howells (4):
PGPLIB: PGP definitions (RFC 4880)
PGPLIB: Basic packet parser
KEYS: Provide PGP key description autogeneration
KEYS: Provide a function to load keys from a PGP keyring blob
Mimi Zohar (1):
initramfs: add file metadata
Roberto Sassu (34):
initramfs: read metadata from special file METADATA!!!
gen_init_cpio: add support for file metadata
init: Add kernel option to force usage of tmpfs for rootfs
ima: Add enforce-evm and log-evm modes to strictly check EVM status
ima: Allow choice of file hash algorithm for measurement and audit
ima: Generalize ima_read_policy()
ima: Generalize ima_write_policy() and raise uploaded data size limit
ima: Generalize policy file operations
ima: Use ima_show_htable_value to show violations and hash table data
ima: Add parser of compact digest list
ima: Prevent usage of digest lists not measured or appraised
ima: Introduce new securityfs files
ima: Introduce new hook DIGEST_LIST_CHECK
ima: Load all digest lists from a directory at boot time
ima: Add support for measurement with digest lists
ima: Add support for appraisal with digest lists
evm: Add support for digest lists of metadata
ima: Add meta_immutable appraisal type
ima: Introduce exec_tcb policy
ima: Introduce appraise_exec_tcb policy
ima: Introduce appraise_exec_immutable policy
ima: Add Documentation/security/IMA-digest-lists.txt
mpi: introduce mpi_key_length()
rsa: add parser of raw format
KEYS: PGP data parser
KEYS: Introduce load_pgp_public_keyring()
certs: Introduce search_trusted_key()
ima: Search key in the built-in keyrings
ima: Allow direct upload of digest lists to securityfs
ima: Add parser keyword to the policy
evm: Extend evm= with x509. allow_metadata_writes and complete values
ima: Execute parser to upload digest lists not recognizable by the
kernel
evm: Propagate choice of HMAC algorithm in evm_crypto.c
config: add digest list options for arm64 and x86
Zhang Tianxing (5):
ima: fix a memory leak in ima_del_digest_data_entry
ima: Add max size for IMA digest database
ima: don't allow control characters in policy path
ima: fix CONFIG_IMA_DIGEST_DB_MEGABYTES in openeuler_defconfig
ima: fix db size overflow and Kconfig issues
Zheng Zengkai (1):
Revert "evm: Refuse EVM_ALLOW_METADATA_WRITES only if an HMAC key is
loaded"
shenxiangwei (1):
ima: bugfix for digest lists importing
Documentation/ABI/testing/evm | 4 +-
.../admin-guide/kernel-parameters.txt | 49 +-
Documentation/security/IMA-digest-lists.txt | 259 ++++++++++
arch/arm64/configs/openeuler_defconfig | 11 +
arch/x86/configs/openeuler_defconfig | 29 +-
certs/Kconfig | 7 +
certs/Makefile | 7 +
certs/system_certificates.S | 18 +
certs/system_keyring.c | 46 ++
crypto/asymmetric_keys/Kconfig | 25 +
crypto/asymmetric_keys/Makefile | 10 +
crypto/asymmetric_keys/pgp_library.c | 284 +++++++++++
crypto/asymmetric_keys/pgp_parser.h | 27 +
crypto/asymmetric_keys/pgp_preload.c | 123 +++++
crypto/asymmetric_keys/pgp_public_key.c | 387 +++++++++++++++
crypto/rsa.c | 16 +
crypto/rsa_helper.c | 76 +++
include/crypto/internal/rsa.h | 10 +
include/linux/initramfs.h | 21 +
include/linux/kernel_read_file.h | 13 +
include/linux/mpi.h | 4 +
include/linux/pgp.h | 223 +++++++++
include/linux/pgplib.h | 48 ++
include/linux/verification.h | 7 +
init/do_mounts.c | 19 +
init/initramfs.c | 161 ++++++
lib/mpi/mpicoder.c | 37 ++
security/integrity/digsig_asymmetric.c | 13 +
security/integrity/evm/Kconfig | 32 ++
security/integrity/evm/evm.h | 3 +
security/integrity/evm/evm_crypto.c | 46 +-
security/integrity/evm/evm_main.c | 141 +++++-
security/integrity/evm/evm_secfs.c | 4 +
security/integrity/iint.c | 4 +
security/integrity/ima/Kconfig | 49 ++
security/integrity/ima/Makefile | 1 +
security/integrity/ima/ima.h | 56 +++
security/integrity/ima/ima_api.c | 55 ++-
security/integrity/ima/ima_appraise.c | 137 +++++
security/integrity/ima/ima_digest_list.c | 466 ++++++++++++++++++
security/integrity/ima/ima_digest_list.h | 54 ++
security/integrity/ima/ima_efi.c | 3 +
security/integrity/ima/ima_fs.c | 320 ++++++++++++
security/integrity/ima/ima_init.c | 4 +
security/integrity/ima/ima_main.c | 116 ++++-
security/integrity/ima/ima_policy.c | 177 ++++++-
security/integrity/integrity.h | 42 ++
usr/Kconfig | 8 +
usr/Makefile | 4 +-
usr/gen_init_cpio.c | 158 ++++++
usr/gen_initramfs.sh | 7 +-
51 files changed, 3798 insertions(+), 23 deletions(-)
create mode 100644 Documentation/security/IMA-digest-lists.txt
create mode 100644 crypto/asymmetric_keys/pgp_library.c
create mode 100644 crypto/asymmetric_keys/pgp_parser.h
create mode 100644 crypto/asymmetric_keys/pgp_preload.c
create mode 100644 crypto/asymmetric_keys/pgp_public_key.c
create mode 100644 include/linux/initramfs.h
create mode 100644 include/linux/pgp.h
create mode 100644 include/linux/pgplib.h
create mode 100644 security/integrity/ima/ima_digest_list.c
create mode 100644 security/integrity/ima/ima_digest_list.h
--
2.33.0
1
46