Kernel
Threads by month
- ----- 2025 -----
- April
- March
- February
- January
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
September 2023
- 56 participants
- 248 discussions

14 Sep '23
LoongArch: Fix the write_fcsr() macro
LoongArch: Fix module relocation error with binutils 2.41
arch/loongarch/Makefile | 2 ++
arch/loongarch/include/asm/loongarch.h | 2 +-
2 files changed, 3 insertions(+), 1 deletion(-)
--
2.33.0
2
3

13 Sep '23
LoongArch: Fix the write_fcsr() macro
LoongArch: Fix module relocation error with binutils 2.41
arch/loongarch/Makefile | 2 ++
arch/loongarch/include/asm/loongarch.h | 2 +-
2 files changed, 3 insertions(+), 1 deletion(-)
--
2.33.0
2
3

13 Sep '23
LoongArch: Fix the write_fcsr() macro
LoongArch: Fix module relocation error with binutils 2.41
arch/loongarch/Makefile | 2 ++
arch/loongarch/include/asm/loongarch.h | 2 +-
2 files changed, 3 insertions(+), 1 deletion(-)
--
2.33.0
2
3
LoongArch inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I80YFC
------------------------------------------
add drm support for Inspur BMC.
Signed-off-by: Hongchen Zhang <zhanghongchen(a)loongson.cn>
---
arch/loongarch/configs/loongson3_defconfig | 4 +-
drivers/gpu/drm/Kconfig | 2 +
drivers/gpu/drm/Makefile | 1 +
drivers/gpu/drm/inspur/Kconfig | 11 +
drivers/gpu/drm/inspur/Makefile | 5 +
drivers/gpu/drm/inspur/inspur_cursor.c | 58 +++
drivers/gpu/drm/inspur/inspur_drm_de.c | 513 +++++++++++++++++++++
drivers/gpu/drm/inspur/inspur_drm_drv.c | 456 ++++++++++++++++++
drivers/gpu/drm/inspur/inspur_drm_drv.h | 116 +++++
drivers/gpu/drm/inspur/inspur_drm_regs.h | 223 +++++++++
drivers/gpu/drm/inspur/inspur_drm_vdac.c | 117 +++++
drivers/gpu/drm/inspur/inspur_ttm.c | 36 ++
12 files changed, 1539 insertions(+), 3 deletions(-)
create mode 100644 drivers/gpu/drm/inspur/Kconfig
create mode 100644 drivers/gpu/drm/inspur/Makefile
create mode 100644 drivers/gpu/drm/inspur/inspur_cursor.c
create mode 100644 drivers/gpu/drm/inspur/inspur_drm_de.c
create mode 100644 drivers/gpu/drm/inspur/inspur_drm_drv.c
create mode 100644 drivers/gpu/drm/inspur/inspur_drm_drv.h
create mode 100644 drivers/gpu/drm/inspur/inspur_drm_regs.h
create mode 100644 drivers/gpu/drm/inspur/inspur_drm_vdac.c
create mode 100644 drivers/gpu/drm/inspur/inspur_ttm.c
diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig
index 6e0adea947f5..ec53e95bf30d 100644
--- a/arch/loongarch/configs/loongson3_defconfig
+++ b/arch/loongarch/configs/loongson3_defconfig
@@ -386,7 +386,6 @@ CONFIG_IP6_NF_SECURITY=m
CONFIG_IP6_NF_NAT=m
CONFIG_IP6_NF_TARGET_MASQUERADE=m
CONFIG_IP6_NF_TARGET_NPT=m
-CONFIG_DECNET_NF_GRABULATOR=m
CONFIG_NF_TABLES_BRIDGE=m
CONFIG_NFT_BRIDGE_META=m
CONFIG_NFT_BRIDGE_REJECT=m
@@ -458,8 +457,6 @@ CONFIG_NET_DSA_TAG_SJA1105=m
CONFIG_NET_DSA_TAG_TRAILER=m
CONFIG_VLAN_8021Q_GVRP=y
CONFIG_VLAN_8021Q_MVRP=y
-CONFIG_DECNET=m
-CONFIG_DECNET_ROUTER=y
CONFIG_LLC2=m
CONFIG_ATALK=m
CONFIG_DEV_APPLETALK=m
@@ -1504,6 +1501,7 @@ CONFIG_DRM_NOUVEAU=m
CONFIG_DRM_VKMS=m
CONFIG_DRM_UDL=m
CONFIG_DRM_AST=y
+CONFIG_DRM_INSPUR=m
CONFIG_DRM_MGAG200=m
CONFIG_DRM_QXL=m
CONFIG_DRM_BOCHS=m
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index b37e6660dd4e..f6dcb60be551 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -315,6 +315,8 @@ source "drivers/gpu/drm/ast/Kconfig"
source "drivers/gpu/drm/loongson/Kconfig"
+source "drivers/gpu/drm/inspur/Kconfig"
+
source "drivers/gpu/drm/mgag200/Kconfig"
source "drivers/gpu/drm/armada/Kconfig"
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index e9dd6847c9fa..e806bda8650a 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -125,3 +125,4 @@ obj-$(CONFIG_DRM_ASPEED_GFX) += aspeed/
obj-$(CONFIG_DRM_MCDE) += mcde/
obj-$(CONFIG_DRM_TIDSS) += tidss/
obj-y += xlnx/
+obj-$(CONFIG_DRM_INSPUR) += inspur/
diff --git a/drivers/gpu/drm/inspur/Kconfig b/drivers/gpu/drm/inspur/Kconfig
new file mode 100644
index 000000000000..7c9ab5ad77ab
--- /dev/null
+++ b/drivers/gpu/drm/inspur/Kconfig
@@ -0,0 +1,11 @@
+config DRM_INSPUR
+ tristate "DRM Support for Inspur BMC"
+ depends on DRM && PCI && MMU
+ select DRM_KMS_HELPER
+ select DRM_VRAM_HELPER
+
+ help
+ Choose this option if you have a Inspur soc chipset.
+ If M is selected the module will be called inspur-drm.
+ IF you use gnome3, please set "WaylandEnable=false" in
+ "vim /etc/gdm3/custom.conf" and reboot.
diff --git a/drivers/gpu/drm/inspur/Makefile b/drivers/gpu/drm/inspur/Makefile
new file mode 100644
index 000000000000..31a5bfe79214
--- /dev/null
+++ b/drivers/gpu/drm/inspur/Makefile
@@ -0,0 +1,5 @@
+
+inspur-drm-y := inspur_drm_drv.o inspur_drm_de.o \
+ inspur_drm_vdac.o inspur_ttm.o inspur_cursor.o
+
+obj-$(CONFIG_DRM_INSPUR) += inspur-drm.o
diff --git a/drivers/gpu/drm/inspur/inspur_cursor.c b/drivers/gpu/drm/inspur/inspur_cursor.c
new file mode 100644
index 000000000000..e84136cbf4f7
--- /dev/null
+++ b/drivers/gpu/drm/inspur/inspur_cursor.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/pci.h>
+#include "inspur_drm_drv.h"
+#include "inspur_drm_regs.h"
+
+void colorcur2monocur(void *data, void *out)
+{
+ unsigned int *col = (unsigned int *)data;
+ unsigned char *mono = (unsigned char *)out;
+ unsigned char pixel = 0;
+ char bit_values;
+ int i;
+
+ for (i = 0; i < 64 * 64; i++) {
+ if (*col >> 24 < 0xe0) {
+ bit_values = 0;
+ } else {
+ int val = *col & 0xff;
+
+ if (val < 0x80)
+ bit_values = 1;
+ else
+ bit_values = 2;
+ }
+ col++;
+ /* Copy bits into cursor byte */
+ switch (i & 3) {
+ case 0:
+ pixel = bit_values;
+ break;
+
+ case 1:
+ pixel |= bit_values << 2;
+ break;
+
+ case 2:
+ pixel |= bit_values << 4;
+ break;
+
+ case 3:
+ pixel |= bit_values << 6;
+ *mono = pixel;
+ mono++;
+ pixel = 0;
+ break;
+ }
+ }
+}
+
+#define HW_FLAG_OFFSET 0x01ffff00
+#define HW_FLAG_ENABLE 0x1bd40750
+unsigned char getKVMHWCursorSetting(struct inspur_drm_private *priv)
+{
+ unsigned int value = *(unsigned int *)(priv->fb_map + HW_FLAG_OFFSET);
+
+ DRM_DEBUG_KMS("HW_FLAG = %x\n", value);
+ return 0;
+}
diff --git a/drivers/gpu/drm/inspur/inspur_drm_de.c b/drivers/gpu/drm/inspur/inspur_drm_de.c
new file mode 100644
index 000000000000..de31bb79129b
--- /dev/null
+++ b/drivers/gpu/drm/inspur/inspur_drm_de.c
@@ -0,0 +1,513 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* INSPUR SoC drm driver
+ *
+ * Based on the smi drm driver.
+ *
+ * Copyright (c) 2020 SMI Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_plane_helper.h>
+#include <drm/drm_probe_helper.h>
+#include <drm/drm_fourcc.h>
+
+#include "inspur_drm_drv.h"
+#include "inspur_drm_regs.h"
+
+struct inspur_dislay_pll_config {
+ unsigned long hdisplay;
+ unsigned long vdisplay;
+ u32 pll1_config_value;
+ u32 pll2_config_value;
+};
+
+static const struct inspur_dislay_pll_config inspur_pll_table[] = {
+ {640, 480, CRT_PLL1_NS_25MHZ, CRT_PLL2_NS_25MHZ},
+ {800, 600, CRT_PLL1_NS_40MHZ, CRT_PLL2_NS_40MHZ},
+ {1024, 768, CRT_PLL1_NS_65MHZ, CRT_PLL2_NS_65MHZ},
+ {1280, 800, CRT_PLL1_NS_83MHZ, CRT_PLL2_NS_83MHZ},
+ {1280, 1024, CRT_PLL1_NS_108MHZ, CRT_PLL2_NS_108MHZ},
+ {1440, 900, CRT_PLL1_NS_106MHZ, CRT_PLL2_NS_106MHZ},
+ {1680, 1050, CRT_PLL1_NS_146MHZ, CRT_PLL2_NS_146MHZ},
+ {1920, 1080, CRT_PLL1_NS_148MHZ, CRT_PLL2_NS_148MHZ},
+ {1920, 1200, CRT_PLL1_NS_193MHZ, CRT_PLL2_NS_193MHZ},
+};
+
+#define PADDING(align, data) (((data) + (align) - 1) & (~((align) - 1)))
+
+static int inspur_plane_atomic_check(struct drm_plane *plane,
+ struct drm_plane_state *state)
+{
+ struct drm_framebuffer *fb = state->fb;
+ struct drm_crtc *crtc = state->crtc;
+ struct drm_crtc_state *crtc_state;
+ u32 src_w = state->src_w >> 16;
+ u32 src_h = state->src_h >> 16;
+
+ if (!crtc || !fb)
+ return 0;
+
+ crtc_state = drm_atomic_get_crtc_state(state->state, crtc);
+ if (IS_ERR(crtc_state))
+ return PTR_ERR(crtc_state);
+
+ if (src_w != state->crtc_w || src_h != state->crtc_h) {
+ DRM_DEBUG_ATOMIC("scale not support\n");
+ return -EINVAL;
+ }
+
+ if (state->crtc_x < 0 || state->crtc_y < 0) {
+ DRM_DEBUG_ATOMIC("crtc_x/y of drm_plane state is invalid\n");
+ return -EINVAL;
+ }
+
+ if (!crtc_state->enable)
+ return 0;
+
+ if (state->crtc_x + state->crtc_w >
+ crtc_state->adjusted_mode.hdisplay ||
+ state->crtc_y + state->crtc_h >
+ crtc_state->adjusted_mode.vdisplay) {
+ DRM_DEBUG_ATOMIC("visible portion of plane is invalid\n");
+ return -EINVAL;
+ }
+
+ if (state->fb->pitches[0] % 128 != 0) {
+ DRM_DEBUG_ATOMIC("wrong stride with 128-byte aligned\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void inspur_plane_atomic_update(struct drm_plane *plane,
+ struct drm_plane_state *old_state)
+{
+ struct drm_plane_state *state = plane->state;
+ u32 reg;
+ int ret;
+ s64 gpu_addr = 0;
+ unsigned int line_l;
+ struct inspur_drm_private *priv = plane->dev->dev_private;
+ struct drm_gem_vram_object *gbo;
+
+ if (!state->fb)
+ return;
+
+ gbo = drm_gem_vram_of_gem(state->fb->obj[0]);
+
+ ret = drm_gem_vram_pin(gbo, DRM_GEM_VRAM_PL_FLAG_VRAM);
+ if (ret) {
+ DRM_ERROR("failed to pin bo: %d", ret);
+ return;
+ }
+ gpu_addr = drm_gem_vram_offset(gbo);
+ if (gpu_addr < 0) {
+ drm_gem_vram_unpin(gbo);
+ return;
+ }
+
+ writel(gpu_addr, priv->mmio + INSPUR_CRT_FB_ADDRESS);
+
+ reg = state->fb->width * (state->fb->format->cpp[0]);
+
+ line_l = state->fb->pitches[0];
+ writel(INSPUR_FIELD(INSPUR_CRT_FB_WIDTH_WIDTH, reg) |
+ INSPUR_FIELD(INSPUR_CRT_FB_WIDTH_OFFS, line_l),
+ priv->mmio + INSPUR_CRT_FB_WIDTH);
+
+ /* SET PIXEL FORMAT */
+ reg = readl(priv->mmio + INSPUR_CRT_DISP_CTL);
+ reg &= ~INSPUR_CRT_DISP_CTL_FORMAT_MASK;
+ reg |= INSPUR_FIELD(INSPUR_CRT_DISP_CTL_FORMAT,
+ state->fb->format->cpp[0] * 8 / 16);
+ writel(reg, priv->mmio + INSPUR_CRT_DISP_CTL);
+}
+
+static const u32 channel_formats1[] = {
+ DRM_FORMAT_RGB565, DRM_FORMAT_BGR565, DRM_FORMAT_RGB888,
+ DRM_FORMAT_BGR888, DRM_FORMAT_XRGB8888, DRM_FORMAT_XBGR8888,
+ DRM_FORMAT_RGBA8888, DRM_FORMAT_BGRA8888, DRM_FORMAT_ARGB8888,
+ DRM_FORMAT_ABGR8888
+};
+
+static struct drm_plane_funcs inspur_plane_funcs = {
+ .update_plane = drm_atomic_helper_update_plane,
+ .disable_plane = drm_atomic_helper_disable_plane,
+ .destroy = drm_plane_cleanup,
+ .reset = drm_atomic_helper_plane_reset,
+ .atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state,
+ .atomic_destroy_state = drm_atomic_helper_plane_destroy_state,
+};
+
+static const struct drm_plane_helper_funcs inspur_plane_helper_funcs = {
+ .atomic_check = inspur_plane_atomic_check,
+ .atomic_update = inspur_plane_atomic_update,
+};
+
+static struct drm_plane *inspur_plane_init(struct inspur_drm_private *priv)
+{
+ struct drm_device *dev = priv->dev;
+ struct drm_plane *plane;
+ int ret = 0;
+
+ plane = devm_kzalloc(dev->dev, sizeof(*plane), GFP_KERNEL);
+ if (!plane) {
+ DRM_ERROR("failed to alloc memory when init plane\n");
+ return ERR_PTR(-ENOMEM);
+ }
+ ret = drm_universal_plane_init(dev, plane, 1, &inspur_plane_funcs,
+ channel_formats1,
+ ARRAY_SIZE(channel_formats1),
+ NULL,
+ DRM_PLANE_TYPE_PRIMARY,
+ NULL);
+ if (ret) {
+ DRM_ERROR("failed to init plane: %d\n", ret);
+ return ERR_PTR(ret);
+ }
+
+ drm_plane_helper_add(plane, &inspur_plane_helper_funcs);
+ return plane;
+}
+
+static void inspur_crtc_dpms(struct drm_crtc *crtc, int dpms)
+{
+ struct inspur_drm_private *priv = crtc->dev->dev_private;
+ unsigned int reg;
+
+ reg = readl(priv->mmio + INSPUR_CRT_DISP_CTL);
+ reg &= ~INSPUR_CRT_DISP_CTL_DPMS_MASK;
+ reg |= INSPUR_FIELD(INSPUR_CRT_DISP_CTL_DPMS, dpms);
+ reg &= ~INSPUR_CRT_DISP_CTL_TIMING_MASK;
+ if (dpms == INSPUR_CRT_DPMS_ON)
+ reg |= INSPUR_CRT_DISP_CTL_TIMING(1);
+ writel(reg, priv->mmio + INSPUR_CRT_DISP_CTL);
+}
+
+
+static void inspur_crtc_atomic_enable(struct drm_crtc *crtc,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 11, 0)
+ struct drm_atomic_state *state)
+#else
+ struct drm_crtc_state *old_state)
+#endif
+{
+ unsigned int reg;
+ struct inspur_drm_private *priv = crtc->dev->dev_private;
+
+ inspur_set_power_mode(priv, INSPUR_PW_MODE_CTL_MODE_MODE0);
+
+ /* Enable display power gate & LOCALMEM power gate*/
+ reg = readl(priv->mmio + INSPUR_CURRENT_GATE);
+ reg &= ~INSPUR_CURR_GATE_LOCALMEM_MASK;
+ reg &= ~INSPUR_CURR_GATE_DISPLAY_MASK;
+ reg |= INSPUR_CURR_GATE_LOCALMEM(1);
+ reg |= INSPUR_CURR_GATE_DISPLAY(1);
+ inspur_set_current_gate(priv, reg);
+ inspur_crtc_dpms(crtc, INSPUR_CRT_DPMS_ON);
+}
+
+static void inspur_crtc_atomic_disable(struct drm_crtc *crtc,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 11, 0)
+ struct drm_atomic_state *state)
+#else
+ struct drm_crtc_state *old_state)
+#endif
+{
+ unsigned int reg;
+ struct inspur_drm_private *priv = crtc->dev->dev_private;
+
+ inspur_crtc_dpms(crtc, INSPUR_CRT_DPMS_OFF);
+
+ inspur_set_power_mode(priv, INSPUR_PW_MODE_CTL_MODE_SLEEP);
+
+ /* Enable display power gate & LOCALMEM power gate*/
+ reg = readl(priv->mmio + INSPUR_CURRENT_GATE);
+ reg &= ~INSPUR_CURR_GATE_LOCALMEM_MASK;
+ reg &= ~INSPUR_CURR_GATE_DISPLAY_MASK;
+ reg |= INSPUR_CURR_GATE_LOCALMEM(0);
+ reg |= INSPUR_CURR_GATE_DISPLAY(0);
+ inspur_set_current_gate(priv, reg);
+}
+
+static enum drm_mode_status
+inspur_crtc_mode_valid(struct drm_crtc *crtc,
+ const struct drm_display_mode *mode)
+{
+ int i = 0;
+ int vrefresh = drm_mode_vrefresh(mode);
+
+ if (vrefresh < 59 || vrefresh > 61)
+ return MODE_NOCLOCK;
+
+ for (i = 0; i < ARRAY_SIZE(inspur_pll_table); i++) {
+ if (inspur_pll_table[i].hdisplay == mode->hdisplay &&
+ inspur_pll_table[i].vdisplay == mode->vdisplay)
+ return MODE_OK;
+ }
+
+ return MODE_BAD;
+}
+
+static void set_vclock_inspur(struct drm_device *dev, unsigned long pll)
+{
+ u32 val;
+ struct inspur_drm_private *priv = dev->dev_private;
+
+ val = readl(priv->mmio + CRT_PLL1_NS);
+ val &= ~(CRT_PLL1_NS_OUTER_BYPASS(1));
+ writel(val, priv->mmio + CRT_PLL1_NS);
+
+ val = CRT_PLL1_NS_INTER_BYPASS(1) | CRT_PLL1_NS_POWERON(1);
+ writel(val, priv->mmio + CRT_PLL1_NS);
+
+ writel(pll, priv->mmio + CRT_PLL1_NS);
+
+ usleep_range(1000, 2000);
+
+ val = pll & ~(CRT_PLL1_NS_POWERON(1));
+ writel(val, priv->mmio + CRT_PLL1_NS);
+
+ usleep_range(1000, 2000);
+
+ val &= ~(CRT_PLL1_NS_INTER_BYPASS(1));
+ writel(val, priv->mmio + CRT_PLL1_NS);
+
+ usleep_range(1000, 2000);
+
+ val |= CRT_PLL1_NS_OUTER_BYPASS(1);
+ writel(val, priv->mmio + CRT_PLL1_NS);
+}
+
+static void get_pll_config(unsigned long x, unsigned long y,
+ u32 *pll1, u32 *pll2)
+{
+ int i;
+ int count = ARRAY_SIZE(inspur_pll_table);
+
+ for (i = 0; i < count; i++) {
+ if (inspur_pll_table[i].hdisplay == x &&
+ inspur_pll_table[i].vdisplay == y) {
+ *pll1 = inspur_pll_table[i].pll1_config_value;
+ *pll2 = inspur_pll_table[i].pll2_config_value;
+ return;
+ }
+ }
+
+ /* if found none, we use default value */
+ *pll1 = CRT_PLL1_NS_25MHZ;
+ *pll2 = CRT_PLL2_NS_25MHZ;
+}
+
+/*
+ * This function takes care the extra registers and bit fields required to
+ * setup a mode in board.
+ * Explanation about Display Control register:
+ * FPGA only supports 7 predefined pixel clocks, and clock select is
+ * in bit 4:0 of new register 0x802a8.
+ */
+static unsigned int display_ctrl_adjust(struct drm_device *dev,
+ struct drm_display_mode *mode,
+ unsigned int ctrl)
+{
+ unsigned long x, y;
+ u32 pll1; /* bit[31:0] of PLL */
+ u32 pll2; /* bit[63:32] of PLL */
+ struct inspur_drm_private *priv = dev->dev_private;
+
+ x = mode->hdisplay;
+ y = mode->vdisplay;
+
+ get_pll_config(x, y, &pll1, &pll2);
+ writel(pll2, priv->mmio + CRT_PLL2_NS);
+ set_vclock_inspur(dev, pll1);
+
+ /*
+ * inspur has to set up the top-left and bottom-right
+ * registers as well.
+ * Note that normal chip only use those two register for
+ * auto-centering mode.
+ */
+ writel(INSPUR_FIELD(INSPUR_CRT_AUTO_CENTERING_TL_TOP, 0) |
+ INSPUR_FIELD(INSPUR_CRT_AUTO_CENTERING_TL_LEFT, 0),
+ priv->mmio + INSPUR_CRT_AUTO_CENTERING_TL);
+
+ writel(INSPUR_FIELD(INSPUR_CRT_AUTO_CENTERING_BR_BOTTOM, y - 1) |
+ INSPUR_FIELD(INSPUR_CRT_AUTO_CENTERING_BR_RIGHT, x - 1),
+ priv->mmio + INSPUR_CRT_AUTO_CENTERING_BR);
+
+ /*
+ * Assume common fields in ctrl have been properly set before
+ * calling this function.
+ * This function only sets the extra fields in ctrl.
+ */
+
+ /* Set bit 25 of display controller: Select CRT or VGA clock */
+ ctrl &= ~INSPUR_CRT_DISP_CTL_CRTSELECT_MASK;
+ ctrl &= ~INSPUR_CRT_DISP_CTL_CLOCK_PHASE_MASK;
+
+ ctrl |= INSPUR_CRT_DISP_CTL_CRTSELECT(INSPUR_CRTSELECT_CRT);
+
+ /* clock_phase_polarity is 0 */
+ ctrl |= INSPUR_CRT_DISP_CTL_CLOCK_PHASE(0);
+
+ writel(ctrl, priv->mmio + INSPUR_CRT_DISP_CTL);
+
+ return ctrl;
+}
+
+static void inspur_crtc_mode_set_nofb(struct drm_crtc *crtc)
+{
+ unsigned int val;
+ struct drm_display_mode *mode = &crtc->state->mode;
+ struct drm_device *dev = crtc->dev;
+ struct inspur_drm_private *priv = dev->dev_private;
+ int width = mode->hsync_end - mode->hsync_start;
+ int height = mode->vsync_end - mode->vsync_start;
+
+ //writel(format_pll_reg(), priv->mmio + INSPUR_CRT_PLL_CTRL);
+ writel(INSPUR_FIELD(INSPUR_CRT_HORZ_TOTAL_TOTAL, mode->htotal - 1) |
+ INSPUR_FIELD(INSPUR_CRT_HORZ_TOTAL_DISP_END, mode->hdisplay - 1),
+ priv->mmio + INSPUR_CRT_HORZ_TOTAL);
+
+ writel(INSPUR_FIELD(INSPUR_CRT_HORZ_SYNC_WIDTH, width) |
+ INSPUR_FIELD(INSPUR_CRT_HORZ_SYNC_START, mode->hsync_start - 1),
+ priv->mmio + INSPUR_CRT_HORZ_SYNC);
+
+ writel(INSPUR_FIELD(INSPUR_CRT_VERT_TOTAL_TOTAL, mode->vtotal - 1) |
+ INSPUR_FIELD(INSPUR_CRT_VERT_TOTAL_DISP_END, mode->vdisplay - 1),
+ priv->mmio + INSPUR_CRT_VERT_TOTAL);
+
+ writel(INSPUR_FIELD(INSPUR_CRT_VERT_SYNC_HEIGHT, height) |
+ INSPUR_FIELD(INSPUR_CRT_VERT_SYNC_START, mode->vsync_start - 1),
+ priv->mmio + INSPUR_CRT_VERT_SYNC);
+
+ val = INSPUR_FIELD(INSPUR_CRT_DISP_CTL_VSYNC_PHASE, 0);
+ val |= INSPUR_FIELD(INSPUR_CRT_DISP_CTL_HSYNC_PHASE, 0);
+ val |= INSPUR_CRT_DISP_CTL_TIMING(1);
+ val |= INSPUR_CRT_DISP_CTL_PLANE(1);
+
+ display_ctrl_adjust(dev, mode, val);
+}
+
+static void inspur_crtc_atomic_begin(struct drm_crtc *crtc,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 11, 0)
+ struct drm_atomic_state *state)
+#else
+ struct drm_crtc_state *old_state)
+#endif
+{
+ unsigned int reg;
+ struct drm_device *dev = crtc->dev;
+ struct inspur_drm_private *priv = dev->dev_private;
+
+ inspur_set_power_mode(priv, INSPUR_PW_MODE_CTL_MODE_MODE0);
+
+ /* Enable display power gate & LOCALMEM power gate*/
+ reg = readl(priv->mmio + INSPUR_CURRENT_GATE);
+ reg &= ~INSPUR_CURR_GATE_DISPLAY_MASK;
+ reg &= ~INSPUR_CURR_GATE_LOCALMEM_MASK;
+ reg |= INSPUR_CURR_GATE_DISPLAY(1);
+ reg |= INSPUR_CURR_GATE_LOCALMEM(1);
+ inspur_set_current_gate(priv, reg);
+
+ /* We can add more initialization as needed. */
+}
+
+static void inspur_crtc_atomic_flush(struct drm_crtc *crtc,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 11, 0)
+ struct drm_atomic_state *state)
+#else
+ struct drm_crtc_state *old_state)
+#endif
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&crtc->dev->event_lock, flags);
+ if (crtc->state->event)
+ drm_crtc_send_vblank_event(crtc, crtc->state->event);
+ crtc->state->event = NULL;
+ spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+}
+
+static int inspur_crtc_enable_vblank(struct drm_crtc *crtc)
+{
+ struct inspur_drm_private *priv = crtc->dev->dev_private;
+
+ writel(INSPUR_RAW_INTERRUPT_EN_VBLANK(1),
+ priv->mmio + INSPUR_RAW_INTERRUPT_EN);
+
+ return 0;
+}
+
+static void inspur_crtc_disable_vblank(struct drm_crtc *crtc)
+{
+ struct inspur_drm_private *priv = crtc->dev->dev_private;
+
+ writel(INSPUR_RAW_INTERRUPT_EN_VBLANK(0),
+ priv->mmio + INSPUR_RAW_INTERRUPT_EN);
+}
+
+static const struct drm_crtc_funcs inspur_crtc_funcs = {
+ .page_flip = drm_atomic_helper_page_flip,
+ .set_config = drm_atomic_helper_set_config,
+ .destroy = drm_crtc_cleanup,
+ .reset = drm_atomic_helper_crtc_reset,
+ .atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state,
+ .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state,
+ .enable_vblank = inspur_crtc_enable_vblank,
+ .disable_vblank = inspur_crtc_disable_vblank,
+
+};
+
+static const struct drm_crtc_helper_funcs inspur_crtc_helper_funcs = {
+ .mode_set_nofb = inspur_crtc_mode_set_nofb,
+ .atomic_begin = inspur_crtc_atomic_begin,
+ .atomic_flush = inspur_crtc_atomic_flush,
+ .atomic_enable = inspur_crtc_atomic_enable,
+ .atomic_disable = inspur_crtc_atomic_disable,
+ .mode_valid = inspur_crtc_mode_valid,
+};
+
+int inspur_de_init(struct inspur_drm_private *priv)
+{
+ struct drm_device *dev = priv->dev;
+ struct drm_crtc *crtc;
+ struct drm_plane *plane;
+ int ret;
+
+ plane = inspur_plane_init(priv);
+ if (IS_ERR(plane)) {
+ DRM_ERROR("failed to create plane: %ld\n", PTR_ERR(plane));
+ return PTR_ERR(plane);
+ }
+
+ crtc = devm_kzalloc(dev->dev, sizeof(*crtc), GFP_KERNEL);
+ if (!crtc) {
+ DRM_ERROR("failed to alloc memory when init crtc\n");
+ return -ENOMEM;
+ }
+
+ ret = drm_crtc_init_with_planes(dev, crtc, plane,
+ NULL, &inspur_crtc_funcs, NULL);
+ if (ret) {
+ DRM_ERROR("failed to init crtc: %d\n", ret);
+ return ret;
+ }
+
+ ret = drm_mode_crtc_set_gamma_size(crtc, 256);
+ if (ret) {
+ DRM_ERROR("failed to set gamma size: %d\n", ret);
+ return ret;
+ }
+ drm_crtc_helper_add(crtc, &inspur_crtc_helper_funcs);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/inspur/inspur_drm_drv.c b/drivers/gpu/drm/inspur/inspur_drm_drv.c
new file mode 100644
index 000000000000..d7026e1df167
--- /dev/null
+++ b/drivers/gpu/drm/inspur/inspur_drm_drv.c
@@ -0,0 +1,456 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* INSPUR SoC drm driver
+ *
+ * Based on the smi drm driver.
+ *
+ * Copyright (c) 2020 SMI Limited.
+ *
+ * Author:
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <linux/console.h>
+#include <linux/module.h>
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_probe_helper.h>
+
+#include "inspur_drm_drv.h"
+#include "inspur_drm_regs.h"
+
+#define MEM_SIZE_RESERVE4KVM 0x200000
+
+
+DEFINE_DRM_GEM_FOPS(inspur_fops);
+irqreturn_t inspur_drm_interrupt(int irq, void *arg)
+{
+ struct drm_device *dev = (struct drm_device *)arg;
+ struct inspur_drm_private *priv =
+ (struct inspur_drm_private *)dev->dev_private;
+ u32 status;
+
+ status = readl(priv->mmio + INSPUR_RAW_INTERRUPT);
+
+ if (status & INSPUR_RAW_INTERRUPT_VBLANK(1)) {
+ writel(INSPUR_RAW_INTERRUPT_VBLANK(1),
+ priv->mmio + INSPUR_RAW_INTERRUPT);
+ drm_handle_vblank(dev, 0);
+ }
+
+ return IRQ_HANDLED;
+}
+
+
+
+static struct drm_driver inspur_driver = {
+ .driver_features = DRIVER_GEM | DRIVER_MODESET |
+ DRIVER_ATOMIC | DRIVER_HAVE_IRQ,
+
+ .fops = &inspur_fops,
+ .name = "inspur",
+ .date = "20230425",
+ .desc = "inspur drm driver",
+ .major = 2,
+ .minor = 2,
+ //.gem_free_object_unlocked = inspur_gem_free_object,
+ .dumb_create = inspur_dumb_create,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 14, 0)
+ .dumb_map_offset = drm_gem_ttm_dumb_map_offset,
+#else
+ .dumb_map_offset = drm_gem_vram_driver_dumb_mmap_offset,
+#endif
+};
+
+static void inspur_remove_framebuffers(struct pci_dev *pdev)
+{
+ struct apertures_struct *ap;
+
+ ap = alloc_apertures(1);
+ if (!ap)
+ return;
+
+ ap->ranges[0].base = pci_resource_start(pdev, 0);
+ ap->ranges[0].size = pci_resource_len(pdev, 0);
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 15, 0)
+ drm_aperture_remove_conflicting_pci_framebuffers(pdev, &inspur_driver);
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(5, 14, 0)
+ drm_aperture_remove_conflicting_pci_framebuffers(pdev, "inspurdrmfb");
+#else
+ drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, "inspurdrmfb");
+#endif
+
+ kfree(ap);
+}
+
+static int __maybe_unused inspur_pm_suspend(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct drm_device *drm_dev = pci_get_drvdata(pdev);
+ struct inspur_drm_private *priv = drm_dev->dev_private;
+
+ drm_kms_helper_poll_disable(drm_dev);
+ priv->suspend_state = drm_atomic_helper_suspend(drm_dev);
+ if (IS_ERR(priv->suspend_state)) {
+ DRM_ERROR("drm_atomic_helper_suspend failed: %ld\n",
+ PTR_ERR(priv->suspend_state));
+ drm_kms_helper_poll_enable(drm_dev);
+ return PTR_ERR(priv->suspend_state);
+ }
+
+ return 0;
+}
+
+static int __maybe_unused inspur_pm_resume(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct drm_device *drm_dev = pci_get_drvdata(pdev);
+ struct inspur_drm_private *priv = drm_dev->dev_private;
+
+ drm_atomic_helper_resume(drm_dev, priv->suspend_state);
+ drm_kms_helper_poll_enable(drm_dev);
+
+ return 0;
+}
+
+static const struct dev_pm_ops inspur_pm_ops = {
+ SET_SYSTEM_SLEEP_PM_OPS(inspur_pm_suspend,
+ inspur_pm_resume)
+};
+
+static int inspur_kms_init(struct inspur_drm_private *priv)
+{
+ int ret;
+
+ drm_mode_config_init(priv->dev);
+ priv->mode_config_initialized = true;
+
+ priv->dev->mode_config.min_width = 0;
+ priv->dev->mode_config.min_height = 0;
+ priv->dev->mode_config.max_width = 1920;
+ priv->dev->mode_config.max_height = 1200;
+
+ priv->dev->mode_config.fb_base = priv->fb_base;
+ priv->dev->mode_config.preferred_depth = 32;
+ priv->dev->mode_config.prefer_shadow = 1;
+
+ if (getKVMHWCursorSetting(priv)) {
+ priv->dev->mode_config.cursor_width = 64;
+ priv->dev->mode_config.cursor_height = 64;
+ }
+
+ priv->dev->mode_config.funcs = (void *)&inspur_mode_funcs;
+
+ ret = inspur_de_init(priv);
+ if (ret) {
+ DRM_ERROR("failed to init de: %d\n", ret);
+ return ret;
+ }
+
+ ret = inspur_vdac_init(priv);
+ if (ret) {
+ DRM_ERROR("failed to init vdac: %d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void inspur_kms_fini(struct inspur_drm_private *priv)
+{
+ if (priv->mode_config_initialized) {
+ drm_mode_config_cleanup(priv->dev);
+ priv->mode_config_initialized = false;
+ }
+}
+
+/*
+ * It can operate in one of three modes: 0, 1 or Sleep.
+ */
+void inspur_set_power_mode(struct inspur_drm_private *priv,
+ unsigned int power_mode)
+{
+ unsigned int control_value = 0;
+ void __iomem *mmio = priv->mmio;
+ unsigned int input = 1;
+
+ if (power_mode > INSPUR_PW_MODE_CTL_MODE_SLEEP)
+ return;
+
+ if (power_mode == INSPUR_PW_MODE_CTL_MODE_SLEEP)
+ input = 0;
+
+ control_value = readl(mmio + INSPUR_POWER_MODE_CTRL);
+ control_value &= ~(INSPUR_PW_MODE_CTL_MODE_MASK |
+ INSPUR_PW_MODE_CTL_OSC_INPUT_MASK);
+ control_value |= INSPUR_FIELD(INSPUR_PW_MODE_CTL_MODE, power_mode);
+ control_value |= INSPUR_FIELD(INSPUR_PW_MODE_CTL_OSC_INPUT, input);
+ writel(control_value, mmio + INSPUR_POWER_MODE_CTRL);
+}
+
+void inspur_set_current_gate(struct inspur_drm_private *priv, unsigned int gate)
+{
+ unsigned int gate_reg;
+ unsigned int mode;
+ void __iomem *mmio = priv->mmio;
+
+ /* Get current power mode. */
+ mode = (readl(mmio + INSPUR_POWER_MODE_CTRL) &
+ INSPUR_PW_MODE_CTL_MODE_MASK) >> INSPUR_PW_MODE_CTL_MODE_SHIFT;
+
+ switch (mode) {
+ case INSPUR_PW_MODE_CTL_MODE_MODE0:
+ gate_reg = INSPUR_MODE0_GATE;
+ break;
+
+ case INSPUR_PW_MODE_CTL_MODE_MODE1:
+ gate_reg = INSPUR_MODE1_GATE;
+ break;
+
+ default:
+ gate_reg = INSPUR_MODE0_GATE;
+ break;
+ }
+ writel(gate, mmio + gate_reg);
+}
+
+static void inspur_hw_config(struct inspur_drm_private *priv)
+{
+ unsigned int reg;
+
+ /* On hardware reset, power mode 0 is default. */
+ inspur_set_power_mode(priv, INSPUR_PW_MODE_CTL_MODE_MODE0);
+
+ /* Enable display power gate & LOCALMEM power gate*/
+ reg = readl(priv->mmio + INSPUR_CURRENT_GATE);
+ reg &= ~INSPUR_CURR_GATE_DISPLAY_MASK;
+ reg &= ~INSPUR_CURR_GATE_LOCALMEM_MASK;
+ reg |= INSPUR_CURR_GATE_DISPLAY(1);
+ reg |= INSPUR_CURR_GATE_LOCALMEM(1);
+
+ inspur_set_current_gate(priv, reg);
+
+ /*
+ * Reset the memory controller. If the memory controller
+ * is not reset in chip,the system might hang when sw accesses
+ * the memory.The memory should be resetted after
+ * changing the MXCLK.
+ */
+ reg = readl(priv->mmio + INSPUR_MISC_CTRL);
+ reg &= ~INSPUR_MSCCTL_LOCALMEM_RESET_MASK;
+ reg |= INSPUR_MSCCTL_LOCALMEM_RESET(0);
+ writel(reg, priv->mmio + INSPUR_MISC_CTRL);
+
+ reg &= ~INSPUR_MSCCTL_LOCALMEM_RESET_MASK;
+ reg |= INSPUR_MSCCTL_LOCALMEM_RESET(1);
+
+ writel(reg, priv->mmio + INSPUR_MISC_CTRL);
+}
+
+static int inspur_hw_map(struct inspur_drm_private *priv)
+{
+ struct drm_device *dev = priv->dev;
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
+ resource_size_t addr, size, ioaddr, iosize;
+
+ ioaddr = pci_resource_start(pdev, 1);
+ iosize = pci_resource_len(pdev, 1);
+ priv->mmio = devm_ioremap(dev->dev, ioaddr, iosize);
+ if (!priv->mmio) {
+ DRM_ERROR("Cannot map mmio region\n");
+ return -ENOMEM;
+ }
+
+ addr = pci_resource_start(pdev, 0);
+ size = pci_resource_len(pdev, 0);
+ priv->fb_map = devm_ioremap(dev->dev, addr, size);
+ if (!priv->fb_map) {
+ DRM_ERROR("Cannot map framebuffer\n");
+ return -ENOMEM;
+ }
+ priv->fb_base = addr;
+ priv->fb_size = size - MEM_SIZE_RESERVE4KVM;
+
+ return 0;
+}
+
+static void inspur_hw_unmap(struct inspur_drm_private *priv)
+{
+ struct drm_device *dev = priv->dev;
+
+ if (priv->mmio) {
+ devm_iounmap(dev->dev, priv->mmio);
+ priv->mmio = NULL;
+ }
+ if (priv->fb_map) {
+ devm_iounmap(dev->dev, priv->fb_map);
+ priv->fb_map = NULL;
+ }
+}
+
+static int inspur_hw_init(struct inspur_drm_private *priv)
+{
+ int ret;
+
+ ret = inspur_hw_map(priv);
+ if (ret)
+ return ret;
+
+ inspur_hw_config(priv);
+
+ return 0;
+}
+
+void inspur_unload(struct drm_device *dev)
+{
+ struct inspur_drm_private *priv = dev->dev_private;
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
+
+ drm_atomic_helper_shutdown(dev);
+
+ free_irq(pdev->irq, dev);
+
+ inspur_kms_fini(priv);
+ inspur_hw_unmap(priv);
+ pci_disable_msi(to_pci_dev(dev->dev));
+ dev->dev_private = NULL;
+}
+
+int inspur_load(struct drm_device *dev, unsigned long flags)
+{
+ struct inspur_drm_private *priv;
+ struct pci_dev *pdev = to_pci_dev(dev->dev);
+ int ret;
+
+ priv = devm_kzalloc(dev->dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv) {
+ DRM_ERROR("no memory to allocate for inspur_drm_private\n");
+ return -ENOMEM;
+ }
+ dev->dev_private = priv;
+ priv->dev = dev;
+
+ ret = inspur_hw_init(priv);
+ if (ret)
+ goto err;
+
+ ret = drmm_vram_helper_init(dev, pci_resource_start(pdev, 0), priv->fb_size);
+ if (ret) {
+ drm_err(dev, "Error initializing VRAM MM; %d\n", ret);
+ goto err;
+ }
+ ret = inspur_kms_init(priv);
+ if (ret)
+ goto err;
+
+
+ /* reset all the states of crtc/plane/encoder/connector */
+ drm_mode_config_reset(dev);
+
+ if (getKVMHWCursorSetting(priv)) {
+#if 0
+ inspur_bo_create(dev, PAGE_ALIGN(1024), 0, 0, &priv->cursor.cursor_1);
+ inspur_bo_create(dev, PAGE_ALIGN(1024), 0, 0, &priv->cursor.cursor_2);
+ if (!priv->cursor.cursor_1 || !priv->cursor.cursor_2) {
+ priv->cursor.cursor_1 = NULL;
+ priv->cursor.cursor_2 = NULL;
+ DRM_ERROR("Could not allocate space for cursors. Not doing hardware cursors.\n");
+ }
+#endif
+ }
+
+ return 0;
+
+err:
+ inspur_unload(dev);
+ DRM_ERROR("failed to initialize drm driver: %d\n", ret);
+ return ret;
+}
+
+static int inspur_pci_probe(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+{
+ int ret = 0;
+ struct inspur_drm_private *priv;
+ struct drm_device *dev;
+
+ inspur_remove_framebuffers(pdev);
+
+ dev = drm_dev_alloc(&inspur_driver, &pdev->dev);
+ if (IS_ERR(dev)) {
+ DRM_ERROR("failed to allocate drm_device\n");
+ return PTR_ERR(dev);
+ }
+
+ pci_set_drvdata(pdev, dev);
+ ret = pci_enable_device(pdev);
+ if (ret) {
+ drm_err(dev, "failed to enable pci device: %d\n", ret);
+ return ret;
+ }
+ ret = inspur_load(dev, ent->driver_data);
+ if (ret)
+ goto err_return;
+
+ ret = drm_dev_register(dev, ent->driver_data);
+ if (ret)
+ goto err_inspur_driver_unload;
+
+ drm_fbdev_generic_setup(dev, dev->mode_config.preferred_depth);
+
+ return 0;
+err_inspur_driver_unload:
+ inspur_unload(dev);
+err_return:
+ return ret;
+}
+
+static void inspur_pci_remove(struct pci_dev *pdev)
+{
+ struct drm_device *dev = pci_get_drvdata(pdev);
+
+ drm_put_dev(dev);
+ pci_disable_device(pdev);
+}
+
+static void inspur_pci_shutdown(struct pci_dev *pdev)
+{
+ inspur_pci_remove(pdev);
+}
+
+static struct pci_device_id inspur_pci_table[] = {
+ {0x1bd4, 0x0750, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+ {0,}
+};
+
+static struct pci_driver inspur_pci_driver = {
+ .name = "inspur-drm",
+ .id_table = inspur_pci_table,
+ .probe = inspur_pci_probe,
+ .remove = inspur_pci_remove,
+ .shutdown = inspur_pci_shutdown,
+ .driver.pm = &inspur_pm_ops,
+};
+
+static int __init inspur_init(void)
+{
+ return pci_register_driver(&inspur_pci_driver);
+}
+
+static void __exit inspur_exit(void)
+{
+ return pci_unregister_driver(&inspur_pci_driver);
+}
+
+module_init(inspur_init);
+module_exit(inspur_exit);
+
+MODULE_DEVICE_TABLE(pci, inspur_pci_table);
+MODULE_AUTHOR("");
+MODULE_DESCRIPTION("DRM Driver for INSPUR");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/inspur/inspur_drm_drv.h b/drivers/gpu/drm/inspur/inspur_drm_drv.h
new file mode 100644
index 000000000000..b1a20f1b7df2
--- /dev/null
+++ b/drivers/gpu/drm/inspur/inspur_drm_drv.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* INSPUR SoC drm driver
+ *
+ * Based on the smi drm driver.
+ *
+ * Copyright (c) 2020 SMI Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#ifndef INSPUR_DRM_DRV_H
+#define INSPUR_DRM_DRV_H
+
+#include <linux/version.h>
+#include <drm/drm_atomic.h>
+#include <drm/drm_fb_helper.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_vram_helper.h>
+#include <linux/pci.h>
+#include <drm/drm_vblank.h>
+#include <drm/drm_drv.h>
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 14, 0)
+#include <drm/drm_aperture.h>
+#endif
+
+#include <linux/delay.h>
+#include <drm/drm_gem_framebuffer_helper.h>
+struct drm_device;
+struct drm_gem_object;
+
+#define inspur_framebuffer drm_framebuffer
+#define BPP16_RED 0x0000f800
+#define BPP16_GREEN 0x000007e0
+#define BPP16_BLUE 0x0000001f
+#define BPP16_WHITE 0x0000ffff
+#define BPP16_GRAY 0x00008410
+#define BPP16_YELLOW 0x0000ffe0
+#define BPP16_CYAN 0x000007ff
+#define BPP16_PINK 0x0000f81f
+#define BPP16_BLACK 0x00000000
+struct inspur_fbdev {
+ struct drm_fb_helper helper;
+ struct inspur_framebuffer *fb;
+ int size;
+};
+
+struct inspur_cursor {
+ struct drm_gem_vram_object *gbo[2];
+ unsigned int next_index;
+};
+
+struct inspur_drm_private {
+ /* hw */
+ void __iomem *mmio;
+ void __iomem *fb_map;
+ unsigned long fb_base;
+ unsigned long fb_size;
+
+ /* drm */
+ struct drm_device *dev;
+ bool mode_config_initialized;
+ struct drm_atomic_state *suspend_state;
+
+ /* fbdev */
+ struct inspur_fbdev *fbdev;
+
+ /* hw cursor */
+ struct inspur_cursor cursor;
+};
+
+#define to_inspur_framebuffer(x) container_of(x, struct inspur_framebuffer, fb)
+
+
+void inspur_set_power_mode(struct inspur_drm_private *priv,
+ unsigned int power_mode);
+void inspur_set_current_gate(struct inspur_drm_private *priv,
+ unsigned int gate);
+int inspur_load(struct drm_device *dev, unsigned long flags);
+void inspur_unload(struct drm_device *dev);
+
+int inspur_de_init(struct inspur_drm_private *priv);
+int inspur_vdac_init(struct inspur_drm_private *priv);
+int inspur_fbdev_init(struct inspur_drm_private *priv);
+void inspur_fbdev_fini(struct inspur_drm_private *priv);
+
+int inspur_gem_create(struct drm_device *dev, u32 size, bool iskernel, struct drm_gem_object **obj);
+struct inspur_framebuffer *
+inspur_framebuffer_init(struct drm_device *dev,
+ const struct drm_mode_fb_cmd2 *mode_cmd,
+ struct drm_gem_object *obj);
+
+int inspur_mm_init(struct inspur_drm_private *inspur);
+void inspur_mm_fini(struct inspur_drm_private *inspur);
+int inspur_dumb_create(struct drm_file *file, struct drm_device *dev,
+ struct drm_mode_create_dumb *args);
+
+extern const struct drm_mode_config_funcs inspur_mode_funcs;
+
+/* inspur_drm_cursor.c */
+int inspur_cursor_init(struct inspur_drm_private *priv);
+void inspur_cursor_fini(struct inspur_drm_private *priv);
+int inspur_crtc_cursor_set(struct drm_crtc *crtc,
+ struct drm_file *file_priv,
+ uint32_t handle, uint32_t width,
+ uint32_t height);
+int inspur_crtc_cursor_move(struct drm_crtc *crtc, int x, int y);
+unsigned char getKVMHWCursorSetting(struct inspur_drm_private *priv);
+void colorcur2monocur(void *data, void *out);
+
+
+#endif
diff --git a/drivers/gpu/drm/inspur/inspur_drm_regs.h b/drivers/gpu/drm/inspur/inspur_drm_regs.h
new file mode 100644
index 000000000000..a28dfd1285d7
--- /dev/null
+++ b/drivers/gpu/drm/inspur/inspur_drm_regs.h
@@ -0,0 +1,223 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* INSPUR SoC drm driver
+ *
+ * Based on the smi drm driver.
+ *
+ * Copyright (c) 2020 SMI Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#ifndef INSPUR_DRM_HW_H
+#define INSPUR_DRM_HW_H
+
+/* register definition */
+#define INSPUR_MISC_CTRL 0x4
+
+#define INSPUR_MSCCTL_LOCALMEM_RESET(x) ((x) << 6)
+#define INSPUR_MSCCTL_LOCALMEM_RESET_MASK 0x40
+
+#define INSPUR_CURRENT_GATE 0x000040
+#define INSPUR_CURR_GATE_DISPLAY(x) ((x) << 2)
+#define INSPUR_CURR_GATE_DISPLAY_MASK 0x4
+
+#define INSPUR_CURR_GATE_LOCALMEM(x) ((x) << 1)
+#define INSPUR_CURR_GATE_LOCALMEM_MASK 0x2
+
+#define INSPUR_MODE0_GATE 0x000044
+#define INSPUR_MODE1_GATE 0x000048
+#define INSPUR_POWER_MODE_CTRL 0x00004C
+
+#define INSPUR_PW_MODE_CTL_OSC_INPUT(x) ((x) << 3)
+#define INSPUR_PW_MODE_CTL_OSC_INPUT_MASK 0x8
+
+#define INSPUR_PW_MODE_CTL_MODE(x) ((x) << 0)
+#define INSPUR_PW_MODE_CTL_MODE_MASK 0x03
+#define INSPUR_PW_MODE_CTL_MODE_SHIFT 0
+
+#define INSPUR_PW_MODE_CTL_MODE_MODE0 0
+#define INSPUR_PW_MODE_CTL_MODE_MODE1 1
+#define INSPUR_PW_MODE_CTL_MODE_SLEEP 2
+
+//#define INSPUR_CRT_PLL_CTRL 0x000060
+
+#define INSPUR_PLL_CTRL_BYPASS(x) ((x) << 18)
+#define INSPUR_PLL_CTRL_BYPASS_MASK 0x40000
+
+#define INSPUR_PLL_CTRL_POWER(x) ((x) << 17)
+#define INSPUR_PLL_CTRL_POWER_MASK 0x20000
+
+#define INSPUR_PLL_CTRL_INPUT(x) ((x) << 16)
+#define INSPUR_PLL_CTRL_INPUT_MASK 0x10000
+
+#define INSPUR_PLL_CTRL_POD(x) ((x) << 14)
+#define INSPUR_PLL_CTRL_POD_MASK 0xC000
+
+#define INSPUR_PLL_CTRL_OD(x) ((x) << 12)
+#define INSPUR_PLL_CTRL_OD_MASK 0x3000
+
+#define INSPUR_PLL_CTRL_N(x) ((x) << 8)
+#define INSPUR_PLL_CTRL_N_MASK 0xF00
+
+#define INSPUR_PLL_CTRL_M(x) ((x) << 0)
+#define INSPUR_PLL_CTRL_M_MASK 0xFF
+
+#define INSPUR_CRT_DISP_CTL 0x80200
+
+
+#define INSPUR_CRT_DISP_CTL_DPMS(x) ((x) << 30)
+#define INSPUR_CRT_DISP_CTL_DPMS_MASK 0xc0000000
+
+#define INSPUR_CRT_DPMS_ON 0
+#define INSPUR_CRT_DPMS_OFF 3
+
+
+#define INSPUR_CRT_DISP_CTL_CRTSELECT(x) ((x) << 25)
+#define INSPUR_CRT_DISP_CTL_CRTSELECT_MASK 0x2000000
+
+#define INSPUR_CRTSELECT_CRT 1
+
+#define INSPUR_CRT_DISP_CTL_CLOCK_PHASE(x) ((x) << 14)
+#define INSPUR_CRT_DISP_CTL_CLOCK_PHASE_MASK 0x4000
+
+#define INSPUR_CRT_DISP_CTL_VSYNC_PHASE(x) ((x) << 13)
+#define INSPUR_CRT_DISP_CTL_VSYNC_PHASE_MASK 0x2000
+
+#define INSPUR_CRT_DISP_CTL_HSYNC_PHASE(x) ((x) << 12)
+#define INSPUR_CRT_DISP_CTL_HSYNC_PHASE_MASK 0x1000
+
+#define INSPUR_CRT_DISP_CTL_TIMING(x) ((x) << 8)
+#define INSPUR_CRT_DISP_CTL_TIMING_MASK 0x100
+
+#define INSPUR_CRT_DISP_CTL_PLANE(x) ((x) << 2)
+#define INSPUR_CRT_DISP_CTL_PLANE_MASK 4
+
+#define INSPUR_CRT_DISP_CTL_FORMAT(x) ((x) << 0)
+#define INSPUR_CRT_DISP_CTL_FORMAT_MASK 0x03
+
+#define INSPUR_CRT_FB_ADDRESS 0x080204
+
+#define INSPUR_CRT_FB_WIDTH 0x080208
+#define INSPUR_CRT_FB_WIDTH_WIDTH(x) ((x) << 16)
+#define INSPUR_CRT_FB_WIDTH_WIDTH_MASK 0x3FFF0000
+#define INSPUR_CRT_FB_WIDTH_OFFS(x) ((x) << 0)
+#define INSPUR_CRT_FB_WIDTH_OFFS_MASK 0x3FFF
+
+#define INSPUR_CRT_HORZ_TOTAL 0x08020C
+#define INSPUR_CRT_HORZ_TOTAL_TOTAL(x) ((x) << 16)
+#define INSPUR_CRT_HORZ_TOTAL_TOTAL_MASK 0xFFF0000
+
+#define INSPUR_CRT_HORZ_TOTAL_DISP_END(x) ((x) << 0)
+#define INSPUR_CRT_HORZ_TOTAL_DISP_END_MASK 0xFFF
+
+#define INSPUR_CRT_HORZ_SYNC 0x080210
+#define INSPUR_CRT_HORZ_SYNC_WIDTH(x) ((x) << 16)
+#define INSPUR_CRT_HORZ_SYNC_WIDTH_MASK 0xFF0000
+
+#define INSPUR_CRT_HORZ_SYNC_START(x) ((x) << 0)
+#define INSPUR_CRT_HORZ_SYNC_START_MASK 0xFFF
+
+#define INSPUR_CRT_VERT_TOTAL 0x080214
+#define INSPUR_CRT_VERT_TOTAL_TOTAL(x) ((x) << 16)
+#define INSPUR_CRT_VERT_TOTAL_TOTAL_MASK 0x7FFF0000
+
+#define INSPUR_CRT_VERT_TOTAL_DISP_END(x) ((x) << 0)
+#define INSPUR_CRT_VERT_TOTAL_DISP_END_MASK 0x7FF
+
+#define INSPUR_CRT_VERT_SYNC 0x080218
+#define INSPUR_CRT_VERT_SYNC_HEIGHT(x) ((x) << 16)
+#define INSPUR_CRT_VERT_SYNC_HEIGHT_MASK 0x3F0000
+
+#define INSPUR_CRT_VERT_SYNC_START(x) ((x) << 0)
+#define INSPUR_CRT_VERT_SYNC_START_MASK 0x7FF
+
+/* Hardware Cursor */
+#define INSPUR_HWC_ADDRESS 0x080230
+#define INSPUR_HWC_ADDRESS_ENABLE(x) ((x) << 31)
+#define INSPUR_HWC_ADDRESS_ENABLE_MASK 0x80000000
+#define INSPUR_HWC_ADDRESS_ADDRESS(x) ((x) << 0)
+#define INSPUR_HWC_ADDRESS_ADDRESS_MASK 0xFFFFFFF
+
+#define INSPUR_HWC_LOCATION 0x080234
+#define INSPUR_HWC_LOCATION_TOP(x) ((x) << 27)
+#define INSPUR_HWC_LOCATION_TOP_MASK 0x8000000
+#define INSPUR_HWC_LOCATION_Y(x) ((x) << 16)
+#define INSPUR_HWC_LOCATION_Y_MASK 0x7FF0000
+#define INSPUR_HWC_LOCATION_LEFT(x) ((x) << 11)
+#define INSPUR_HWC_LOCATION_LEFT_MASK 0x800
+#define INSPUR_HWC_LOCATION_X(x) ((x) << 0)
+#define INSPUR_HWC_LOCATION_X_MASK 0x7FF
+
+#define INSPUR_HWC_COLOR_12 0x080238
+#define INSPUR_HWC_COLOR_12_2_RGB(x) ((x) << 16)
+#define INSPUR_HWC_COLOR_12_2_RGB_MASK 0xFFFF0000
+#define INSPUR_HWC_COLOR_12_1_RGB(x) ((x) << 0)
+#define INSPUR_HWC_COLOR_12_1_RGB_MASK 0xFFFF
+
+#define INSPUR_HWC_COLOR_3 0x08023C
+#define INSPUR_HWC_COLOR_3_RGB(x) ((x) << 0)
+#define INSPUR_HWC_COLOR_3_RGB_MASK 0xFFFF
+
+/* Auto Centering */
+#define INSPUR_CRT_AUTO_CENTERING_TL 0x080280
+#define INSPUR_CRT_AUTO_CENTERING_TL_TOP(x) ((x) << 16)
+#define INSPUR_CRT_AUTO_CENTERING_TL_TOP_MASK 0x7FF0000
+
+#define INSPUR_CRT_AUTO_CENTERING_TL_LEFT(x) ((x) << 0)
+#define INSPUR_CRT_AUTO_CENTERING_TL_LEFT_MASK 0x7FF
+
+#define INSPUR_CRT_AUTO_CENTERING_BR 0x080284
+#define INSPUR_CRT_AUTO_CENTERING_BR_BOTTOM(x) ((x) << 16)
+#define INSPUR_CRT_AUTO_CENTERING_BR_BOTTOM_MASK 0x7FF0000
+
+#define INSPUR_CRT_AUTO_CENTERING_BR_RIGHT(x) ((x) << 0)
+#define INSPUR_CRT_AUTO_CENTERING_BR_RIGHT_MASK 0x7FF
+
+/* register to control panel output */
+#define INSPUR_DISPLAY_CONTROL_HISILE 0x80288
+#define INSPUR_DISPLAY_CONTROL_FPVDDEN(x) ((x) << 0)
+#define INSPUR_DISPLAY_CONTROL_PANELDATE(x) ((x) << 1)
+#define INSPUR_DISPLAY_CONTROL_FPEN(x) ((x) << 2)
+#define INSPUR_DISPLAY_CONTROL_VBIASEN(x) ((x) << 3)
+
+#define INSPUR_RAW_INTERRUPT 0x80290
+#define INSPUR_RAW_INTERRUPT_VBLANK(x) ((x) << 2)
+#define INSPUR_RAW_INTERRUPT_VBLANK_MASK 0x4
+
+#define INSPUR_RAW_INTERRUPT_EN 0x80298
+#define INSPUR_RAW_INTERRUPT_EN_VBLANK(x) ((x) << 2)
+#define INSPUR_RAW_INTERRUPT_EN_VBLANK_MASK 0x4
+
+/* register and values for PLL control */
+#define CRT_PLL1_NS 0x802a8
+#define CRT_PLL1_NS_OUTER_BYPASS(x) ((x) << 30)
+#define CRT_PLL1_NS_INTER_BYPASS(x) ((x) << 29)
+#define CRT_PLL1_NS_POWERON(x) ((x) << 24)
+
+#define CRT_PLL1_NS_25MHZ 0x00006691 //640x480
+#define CRT_PLL1_NS_40MHZ 0x00004580 //800x600
+#define CRT_PLL1_NS_65MHZ 0x00002568 //1024x768
+#define CRT_PLL1_NS_83MHZ 0x000027bb //1280x800
+#define CRT_PLL1_NS_106MHZ 0x000027ef //1440x900
+#define CRT_PLL1_NS_108MHZ 0x000027f2 //1280x1024
+#define CRT_PLL1_NS_146MHZ 0x00001575 //1680x1050
+#define CRT_PLL1_NS_148MHZ 0x0000145f //1920x1080
+#define CRT_PLL1_NS_193MHZ 0x000018f7 //1920x1200
+
+#define CRT_PLL2_NS 0x802ac
+#define CRT_PLL2_NS_25MHZ 0x0
+#define CRT_PLL2_NS_40MHZ 0x0
+#define CRT_PLL2_NS_65MHZ 0x0
+#define CRT_PLL2_NS_83MHZ 0x0
+#define CRT_PLL2_NS_106MHZ 0x0
+#define CRT_PLL2_NS_108MHZ 0x0
+#define CRT_PLL2_NS_146MHZ 0x0
+#define CRT_PLL2_NS_148MHZ 0x0
+#define CRT_PLL2_NS_193MHZ 0x0
+
+#define INSPUR_FIELD(field, value) (field(value) & field##_MASK)
+#endif
diff --git a/drivers/gpu/drm/inspur/inspur_drm_vdac.c b/drivers/gpu/drm/inspur/inspur_drm_vdac.c
new file mode 100644
index 000000000000..20e22ef02546
--- /dev/null
+++ b/drivers/gpu/drm/inspur/inspur_drm_vdac.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* INSPUR SoC drm driver
+ *
+ * Based on the smi drm driver.
+ *
+ * Copyright (c) 2020 SMI Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_probe_helper.h>
+
+#include "inspur_drm_drv.h"
+#include "inspur_drm_regs.h"
+
+static int inspur_connector_get_modes(struct drm_connector *connector)
+{
+ int count;
+
+ count = drm_add_modes_noedid(connector,
+ connector->dev->mode_config.max_width,
+ connector->dev->mode_config.max_height);
+ drm_set_preferred_mode(connector, 1024, 768);
+ return count;
+}
+
+static int inspur_connector_mode_valid(struct drm_connector *connector,
+ struct drm_display_mode *mode)
+{
+ return MODE_OK;
+}
+
+static const struct drm_connector_helper_funcs
+ inspur_connector_helper_funcs = {
+ .get_modes = inspur_connector_get_modes,
+ .mode_valid = inspur_connector_mode_valid,
+};
+
+static const struct drm_connector_funcs inspur_connector_funcs = {
+ .fill_modes = drm_helper_probe_single_connector_modes,
+ .destroy = drm_connector_cleanup,
+ .reset = drm_atomic_helper_connector_reset,
+ .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+ .atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+};
+
+static void inspur_encoder_mode_set(struct drm_encoder *encoder,
+ struct drm_display_mode *mode,
+ struct drm_display_mode *adj_mode)
+{
+ u32 reg;
+ struct drm_device *dev = encoder->dev;
+ struct inspur_drm_private *priv = dev->dev_private;
+
+ reg = readl(priv->mmio + INSPUR_DISPLAY_CONTROL_HISILE);
+ reg |= INSPUR_DISPLAY_CONTROL_FPVDDEN(1);
+ reg |= INSPUR_DISPLAY_CONTROL_PANELDATE(1);
+ reg |= INSPUR_DISPLAY_CONTROL_FPEN(1);
+ reg |= INSPUR_DISPLAY_CONTROL_VBIASEN(1);
+ writel(reg, priv->mmio + INSPUR_DISPLAY_CONTROL_HISILE);
+}
+
+static const struct drm_encoder_helper_funcs inspur_encoder_helper_funcs = {
+ .mode_set = inspur_encoder_mode_set,
+};
+
+static const struct drm_encoder_funcs inspur_encoder_funcs = {
+ .destroy = drm_encoder_cleanup,
+};
+
+int inspur_vdac_init(struct inspur_drm_private *priv)
+{
+ struct drm_device *dev = priv->dev;
+ struct drm_encoder *encoder;
+ struct drm_connector *connector;
+ int ret;
+
+ encoder = devm_kzalloc(dev->dev, sizeof(*encoder), GFP_KERNEL);
+ if (!encoder) {
+ DRM_ERROR("failed to alloc memory when init encoder\n");
+ return -ENOMEM;
+ }
+
+ encoder->possible_crtcs = 0x1;
+ ret = drm_encoder_init(dev, encoder, &inspur_encoder_funcs,
+ DRM_MODE_ENCODER_DAC, NULL);
+ if (ret) {
+ DRM_ERROR("failed to init encoder: %d\n", ret);
+ return ret;
+ }
+
+ drm_encoder_helper_add(encoder, &inspur_encoder_helper_funcs);
+
+ connector = devm_kzalloc(dev->dev, sizeof(*connector), GFP_KERNEL);
+ if (!connector) {
+ DRM_ERROR("failed to alloc memory when init connector\n");
+ return -ENOMEM;
+ }
+
+ ret = drm_connector_init(dev, connector,
+ &inspur_connector_funcs,
+ DRM_MODE_CONNECTOR_VGA);
+ if (ret) {
+ DRM_ERROR("failed to init connector: %d\n", ret);
+ return ret;
+ }
+ drm_connector_helper_add(connector, &inspur_connector_helper_funcs);
+
+ drm_connector_register(connector);
+ drm_connector_attach_encoder(connector, encoder);
+ return 0;
+}
diff --git a/drivers/gpu/drm/inspur/inspur_ttm.c b/drivers/gpu/drm/inspur/inspur_ttm.c
new file mode 100644
index 000000000000..5757120597e9
--- /dev/null
+++ b/drivers/gpu/drm/inspur/inspur_ttm.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* INSPUR SoC drm driver
+ *
+ * Based on the smi drm driver.
+ *
+ * Copyright (c) 2020 SMI Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <drm/drm_atomic_helper.h>
+
+#include "inspur_drm_drv.h"
+
+
+int inspur_dumb_create(struct drm_file *file, struct drm_device *dev,
+ struct drm_mode_create_dumb *args)
+{
+
+ return drm_gem_vram_fill_create_dumb(file, dev, 0, 128, args);
+}
+
+
+
+
+
+const struct drm_mode_config_funcs inspur_mode_funcs = {
+ .atomic_check = drm_atomic_helper_check,
+ .atomic_commit = drm_atomic_helper_commit,
+ .fb_create = drm_gem_fb_create,
+ .mode_valid = drm_vram_helper_mode_valid,
+};
--
2.33.0
2
1

[PATCH OLK-5.10] LoongArch: Fix module relocation error with binutils 2.41
by Hongchen Zhang 13 Sep '23
by Hongchen Zhang 13 Sep '23
13 Sep '23
From: Huacai Chen <chenhuacai(a)loongson.cn>
stable inclusion
from stable-v6.5-rc4
commit 03c53eb90c0c61885b2175adf8675fb56df7f8db
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I80YEI
CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=…
---------------------------
Binutils 2.41 enables linker relaxation by default, but the kernel
module loader doesn't support that, so just disable it. Otherwise we
get such an error when loading modules:
"Unknown relocation type 102"
As an alternative, we could add linker relaxation support in the kernel
module loader. But it is relatively large complexity that may or may not
bring a similar gain, and we don't really want to include this linker
pass in the kernel.
Reviewed-by: WANG Xuerui <git(a)xen0n.name>
Signed-off-by: Huacai Chen <chenhuacai(a)loongson.cn>
---
arch/loongarch/Makefile | 2 ++
1 file changed, 2 insertions(+)
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index 345dc10576d4..a0f194da592b 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -55,6 +55,8 @@ LDFLAGS_vmlinux += -G0 -static -n -nostdlib
ifdef CONFIG_AS_HAS_EXPLICIT_RELOCS
cflags-y += -mexplicit-relocs
KBUILD_CFLAGS_KERNEL += -mdirect-extern-access
+KBUILD_AFLAGS_MODULE += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax)
+KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax)
else
cflags-y += $(call cc-option,-mno-explicit-relocs)
KBUILD_AFLAGS_KERNEL += -Wa,-mla-global-with-pcrel
--
2.33.0
2
1
From: Qi Hu <huqi(a)loongson.cn>
linux-next inclusion
from next-20230616
commit 346dc929623cef70ff7832a4fa0ffd1b696e312a
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I80YEI
CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/…
---------------------------
The "write_fcsr()" macro uses wrong the positions for val and dest in
asm. Fix it!
Reported-by: Miao HAO <haomiao19(a)mails.ucas.ac.cn>
Signed-off-by: Qi Hu <huqi(a)loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai(a)loongson.cn>
---
arch/loongarch/include/asm/loongarch.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h
index 33a8fa446ba9..0b8c1bde008f 100644
--- a/arch/loongarch/include/asm/loongarch.h
+++ b/arch/loongarch/include/asm/loongarch.h
@@ -1521,7 +1521,7 @@ __BUILD_CSR_OP(tlbidx)
#define write_fcsr(dest, val) \
do { \
__asm__ __volatile__( \
- " movgr2fcsr %0, "__stringify(dest)" \n" \
+ " movgr2fcsr "__stringify(dest)", %0 \n" \
: : "r" (val)); \
} while (0)
--
2.33.0
2
2

[PATCH openEuler-1.0-LTS] netfilter: nftables: exthdr: fix 4-byte stack OOB write
by Zhengchao Shao 13 Sep '23
by Zhengchao Shao 13 Sep '23
13 Sep '23
From: Florian Westphal <fw(a)strlen.de>
mainline inclusion
from mainline-v6.6-rc1
commit fd94d9dadee58e09b49075240fe83423eb1dcd36
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I80I0G
CVE: CVE-2023-4881
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
--------------------------------
If priv->len is a multiple of 4, then dst[len / 4] can write past
the destination array which leads to stack corruption.
This construct is necessary to clean the remainder of the register
in case ->len is NOT a multiple of the register size, so make it
conditional just like nft_payload.c does.
The bug was added in 4.1 cycle and then copied/inherited when
tcp/sctp and ip option support was added.
Bug reported by Zero Day Initiative project (ZDI-CAN-21950,
ZDI-CAN-21951, ZDI-CAN-21961).
Fixes: 49499c3e6e18 ("netfilter: nf_tables: switch registers to 32 bit addressing")
Fixes: 935b7f643018 ("netfilter: nft_exthdr: add TCP option matching")
Fixes: 133dc203d77d ("netfilter: nft_exthdr: Support SCTP chunks")
Fixes: dbb5281a1f84 ("netfilter: nf_tables: add support for matching IPv4 options")
Signed-off-by: Florian Westphal <fw(a)strlen.de>
Conflicts:
net/netfilter/nft_exthdr.c
Signed-off-by: Zhengchao Shao <shaozhengchao(a)huawei.com>
---
net/netfilter/nft_exthdr.c | 14 +++++++++++---
1 file changed, 11 insertions(+), 3 deletions(-)
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 93fee4106019..07dd5a723d79 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -36,6 +36,14 @@ static unsigned int optlen(const u8 *opt, unsigned int offset)
return opt[offset + 1];
}
+static int nft_skb_copy_to_reg(const struct sk_buff *skb, int offset, u32 *dest, unsigned int len)
+{
+ if (len % NFT_REG32_SIZE)
+ dest[len / NFT_REG32_SIZE] = 0;
+
+ return skb_copy_bits(skb, offset, dest, len);
+}
+
static void nft_exthdr_ipv6_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -57,8 +65,7 @@ static void nft_exthdr_ipv6_eval(const struct nft_expr *expr,
}
offset += priv->offset;
- dest[priv->len / NFT_REG32_SIZE] = 0;
- if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0)
+ if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0)
goto err;
return;
err:
@@ -114,7 +121,8 @@ static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
if (priv->flags & NFT_EXTHDR_F_PRESENT) {
*dest = 1;
} else {
- dest[priv->len / NFT_REG32_SIZE] = 0;
+ if (priv->len % NFT_REG32_SIZE)
+ dest[priv->len / NFT_REG32_SIZE] = 0;
memcpy(dest, opt + offset, priv->len);
}
--
2.34.1
2
1

[PATCH OLK-5.10] netfilter: nftables: exthdr: fix 4-byte stack OOB write
by Zhengchao Shao 13 Sep '23
by Zhengchao Shao 13 Sep '23
13 Sep '23
From: Florian Westphal <fw(a)strlen.de>
mainline inclusion
from mainline-v6.6-rc1
commit fd94d9dadee58e09b49075240fe83423eb1dcd36
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I80I0G
CVE: CVE-2023-4881
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
--------------------------------
If priv->len is a multiple of 4, then dst[len / 4] can write past
the destination array which leads to stack corruption.
This construct is necessary to clean the remainder of the register
in case ->len is NOT a multiple of the register size, so make it
conditional just like nft_payload.c does.
The bug was added in 4.1 cycle and then copied/inherited when
tcp/sctp and ip option support was added.
Bug reported by Zero Day Initiative project (ZDI-CAN-21950,
ZDI-CAN-21951, ZDI-CAN-21961).
Fixes: 49499c3e6e18 ("netfilter: nf_tables: switch registers to 32 bit addressing")
Fixes: 935b7f643018 ("netfilter: nft_exthdr: add TCP option matching")
Fixes: 133dc203d77d ("netfilter: nft_exthdr: Support SCTP chunks")
Fixes: dbb5281a1f84 ("netfilter: nf_tables: add support for matching IPv4 options")
Signed-off-by: Florian Westphal <fw(a)strlen.de>
Conflicts:
net/netfilter/nft_exthdr.c
Signed-off-by: Zhengchao Shao <shaozhengchao(a)huawei.com>
---
net/netfilter/nft_exthdr.c | 17 ++++++++++++-----
1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 670dd146fb2b..ca268293cfa1 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -33,6 +33,14 @@ static unsigned int optlen(const u8 *opt, unsigned int offset)
return opt[offset + 1];
}
+static int nft_skb_copy_to_reg(const struct sk_buff *skb, int offset, u32 *dest, unsigned int len)
+{
+ if (len % NFT_REG32_SIZE)
+ dest[len / NFT_REG32_SIZE] = 0;
+
+ return skb_copy_bits(skb, offset, dest, len);
+}
+
static void nft_exthdr_ipv6_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -54,8 +62,7 @@ static void nft_exthdr_ipv6_eval(const struct nft_expr *expr,
}
offset += priv->offset;
- dest[priv->len / NFT_REG32_SIZE] = 0;
- if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0)
+ if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0)
goto err;
return;
err:
@@ -151,8 +158,7 @@ static void nft_exthdr_ipv4_eval(const struct nft_expr *expr,
}
offset += priv->offset;
- dest[priv->len / NFT_REG32_SIZE] = 0;
- if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0)
+ if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0)
goto err;
return;
err:
@@ -208,7 +214,8 @@ static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
if (priv->flags & NFT_EXTHDR_F_PRESENT) {
*dest = 1;
} else {
- dest[priv->len / NFT_REG32_SIZE] = 0;
+ if (priv->len % NFT_REG32_SIZE)
+ dest[priv->len / NFT_REG32_SIZE] = 0;
memcpy(dest, opt + offset, priv->len);
}
--
2.34.1
2
1

[PATCH openEuler-22.03-LTS-SP1] io_uring: ensure IOPOLL locks around deferred work
by Zhihao Cheng 13 Sep '23
by Zhihao Cheng 13 Sep '23
13 Sep '23
From: Jens Axboe <axboe(a)kernel.dk>
stable inclusion
from stable-v5.10.188
commit 810e401b34c4c4c244d8b93b9947ea5b3d4d49f8
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I7KXLN
CVE: CVE-2023-21400
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
--------------------------------
No direct upstream commit exists for this issue. It was fixed in
5.18 as part of a larger rework of the completion side.
io_commit_cqring() writes the CQ ring tail to make it visible, but it
also kicks off any deferred work we have. A ring setup with IOPOLL
does not need any locking around the CQ ring updates, as we're always
under the ctx uring_lock. But if we have deferred work that needs
processing, then io_queue_deferred() assumes that the completion_lock
is held, as it is for !IOPOLL.
Add a lockdep assertion to check and document this fact, and have
io_iopoll_complete() check if we have deferred work and run that
separately with the appropriate lock grabbed.
Cc: stable(a)vger.kernel.org # 5.10, 5.15
Reported-by: dghost david <daviduniverse18(a)gmail.com>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Lin Yujun <linyujun809(a)huawei.com>
Signed-off-by: Zhihao Cheng <chengzhihao1(a)huawei.com>
---
io_uring/io_uring.c | 25 +++++++++++++++++++++----
1 file changed, 21 insertions(+), 4 deletions(-)
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 3d35f5d13666..781af0b05d8c 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1521,6 +1521,8 @@ static void io_kill_timeout(struct io_kiocb *req, int status)
static void io_queue_deferred(struct io_ring_ctx *ctx)
{
+ lockdep_assert_held(&ctx->completion_lock);
+
while (!list_empty(&ctx->defer_list)) {
struct io_defer_entry *de = list_first_entry(&ctx->defer_list,
struct io_defer_entry, list);
@@ -1572,14 +1574,24 @@ static void __io_commit_cqring_flush(struct io_ring_ctx *ctx)
io_queue_deferred(ctx);
}
-static inline void io_commit_cqring(struct io_ring_ctx *ctx)
+static inline bool io_commit_needs_flush(struct io_ring_ctx *ctx)
+{
+ return ctx->off_timeout_used || ctx->drain_active;
+}
+
+static inline void __io_commit_cqring(struct io_ring_ctx *ctx)
{
- if (unlikely(ctx->off_timeout_used || ctx->drain_active))
- __io_commit_cqring_flush(ctx);
/* order cqe stores with ring update */
smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail);
}
+static inline void io_commit_cqring(struct io_ring_ctx *ctx)
+{
+ if (unlikely(io_commit_needs_flush(ctx)))
+ __io_commit_cqring_flush(ctx);
+ __io_commit_cqring(ctx);
+}
+
static inline bool io_sqring_full(struct io_ring_ctx *ctx)
{
struct io_rings *r = ctx->rings;
@@ -2509,7 +2521,12 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
io_req_free_batch(&rb, req, &ctx->submit_state);
}
- io_commit_cqring(ctx);
+ if (io_commit_needs_flush(ctx)) {
+ spin_lock(&ctx->completion_lock);
+ __io_commit_cqring_flush(ctx);
+ spin_unlock(&ctx->completion_lock);
+ }
+ __io_commit_cqring(ctx);
io_cqring_ev_posted_iopoll(ctx);
io_req_free_batch_finish(ctx, &rb);
}
--
2.31.1
2
1
hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I80YXE
CVE: NA
----------------------------------------
ucc support for XPU.
Signed-off-by: Chen Hui <judy.chenhui(a)huawei.com>
Signed-off-by: Yang Yanchao <yangyanchao6(a)huawei.com>
Signed-off-by: Hui Tang <tanghui20(a)huawei.com>
Signed-off-by: Guan Jing <guanjing6(a)huawei.com>
Signed-off-by: Jinjie Ruan <ruanjinjie(a)huawei.com>
---
Kconfig | 2 +
drivers/Kconfig | 2 +
drivers/Makefile | 1 +
drivers/xpu/Kconfig | 9 +
drivers/xpu/Makefile | 1 +
drivers/xpu/xpu_group.c | 175 ++++++++
fs/proc/base.c | 102 ++++-
include/linux/sched.h | 3 +
include/linux/ucc_common.h | 21 +
include/linux/ucc_kfd.h | 110 +++++
include/linux/ucc_sched.h | 36 ++
include/linux/ucc_sched/ucc_sched.h | 71 +++
include/linux/ucc_ts.h | 254 +++++++++++
include/linux/vstream.h | 123 ++++++
include/linux/xpu_group.h | 66 +++
include/trace/events/ucc_sched.h | 120 +++++
init/init_task.c | 4 +
init/main.c | 9 +
kernel/Makefile | 2 +
kernel/sched/Makefile | 1 +
kernel/sched/core.c | 5 +
kernel/sched/ucc_sched.c | 148 +++++++
kernel/sysctl.c | 17 +-
kernel/ucc/Kconfig | 21 +
kernel/ucc/Makefile | 1 +
kernel/ucc/ascend_vstream.c | 654 ++++++++++++++++++++++++++++
kernel/ucc/ascend_vstream.h | 13 +
kernel/ucc/vstream.c | 62 +++
kernel/ucc_sched/Makefile | 1 +
kernel/ucc_sched/core.c | 591 +++++++++++++++++++++++++
kernel/ucc_sched/ucc_sched.h | 43 ++
31 files changed, 2666 insertions(+), 2 deletions(-)
create mode 100644 drivers/xpu/Kconfig
create mode 100644 drivers/xpu/Makefile
create mode 100644 drivers/xpu/xpu_group.c
create mode 100644 include/linux/ucc_common.h
create mode 100644 include/linux/ucc_kfd.h
create mode 100644 include/linux/ucc_sched.h
create mode 100644 include/linux/ucc_sched/ucc_sched.h
create mode 100644 include/linux/ucc_ts.h
create mode 100644 include/linux/vstream.h
create mode 100644 include/linux/xpu_group.h
create mode 100644 include/trace/events/ucc_sched.h
create mode 100644 kernel/sched/ucc_sched.c
create mode 100644 kernel/ucc/Kconfig
create mode 100644 kernel/ucc/Makefile
create mode 100644 kernel/ucc/ascend_vstream.c
create mode 100644 kernel/ucc/ascend_vstream.h
create mode 100644 kernel/ucc/vstream.c
create mode 100644 kernel/ucc_sched/Makefile
create mode 100644 kernel/ucc_sched/core.c
create mode 100644 kernel/ucc_sched/ucc_sched.h
diff --git a/Kconfig b/Kconfig
index 48a80beab685..8e558777fb54 100644
--- a/Kconfig
+++ b/Kconfig
@@ -30,3 +30,5 @@ source "crypto/Kconfig"
source "lib/Kconfig"
source "lib/Kconfig.debug"
+
+source "kernel/ucc/Kconfig"
diff --git a/drivers/Kconfig b/drivers/Kconfig
index ab4d43923c4d..bd59e9e525ba 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -219,4 +219,6 @@ source "drivers/siox/Kconfig"
source "drivers/slimbus/Kconfig"
+source "drivers/xpu/Kconfig"
+
endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index 578f469f72fb..1130b2d92df1 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -186,3 +186,4 @@ obj-$(CONFIG_MULTIPLEXER) += mux/
obj-$(CONFIG_UNISYS_VISORBUS) += visorbus/
obj-$(CONFIG_SIOX) += siox/
obj-$(CONFIG_GNSS) += gnss/
+obj-$(CONFIG_XPU_SCHEDULE) += xpu/
diff --git a/drivers/xpu/Kconfig b/drivers/xpu/Kconfig
new file mode 100644
index 000000000000..c4a391d0039d
--- /dev/null
+++ b/drivers/xpu/Kconfig
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+
+menuconfig XPU_SCHEDULE
+ bool "xpu schedule"
+ default n
+ help
+ Support xpu schedule, Say Y here if you want support for use
+ xpu schedule.
+
diff --git a/drivers/xpu/Makefile b/drivers/xpu/Makefile
new file mode 100644
index 000000000000..9edc6dcdd4d0
--- /dev/null
+++ b/drivers/xpu/Makefile
@@ -0,0 +1 @@
+obj-y += xpu_group.o
diff --git a/drivers/xpu/xpu_group.c b/drivers/xpu/xpu_group.c
new file mode 100644
index 000000000000..53a598db0615
--- /dev/null
+++ b/drivers/xpu/xpu_group.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/xpu_group.h>
+#include <linux/rwsem.h>
+#include <linux/slab.h>
+
+extern int ucc_rt_nr_running(struct xcu *cu);
+static DECLARE_RWSEM(xpu_group_rwsem);
+
+static struct xpu_capability xpu_capability_root;
+
+struct xpu_group __xpu_root = {
+ .type = XPU_TYPE_ROOT,
+ .capability = &xpu_capability_root,
+
+ .next_layer = IDR_INIT(next_layer),
+};
+
+struct xpu_group *xpu_root = &__xpu_root;
+EXPORT_SYMBOL(xpu_root);
+
+int __xpu_group_attach(struct xpu_group *new_group,
+ struct xpu_group *previous_group)
+{
+ int id = new_group->id;
+
+ if (id == -1)
+ id = idr_alloc(&previous_group->next_layer, new_group,
+ 0, INT_MAX, GFP_KERNEL);
+ else
+ id = idr_alloc(&previous_group->next_layer, new_group,
+ id, id + 1, GFP_KERNEL);
+ if (id < 0)
+ return -EEXIST;
+
+ new_group->id = id;
+ new_group->previous_layer = previous_group;
+
+ return 0;
+}
+
+int xpu_group_attach(struct xpu_group *new_group,
+ struct xpu_group *previous_group)
+{
+ int ret;
+
+ down_write(&xpu_group_rwsem);
+ ret = __xpu_group_attach(new_group, previous_group);
+ up_write(&xpu_group_rwsem);
+ return ret;
+}
+EXPORT_SYMBOL(xpu_group_attach);
+
+struct xpu_group *xpu_group_alloc_and_attach(struct xpu_group *previous_group,
+ int id)
+{
+ struct xpu_group *new = xpu_group_alloc();
+
+ if (!new) {
+ pr_err("alloc xpu_group failed\n");
+ return NULL;
+ }
+
+ new->id = id;
+
+ if (!xpu_group_attach(new, previous_group))
+ return NULL;
+
+ return new;
+}
+EXPORT_SYMBOL(xpu_group_alloc_and_attach);
+
+int __xpu_group_detach(struct xpu_group *group)
+{
+ idr_remove(&group->previous_layer->next_layer, group->id);
+ return 0;
+}
+
+int xpu_group_detach(struct xpu_group *group)
+{
+ int ret;
+
+ down_write(&xpu_group_rwsem);
+ ret = __xpu_group_detach(group);
+ up_write(&xpu_group_rwsem);
+ return ret;
+}
+EXPORT_SYMBOL(xpu_group_detach);
+
+struct xpu_group *__xpu_group_find(struct xpu_group *group, int id)
+{
+ return idr_find(&group->next_layer, id);
+}
+
+struct xpu_group *xpu_group_find(struct xpu_group *group, int id)
+{
+ struct xpu_group *p;
+
+ p = xpu_group_alloc();
+
+ down_read(&xpu_group_rwsem);
+ p = __xpu_group_find(group, id);
+ up_read(&xpu_group_rwsem);
+
+ return p;
+}
+EXPORT_SYMBOL(xpu_group_find);
+
+
+struct xpu_group *xpu_idle_group_find(struct xpu_group *group)
+{
+ struct xpu_group *entry_group;
+ int id;
+
+ down_read(&xpu_group_rwsem);
+ idr_for_each_entry(&group->next_layer, entry_group, id) {
+ if (!entry_group->used) {
+ up_read(&xpu_group_rwsem);
+ return entry_group;
+ }
+ }
+ up_read(&xpu_group_rwsem);
+
+ return NULL;
+}
+
+int xpu_run(struct xpu_group *group, void *para1, void *para2)
+{
+ int ret = 0;
+
+ if (group->opt && group->opt->run)
+ ret = group->opt->run(group, para1, para2);
+
+ return ret;
+}
+
+int xpu_finish(struct xpu_group *group, void *para1, void *para2)
+{
+ if (group->opt && group->opt->finish)
+ return group->opt->finish(group, para1, para2);
+
+ return 0;
+}
+
+int xpu_wait(struct xpu_group *group, void *para1, void *para2, void *para3)
+{
+ if (group->opt && group->opt->wait)
+ return group->opt->wait(group, para1, para2, para3);
+
+ return 0;
+}
+
+int xpu_complete(struct xpu_group *group, void *para1, void *para2, void *para3)
+{
+ if (group->opt && group->opt->complete)
+ return group->opt->complete(group, para1, para2, para3);
+
+ return 0;
+}
+
+struct xpu_group *xpu_group_alloc(void)
+{
+ struct xpu_group *node = kzalloc(sizeof(*node), GFP_KERNEL);
+
+ if (!node)
+ return NULL;
+
+ node->type = XPU_TYPE_CUSTOM;
+ idr_init(&node->next_layer);
+
+ return node;
+}
+EXPORT_SYMBOL(xpu_group_alloc);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index dc9841826264..516eee1ae952 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -770,7 +770,6 @@ static const struct file_operations proc_single_file_operations = {
.release = single_release,
};
-
struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode)
{
struct task_struct *task = get_proc_task(inode);
@@ -1546,6 +1545,99 @@ static const struct file_operations proc_pid_sched_operations = {
#endif
+#ifdef CONFIG_XPU_SCHEDULE
+static ssize_t ucc_step_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct task_struct *task;
+ char numbuf[PROC_NUMBUF];
+ ssize_t len;
+
+ task = get_proc_task(file_inode(file));
+ if (!task)
+ return -ESRCH;
+
+ len = snprintf(numbuf, sizeof(numbuf), "%u\n", task->ucc_step);
+
+ put_task_struct(task);
+
+ return simple_read_from_buffer(buf, count, ppos, numbuf, len);
+}
+
+static ssize_t ucc_step_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *offset)
+{
+ struct inode *inode = file_inode(file);
+ struct task_struct *p;
+ int err;
+ unsigned int ucc_step;
+
+ p = get_proc_task(inode);
+ if (!p)
+ return -ESRCH;
+
+ err = kstrtouint_from_user(buf, count, 0, &ucc_step);
+ if (err)
+ return err;
+
+ p->ucc_step = ucc_step;
+ put_task_struct(p);
+
+ return count;
+}
+
+static const struct file_operations ucc_step_operations = {
+ .write = ucc_step_write,
+ .read = ucc_step_read,
+};
+
+static ssize_t ucc_priority_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct task_struct *task;
+ char numbuf[PROC_NUMBUF];
+ ssize_t len;
+
+ task = get_proc_task(file_inode(file));
+ if (!task)
+ return -ESRCH;
+
+ len = snprintf(numbuf, sizeof(numbuf), "%u\n", task->ucc_priority);
+
+ put_task_struct(task);
+
+ return simple_read_from_buffer(buf, count, ppos, numbuf, len);
+}
+
+static ssize_t ucc_priority_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *offset)
+{
+ struct inode *inode = file_inode(file);
+ struct task_struct *p;
+ int err;
+ unsigned int ucc_priority;
+
+ p = get_proc_task(inode);
+ if (!p)
+ return -ESRCH;
+
+ err = kstrtouint_from_user(buf, count, 0, &ucc_priority);
+ if (err)
+ return err;
+
+ p->ucc_priority = ucc_priority;
+ put_task_struct(p);
+
+ return count;
+}
+
+static const struct file_operations ucc_priority_operations = {
+ .write = ucc_priority_write,
+ .read = ucc_priority_read,
+};
+
+#endif
+
#ifdef CONFIG_SCHED_AUTOGROUP
/*
* Print out autogroup related information:
@@ -3151,6 +3243,10 @@ static const struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_ASCEND_SHARE_POOL
ONE("sp_group", S_IRUGO, proc_sp_group_state),
#endif
+#ifdef CONFIG_XPU_SCHEDULE
+ REG("ucc_priority", 0644, ucc_priority_operations),
+ REG("ucc_step", 0644, ucc_step_operations),
+#endif
};
static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
@@ -3537,6 +3633,10 @@ static const struct pid_entry tid_base_stuff[] = {
#ifdef CONFIG_ASCEND_SHARE_POOL
ONE("sp_group", S_IRUGO, proc_sp_group_state),
#endif
+#ifdef CONFIG_XPU_SCHEDULE
+ REG("ucc_priority", 0644, ucc_priority_operations),
+ REG("ucc_step", 0644, ucc_step_operations),
+#endif
};
static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8fd8c5b7cdc6..175659be95f3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1281,6 +1281,9 @@ struct task_struct {
#if !defined(__GENKSYMS__)
#if defined(CONFIG_QOS_SCHED_SMART_GRID)
struct sched_grid_qos *grid_qos;
+#elif defined(CONFIG_XPU_SCHEDULE)
+ u32 ucc_priority;
+ u32 ucc_step;
#else
KABI_RESERVE(8)
#endif
diff --git a/include/linux/ucc_common.h b/include/linux/ucc_common.h
new file mode 100644
index 000000000000..3875c2226d24
--- /dev/null
+++ b/include/linux/ucc_common.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _UCC_COMMON_H
+#define _UCC_COMMON_H
+
+/*
+ * UCC Print Function
+ */
+#ifndef pr_fmt
+#define pr_fmt(fmt) fmt
+#endif
+
+#define ucc_err(fmt, ...) printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
+
+#define ucc_warn(fmt, ...) printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
+
+#define ucc_info(fmt, ...) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
+
+#define ucc_dbg(fmt, ...) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
+
+#endif
diff --git a/include/linux/ucc_kfd.h b/include/linux/ucc_kfd.h
new file mode 100644
index 000000000000..07eedc2fd5f2
--- /dev/null
+++ b/include/linux/ucc_kfd.h
@@ -0,0 +1,110 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef KFD_PRIV_H_INCLUDED
+#define KFD_PRIV_H_INCLUDED
+
+#include <linux/mmu_notifier.h>
+#include <linux/types.h>
+#include <linux/kref.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/mmu_notifier.h>
+#include <linux/idr.h>
+#include <linux/dma-fence.h>
+#include <linux/workqueue.h>
+#include <linux/fs.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+
+struct process_queue_manager;
+struct kfd_process;
+struct kfd_signal_page;
+
+struct process_queue_manager {
+ struct kfd_process *process;
+ struct list_head queues;
+ unsigned long *queue_slot_bitmap;
+};
+
+struct kfd_signal_page {
+ uint64_t *kernel_address;
+ uint64_t __user *user_address;
+ bool need_to_free_pages;
+};
+
+/* Process data */
+struct kfd_process {
+ struct hlist_node kfd_processes;
+ void *mm;
+ struct kref ref;
+ struct work_struct release_work;
+ struct mutex mutex;
+ struct task_struct *lead_thread;
+ struct mmu_notifier mmu_notifier;
+/* TODO: check if use right branch */
+ struct rcu_head rcu;
+ uint16_t pasid;
+ struct list_head per_device_data;
+ struct process_queue_manager pqm;
+ bool is_32bit_user_mode;
+ struct mutex event_mutex;
+ struct idr event_idr;
+ struct kfd_signal_page *signal_page;
+ size_t signal_mapped_size;
+ size_t signal_event_count;
+ bool signal_event_limit_reached;
+/* TODO: check if use right branch */
+ struct rb_root bo_interval_tree;
+ void *kgd_process_info;
+ struct dma_fence *ef;
+ struct delayed_work eviction_work;
+ struct delayed_work restore_work;
+ unsigned int last_eviction_seqno;
+ unsigned long last_restore_timestamp;
+ unsigned long last_evict_timestamp;
+ bool debug_trap_enabled;
+ uint32_t trap_debug_wave_launch_mode;
+ struct file *dbg_ev_file;
+ uint32_t allocated_debug_watch_point_bitmask;
+ struct kobject *kobj;
+ struct kobject *kobj_queues;
+ struct attribute attr_pasid;
+ bool has_cwsr;
+ uint64_t exception_enable_mask;
+ uint64_t exception_status;
+};
+
+struct kfd_ioctl_create_queue_args {
+ __u64 ring_base_address; /* to KFD */
+ __u64 write_pointer_address; /* from KFD */
+ __u64 read_pointer_address; /* from KFD */
+ __u64 doorbell_offset; /* from KFD */
+
+ __u32 ring_size; /* to KFD */
+ __u32 gpu_id; /* to KFD */
+ __u32 queue_type; /* to KFD */
+ __u32 queue_percentage; /* to KFD */
+ __u32 queue_priority; /* to KFD */
+ __u32 queue_id; /* from KFD */
+
+ __u64 eop_buffer_address; /* to KFD */
+ __u64 eop_buffer_size; /* to KFD */
+ __u64 ctx_save_restore_address; /* to KFD */
+ __u32 ctx_save_restore_size; /* to KFD */
+ __u32 ctl_stack_size; /* to KFD */
+};
+
+struct kfd_ioctl_destroy_queue_args {
+ __u32 queue_id; /* to KFD */
+ __u32 pad;
+};
+
+struct kfd_ioctl_update_queue_args {
+ __u64 ring_base_address; /* to KFD */
+
+ __u32 queue_id; /* to KFD */
+ __u32 ring_size; /* to KFD */
+ __u32 queue_percentage; /* to KFD */
+ __u32 queue_priority; /* to KFD */
+};
+#endif
diff --git a/include/linux/ucc_sched.h b/include/linux/ucc_sched.h
new file mode 100644
index 000000000000..5b170545f7c2
--- /dev/null
+++ b/include/linux/ucc_sched.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __LINUX_UCC_SCHED_H__
+#define __LINUX_UCC_SCHED_H__
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/hash.h>
+#include <linux/rculist.h>
+#include <linux/idr.h>
+#include <linux/xpu_group.h>
+#include <linux/hashtable.h>
+#include <linux/vstream.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+
+#define VRTSQ_RTSQ_HASH_ORDER 6
+
+#ifdef CONFIG_XPU_SCHEDULE
+int ucc_process_task(struct vstream_info *vsqcq_info, struct tsdrv_ctx *ctx,
+ int *sqenum);
+int ucc_free_task(struct vstream_info *vsqcq_info, struct tsdrv_ctx *ctx);
+int ucc_wait_cq(struct vstream_info *vsqcq_info, struct tsdrv_ctx *ctx,
+ struct devdrv_report_para *arg, int *sqenum);
+struct xpu_group *select_sq(struct vstream_info *vstream_info);
+int ucc_sched_register_xcu(int dev_id, int ts_id, int cu_num);
+void ucc_set_vstream_state(struct vstream_info *vinfo, int state);
+void ucc_dequeue_task(struct vstream_info *vInfo);
+int ucc_rt_nr_running(struct xcu *cu);
+struct xcu *ucc_get_xcu_by_id(int cu_id);
+int ucc_xcu_is_sched(int cu_id);
+void ucc_dump_statistics_info(struct ucc_se *se);
+#endif
+
+#endif
diff --git a/include/linux/ucc_sched/ucc_sched.h b/include/linux/ucc_sched/ucc_sched.h
new file mode 100644
index 000000000000..6edd8930e09e
--- /dev/null
+++ b/include/linux/ucc_sched/ucc_sched.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) Huawei Technologies Co., Ltd. 2019. All rights reserved.
+ * Author: Huawei OS Kernel Lab
+ * Create: Mon Jan 30 14:29:19 2023
+ */
+
+#ifndef __LINUX_UCC_SCHED_USCHED_H__
+#define __LINUX_UCC_SCHED_USCHED_H__
+
+enum ucc_se_state {
+ SE_PREPARE,
+ SE_READY,
+ SE_RUNNING,
+ SE_BLOCK,
+ SE_DEAD,
+};
+
+enum ucc_se_flag {
+ UCC_TIF_NONE,
+ UCC_TIF_PREEMPT,
+ UCC_TIF_BALANCE,
+};
+
+enum ucc_se_prio {
+ UCC_PRIO_HIGH,
+ UCC_PRIO_LOW,
+};
+
+enum ucc_se_step {
+ UCC_STEP_SLOW = 1,
+ UCC_STEP_FAST = 10,
+};
+
+struct ucc_statistics {
+ u64 wait_start;
+ u64 wait_max;
+ u64 wait_count;
+ u64 wait_sum;
+
+ u64 preempt_start;
+ u64 preempt_max;
+ u64 preempt_count;
+ u64 preempt_sum;
+
+ u64 kernel_sum;
+ u64 timeout_count;
+
+ u64 run_start;
+ u64 run_max;
+ u64 run_count;
+ u64 run_sum;
+};
+
+struct ucc_se {
+ int on_cu;
+ struct list_head run_list;
+ enum ucc_se_state state;
+ enum ucc_se_flag flag;
+ enum ucc_se_prio prio;
+ enum ucc_se_step step;
+ raw_spinlock_t se_lock;
+ struct ucc_statistics statistics;
+ int is_timeout;
+};
+
+int ucc_sched_init(void);
+int ucc_schedule(int cu_id);
+int ucc_wake_up(struct ucc_se *se);
+
+#endif
diff --git a/include/linux/ucc_ts.h b/include/linux/ucc_ts.h
new file mode 100644
index 000000000000..7280ccca1059
--- /dev/null
+++ b/include/linux/ucc_ts.h
@@ -0,0 +1,254 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef TS_H
+#define TS_H
+
+#include <linux/file.h>
+#include <linux/device.h>
+#include <linux/cdev.h>
+#include <linux/fs.h>
+
+#define DEVDRV_MAX_SQ_DEPTH (1024)
+#define DEVDRV_SQ_SLOT_SIZE (64)
+
+#define DEVDRV_MAX_SQ_NUM (512 - 1)
+#define DEVDRV_MAX_CQ_NUM (352 - 1)
+
+#define DEVDRV_MAX_TS_NUM (1)
+
+#define REMAP_ALIGN_SIZE (64 * 1024)
+#define REMAP_ALIGN_MASK (~(REMAP_ALIGN_SIZE - 1))
+#define REMAP_ALIGN(x) (((x) + REMAP_ALIGN_SIZE - 1) & \
+ REMAP_ALIGN_MASK)
+
+#define DEVDRV_DB_SPACE_SIZE (1024 * 4096)
+
+#define SQCQ_RTS_INFO_LENGTH 5
+#define SQCQ_RESV_LENGTH 8
+
+#define DEVDRV_CBCQ_MAX_GID 128
+
+enum phy_sqcq_type {
+ NORMAL_SQCQ_TYPE = 0,
+ CALLBACK_SQCQ_TYPE,
+ LOGIC_SQCQ_TYPE,
+ SHM_SQCQ_TYPE,
+ DFX_SQCQ_TYPE,
+ TS_SQCQ_TYPE,
+ KERNEL_SQCQ_TYPE,
+};
+
+struct notifier_operations {
+ int (*notifier_call)(struct file *file_op, unsigned long mode);
+};
+
+#define MAX_DEVICE_COUNT 64
+
+struct davinci_intf_stru {
+ atomic_t count;
+ struct mutex dmutex;
+ struct cdev cdev;
+ struct device *device;
+ struct list_head process_list;
+ struct list_head module_list;
+ unsigned int device_status[MAX_DEVICE_COUNT];
+ cpumask_var_t cpumask;
+};
+
+#define DAVINIC_MODULE_NAME_MAX 256
+struct davinci_intf_private_stru {
+ char module_name[DAVINIC_MODULE_NAME_MAX];
+ unsigned int device_id;
+ pid_t owner_pid;
+ int close_flag;
+ atomic_t work_count;
+ int release_status;
+ struct mutex fmutex;
+ const struct file_operations fops;
+ struct notifier_operations notifier;
+ struct davinci_intf_stru *device_cb;
+ struct file priv_filep;
+ unsigned int free_type;
+};
+
+enum sqcq_alloc_status {
+ SQCQ_INACTIVE = 0,
+ SQCQ_ACTIVE
+};
+
+struct devdrv_ts_sq_info {
+ enum phy_sqcq_type type;
+ pid_t tgid;
+ u32 head;
+ u32 tail;
+ u32 credit;
+ u32 index;
+ int uio_fd;
+
+ u8 *uio_addr;
+ int uio_size;
+
+ enum sqcq_alloc_status alloc_status;
+ u64 send_count;
+
+ void *sq_sub;
+};
+
+struct devdrv_ts_cq_info {
+ enum phy_sqcq_type type;
+ pid_t tgid;
+ u32 vfid;
+
+ u32 head;
+ u32 tail;
+ u32 release_head; /* runtime read cq head value */
+ u32 index;
+ u32 phase;
+ u32 int_flag;
+
+ int uio_fd;
+
+ u8 *uio_addr;
+ int uio_size;
+
+ enum sqcq_alloc_status alloc_status;
+ u64 receive_count;
+
+ void *cq_sub;
+
+ void (*complete_handle)(struct devdrv_ts_cq_info *cq_info);
+
+ u8 slot_size;
+};
+
+#define DEVDRV_SQ_INFO_OCCUPY_SIZE \
+ (sizeof(struct devdrv_ts_sq_info) * DEVDRV_MAX_SQ_NUM)
+#define DEVDRV_CQ_INFO_OCCUPY_SIZE \
+ (sizeof(struct devdrv_ts_cq_info) * DEVDRV_MAX_CQ_NUM)
+
+#define DEVDRV_MAX_INFO_SIZE \
+ (DEVDRV_SQ_INFO_OCCUPY_SIZE + DEVDRV_CQ_INFO_OCCUPY_SIZE)
+#define DEVDRV_VM_SQ_MEM_OFFSET 0
+#define DEVDRV_VM_SQ_SLOT_SIZE \
+ REMAP_ALIGN(DEVDRV_MAX_SQ_DEPTH * DEVDRV_SQ_SLOT_SIZE)
+#define DEVDRV_VM_SQ_MEM_SIZE \
+ (DEVDRV_VM_SQ_SLOT_SIZE * DEVDRV_MAX_SQ_NUM)
+
+#define DEVDRV_VM_INFO_MEM_OFFSET \
+ (DEVDRV_VM_SQ_MEM_OFFSET + DEVDRV_VM_SQ_MEM_SIZE)
+#define DEVDRV_VM_INFO_MEM_SIZE REMAP_ALIGN(DEVDRV_MAX_INFO_SIZE)
+
+#define DEVDRV_VM_DB_MEM_OFFSET \
+ (DEVDRV_VM_INFO_MEM_OFFSET + DEVDRV_VM_INFO_MEM_SIZE)
+#define DEVDRV_VM_DB_MEM_SIZE REMAP_ALIGN(DEVDRV_DB_SPACE_SIZE)
+
+#define DEVDRV_VM_CQ_MEM_OFFSET \
+ (DEVDRV_VM_DB_MEM_OFFSET + DEVDRV_VM_DB_MEM_SIZE)
+
+enum tsdrv_id_type {
+ TSDRV_STREAM_ID,
+ TSDRV_NOTIFY_ID,
+ TSDRV_MODEL_ID,
+ TSDRV_EVENT_SW_ID, /* should use for event alloc/free/inquiry res_num*/
+ TSDRV_EVENT_HW_ID,
+ TSDRV_IPC_EVENT_ID,
+ TSDRV_SQ_ID,
+ TSDRV_CQ_ID,
+ TSDRV_PCQ_ID,
+ TSDRV_MAX_ID,
+};
+
+#define TSDRV_CQ_REUSE 0x00000001
+#define TSDRV_SQ_REUSE 0x00000002
+
+struct normal_alloc_sqcq_para {
+ uint32_t fd;
+ uint32_t tsId;
+ uint32_t devId;
+ uint32_t sqeSize;
+ uint32_t cqeSize;
+ uint32_t sqeDepth;
+ uint32_t cqeDepth;
+ uint32_t grpId;
+ uint32_t flag;
+ uint32_t sqId;
+ uint32_t cqId;
+ uint32_t priority;
+ uint32_t info[SQCQ_RTS_INFO_LENGTH];
+ uint32_t res[SQCQ_RESV_LENGTH];
+};
+
+struct normal_free_sqcq_para {
+ uint32_t tsId;
+ uint32_t flag;
+ uint32_t sqId;
+ uint32_t cqId;
+ uint32_t res[SQCQ_RESV_LENGTH];
+};
+
+struct tsdrv_sqcq_data_para {
+ uint32_t id;
+ uint32_t val;
+};
+
+struct devdrv_report_para {
+ int timeout;
+ u32 cq_tail;
+ u32 cq_id;
+};
+
+struct tsdrv_ts_id_ctx {
+ u32 id_num;
+ struct list_head id_list;
+ spinlock_t id_lock;
+};
+struct tsdrv_ts_ctx {
+ u32 tsid;
+ atomic_t status;
+ u32 send_count;
+ u64 receive_count;
+
+ int32_t cq_tail_updated;
+ wait_queue_head_t report_wait;
+
+ struct work_struct recycle_work;
+
+ wait_queue_head_t cbcq_wait[DEVDRV_CBCQ_MAX_GID];
+
+ void *shm_sqcq_ctx;
+ void *logic_sqcq_ctx;
+ void *sync_cb_sqcq_ctx; // mini callback
+
+ struct tsdrv_ts_id_ctx id_ctx[TSDRV_MAX_ID];
+
+ /* only used by vm */
+ u32 vcqid;
+ u32 wait_queue_inited;
+ u32 cq_report_status;
+ int32_t cq_tail;
+ spinlock_t ctx_lock;
+
+ u32 recycle_cbsqcq_num; // min callback
+};
+
+//Context Delivers
+struct tsdrv_ctx {
+ u32 ctx_index;
+ atomic_t status;
+ atomic_t type;
+ pid_t tgid;
+ pid_t pid;
+ int32_t ssid;
+ u32 thread_bind_irq_num;
+ u32 mirror_ctx_status;
+ struct rb_node node;
+ struct list_head list;
+ struct vm_area_struct *vma[DEVDRV_MAX_TS_NUM];
+ spinlock_t ctx_lock;
+ struct mutex mutex_lock;
+ struct tsdrv_ts_ctx ts_ctx[DEVDRV_MAX_TS_NUM];
+
+ u64 unique_id; /* mark unique processes for vm */
+};
+
+#endif
diff --git a/include/linux/vstream.h b/include/linux/vstream.h
new file mode 100644
index 000000000000..14d799296053
--- /dev/null
+++ b/include/linux/vstream.h
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_VSTREAM_H
+#define _LINUX_VSTREAM_H
+
+#include <linux/ucc_kfd.h>
+#include <linux/ucc_sched/ucc_sched.h>
+#include <linux/ucc_ts.h>
+
+#define MAX_VSTREAM_SIZE 1024
+#define MAX_VSTREAM_SLOT_SIZE 64
+#define MAX_CQ_SLOT_SIZE 12
+
+/*
+ * XXX_VSTREAM_ALLOC: alloc a vstream, buffer for tasks
+ * XXX_VSTREAM_FREE: free a vstream
+ * XXX_VSTREAM_KICK: there are tasks to be executed in the vstream
+ * XXX_VSTREAM_UPDATE: update information for an existing vstream
+ * XXX_CALLBACK_VSTREAM_WAIT: waiting for callback tasks
+ * XXX_CALLBACK_VSTREAM_KICK: callback tasks have been executed
+ *
+ * NOTE: Callback vstream is only for Ascend now. We do not need
+ * CALLBACK_VSTREAM_ALLOC because the callback vstream will be
+ * alloced with vstream on Ascend.
+ */
+enum VSTREAM_COMMAND {
+ /* vstream command for Ascend */
+ ASCEND_VSTREAM_ALLOC = 0,
+ ASCEND_VSTREAM_FREE,
+ ASCEND_VSTREAM_KICK,
+ ASCEND_CALLBACK_VSTREAM_WAIT,
+ ASCEND_CALLBACK_VSTREAM_KICK,
+ ASCEND_VSTREAM_GET_HEAD,
+ ASCEND_MAX_COMMAND,
+
+ /* vstream command for amdgpu */
+ AMDGPU_VSTREAM_ALLOC = ASCEND_MAX_COMMAND + 1,
+ AMDGPU_VSTREAM_FREE,
+ AMDGPU_VSTREAM_KICK,
+ AMDGPU_VSTREAM_UPDATE,
+ AMDGPU_MAX_COMMAND,
+};
+
+struct vstream_alloc_args {
+ union {
+ /* For Ascend */
+ struct normal_alloc_sqcq_para ascend;
+ /* For amdgpu */
+ struct kfd_ioctl_create_queue_args amdgpu;
+ };
+};
+
+struct vstream_free_args {
+ union {
+ /* For Ascend */
+ struct normal_free_sqcq_para ascend;
+ /* For amdgpu */
+ struct kfd_ioctl_destroy_queue_args amdgpu;
+ };
+};
+
+struct vstream_kick_args {
+ union {
+ /* For Ascend */
+ struct tsdrv_sqcq_data_para ascend;
+ /* For amdgpu */
+ };
+};
+
+struct vstream_args {
+ union {
+ struct vstream_alloc_args va_args;
+ struct vstream_free_args vf_args;
+ struct vstream_kick_args vk_args;
+ struct kfd_ioctl_update_queue_args vu_args;
+ struct tsdrv_sqcq_data_para vh_args;
+ struct devdrv_report_para cvw_args;
+ struct tsdrv_sqcq_data_para cvk_args;
+ };
+};
+
+struct vstream_node {
+ uint32_t id;
+ uint32_t head;
+ uint32_t tail;
+ uint32_t credit;
+ void *vstreamData;
+ raw_spinlock_t spin_lock;
+};
+
+struct vstream_id {
+ uint32_t vstreamId;
+ struct list_head list;
+};
+
+struct vcq_map_table {
+ uint32_t vcqId;
+ struct vstream_node *vcqNode;
+ struct list_head vstreamId_list;
+};
+
+struct vstream_info {
+ uint32_t vstreamId; //key
+ uint32_t vcqId;
+ uint32_t devId;
+ uint32_t tsId;
+ struct ucc_se se;
+ //TODO::check name
+ struct vstream_node *vsqNode;
+ struct vstream_node *vcqNode;
+ void *privdata;
+ uint32_t info[SQCQ_RTS_INFO_LENGTH];
+ int cu_id;
+ struct xpu_group *group;
+ int send_cnt;
+ struct task_struct *p;
+};
+
+typedef int vstream_manage_t(struct vstream_args *arg);
+int update_vstream_head(struct vstream_info *vstream_info, int num);
+struct vstream_info *vstream_get_info(uint32_t id);
+bool vstream_have_kernel(struct ucc_se *se);
+
+#endif /* _LINUX_VSTREAM_H */
diff --git a/include/linux/xpu_group.h b/include/linux/xpu_group.h
new file mode 100644
index 000000000000..5e3a96b15f9c
--- /dev/null
+++ b/include/linux/xpu_group.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __XPU_GROUP_H__
+#define __XPU_GROUP_H__
+#include <linux/idr.h>
+
+struct xpu_group;
+struct xcu;
+
+enum xpu_type {
+ XPU_TYPE_ROOT,
+ XPU_TYPE_TASK_QUEUE,
+ XPU_TYPE_NPU_310,
+ XPU_TYPE_CUSTOM,
+};
+
+enum xpu_capability_type {
+ TYPE_1,
+ XPU_CAPABILITY_TYPE_NR,
+};
+
+struct xpu_capability {
+ unsigned long capacities[XPU_CAPABILITY_TYPE_NR];
+};
+
+struct xpu_operation {
+ int (*run)(struct xpu_group *group, void *para1, void *para2);
+ int (*finish)(struct xpu_group *group, void *para1, void *para2);
+ int (*wait)(struct xpu_group *group, void *para1, void *para2,
+ void *para3);
+ int (*complete)(struct xpu_group *group, void *para1, void *para2,
+ void *para3);
+};
+
+struct xpu_group {
+ int id;
+ enum xpu_type type;
+ struct xpu_capability *capability;
+
+ struct xpu_group *previous_layer;
+ struct idr next_layer;
+
+ struct xpu_operation *opt;
+
+ int used;
+
+ void *data;
+};
+
+extern struct xpu_group *xpu_root;
+
+#ifdef CONFIG_XPU_SCHEDULE
+int xpu_group_attach(struct xpu_group *new_group,
+ struct xpu_group *previous_group);
+int xpu_group_detach(struct xpu_group *group);
+struct xpu_group *xpu_group_find(struct xpu_group *group, int id);
+struct xpu_group *xpu_idle_group_find(struct xpu_group *group);
+struct xpu_group *xpu_group_alloc(void);
+struct xpu_group *xpu_group_alloc_and_attach(struct xpu_group *previous_group,
+ int id);
+int xpu_run(struct xpu_group *group, void *para1, void *para2);
+int xpu_finish(struct xpu_group *group, void *para1, void *para2);
+int xpu_wait(struct xpu_group *group, void *para1, void *para2, void *para3);
+#endif
+
+#endif
diff --git a/include/trace/events/ucc_sched.h b/include/trace/events/ucc_sched.h
new file mode 100644
index 000000000000..104a39b2f41c
--- /dev/null
+++ b/include/trace/events/ucc_sched.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM ucc_sched
+
+#if !defined(_TRACE_UCC_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_UCC_SCHED_H
+
+#include <linux/tracepoint.h>
+#include <linux/binfmts.h>
+
+/*
+ * XXX the below ucc_sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
+ * adding ucc_sched_stat support to SCHED_FIFO/RR would be welcome.
+ */
+DECLARE_EVENT_CLASS(ucc_sched_stat_template,
+
+ TP_PROTO(struct vstream_info *vinfo, u64 delay),
+
+ TP_ARGS(vinfo, delay),
+
+ TP_STRUCT__entry(
+ __array(char, comm, TASK_COMM_LEN)
+ __field(pid_t, pid)
+ __field(int, cu_id)
+ __field(u32, vstreamId)
+ __field(u32, prio)
+ __field(u64, delay)
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->comm, vinfo->p->comm, TASK_COMM_LEN);
+ __entry->pid = vinfo->p->pid;
+ __entry->cu_id = vinfo->cu_id;
+ __entry->vstreamId = vinfo->vstreamId;
+ __entry->prio = vinfo->p->ucc_priority;
+ __entry->delay = delay;
+ ),
+
+ TP_printk("comm=%s pid=%d cu_id=%d vstreamId %u prio %u, delay=%llu [ns]",
+ __entry->comm, __entry->pid,
+ __entry->cu_id, __entry->vstreamId, __entry->prio,
+ (unsigned long long)__entry->delay)
+);
+
+DECLARE_EVENT_CLASS(ucc_sched_stat_template_1,
+
+ TP_PROTO(struct vstream_info *vinfo, u64 delay, int is_timeout),
+
+ TP_ARGS(vinfo, delay, is_timeout),
+
+ TP_STRUCT__entry(
+ __array(char, comm, TASK_COMM_LEN)
+ __field(pid_t, pid)
+ __field(int, cu_id)
+ __field(u32, vstreamId)
+ __field(u64, delay)
+ __field(int, is_timeout)
+ ),
+
+ TP_fast_assign(
+ memcpy(__entry->comm, vinfo->p->comm, TASK_COMM_LEN);
+ __entry->pid = vinfo->p->pid;
+ __entry->cu_id = vinfo->cu_id;
+ __entry->vstreamId = vinfo->vstreamId;
+ __entry->delay = delay;
+ __entry->is_timeout = is_timeout;
+ ),
+
+ TP_printk("comm=%s pid=%d cu_id=%d vstreamId %u, delay=%llu [ns]:%d",
+ __entry->comm, __entry->pid,
+ __entry->cu_id, __entry->vstreamId,
+ (unsigned long long)__entry->delay,
+ __entry->is_timeout)
+);
+/*
+ * Tracepoint for accounting wait time (time the task is runnable
+ * but not actually running due to scheduler contention).
+ */
+DEFINE_EVENT(ucc_sched_stat_template, ucc_sched_stat_wait,
+ TP_PROTO(struct vstream_info *vinfo, u64 delay),
+ TP_ARGS(vinfo, delay));
+
+DEFINE_EVENT(ucc_sched_stat_template, ucc_sched_stat_preempt,
+ TP_PROTO(struct vstream_info *vinfo, u64 delay),
+ TP_ARGS(vinfo, delay));
+
+DEFINE_EVENT(ucc_sched_stat_template_1, ucc_sched_stat_run,
+ TP_PROTO(struct vstream_info *vinfo, u64 delay, int is_timeout),
+ TP_ARGS(vinfo, delay, is_timeout));
+
+TRACE_EVENT(ucc_sched_switch,
+
+ TP_PROTO(int preempt,
+ struct vstream_info *next),
+
+ TP_ARGS(preempt, next),
+
+ TP_STRUCT__entry(
+ __field(int, cu_id)
+ __field(u32, next_vstreamId)
+ __field(u32, next_prio)
+ __field(int, preempt)
+ ),
+
+ TP_fast_assign(
+ __entry->cu_id = next->cu_id;
+ __entry->next_vstreamId = next->vstreamId;
+ __entry->next_prio = next->p->ucc_priority;
+ __entry->preempt = preempt;
+ ),
+
+ TP_printk("cu_id=%d next_vstreamId %u next_prio %u preempt[%d]",
+ __entry->cu_id,
+ __entry->next_vstreamId, __entry->next_prio,
+ __entry->preempt)
+);
+#endif /* _TRACE_UCC_SCHED_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/init/init_task.c b/init/init_task.c
index b312a045f4b9..c1a78b4da368 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -188,6 +188,10 @@ struct task_struct init_task
.fork_pid = 0,
},
#endif
+#ifdef CONFIG_XPU_SCHEDULE
+ .ucc_priority = 1,
+ .ucc_step = 1,
+#endif
};
EXPORT_SYMBOL(init_task);
diff --git a/init/main.c b/init/main.c
index 50af60ff0ef6..7ed2e67d7011 100644
--- a/init/main.c
+++ b/init/main.c
@@ -66,6 +66,7 @@
#include <linux/kthread.h>
#include <linux/sched.h>
#include <linux/sched/init.h>
+#include <linux/ucc_sched/ucc_sched.h>
#include <linux/signal.h>
#include <linux/idr.h>
#include <linux/kgdb.h>
@@ -599,6 +600,14 @@ asmlinkage __visible void __init start_kernel(void)
* time - but meanwhile we still have a functioning scheduler.
*/
sched_init();
+
+#ifdef CONFIG_XPU_SCHEDULE
+ /*
+ * Set up the ucc scheduler, to enable heterogeneous scheduling.
+ */
+ ucc_sched_init();
+#endif
+
/*
* Disable preemption - early bootup scheduling is extremely
* fragile until we cpu_idle() for the first time.
diff --git a/kernel/Makefile b/kernel/Makefile
index d0482bd27ba4..273fe481d303 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -43,6 +43,8 @@ obj-y += irq/
obj-y += rcu/
obj-y += livepatch/
obj-y += dma/
+obj-(CONFIG_XPU_SCHEDULE) += ucc_sched/
+obj-(CONFIG_XPU_UCC) += ucc/
obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o
obj-$(CONFIG_FREEZER) += freezer.o
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 0612af002ae5..0f659b2ad251 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -19,6 +19,7 @@ endif
obj-y += core.o loadavg.o clock.o cputime.o
obj-y += idle.o fair.o rt.o deadline.o
obj-y += wait.o wait_bit.o swait.o completion.o
+obj-(CONFIG_XPU_SCHEDULE) += ucc_sched.o
obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o
obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 67bda877bfa8..89348097b29a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2316,6 +2316,11 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
*/
p->prio = current->normal_prio;
+#ifdef CONFIG_XPU_SCHEDULE
+ p->ucc_priority = current->ucc_priority;
+ p->ucc_step = current->ucc_step;
+#endif
+
/*
* Revert to default priority/policy on fork if requested.
*/
diff --git a/kernel/sched/ucc_sched.c b/kernel/sched/ucc_sched.c
new file mode 100644
index 000000000000..646f120c3c34
--- /dev/null
+++ b/kernel/sched/ucc_sched.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/ucc_sched.h>
+#include <linux/ucc_common.h>
+
+static DEFINE_MUTEX(revmap_mutex);
+
+static DEFINE_HASHTABLE(vrtsq_rtsq_revmap, VRTSQ_RTSQ_HASH_ORDER);
+
+/**
+ * @group: value for this entry.
+ * @hash_node : hash node list.
+ * @
+ */
+struct vsqce_idx_revmap_data {
+ unsigned int vrtsdId;
+ struct xpu_group *group;
+ struct hlist_node hash_node;
+};
+
+struct xpu_group *select_sq(struct vstream_info *vstream_info)
+{
+ struct vsqce_idx_revmap_data *revmap_data;
+
+ /* find history */
+ mutex_lock(&revmap_mutex);
+ hash_for_each_possible(vrtsq_rtsq_revmap, revmap_data, hash_node,
+ (unsigned long)vstream_info->vstreamId) {
+ if (revmap_data && revmap_data->group) {
+ mutex_unlock(&revmap_mutex);
+ return revmap_data->group;
+ }
+ }
+ mutex_unlock(&revmap_mutex);
+
+ revmap_data = kzalloc(sizeof(struct vsqce_idx_revmap_data), GFP_KERNEL);
+ if (revmap_data == NULL)
+ return NULL;
+ /* find XPU group */
+ revmap_data->group = xpu_group_find(xpu_root, XPU_TYPE_NPU_310);
+ if (revmap_data->group == NULL) {
+ ucc_err("find XPU group is failed.\n");
+ return NULL;
+ }
+ /* find device group */
+ revmap_data->group = xpu_group_find(revmap_data->group,
+ vstream_info->devId);
+ if (revmap_data->group == NULL) {
+ ucc_err("find device group is failed.\n");
+ return NULL;
+ }
+ /* find tsgroup */
+ revmap_data->group = xpu_group_find(revmap_data->group,
+ vstream_info->tsId);
+ if (revmap_data->group == NULL) {
+ ucc_err("find ts group is failed.\n");
+ return NULL;
+ }
+
+ /* select idle xcu */
+ revmap_data->group = xpu_idle_group_find(revmap_data->group);
+ if (revmap_data->group == NULL) {
+ ucc_err("find rtsq group is failed.\n");
+ return NULL;
+ }
+
+ revmap_data->vrtsdId = vstream_info->vstreamId;
+ /* set group used : 1 */
+ revmap_data->group->used = 1;
+
+ mutex_lock(&revmap_mutex);
+ hash_add(vrtsq_rtsq_revmap, &revmap_data->hash_node,
+ (unsigned long)vstream_info->vstreamId);
+ mutex_unlock(&revmap_mutex);
+ return revmap_data->group;
+}
+
+int ucc_process_task(struct vstream_info *vstream_info, struct tsdrv_ctx *ctx,
+ int *sqenum)
+{
+ struct xpu_group *group = NULL;
+
+ if (vstream_info == NULL) {
+ ucc_err("vsqcq_info is NULL\n");
+ return -1;
+ }
+
+ group = select_sq(vstream_info);
+ if (group == NULL) {
+ ucc_err("find group is failed.\n");
+ return -1;
+ }
+ /* send sqe */
+ *sqenum = xpu_run(group, vstream_info, ctx);
+
+ return 0;
+}
+EXPORT_SYMBOL(ucc_process_task);
+
+int ucc_free_task(struct vstream_info *vstream_info, struct tsdrv_ctx *ctx)
+{
+ struct vsqce_idx_revmap_data *revmap_data;
+
+ ucc_dequeue_task(vstream_info);
+
+ while (!ucc_xcu_is_sched(vstream_info->cu_id))
+ schedule_timeout_interruptible(10);
+
+ ucc_dump_statistics_info(&vstream_info->se);
+
+ mutex_lock(&revmap_mutex);
+ hash_for_each_possible(vrtsq_rtsq_revmap, revmap_data, hash_node,
+ (unsigned long)vstream_info->vstreamId) {
+ if (revmap_data &&
+ revmap_data->vrtsdId == vstream_info->vstreamId &&
+ revmap_data->group) {
+ xpu_finish(revmap_data->group, vstream_info, ctx);
+ /* set group unused : 0 */
+ revmap_data->group->used = 0;
+ hash_del(&revmap_data->hash_node);
+ kfree(revmap_data);
+ revmap_data = NULL;
+ break;
+ }
+ }
+ mutex_unlock(&revmap_mutex);
+
+ return 0;
+}
+EXPORT_SYMBOL(ucc_free_task);
+
+int ucc_wait_cq(struct vstream_info *vstream_info, struct tsdrv_ctx *ctx,
+ struct devdrv_report_para *arg, int *cqenum)
+{
+ struct vsqce_idx_revmap_data *revmap_data;
+
+ hash_for_each_possible(vrtsq_rtsq_revmap, revmap_data, hash_node,
+ (unsigned long)vstream_info->vstreamId) {
+ if (revmap_data &&
+ revmap_data->vrtsdId == vstream_info->vstreamId &&
+ revmap_data->group)
+ *cqenum = xpu_wait(revmap_data->group, vstream_info,
+ ctx, arg);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(ucc_wait_cq);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c7064f67f4a5..aeceb9e9c927 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -117,6 +117,10 @@ extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max;
extern int sysctl_nr_trim_pages;
#endif
+#ifdef CONFIG_XPU_SCHEDULE
+extern int sysctl_ucc_sched_rcv_timeout_ms;
+#endif
+
/* Constants used for minimum and maximum */
#ifdef CONFIG_LOCKUP_DETECTOR
static int sixty = 60;
@@ -139,7 +143,7 @@ static int one_thousand = 1000;
#ifdef CONFIG_PRINTK
static int ten_thousand = 10000;
#endif
-#if defined(CONFIG_QOS_SCHED) || defined(CONFIG_QOS_SCHED_SMART_GRID)
+#if defined(CONFIG_QOS_SCHED) || defined(CONFIG_QOS_SCHED_SMART_GRID) || defined(CONFIG_XPU_SCHEDULE)
static int hundred_thousand = 100000;
#endif
#ifdef CONFIG_PERF_EVENTS
@@ -352,6 +356,17 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+#ifdef CONFIG_XPU_SCHEDULE
+ {
+ .procname = "ucc_sched_rcv_timeout",
+ .data = &sysctl_ucc_sched_rcv_timeout_ms,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &hundred_thousand,
+ },
+#endif
#ifdef CONFIG_SCHED_DEBUG
{
.procname = "sched_min_granularity_ns",
diff --git a/kernel/ucc/Kconfig b/kernel/ucc/Kconfig
new file mode 100644
index 000000000000..279c11f702b1
--- /dev/null
+++ b/kernel/ucc/Kconfig
@@ -0,0 +1,21 @@
+#
+# TODO: add description
+#
+
+config XPU_UCC
+ bool "ucc"
+ default n
+ depends on ARM64 || X86
+ help
+ Say Y here if you want support for using XPU UCC. XPU UCC
+ is helpfer for XPU schedule. The full name of UCC is
+ Universal Converged Computing.
+
+
+config XPU_VSTREAM
+ bool "virtual submit queue and complete queue"
+ default n
+ depends on XPU_UCC
+ help
+ virtual Submit Queue and Complete Queue support for XPU.
+ It is used to help XPU schedule.
diff --git a/kernel/ucc/Makefile b/kernel/ucc/Makefile
new file mode 100644
index 000000000000..0e2735d2aef4
--- /dev/null
+++ b/kernel/ucc/Makefile
@@ -0,0 +1 @@
+obj-y += ascend_vstream.o vstream.o
diff --git a/kernel/ucc/ascend_vstream.c b/kernel/ucc/ascend_vstream.c
new file mode 100644
index 000000000000..d248aaff7639
--- /dev/null
+++ b/kernel/ucc/ascend_vstream.c
@@ -0,0 +1,654 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/syscalls.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/vstream.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/ucc_common.h>
+#include <linux/ucc_sched.h>
+
+DEFINE_MUTEX(vstreamId_Bitmap_mutex);
+static DECLARE_BITMAP(vstreamIdBitmap, DEVDRV_MAX_SQ_NUM);
+
+static DEFINE_MUTEX(vcqId_Bitmap_mutex);
+static DECLARE_BITMAP(vcqIdBitmap, DEVDRV_MAX_CQ_NUM);
+
+static DEFINE_MUTEX(revmap_mutex);
+
+static struct vstream_info *vstreamContainer[DEVDRV_MAX_SQ_NUM];
+static struct vcq_map_table *vsqcqMapTable[DEVDRV_MAX_CQ_NUM];
+
+#define MAX_SQ_SIZE (MAX_VSTREAM_SIZE * MAX_VSTREAM_SLOT_SIZE)
+#define MAX_CQ_SIZE (MAX_VSTREAM_SIZE * MAX_CQ_SLOT_SIZE)
+
+#define SQ_USER_ADDR_OFFSET(id) ((unsigned long)REMAP_ALIGN(MAX_SQ_SIZE) * id)
+#define CQ_USER_ADDR_OFFSET(id) ((unsigned long)REMAP_ALIGN(MAX_CQ_SIZE) * id)
+
+#define SQ_VSTREAM_DATA(id) vstreamContainer[id]->vsqNode->vstreamData
+#define CQ_VSTREAM_DATA(id) vstreamContainer[id]->vcqNode->vstreamData
+
+static struct tsdrv_ctx *get_ctx(int fd)
+{
+ struct fd f;
+ struct davinci_intf_private_stru *file_private_data;
+ struct tsdrv_ctx *ctx = NULL;
+
+ f = fdget(fd);
+ if (!f.file)
+ goto out;
+
+ file_private_data = f.file->private_data;
+ if (!file_private_data)
+ goto out;
+
+ ctx = file_private_data->priv_filep.private_data;
+
+out:
+ fdput(f);
+ return ctx;
+}
+
+static struct vcq_map_table *vstream_get_map_table(uint32_t id)
+{
+ return vsqcqMapTable[id];
+}
+
+static void free_vstreamId(uint32_t vstreamId)
+{
+ mutex_lock(&vstreamId_Bitmap_mutex);
+ clear_bit(vstreamId, vstreamIdBitmap);
+ mutex_unlock(&vstreamId_Bitmap_mutex);
+}
+
+static void free_vcqId(uint32_t vcqId, uint32_t flag)
+{
+ mutex_lock(&vcqId_Bitmap_mutex);
+ if (!(flag & TSDRV_CQ_REUSE))
+ clear_bit(vcqId, vcqIdBitmap);
+ mutex_unlock(&vcqId_Bitmap_mutex);
+}
+
+static void vstream_free_map_table(uint32_t vcqId, uint32_t vstreamId,
+ uint32_t flag)
+{
+ struct vcq_map_table *freeTable = NULL;
+ struct vstream_id *vstreamIdNode = NULL;
+
+ freeTable = vstream_get_map_table(vcqId);
+ if (!freeTable) {
+ ucc_err("No map found for vcq:%d.\n", vcqId);
+ return;
+ }
+
+ list_for_each_entry(vstreamIdNode, &freeTable->vstreamId_list, list) {
+ if (vstreamIdNode->vstreamId == vstreamId) {
+ list_del(&vstreamIdNode->list);
+ kfree(vstreamIdNode);
+ break;
+ }
+ }
+ if (!(flag & TSDRV_CQ_REUSE)) {
+ kfree(freeTable->vcqNode->vstreamData);
+ kfree(freeTable->vcqNode);
+ kfree(freeTable);
+ }
+}
+
+static void vstream_alloc_ucc_se(struct ucc_se *se)
+{
+ memset(&se->statistics, 0, sizeof(se->statistics));
+ se->on_cu = 0;
+ se->state = SE_PREPARE;
+ se->flag = UCC_TIF_NONE;
+ se->prio = UCC_PRIO_HIGH;
+ se->step = UCC_STEP_SLOW;
+ raw_spin_lock_init(&se->se_lock);
+}
+
+static struct vstream_info *vstream_create_info(struct tsdrv_ctx *ctx,
+ struct normal_alloc_sqcq_para *para)
+{
+ struct vcq_map_table *mapTable = NULL;
+
+ struct vstream_info *vstream = kzalloc(sizeof(struct vstream_info),
+ GFP_KERNEL);
+ if (!vstream)
+ return NULL;
+
+ (void)memcpy(vstream->info, para->info,
+ sizeof(uint32_t) * SQCQ_RTS_INFO_LENGTH);
+
+ vstream->privdata = ctx;
+ vstream->tsId = para->tsId;
+ vstream->vstreamId = para->sqId;
+ vstream->vcqId = para->cqId;
+
+ mapTable = vstream_get_map_table(vstream->vcqId);
+ if (!mapTable || !mapTable->vcqNode) {
+ ucc_err("No map found for vcqId:%d.\n", vstream->vcqId);
+ goto free_vstream;
+ }
+ vstream->vcqNode = mapTable->vcqNode;
+ vstream->vsqNode = kmalloc(sizeof(struct vstream_node), GFP_KERNEL);
+ if (!vstream->vsqNode) {
+ ucc_err("Failed to alloc memory for vsqNode:%d.\n",
+ vstream->vstreamId);
+ goto free_vstream;
+ }
+ vstream->vsqNode->vstreamData = kmalloc(MAX_SQ_SIZE, GFP_KERNEL);
+ if (!vstream->vsqNode->vstreamData)
+ goto free_vsqNode;
+ vstream->vsqNode->id = vstream->vstreamId;
+ vstream->vsqNode->head = 0;
+ vstream->vsqNode->tail = 0;
+ vstream->vsqNode->credit = MAX_VSTREAM_SIZE;
+ raw_spin_lock_init(&vstream->vsqNode->spin_lock);
+ vstream->send_cnt = 0;
+ vstream->p = current;
+ vstream_alloc_ucc_se(&vstream->se);
+
+ return vstream;
+
+free_vsqNode:
+ kfree(vstream->vsqNode);
+
+free_vstream:
+ kfree(vstream);
+ return NULL;
+}
+
+struct vstream_info *vstream_get_info(uint32_t id)
+{
+ return vstreamContainer[id];
+}
+
+static void vstream_free_info(uint32_t id)
+{
+ struct vstream_info *freeInfo = vstream_get_info(id);
+
+ ucc_set_vstream_state(freeInfo, SE_DEAD);
+
+ if (freeInfo) {
+ if (freeInfo->vsqNode)
+ kfree(freeInfo->vsqNode->vstreamData);
+
+ kfree(freeInfo->vsqNode);
+ }
+
+ kfree(freeInfo);
+}
+
+static int queue_pop_by_num(struct vstream_node *node, uint32_t pop_num)
+{
+ if (node->credit + pop_num > MAX_VSTREAM_SIZE) {
+ ucc_err("Queue usage out-of-bounds");
+ return -EACCES;
+ }
+
+ node->credit += pop_num;
+ node->head = (node->head + pop_num) % MAX_VSTREAM_SIZE;
+ return 0;
+}
+
+static int queue_pop_by_head(struct vstream_node *node, uint32_t head)
+{
+ int pop_num = (head - node->head + MAX_VSTREAM_SIZE) %
+ MAX_VSTREAM_SIZE;
+ return queue_pop_by_num(node, pop_num);
+}
+
+int update_vstream_head(struct vstream_info *vstream_info, int num)
+{
+ struct vstream_node *node = vstream_info->vsqNode;
+
+ raw_spin_lock(&node->spin_lock);
+ if (node->credit + num > MAX_VSTREAM_SIZE) {
+ raw_spin_unlock(&node->spin_lock);
+ return -1;
+ }
+
+ node->credit += num;
+ node->head = (node->head + num) % MAX_VSTREAM_SIZE;
+ raw_spin_unlock(&node->spin_lock);
+
+ return 0;
+}
+
+bool vstream_have_kernel(struct ucc_se *se)
+{
+ struct vstream_info *vinfo;
+
+ vinfo = container_of(se, struct vstream_info, se);
+ return vinfo->vsqNode->credit != MAX_VSTREAM_SIZE;
+}
+
+static int queue_push_by_num(struct vstream_node *node, uint32_t push_num)
+{
+ if (node->credit - push_num < 0)
+ return -EACCES;
+
+ node->credit -= push_num;
+ node->tail = (node->tail + push_num) % MAX_VSTREAM_SIZE;
+ return 0;
+}
+
+static int queue_push_by_tail(struct vstream_node *node, uint32_t tail)
+{
+ int push_num = (tail - node->tail + MAX_VSTREAM_SIZE) %
+ MAX_VSTREAM_SIZE;
+ return queue_push_by_num(node, push_num);
+}
+
+static uint32_t vstream_alloc_vstreamId(void)
+{
+ uint32_t vstreamId = DEVDRV_MAX_SQ_NUM;
+
+ /* alloc vstreamId */
+ mutex_lock(&vstreamId_Bitmap_mutex);
+ vstreamId = find_first_zero_bit(vstreamIdBitmap, DEVDRV_MAX_SQ_NUM);
+ if (vstreamId == DEVDRV_MAX_SQ_NUM) {
+ ucc_err("vstreamId exhausted.\n");
+ mutex_unlock(&vstreamId_Bitmap_mutex);
+ return DEVDRV_MAX_SQ_NUM;
+ }
+ set_bit(vstreamId, vstreamIdBitmap);
+ mutex_unlock(&vstreamId_Bitmap_mutex);
+
+ return vstreamId;
+}
+
+static uint32_t vstream_alloc_vcqid(void)
+{
+ uint32_t vcqId = DEVDRV_MAX_CQ_NUM;
+
+ /* alloc vcqid */
+ mutex_lock(&vcqId_Bitmap_mutex);
+ vcqId = find_first_zero_bit(vcqIdBitmap, DEVDRV_MAX_CQ_NUM);
+ if (vcqId == DEVDRV_MAX_CQ_NUM) {
+ ucc_err("vcqId has been used up.\n");
+ mutex_unlock(&vcqId_Bitmap_mutex);
+ return DEVDRV_MAX_CQ_NUM;
+ }
+ set_bit(vcqId, vcqIdBitmap);
+ mutex_unlock(&vcqId_Bitmap_mutex);
+
+ ucc_info("vcqId = %d\n", vcqId);
+ return vcqId;
+}
+
+int vstream_map_pfnaddr(struct tsdrv_ctx *ctx,
+ struct normal_alloc_sqcq_para *para)
+{
+ int err = 0;
+ unsigned long vsqAddr;
+ unsigned long vcqAddr;
+ pgprot_t vm_page_prot;
+ struct vm_area_struct *vma = ctx->vma[para->tsId];
+
+ vsqAddr = vma->vm_start + SQ_USER_ADDR_OFFSET(para->sqId);
+ vm_page_prot = pgprot_device(vma->vm_page_prot);
+ err = remap_pfn_range(vma, vsqAddr,
+ virt_to_pfn(SQ_VSTREAM_DATA(para->sqId)),
+ MAX_SQ_SIZE, vm_page_prot);
+ if (err) {
+ ucc_err("remap_pfn_range failed,ret=%d.\n", err);
+ return -EFAULT;
+ }
+ if (!(para->flag & TSDRV_CQ_REUSE)) {
+ vcqAddr = vma->vm_start + DEVDRV_VM_CQ_MEM_OFFSET +
+ CQ_USER_ADDR_OFFSET(para->cqId);
+ err = remap_pfn_range(vma, vcqAddr,
+ virt_to_pfn(CQ_VSTREAM_DATA(para->sqId)),
+ MAX_CQ_SIZE, vm_page_prot);
+ if (err) {
+ ucc_err("remap_pfn_range failed,ret=%d.\n", err);
+ return -EFAULT;
+ }
+ }
+
+ return err;
+}
+
+void vstream_unmap_pfnaddr(struct tsdrv_ctx *ctx,
+ struct normal_free_sqcq_para *para)
+{
+ unsigned long vsqAddr;
+ unsigned long vcqAddr;
+ size_t cqSize = PAGE_ALIGN(MAX_CQ_SIZE);
+ struct vm_area_struct *vma = ctx->vma[para->tsId];
+
+ vsqAddr = vma->vm_start + SQ_USER_ADDR_OFFSET(para->sqId);
+ zap_vma_ptes(vma, vsqAddr, MAX_SQ_SIZE);
+
+ if (!(para->flag & TSDRV_CQ_REUSE)) {
+ vcqAddr = vma->vm_start + DEVDRV_VM_CQ_MEM_OFFSET +
+ CQ_USER_ADDR_OFFSET(para->cqId);
+ zap_vma_ptes(vma, vcqAddr, cqSize);
+ }
+}
+
+static int vstream_update_vcqtable(uint32_t vcqId, uint32_t vstreamId,
+ uint32_t flag)
+{
+ int err = -ENOSPC;
+ struct vcq_map_table *vcqTable = NULL;
+ struct vstream_id *vstreamIdNode = NULL;
+
+ if (!(flag & TSDRV_CQ_REUSE)) {
+ vcqTable = kmalloc(sizeof(struct vcq_map_table), GFP_KERNEL);
+ if (!vcqTable)
+ return -ENOMEM;
+
+ vcqTable->vcqId = vcqId;
+ vcqTable->vcqNode = kmalloc(sizeof(struct vstream_node),
+ GFP_KERNEL);
+ if (!vcqTable->vcqNode) {
+ err = -ENOMEM;
+ goto free_vcqTable;
+ }
+
+ vcqTable->vcqNode->vstreamData = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!vcqTable->vcqNode->vstreamData) {
+ err = -ENOMEM;
+ goto free_vcqNode;
+ }
+ vcqTable->vcqNode->id = vcqId;
+ vcqTable->vcqNode->head = 0;
+ vcqTable->vcqNode->tail = 0;
+ vcqTable->vcqNode->credit = MAX_VSTREAM_SIZE;
+ INIT_LIST_HEAD(&vcqTable->vstreamId_list);
+ vsqcqMapTable[vcqId] = vcqTable;
+ } else {
+ vcqTable = vsqcqMapTable[vcqId];
+ }
+ vstreamIdNode = kmalloc(sizeof(struct vstream_id), GFP_KERNEL);
+ if (!vstreamIdNode) {
+ err = -ENOMEM;
+
+ if (!(flag & TSDRV_CQ_REUSE))
+ goto free_vstreamData;
+ return err;
+ }
+ vstreamIdNode->vstreamId = vstreamId;
+ list_add(&vstreamIdNode->list, &vcqTable->vstreamId_list);
+
+ return 0;
+
+free_vstreamData:
+ kfree(vcqTable->vcqNode->vstreamData);
+
+free_vcqNode:
+ kfree(vcqTable->vcqNode);
+
+free_vcqTable:
+ kfree(vcqTable);
+
+ return err;
+}
+
+int ascend_vstream_alloc(struct vstream_args *arg)
+{
+ uint32_t vstreamId;
+ uint32_t vcqId = DEVDRV_MAX_CQ_NUM;
+ int err = -EINVAL;
+ struct vstream_info *vstream = NULL;
+ struct tsdrv_ctx *ctx = NULL;
+ struct normal_alloc_sqcq_para *sqcq_alloc_para = &arg->va_args.ascend;
+
+ ctx = get_ctx(sqcq_alloc_para->fd);
+ if (!ctx)
+ return err;
+
+ vstreamId = vstream_alloc_vstreamId();
+ if (vstreamId == DEVDRV_MAX_SQ_NUM) {
+ ucc_err("vstreamId alloc failed.\n");
+ return err;
+ }
+ if (!(sqcq_alloc_para->flag & TSDRV_CQ_REUSE))
+ vcqId = vstream_alloc_vcqid();
+ else
+ vcqId = sqcq_alloc_para->cqId;
+
+ if (vcqId >= DEVDRV_MAX_CQ_NUM) {
+ ucc_err("vcqId alloc failed.\n");
+ goto free_vstreamIds;
+ }
+ err = vstream_update_vcqtable(vcqId, vstreamId, sqcq_alloc_para->flag);
+ if (err) {
+ ucc_err("vcqtable update failed, vcqId:%d, vstreamId:%d, flag:%d.\n",
+ vcqId, vstreamId, sqcq_alloc_para->flag);
+ goto free_vcqid;
+ }
+
+ sqcq_alloc_para->sqId = vstreamId;
+ sqcq_alloc_para->cqId = vcqId;
+ vstream = vstream_create_info(ctx, sqcq_alloc_para);
+ if (!vstream) {
+ ucc_err("vstream create failed: vcqId:%d, vstreamId:%d.\n",
+ vcqId, vstreamId);
+ err = -ENOSPC;
+ goto free_vcqtable;
+ }
+
+ vstream->devId = sqcq_alloc_para->devId;
+ vstreamContainer[vstreamId] = vstream;
+
+ vstream->group = select_sq(vstream);
+ if (!vstream->group) {
+ ucc_err("Failed to select sq\n");
+ err = -EINVAL;
+ goto free_vstream_info;
+ }
+
+ err = vstream_map_pfnaddr(ctx, sqcq_alloc_para);
+ if (err) {
+ ucc_err("vstream map failed, ret=%d.\n", err);
+ goto free_vstream_info;
+ }
+ return 0;
+
+free_vstream_info:
+ vstream_free_info(vstreamId);
+
+free_vcqtable:
+ vstream_free_map_table(vcqId, vstreamId, sqcq_alloc_para->flag);
+
+free_vcqid:
+ free_vcqId(vcqId, sqcq_alloc_para->flag);
+
+free_vstreamIds:
+ free_vstreamId(vstreamId);
+
+ return err;
+}
+
+int ascend_vstream_free(struct vstream_args *arg)
+{
+ int err = 0;
+ struct vstream_info *vstreamInfo = NULL;
+ struct normal_free_sqcq_para *sqcq_free_para = &arg->vf_args.ascend;
+ uint32_t vstreamId = sqcq_free_para->sqId;
+ uint32_t vcqId = sqcq_free_para->cqId;
+
+ if (vstreamId >= DEVDRV_MAX_SQ_NUM || vcqId >= DEVDRV_MAX_CQ_NUM) {
+ ucc_err("vstream index out-of-range, vstreamId=%d, vcqId=%d.\n",
+ vstreamId, vcqId);
+ return -EPERM;
+ }
+
+ vstreamInfo = vstream_get_info(vstreamId);
+ if (!vstreamInfo) {
+ ucc_err("vstreamInfo get failed, vstreamId=%d.\n", vstreamId);
+ return -EPERM;
+ }
+ err = ucc_free_task(vstreamInfo, vstreamInfo->privdata);
+
+ free_vcqId(vcqId, sqcq_free_para->flag);
+ vstream_free_map_table(vcqId, vstreamId, sqcq_free_para->flag);
+
+ vstream_unmap_pfnaddr(vstreamInfo->privdata, sqcq_free_para);
+
+ vstream_free_info(vstreamId);
+ free_vstreamId(vstreamId);
+ return err;
+}
+
+int ascend_vstream_kick(struct vstream_args *arg)
+{
+ int err = 0;
+ struct tsdrv_sqcq_data_para *sqcq_data_para = &arg->vk_args.ascend;
+ int vstreamId = sqcq_data_para->id;
+ int tail = sqcq_data_para->val;
+ struct vstream_info *vstreamInfo = NULL;
+ int push_num;
+
+ vstreamInfo = vstream_get_info(vstreamId);
+ vstreamInfo->p = current;
+
+ if (!vstreamInfo) {
+ ucc_err("vstreamInfo get failed, vstreamId=%d.\n", vstreamId);
+ return -ENOMEM;
+ }
+
+ push_num = (tail - vstreamInfo->vsqNode->tail + MAX_VSTREAM_SIZE) %
+ MAX_VSTREAM_SIZE;
+
+ raw_spin_lock(&vstreamInfo->vsqNode->spin_lock);
+ err = queue_push_by_tail(vstreamInfo->vsqNode, tail);
+ if (err) {
+ raw_spin_unlock(&vstreamInfo->vsqNode->spin_lock);
+ ucc_err("queue_push_by_tail error, ret = %d\n", err);
+ return err;
+ }
+ raw_spin_unlock(&vstreamInfo->vsqNode->spin_lock);
+
+ err = ucc_wake_up(&vstreamInfo->se);
+ return err;
+}
+
+int ascend_callback_vstream_wait(struct vstream_args *arg)
+{
+ int err = 0;
+ int cqeNum = 0;
+ int cqeSum = 0;
+ struct vstream_info *vstreamInfo = NULL;
+ struct vcq_map_table *vcqTable = NULL;
+ struct vcq_map_table *waitTable = NULL;
+ struct vstream_id *vstreamIdNode = NULL;
+ struct devdrv_report_para *report_para = &arg->cvw_args;
+ uint32_t *sqlist;
+ uint32_t sqlist_num = 0;
+ uint32_t vstreamId, vcqId;
+
+ sqlist = kmalloc_array(DEVDRV_MAX_SQ_NUM, sizeof(uint32_t), GFP_KERNEL);
+ if (!sqlist)
+ return -ENOMEM;
+
+ vcqId = report_para->cq_id;
+ if (vcqId >= DEVDRV_MAX_CQ_NUM) {
+ ucc_err("vcqId out-of-range, vcqId=%d.\n", vcqId);
+ err = -EPERM;
+ goto out;
+ }
+
+ mutex_lock(&vcqId_Bitmap_mutex);
+ waitTable = vstream_get_map_table(vcqId);
+ if (!waitTable) {
+ ucc_err("No map found for vcq:%d.\n", vcqId);
+ mutex_unlock(&vcqId_Bitmap_mutex);
+ err = -EPERM;
+ goto out;
+ }
+
+ list_for_each_entry(vstreamIdNode, &waitTable->vstreamId_list, list)
+ sqlist[sqlist_num++] = vstreamIdNode->vstreamId;
+ mutex_unlock(&vcqId_Bitmap_mutex);
+
+ //get sqInfo from hardware
+ for (vstreamId = 0; vstreamId < sqlist_num; vstreamId++) {
+ vstreamInfo = vstream_get_info(sqlist[vstreamId]);
+ if (!vstreamInfo)
+ continue;
+ err |= ucc_wait_cq(vstreamInfo, vstreamInfo->privdata,
+ report_para, &cqeNum);
+ cqeSum += cqeNum;
+ if (cqeNum)
+ break;
+ }
+
+ //update cqInfo
+ mutex_lock(&vcqId_Bitmap_mutex);
+ vcqTable = vstream_get_map_table(vcqId);
+ if (!vcqTable) {
+ ucc_err("No map found for vcq:%d.\n", vcqId);
+ err = -EPERM;
+ goto out;
+ }
+
+ err = queue_push_by_num(vcqTable->vcqNode, cqeSum);
+ if (err) {
+ mutex_unlock(&vcqId_Bitmap_mutex);
+ ucc_err("failed to queue_push_by_num, ret = %d.\n", err);
+ goto out;
+ }
+ report_para->cq_tail = vcqTable->vcqNode->tail;
+ mutex_unlock(&vcqId_Bitmap_mutex);
+
+out:
+ kfree(sqlist);
+ return err;
+}
+
+int ascend_callback_vstream_kick(struct vstream_args *arg)
+{
+ u32 vcqId, release_head;
+ struct vstream_info *vstreamInfo = NULL;
+ int err = 0;
+
+ vcqId = arg->cvk_args.id;
+ release_head = arg->cvk_args.val;
+ if (vcqId >= DEVDRV_MAX_CQ_NUM || release_head >= MAX_VSTREAM_SIZE) {
+ ucc_err("vstream index out-of-range, vcqId=%d, release_head=%d.\n",
+ vcqId, release_head);
+ return -EPERM;
+ }
+
+ mutex_lock(&vcqId_Bitmap_mutex);
+ vstreamInfo = vstream_get_info(vcqId);
+ if (!vstreamInfo) {
+ err = -EPERM;
+ goto out;
+ }
+
+ err = queue_pop_by_head(vstreamInfo->vcqNode, release_head);
+
+out:
+ mutex_unlock(&vcqId_Bitmap_mutex);
+ return err;
+}
+
+int ascend_vstream_get_head(struct vstream_args *arg)
+{
+ u32 vstreamId = arg->vh_args.id;
+ struct vstream_info *vstreamInfo = NULL;
+
+ if (vstreamId >= DEVDRV_MAX_SQ_NUM) {
+ ucc_err("vstreamId out-of-range, vstreamId=%d.\n", vstreamId);
+ return -EINVAL;
+ }
+
+ vstreamInfo = vstream_get_info(vstreamId);
+ if (!vstreamInfo) {
+ ucc_err("vstreamInfo get failed, vstreamId=%d.\n", vstreamId);
+ return -EINVAL;
+ }
+ arg->vh_args.val = vstreamInfo->vsqNode->head;
+
+ return 0;
+}
+
diff --git a/kernel/ucc/ascend_vstream.h b/kernel/ucc/ascend_vstream.h
new file mode 100644
index 000000000000..0cd200168495
--- /dev/null
+++ b/kernel/ucc/ascend_vstream.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#ifndef _ASCEND_VSTREAM_H
+#define _ASCEND_VSTREAM_H
+
+int ascend_vstream_alloc(struct vstream_args *arg);
+int ascend_vstream_free(struct vstream_args *arg);
+int ascend_vstream_kick(struct vstream_args *arg);
+int ascend_callback_vstream_wait(struct vstream_args *arg);
+int ascend_callback_vstream_kick(struct vstream_args *arg);
+int ascend_vstream_get_head(struct vstream_args *arg);
+
+#endif /* _ASCEND_VSTREAM_H */
diff --git a/kernel/ucc/vstream.c b/kernel/ucc/vstream.c
new file mode 100644
index 000000000000..d4705f285b89
--- /dev/null
+++ b/kernel/ucc/vstream.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/syscalls.h>
+#include <linux/vstream.h>
+
+#include "ascend_vstream.h"
+
+static int amdgpu_vstream_alloc(struct vstream_args *arg)
+{
+ return 0;
+}
+static int amdgpu_vstream_free(struct vstream_args *arg)
+{
+ return 0;
+}
+static int amdgpu_vstream_kick(struct vstream_args *arg)
+{
+ return 0;
+}
+static int amdgpu_vstream_update(struct vstream_args *arg)
+{
+ return 0;
+}
+
+/*
+ * vstream_manage_cmd table
+ */
+static vstream_manage_t (*vstream_command_table[AMDGPU_MAX_COMMAND + 1]) = {
+ ascend_vstream_alloc, // ASCEND_VSTREAM_ALLOC
+ ascend_vstream_free, // ASCEND_VSTREAM_FREE
+ ascend_vstream_kick, // ASCEND_VSTREAM_KICK
+ ascend_callback_vstream_wait, // ASCEND_CALLBACK_VSTREAM_WAIT
+ ascend_callback_vstream_kick, // ASCEND_CALLBACK_VSTREAM_KICK
+ ascend_vstream_get_head, // ASCEND_VSTREAM_GET_HEAD
+ NULL, // ASCEND_MAX_COMMAND
+ amdgpu_vstream_alloc, // AMDGPU_VSTREAM_ALLOC
+ amdgpu_vstream_free, // AMDGPU_VSTREAM_FREE
+ amdgpu_vstream_kick, // AMDGPU_VSTREAM_KICK
+ amdgpu_vstream_update, // AMDGPU_VSTREAM_UPDATE
+ NULL // AMDGPU_MAX_COMMAND
+};
+
+SYSCALL_DEFINE2(vstream_manage, struct vstream_args __user *, arg, int, cmd)
+{
+ int res = 0;
+ struct vstream_args vstream_arg;
+
+ if (cmd > AMDGPU_MAX_COMMAND)
+ return -EINVAL;
+
+ if (copy_from_user(&vstream_arg, arg, sizeof(struct vstream_args))) {
+ pr_err("copy_from_user failed\n");
+ return -EFAULT;
+ }
+ res = vstream_command_table[cmd](&vstream_arg);
+ if (copy_to_user(arg, &vstream_arg, sizeof(struct vstream_args))) {
+ pr_err("copy_to_user failed\n");
+ return -EFAULT;
+ }
+
+ return res;
+}
diff --git a/kernel/ucc_sched/Makefile b/kernel/ucc_sched/Makefile
new file mode 100644
index 000000000000..4a41f07d091c
--- /dev/null
+++ b/kernel/ucc_sched/Makefile
@@ -0,0 +1 @@
+obj-(CONFIG_XPU_SCHEDULE) += core.o
diff --git a/kernel/ucc_sched/core.c b/kernel/ucc_sched/core.c
new file mode 100644
index 000000000000..4c7f1f59aeb9
--- /dev/null
+++ b/kernel/ucc_sched/core.c
@@ -0,0 +1,591 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) Huawei Technologies Co., Ltd. 2023. All rights reserved.
+ * Author: Huawei OS Kernel Lab
+ * Create: Tue Jan 17 22:19:17 2023
+ */
+
+#include <uapi/linux/sched/types.h>
+#include <linux/kthread.h>
+#include <linux/slab.h>
+#include <linux/ucc_sched.h>
+
+#include "ucc_sched.h"
+#include "../sched/sched.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/ucc_sched.h>
+
+#define MAX_XCU_NUM (100)
+#define TS_SQ_TRANS_TASK_THRESHOLD (20)
+
+static struct xcu xcu_manager[MAX_XCU_NUM];
+static int num_active_xcu;
+raw_spinlock_t xcu_mgr_lock;
+int sysctl_ucc_sched_rcv_timeout_ms = 10;
+
+static struct task_struct vstream_idle_task;
+static struct vstream_info vstream_idle = {
+ .vstreamId = UINT_MAX,
+ .p = &vstream_idle_task,
+};
+
+struct sched_args {
+ int cu_id;
+};
+
+static inline int is_xcu_offline(struct xcu *cu)
+{
+ return cu->state == XCU_INACTIVE;
+}
+
+void ucc_set_vstream_state(struct vstream_info *vinfo, int state)
+{
+ vinfo->se.state = state;
+}
+
+static inline int should_se_run(struct ucc_se *se)
+{
+ return se->state != SE_BLOCK && se->state != SE_DEAD;
+}
+
+static inline void update_stats_run_start(struct xcu *cu,
+ struct ucc_se *se)
+{
+ u64 start;
+
+ if (!schedstat_enabled())
+ return;
+
+ start = ktime_get_boot_ns();
+ __schedstat_set(se->statistics.run_start, start);
+}
+
+static inline void update_stats_run_end(struct xcu *cu,
+ struct ucc_se *se)
+{
+
+ struct vstream_info *vinfo;
+ u64 delta;
+
+ if (!schedstat_enabled())
+ return;
+
+ delta = ktime_get_boot_ns() - schedstat_val(se->statistics.run_start);
+ vinfo = container_of(se, struct vstream_info, se);
+ trace_ucc_sched_stat_run(vinfo, delta, se->is_timeout);
+
+ __schedstat_set(se->statistics.run_max,
+ max(schedstat_val(se->statistics.run_max), delta));
+ __schedstat_inc(se->statistics.run_count);
+ __schedstat_add(se->statistics.run_sum, delta);
+ __schedstat_set(se->statistics.run_start, 0);
+}
+
+static inline void update_stats_preempt_start(struct xcu *cu,
+ struct ucc_se *se)
+{
+ u64 wait_start;
+
+ if (!schedstat_enabled())
+ return;
+
+ wait_start = ktime_get_boot_ns();
+ __schedstat_set(se->statistics.preempt_start, wait_start);
+}
+
+static inline void update_stats_wait_start(struct xcu *cu, struct ucc_se *se)
+{
+ u64 wait_start;
+
+ if (!schedstat_enabled())
+ return;
+
+ wait_start = ktime_get_boot_ns();
+ __schedstat_set(se->statistics.wait_start, wait_start);
+}
+
+
+static inline void update_stats_wait_end(struct xcu *cu, struct ucc_se *se)
+{
+ struct vstream_info *vinfo;
+ u64 delta, preempt_delta;
+
+ if (!schedstat_enabled())
+ return;
+
+ delta = ktime_get_boot_ns() - schedstat_val(se->statistics.wait_start);
+ vinfo = container_of(se, struct vstream_info, se);
+ trace_ucc_sched_stat_wait(vinfo, delta);
+
+ __schedstat_set(se->statistics.wait_max,
+ max(schedstat_val(se->statistics.wait_max), delta));
+ __schedstat_inc(se->statistics.wait_count);
+ __schedstat_add(se->statistics.wait_sum, delta);
+ __schedstat_set(se->statistics.wait_start, 0);
+
+ if (se->statistics.preempt_start) {
+ preempt_delta = ktime_get_boot_ns() -
+ schedstat_val(se->statistics.preempt_start);
+ trace_ucc_sched_stat_preempt(vinfo, preempt_delta);
+
+ __schedstat_set(se->statistics.preempt_max,
+ max(schedstat_val(se->statistics.preempt_max),
+ preempt_delta));
+ __schedstat_inc(se->statistics.preempt_count);
+ __schedstat_add(se->statistics.preempt_sum, preempt_delta);
+ __schedstat_set(se->statistics.preempt_start, 0);
+ }
+}
+
+void ucc_dump_statistics_info(struct ucc_se *se)
+{
+ struct vstream_info *vinfo = container_of(se, struct vstream_info, se);
+
+ pr_info("comm %s pid %d vstreamId %d kernel_sum %llu wait_count %llu wait_max %llu[ns] wait_sum %llu[ns] preempt_count %llu preempt_max %llu[ns] preempt_sum %llu[ns]\n",
+ vinfo->p->comm,
+ vinfo->p->pid,
+ vinfo->vstreamId,
+ vinfo->se.statistics.kernel_sum,
+ vinfo->se.statistics.wait_count,
+ vinfo->se.statistics.wait_max,
+ vinfo->se.statistics.wait_sum,
+ vinfo->se.statistics.preempt_count,
+ vinfo->se.statistics.preempt_max,
+ vinfo->se.statistics.preempt_sum);
+}
+
+static void put_prev_entity(struct xcu *cu, struct ucc_se *prev)
+{
+ if (!prev)
+ return;
+
+ if (prev->on_cu)
+ update_stats_wait_start(cu, prev);
+
+ prev->state = SE_READY;
+ cu->curr_se->state = SE_RUNNING;
+}
+
+static void set_next_entity(struct xcu *cu, struct ucc_se *se)
+{
+ if (se->on_cu && se != cu->curr_se)
+ update_stats_wait_end(cu, se);
+
+ cu->curr_se = se;
+}
+
+static void dequeue_ucc_se(struct ucc_se *se, struct xcu *cu)
+{
+ raw_spin_lock(&cu->xcu_lock);
+ if (!se->on_cu) {
+ raw_spin_unlock(&cu->xcu_lock);
+ return;
+ }
+
+ se->on_cu = 0;
+
+ list_del_init(&se->run_list);
+
+ if (list_empty(cu->queue + se->prio))
+ __clear_bit(se->prio, cu->bitmap);
+ cu->rt_nr_running--;
+
+ if (se != cu->curr_se)
+ update_stats_wait_end(cu, se);
+
+ if (cu->curr_se == se)
+ cu->curr_se = NULL;
+
+ raw_spin_unlock(&cu->xcu_lock);
+}
+
+static void enqueue_ucc_se(struct ucc_se *se, struct xcu *cu)
+{
+ struct list_head *queue = cu->queue + se->prio;
+
+ raw_spin_lock(&cu->xcu_lock);
+ if (se->on_cu) {
+ raw_spin_unlock(&cu->xcu_lock);
+ return;
+ }
+ se->on_cu = 1;
+ se->is_timeout = 0;
+ list_add_tail(&se->run_list, queue);
+ __set_bit(se->prio, cu->bitmap);
+ cu->rt_nr_running++;
+
+ update_stats_wait_start(cu, se);
+
+ raw_spin_unlock(&cu->xcu_lock);
+}
+
+static struct xcu *ucc_select_cu(struct ucc_se *se)
+{
+ struct vstream_info *vstream_info;
+ int min_nr_running = INT_MAX;
+ struct xcu *cu;
+ int select_cu = 0;
+ int cu_id;
+
+ vstream_info = container_of(se, struct vstream_info, se);
+ for (cu_id = 0; cu_id < num_active_xcu; cu_id++) {
+ cu = &xcu_manager[cu_id];
+
+ if (vstream_info->devId != cu->dev_id ||
+ vstream_info->tsId != cu->ts_id)
+ continue;
+
+ if (cu->rt_nr_running < min_nr_running) {
+ min_nr_running = cu->rt_nr_running;
+ select_cu = cu_id;
+ }
+ }
+
+ vstream_info->cu_id = select_cu;
+ return &xcu_manager[select_cu];
+}
+
+static int ucc_check_preempt(struct ucc_se *se, struct xcu *cu)
+{
+ struct vstream_info *vinfo_curr, *vinfo;
+ struct ucc_se *curr_se;
+
+ curr_se = cu->curr_se;
+ if (!curr_se)
+ return 1;
+
+ vinfo = container_of(se, struct vstream_info, se);
+ vinfo_curr = container_of(curr_se, struct vstream_info, se);
+ if (vinfo_curr->p->ucc_priority > vinfo->p->ucc_priority) {
+ update_stats_preempt_start(cu, se);
+ curr_se->flag = UCC_TIF_PREEMPT;
+ return 1;
+ }
+
+ return 0;
+}
+
+static inline void ucc_wakeup_idle_worker(struct xcu *cu)
+{
+ wake_up_state(cu->worker, TASK_INTERRUPTIBLE);
+}
+
+static inline void ucc_wakeup_running_worker(struct xcu *cu)
+{
+ wake_up_state(cu->worker, TASK_UNINTERRUPTIBLE);
+}
+
+int ucc_schedule(int cu_id)
+{
+ struct xcu *cu;
+
+ cu = &xcu_manager[cu_id];
+ cu->is_wake = 1;
+ ucc_wakeup_running_worker(cu);
+
+ return 0;
+}
+EXPORT_SYMBOL(ucc_schedule);
+
+int ucc_wake_up(struct ucc_se *se)
+{
+ struct xcu *cu;
+
+ raw_spin_lock(&se->se_lock);
+ if (se->on_cu) {
+ raw_spin_unlock(&se->se_lock);
+ return 0;
+ }
+
+ if (se->state == SE_BLOCK)
+ se->state = SE_READY;
+
+ cu = ucc_select_cu(se);
+ if (!cu) {
+ raw_spin_unlock(&se->se_lock);
+ return -1;
+ }
+
+ enqueue_ucc_se(se, cu);
+ if (ucc_check_preempt(se, cu))
+ ucc_wakeup_idle_worker(cu);
+
+ raw_spin_unlock(&se->se_lock);
+
+ return 0;
+}
+
+static struct ucc_se *pick_next_ucc_se(struct xcu *cu)
+{
+ struct ucc_se *se;
+ struct list_head *queue;
+ int idx;
+
+ if (!cu->rt_nr_running)
+ return NULL;
+
+ idx = sched_find_first_bit(cu->bitmap);
+ BUG_ON(idx >= MAX_UCC_PRIO);
+
+ queue = cu->queue + idx;
+ se = list_entry(queue->next, struct ucc_se, run_list);
+
+ return se;
+}
+
+static int ucc_submit_kernel(struct xcu *cu, struct ucc_se *se)
+{
+ struct vstream_info *vstream_info;
+ struct xpu_group *group;
+ struct tsdrv_ctx *ctx;
+ int kernel_num, left;
+
+ vstream_info = container_of(se, struct vstream_info, se);
+ ctx = vstream_info->privdata;
+ left = (vstream_info->vsqNode->tail - vstream_info->vsqNode->head +
+ MAX_VSTREAM_SIZE) % MAX_VSTREAM_SIZE;
+
+ group = vstream_info->group;
+
+ kernel_num = xpu_run(group, vstream_info, ctx);
+ if (kernel_num <= 0)
+ return kernel_num;
+
+ //update vstream info head and tail;
+ update_vstream_head(vstream_info, kernel_num);
+
+ left -= kernel_num;
+
+ return kernel_num;
+}
+
+static inline void ucc_wait_idle(struct xcu *cu)
+{
+ cu->state = XCU_IDLE;
+
+ do {
+ schedule_timeout_interruptible(1);
+ } while (cu->rt_nr_running == 0);
+
+ cu->state = XCU_BUSY;
+}
+
+static inline void ucc_wait_running(struct xcu *cu, struct ucc_se *se)
+{
+ int cnt = 1;
+
+ do {
+ schedule_timeout_uninterruptible(
+ msecs_to_jiffies(sysctl_ucc_sched_rcv_timeout_ms));
+ } while (cu->is_wake == 0 && --cnt > 0);
+
+ if (cnt == 0) {
+ __schedstat_inc(se->statistics.timeout_count);
+ se->is_timeout = 1;
+ }
+}
+
+static inline void clear_se_flag(struct ucc_se *se)
+{
+ if (se)
+ se->flag = UCC_TIF_NONE;
+}
+
+void ucc_dequeue_task(struct vstream_info *vInfo)
+{
+ struct xcu *cu = &xcu_manager[vInfo->cu_id];
+ struct ucc_se *se = &vInfo->se;
+
+ raw_spin_lock(&se->se_lock);
+ dequeue_ucc_se(se, cu);
+ raw_spin_unlock(&se->se_lock);
+}
+
+/*
+ * dynamic padding: select kernels with no QoS confilcts to current ucc_se
+ * to fill cu;
+ */
+static void dynamic_padding(struct xcu *cu, struct ucc_se *se)
+{
+}
+
+static int __ucc_schedule(void *args)
+{
+ struct sched_args *sargs = (struct sched_args *)args;
+ int cu_id = sargs->cu_id;
+ struct xcu *cu = &xcu_manager[cu_id];
+ struct ucc_se *se = NULL, *curr_se = NULL;
+ struct ucc_se *prev_se = NULL;
+ struct vstream_info *vinfo;
+ int send_cnt = 0;
+ int kernel_num, preempt;
+
+ while (!is_xcu_offline(cu)) {
+ raw_spin_lock(&cu->xcu_lock);
+ cu->is_sched = 0;
+ prev_se = cu->curr_se;
+
+ preempt = 0;
+ if (prev_se) {
+ if (prev_se->flag != UCC_TIF_PREEMPT)
+ goto submit_kernel;
+
+ vinfo = container_of(prev_se, struct vstream_info, se);
+ if (send_cnt < vinfo->p->ucc_step)
+ goto submit_kernel;
+
+ preempt = 1;
+ }
+
+ clear_se_flag(prev_se);
+ se = pick_next_ucc_se(cu);
+ if (!se) {
+ cu->is_sched = 1;
+ raw_spin_unlock(&cu->xcu_lock);
+ trace_ucc_sched_switch(0, &vstream_idle);
+ ucc_wait_idle(cu);
+ continue;
+ }
+
+ set_next_entity(cu, se);
+ if (se != prev_se) {
+ put_prev_entity(cu, prev_se);
+ vinfo = container_of(se, struct vstream_info, se);
+ trace_ucc_sched_switch(preempt, vinfo);
+ }
+ send_cnt = 0;
+submit_kernel:
+ curr_se = cu->curr_se;
+ dynamic_padding(cu, curr_se);
+ raw_spin_unlock(&cu->xcu_lock);
+
+ curr_se->is_timeout = 0;
+ kernel_num = ucc_submit_kernel(cu, curr_se);
+ //has no more kernels to submit.
+ if (kernel_num <= 0 && !vstream_have_kernel(curr_se)) {
+ raw_spin_lock(&curr_se->se_lock);
+ curr_se->state = SE_BLOCK;
+ dequeue_ucc_se(curr_se, cu);
+ raw_spin_unlock(&curr_se->se_lock);
+ cu->is_sched = 1;
+ continue;
+ }
+ cu->is_sched = 1;
+
+ vinfo = container_of(curr_se, struct vstream_info, se);
+ if (vinfo->send_cnt > TS_SQ_TRANS_TASK_THRESHOLD) {
+ update_stats_run_start(cu, curr_se);
+ /* kernel has not finish */
+ if (!cu->is_wake)
+ ucc_wait_running(cu, curr_se);
+
+ update_stats_run_end(cu, curr_se);
+ cu->is_wake = 0;
+ vinfo->send_cnt = 0;
+ }
+
+ send_cnt += kernel_num;
+ schedstat_add(se->statistics.kernel_sum, kernel_num);
+ }
+
+ return 0;
+}
+
+static void init_xcu_rq(struct xcu *cu)
+{
+ int i;
+
+ for (i = 0; i < MAX_UCC_PRIO; i++) {
+ INIT_LIST_HEAD(cu->queue + i);
+ __clear_bit(i, cu->bitmap);
+ }
+
+ /* delimiter for bitsearch: */
+ __set_bit(MAX_UCC_PRIO, cu->bitmap);
+ cu->rt_nr_running = 0;
+ raw_spin_lock_init(&cu->xcu_lock);
+}
+
+static int alloc_cu_id(void)
+{
+ int cu_id = -1;
+
+ raw_spin_lock(&xcu_mgr_lock);
+ if (num_active_xcu >= MAX_XCU_NUM) {
+ raw_spin_unlock(&xcu_mgr_lock);
+ return cu_id;
+ }
+
+ cu_id = num_active_xcu;
+ num_active_xcu++;
+ raw_spin_unlock(&xcu_mgr_lock);
+
+ return cu_id;
+}
+
+int ucc_sched_register_xcu(int dev_id, int ts_id, int cu_num)
+{
+ int cu_id;
+ struct xcu *cu;
+ struct sched_args *args;
+ struct sched_param param = { .sched_priority = 1 };
+ char id_buf[16];
+ int i;
+
+ for (i = 0; i < cu_num; i++) {
+ cu_id = alloc_cu_id();
+ if (cu_id < 0) {
+ pr_err("alloc cu id failed\n");
+ return -1;
+ }
+
+ cu = &xcu_manager[cu_id];
+ cu->cu_id = cu_id;
+ cu->state = XCU_IDLE;
+ cu->curr_se = NULL;
+ cu->dev_id = dev_id;
+ cu->ts_id = ts_id;
+ cu->is_wake = 0;
+ init_xcu_rq(cu);
+
+ args = kzalloc(sizeof(struct sched_args), GFP_KERNEL);
+ if (!args)
+ return -1;
+
+ args->cu_id = cu->cu_id;
+ snprintf(id_buf, sizeof(id_buf), "%d:%d:%d",
+ cu->cu_id, cu->dev_id, cu->ts_id);
+ cu->worker = kthread_create_on_node(__ucc_schedule,
+ (void *)args, NUMA_NO_NODE,
+ "u_sched/%s", id_buf);
+ sched_setscheduler_nocheck(cu->worker, SCHED_FIFO, ¶m);
+ wake_up_process(cu->worker);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(ucc_sched_register_xcu);
+
+int ucc_sched_init(void)
+{
+ raw_spin_lock_init(&xcu_mgr_lock);
+ return 0;
+}
+
+int ucc_rt_nr_running(struct xcu *cu)
+{
+ return cu->rt_nr_running;
+}
+EXPORT_SYMBOL(ucc_rt_nr_running);
+
+struct xcu *ucc_get_xcu_by_id(int cu_id)
+{
+ return &xcu_manager[cu_id];
+}
+EXPORT_SYMBOL(ucc_get_xcu_by_id);
+
+int ucc_xcu_is_sched(int cu_id)
+{
+ return xcu_manager[cu_id].is_sched;
+}
+EXPORT_SYMBOL(ucc_xcu_is_sched);
diff --git a/kernel/ucc_sched/ucc_sched.h b/kernel/ucc_sched/ucc_sched.h
new file mode 100644
index 000000000000..30e2aa10cf2f
--- /dev/null
+++ b/kernel/ucc_sched/ucc_sched.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) Huawei Technologies Co., Ltd. 2023. All rights reserved.
+ * Author: Huawei OS Kernel Lab
+ * Create: Tue Jan 17 22:27:22 2023
+ */
+#ifndef __UCC_SCHED_USCHED_H__
+#define __UCC_SCHED_USCHED_H__
+
+#include <linux/sched.h>
+#include <linux/spinlock_types.h>
+#include <linux/types.h>
+#include <linux/vstream.h>
+
+//For simplicity, we set this parameter to 2.
+#define MAX_UCC_PRIO (2)
+
+enum xcu_state {
+ XCU_INACTIVE,
+ XCU_IDLE,
+ XCU_BUSY,
+ XCU_SUBMIT,
+};
+
+/*
+ * This is the abstraction object of the xpu computing unit.
+ */
+struct xcu {
+ int is_sched;
+ int cu_id;
+ int dev_id;
+ int ts_id;
+ int rt_nr_running;
+ int is_wake;
+ struct task_struct *worker;
+ DECLARE_BITMAP(bitmap, MAX_UCC_PRIO);
+ struct list_head queue[MAX_UCC_PRIO];
+ enum xcu_state state;
+ struct ucc_se *curr_se;
+ raw_spinlock_t xcu_lock;
+};
+
+#endif
--
2.34.1
2
1