September 2023 - Kernel - mailweb.openeuler.org

[PATCH OLK-5.10 0/2] Fix the two problems when using binutil 2.41.
by Hongchen Zhang 14 Sep '23

14 Sep '23

LoongArch: Fix the write_fcsr() macro LoongArch: Fix module relocation error with binutils 2.41 arch/loongarch/Makefile | 2 ++ arch/loongarch/include/asm/loongarch.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) -- 2.33.0

2 3

[PATCH OLK-5.10 0/2] Fix the two problems when using binutil 2.41.
by Hongchen Zhang 13 Sep '23

13 Sep '23

LoongArch: Fix the write_fcsr() macro LoongArch: Fix module relocation error with binutils 2.41 arch/loongarch/Makefile | 2 ++ arch/loongarch/include/asm/loongarch.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) -- 2.33.0

2 3

[PATCH OLK-5.10 0/2] Fix the two problems when using binutil 2.41.
by Hongchen Zhang 13 Sep '23

13 Sep '23

LoongArch: Fix the write_fcsr() macro LoongArch: Fix module relocation error with binutils 2.41 arch/loongarch/Makefile | 2 ++ arch/loongarch/include/asm/loongarch.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) -- 2.33.0

2 3

[PATCH OLK-5.10] drm: add inspur drm driver support
by Hongchen Zhang 13 Sep '23

13 Sep '23

LoongArch inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I80YFC ------------------------------------------ add drm support for Inspur BMC. Signed-off-by: Hongchen Zhang <zhanghongchen(a)loongson.cn> --- arch/loongarch/configs/loongson3_defconfig | 4 +- drivers/gpu/drm/Kconfig | 2 + drivers/gpu/drm/Makefile | 1 + drivers/gpu/drm/inspur/Kconfig | 11 + drivers/gpu/drm/inspur/Makefile | 5 + drivers/gpu/drm/inspur/inspur_cursor.c | 58 +++ drivers/gpu/drm/inspur/inspur_drm_de.c | 513 +++++++++++++++++++++ drivers/gpu/drm/inspur/inspur_drm_drv.c | 456 ++++++++++++++++++ drivers/gpu/drm/inspur/inspur_drm_drv.h | 116 +++++ drivers/gpu/drm/inspur/inspur_drm_regs.h | 223 +++++++++ drivers/gpu/drm/inspur/inspur_drm_vdac.c | 117 +++++ drivers/gpu/drm/inspur/inspur_ttm.c | 36 ++ 12 files changed, 1539 insertions(+), 3 deletions(-) create mode 100644 drivers/gpu/drm/inspur/Kconfig create mode 100644 drivers/gpu/drm/inspur/Makefile create mode 100644 drivers/gpu/drm/inspur/inspur_cursor.c create mode 100644 drivers/gpu/drm/inspur/inspur_drm_de.c create mode 100644 drivers/gpu/drm/inspur/inspur_drm_drv.c create mode 100644 drivers/gpu/drm/inspur/inspur_drm_drv.h create mode 100644 drivers/gpu/drm/inspur/inspur_drm_regs.h create mode 100644 drivers/gpu/drm/inspur/inspur_drm_vdac.c create mode 100644 drivers/gpu/drm/inspur/inspur_ttm.c diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig index 6e0adea947f5..ec53e95bf30d 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -386,7 +386,6 @@ CONFIG_IP6_NF_SECURITY=m CONFIG_IP6_NF_NAT=m CONFIG_IP6_NF_TARGET_MASQUERADE=m CONFIG_IP6_NF_TARGET_NPT=m -CONFIG_DECNET_NF_GRABULATOR=m CONFIG_NF_TABLES_BRIDGE=m CONFIG_NFT_BRIDGE_META=m CONFIG_NFT_BRIDGE_REJECT=m @@ -458,8 +457,6 @@ CONFIG_NET_DSA_TAG_SJA1105=m CONFIG_NET_DSA_TAG_TRAILER=m CONFIG_VLAN_8021Q_GVRP=y CONFIG_VLAN_8021Q_MVRP=y -CONFIG_DECNET=m -CONFIG_DECNET_ROUTER=y CONFIG_LLC2=m CONFIG_ATALK=m CONFIG_DEV_APPLETALK=m @@ -1504,6 +1501,7 @@ CONFIG_DRM_NOUVEAU=m CONFIG_DRM_VKMS=m CONFIG_DRM_UDL=m CONFIG_DRM_AST=y +CONFIG_DRM_INSPUR=m CONFIG_DRM_MGAG200=m CONFIG_DRM_QXL=m CONFIG_DRM_BOCHS=m diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index b37e6660dd4e..f6dcb60be551 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -315,6 +315,8 @@ source "drivers/gpu/drm/ast/Kconfig" source "drivers/gpu/drm/loongson/Kconfig" +source "drivers/gpu/drm/inspur/Kconfig" + source "drivers/gpu/drm/mgag200/Kconfig" source "drivers/gpu/drm/armada/Kconfig" diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index e9dd6847c9fa..e806bda8650a 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -125,3 +125,4 @@ obj-$(CONFIG_DRM_ASPEED_GFX) += aspeed/ obj-$(CONFIG_DRM_MCDE) += mcde/ obj-$(CONFIG_DRM_TIDSS) += tidss/ obj-y += xlnx/ +obj-$(CONFIG_DRM_INSPUR) += inspur/ diff --git a/drivers/gpu/drm/inspur/Kconfig b/drivers/gpu/drm/inspur/Kconfig new file mode 100644 index 000000000000..7c9ab5ad77ab --- /dev/null +++ b/drivers/gpu/drm/inspur/Kconfig @@ -0,0 +1,11 @@ +config DRM_INSPUR + tristate "DRM Support for Inspur BMC" + depends on DRM && PCI && MMU + select DRM_KMS_HELPER + select DRM_VRAM_HELPER + + help + Choose this option if you have a Inspur soc chipset. + If M is selected the module will be called inspur-drm. + IF you use gnome3, please set "WaylandEnable=false" in + "vim /etc/gdm3/custom.conf" and reboot. diff --git a/drivers/gpu/drm/inspur/Makefile b/drivers/gpu/drm/inspur/Makefile new file mode 100644 index 000000000000..31a5bfe79214 --- /dev/null +++ b/drivers/gpu/drm/inspur/Makefile @@ -0,0 +1,5 @@ + +inspur-drm-y := inspur_drm_drv.o inspur_drm_de.o \ + inspur_drm_vdac.o inspur_ttm.o inspur_cursor.o + +obj-$(CONFIG_DRM_INSPUR) += inspur-drm.o diff --git a/drivers/gpu/drm/inspur/inspur_cursor.c b/drivers/gpu/drm/inspur/inspur_cursor.c new file mode 100644 index 000000000000..e84136cbf4f7 --- /dev/null +++ b/drivers/gpu/drm/inspur/inspur_cursor.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/pci.h> +#include "inspur_drm_drv.h" +#include "inspur_drm_regs.h" + +void colorcur2monocur(void *data, void *out) +{ + unsigned int *col = (unsigned int *)data; + unsigned char *mono = (unsigned char *)out; + unsigned char pixel = 0; + char bit_values; + int i; + + for (i = 0; i < 64 * 64; i++) { + if (*col >> 24 < 0xe0) { + bit_values = 0; + } else { + int val = *col & 0xff; + + if (val < 0x80) + bit_values = 1; + else + bit_values = 2; + } + col++; + /* Copy bits into cursor byte */ + switch (i & 3) { + case 0: + pixel = bit_values; + break; + + case 1: + pixel |= bit_values << 2; + break; + + case 2: + pixel |= bit_values << 4; + break; + + case 3: + pixel |= bit_values << 6; + *mono = pixel; + mono++; + pixel = 0; + break; + } + } +} + +#define HW_FLAG_OFFSET 0x01ffff00 +#define HW_FLAG_ENABLE 0x1bd40750 +unsigned char getKVMHWCursorSetting(struct inspur_drm_private *priv) +{ + unsigned int value = *(unsigned int *)(priv->fb_map + HW_FLAG_OFFSET); + + DRM_DEBUG_KMS("HW_FLAG = %x\n", value); + return 0; +} diff --git a/drivers/gpu/drm/inspur/inspur_drm_de.c b/drivers/gpu/drm/inspur/inspur_drm_de.c new file mode 100644 index 000000000000..de31bb79129b --- /dev/null +++ b/drivers/gpu/drm/inspur/inspur_drm_de.c @@ -0,0 +1,513 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* INSPUR SoC drm driver + * + * Based on the smi drm driver. + * + * Copyright (c) 2020 SMI Limited. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +#include <drm/drm_atomic_helper.h> +#include <drm/drm_plane_helper.h> +#include <drm/drm_probe_helper.h> +#include <drm/drm_fourcc.h> + +#include "inspur_drm_drv.h" +#include "inspur_drm_regs.h" + +struct inspur_dislay_pll_config { + unsigned long hdisplay; + unsigned long vdisplay; + u32 pll1_config_value; + u32 pll2_config_value; +}; + +static const struct inspur_dislay_pll_config inspur_pll_table[] = { + {640, 480, CRT_PLL1_NS_25MHZ, CRT_PLL2_NS_25MHZ}, + {800, 600, CRT_PLL1_NS_40MHZ, CRT_PLL2_NS_40MHZ}, + {1024, 768, CRT_PLL1_NS_65MHZ, CRT_PLL2_NS_65MHZ}, + {1280, 800, CRT_PLL1_NS_83MHZ, CRT_PLL2_NS_83MHZ}, + {1280, 1024, CRT_PLL1_NS_108MHZ, CRT_PLL2_NS_108MHZ}, + {1440, 900, CRT_PLL1_NS_106MHZ, CRT_PLL2_NS_106MHZ}, + {1680, 1050, CRT_PLL1_NS_146MHZ, CRT_PLL2_NS_146MHZ}, + {1920, 1080, CRT_PLL1_NS_148MHZ, CRT_PLL2_NS_148MHZ}, + {1920, 1200, CRT_PLL1_NS_193MHZ, CRT_PLL2_NS_193MHZ}, +}; + +#define PADDING(align, data) (((data) + (align) - 1) & (~((align) - 1))) + +static int inspur_plane_atomic_check(struct drm_plane *plane, + struct drm_plane_state *state) +{ + struct drm_framebuffer *fb = state->fb; + struct drm_crtc *crtc = state->crtc; + struct drm_crtc_state *crtc_state; + u32 src_w = state->src_w >> 16; + u32 src_h = state->src_h >> 16; + + if (!crtc || !fb) + return 0; + + crtc_state = drm_atomic_get_crtc_state(state->state, crtc); + if (IS_ERR(crtc_state)) + return PTR_ERR(crtc_state); + + if (src_w != state->crtc_w || src_h != state->crtc_h) { + DRM_DEBUG_ATOMIC("scale not support\n"); + return -EINVAL; + } + + if (state->crtc_x < 0 || state->crtc_y < 0) { + DRM_DEBUG_ATOMIC("crtc_x/y of drm_plane state is invalid\n"); + return -EINVAL; + } + + if (!crtc_state->enable) + return 0; + + if (state->crtc_x + state->crtc_w > + crtc_state->adjusted_mode.hdisplay || + state->crtc_y + state->crtc_h > + crtc_state->adjusted_mode.vdisplay) { + DRM_DEBUG_ATOMIC("visible portion of plane is invalid\n"); + return -EINVAL; + } + + if (state->fb->pitches[0] % 128 != 0) { + DRM_DEBUG_ATOMIC("wrong stride with 128-byte aligned\n"); + return -EINVAL; + } + + return 0; +} + +static void inspur_plane_atomic_update(struct drm_plane *plane, + struct drm_plane_state *old_state) +{ + struct drm_plane_state *state = plane->state; + u32 reg; + int ret; + s64 gpu_addr = 0; + unsigned int line_l; + struct inspur_drm_private *priv = plane->dev->dev_private; + struct drm_gem_vram_object *gbo; + + if (!state->fb) + return; + + gbo = drm_gem_vram_of_gem(state->fb->obj[0]); + + ret = drm_gem_vram_pin(gbo, DRM_GEM_VRAM_PL_FLAG_VRAM); + if (ret) { + DRM_ERROR("failed to pin bo: %d", ret); + return; + } + gpu_addr = drm_gem_vram_offset(gbo); + if (gpu_addr < 0) { + drm_gem_vram_unpin(gbo); + return; + } + + writel(gpu_addr, priv->mmio + INSPUR_CRT_FB_ADDRESS); + + reg = state->fb->width * (state->fb->format->cpp[0]); + + line_l = state->fb->pitches[0]; + writel(INSPUR_FIELD(INSPUR_CRT_FB_WIDTH_WIDTH, reg) | + INSPUR_FIELD(INSPUR_CRT_FB_WIDTH_OFFS, line_l), + priv->mmio + INSPUR_CRT_FB_WIDTH); + + /* SET PIXEL FORMAT */ + reg = readl(priv->mmio + INSPUR_CRT_DISP_CTL); + reg &= ~INSPUR_CRT_DISP_CTL_FORMAT_MASK; + reg |= INSPUR_FIELD(INSPUR_CRT_DISP_CTL_FORMAT, + state->fb->format->cpp[0] * 8 / 16); + writel(reg, priv->mmio + INSPUR_CRT_DISP_CTL); +} + +static const u32 channel_formats1[] = { + DRM_FORMAT_RGB565, DRM_FORMAT_BGR565, DRM_FORMAT_RGB888, + DRM_FORMAT_BGR888, DRM_FORMAT_XRGB8888, DRM_FORMAT_XBGR8888, + DRM_FORMAT_RGBA8888, DRM_FORMAT_BGRA8888, DRM_FORMAT_ARGB8888, + DRM_FORMAT_ABGR8888 +}; + +static struct drm_plane_funcs inspur_plane_funcs = { + .update_plane = drm_atomic_helper_update_plane, + .disable_plane = drm_atomic_helper_disable_plane, + .destroy = drm_plane_cleanup, + .reset = drm_atomic_helper_plane_reset, + .atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_plane_destroy_state, +}; + +static const struct drm_plane_helper_funcs inspur_plane_helper_funcs = { + .atomic_check = inspur_plane_atomic_check, + .atomic_update = inspur_plane_atomic_update, +}; + +static struct drm_plane *inspur_plane_init(struct inspur_drm_private *priv) +{ + struct drm_device *dev = priv->dev; + struct drm_plane *plane; + int ret = 0; + + plane = devm_kzalloc(dev->dev, sizeof(*plane), GFP_KERNEL); + if (!plane) { + DRM_ERROR("failed to alloc memory when init plane\n"); + return ERR_PTR(-ENOMEM); + } + ret = drm_universal_plane_init(dev, plane, 1, &inspur_plane_funcs, + channel_formats1, + ARRAY_SIZE(channel_formats1), + NULL, + DRM_PLANE_TYPE_PRIMARY, + NULL); + if (ret) { + DRM_ERROR("failed to init plane: %d\n", ret); + return ERR_PTR(ret); + } + + drm_plane_helper_add(plane, &inspur_plane_helper_funcs); + return plane; +} + +static void inspur_crtc_dpms(struct drm_crtc *crtc, int dpms) +{ + struct inspur_drm_private *priv = crtc->dev->dev_private; + unsigned int reg; + + reg = readl(priv->mmio + INSPUR_CRT_DISP_CTL); + reg &= ~INSPUR_CRT_DISP_CTL_DPMS_MASK; + reg |= INSPUR_FIELD(INSPUR_CRT_DISP_CTL_DPMS, dpms); + reg &= ~INSPUR_CRT_DISP_CTL_TIMING_MASK; + if (dpms == INSPUR_CRT_DPMS_ON) + reg |= INSPUR_CRT_DISP_CTL_TIMING(1); + writel(reg, priv->mmio + INSPUR_CRT_DISP_CTL); +} + + +static void inspur_crtc_atomic_enable(struct drm_crtc *crtc, +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 11, 0) + struct drm_atomic_state *state) +#else + struct drm_crtc_state *old_state) +#endif +{ + unsigned int reg; + struct inspur_drm_private *priv = crtc->dev->dev_private; + + inspur_set_power_mode(priv, INSPUR_PW_MODE_CTL_MODE_MODE0); + + /* Enable display power gate & LOCALMEM power gate*/ + reg = readl(priv->mmio + INSPUR_CURRENT_GATE); + reg &= ~INSPUR_CURR_GATE_LOCALMEM_MASK; + reg &= ~INSPUR_CURR_GATE_DISPLAY_MASK; + reg |= INSPUR_CURR_GATE_LOCALMEM(1); + reg |= INSPUR_CURR_GATE_DISPLAY(1); + inspur_set_current_gate(priv, reg); + inspur_crtc_dpms(crtc, INSPUR_CRT_DPMS_ON); +} + +static void inspur_crtc_atomic_disable(struct drm_crtc *crtc, +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 11, 0) + struct drm_atomic_state *state) +#else + struct drm_crtc_state *old_state) +#endif +{ + unsigned int reg; + struct inspur_drm_private *priv = crtc->dev->dev_private; + + inspur_crtc_dpms(crtc, INSPUR_CRT_DPMS_OFF); + + inspur_set_power_mode(priv, INSPUR_PW_MODE_CTL_MODE_SLEEP); + + /* Enable display power gate & LOCALMEM power gate*/ + reg = readl(priv->mmio + INSPUR_CURRENT_GATE); + reg &= ~INSPUR_CURR_GATE_LOCALMEM_MASK; + reg &= ~INSPUR_CURR_GATE_DISPLAY_MASK; + reg |= INSPUR_CURR_GATE_LOCALMEM(0); + reg |= INSPUR_CURR_GATE_DISPLAY(0); + inspur_set_current_gate(priv, reg); +} + +static enum drm_mode_status +inspur_crtc_mode_valid(struct drm_crtc *crtc, + const struct drm_display_mode *mode) +{ + int i = 0; + int vrefresh = drm_mode_vrefresh(mode); + + if (vrefresh < 59 || vrefresh > 61) + return MODE_NOCLOCK; + + for (i = 0; i < ARRAY_SIZE(inspur_pll_table); i++) { + if (inspur_pll_table[i].hdisplay == mode->hdisplay && + inspur_pll_table[i].vdisplay == mode->vdisplay) + return MODE_OK; + } + + return MODE_BAD; +} + +static void set_vclock_inspur(struct drm_device *dev, unsigned long pll) +{ + u32 val; + struct inspur_drm_private *priv = dev->dev_private; + + val = readl(priv->mmio + CRT_PLL1_NS); + val &= ~(CRT_PLL1_NS_OUTER_BYPASS(1)); + writel(val, priv->mmio + CRT_PLL1_NS); + + val = CRT_PLL1_NS_INTER_BYPASS(1) | CRT_PLL1_NS_POWERON(1); + writel(val, priv->mmio + CRT_PLL1_NS); + + writel(pll, priv->mmio + CRT_PLL1_NS); + + usleep_range(1000, 2000); + + val = pll & ~(CRT_PLL1_NS_POWERON(1)); + writel(val, priv->mmio + CRT_PLL1_NS); + + usleep_range(1000, 2000); + + val &= ~(CRT_PLL1_NS_INTER_BYPASS(1)); + writel(val, priv->mmio + CRT_PLL1_NS); + + usleep_range(1000, 2000); + + val |= CRT_PLL1_NS_OUTER_BYPASS(1); + writel(val, priv->mmio + CRT_PLL1_NS); +} + +static void get_pll_config(unsigned long x, unsigned long y, + u32 *pll1, u32 *pll2) +{ + int i; + int count = ARRAY_SIZE(inspur_pll_table); + + for (i = 0; i < count; i++) { + if (inspur_pll_table[i].hdisplay == x && + inspur_pll_table[i].vdisplay == y) { + *pll1 = inspur_pll_table[i].pll1_config_value; + *pll2 = inspur_pll_table[i].pll2_config_value; + return; + } + } + + /* if found none, we use default value */ + *pll1 = CRT_PLL1_NS_25MHZ; + *pll2 = CRT_PLL2_NS_25MHZ; +} + +/* + * This function takes care the extra registers and bit fields required to + * setup a mode in board. + * Explanation about Display Control register: + * FPGA only supports 7 predefined pixel clocks, and clock select is + * in bit 4:0 of new register 0x802a8. + */ +static unsigned int display_ctrl_adjust(struct drm_device *dev, + struct drm_display_mode *mode, + unsigned int ctrl) +{ + unsigned long x, y; + u32 pll1; /* bit[31:0] of PLL */ + u32 pll2; /* bit[63:32] of PLL */ + struct inspur_drm_private *priv = dev->dev_private; + + x = mode->hdisplay; + y = mode->vdisplay; + + get_pll_config(x, y, &pll1, &pll2); + writel(pll2, priv->mmio + CRT_PLL2_NS); + set_vclock_inspur(dev, pll1); + + /* + * inspur has to set up the top-left and bottom-right + * registers as well. + * Note that normal chip only use those two register for + * auto-centering mode. + */ + writel(INSPUR_FIELD(INSPUR_CRT_AUTO_CENTERING_TL_TOP, 0) | + INSPUR_FIELD(INSPUR_CRT_AUTO_CENTERING_TL_LEFT, 0), + priv->mmio + INSPUR_CRT_AUTO_CENTERING_TL); + + writel(INSPUR_FIELD(INSPUR_CRT_AUTO_CENTERING_BR_BOTTOM, y - 1) | + INSPUR_FIELD(INSPUR_CRT_AUTO_CENTERING_BR_RIGHT, x - 1), + priv->mmio + INSPUR_CRT_AUTO_CENTERING_BR); + + /* + * Assume common fields in ctrl have been properly set before + * calling this function. + * This function only sets the extra fields in ctrl. + */ + + /* Set bit 25 of display controller: Select CRT or VGA clock */ + ctrl &= ~INSPUR_CRT_DISP_CTL_CRTSELECT_MASK; + ctrl &= ~INSPUR_CRT_DISP_CTL_CLOCK_PHASE_MASK; + + ctrl |= INSPUR_CRT_DISP_CTL_CRTSELECT(INSPUR_CRTSELECT_CRT); + + /* clock_phase_polarity is 0 */ + ctrl |= INSPUR_CRT_DISP_CTL_CLOCK_PHASE(0); + + writel(ctrl, priv->mmio + INSPUR_CRT_DISP_CTL); + + return ctrl; +} + +static void inspur_crtc_mode_set_nofb(struct drm_crtc *crtc) +{ + unsigned int val; + struct drm_display_mode *mode = &crtc->state->mode; + struct drm_device *dev = crtc->dev; + struct inspur_drm_private *priv = dev->dev_private; + int width = mode->hsync_end - mode->hsync_start; + int height = mode->vsync_end - mode->vsync_start; + + //writel(format_pll_reg(), priv->mmio + INSPUR_CRT_PLL_CTRL); + writel(INSPUR_FIELD(INSPUR_CRT_HORZ_TOTAL_TOTAL, mode->htotal - 1) | + INSPUR_FIELD(INSPUR_CRT_HORZ_TOTAL_DISP_END, mode->hdisplay - 1), + priv->mmio + INSPUR_CRT_HORZ_TOTAL); + + writel(INSPUR_FIELD(INSPUR_CRT_HORZ_SYNC_WIDTH, width) | + INSPUR_FIELD(INSPUR_CRT_HORZ_SYNC_START, mode->hsync_start - 1), + priv->mmio + INSPUR_CRT_HORZ_SYNC); + + writel(INSPUR_FIELD(INSPUR_CRT_VERT_TOTAL_TOTAL, mode->vtotal - 1) | + INSPUR_FIELD(INSPUR_CRT_VERT_TOTAL_DISP_END, mode->vdisplay - 1), + priv->mmio + INSPUR_CRT_VERT_TOTAL); + + writel(INSPUR_FIELD(INSPUR_CRT_VERT_SYNC_HEIGHT, height) | + INSPUR_FIELD(INSPUR_CRT_VERT_SYNC_START, mode->vsync_start - 1), + priv->mmio + INSPUR_CRT_VERT_SYNC); + + val = INSPUR_FIELD(INSPUR_CRT_DISP_CTL_VSYNC_PHASE, 0); + val |= INSPUR_FIELD(INSPUR_CRT_DISP_CTL_HSYNC_PHASE, 0); + val |= INSPUR_CRT_DISP_CTL_TIMING(1); + val |= INSPUR_CRT_DISP_CTL_PLANE(1); + + display_ctrl_adjust(dev, mode, val); +} + +static void inspur_crtc_atomic_begin(struct drm_crtc *crtc, +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 11, 0) + struct drm_atomic_state *state) +#else + struct drm_crtc_state *old_state) +#endif +{ + unsigned int reg; + struct drm_device *dev = crtc->dev; + struct inspur_drm_private *priv = dev->dev_private; + + inspur_set_power_mode(priv, INSPUR_PW_MODE_CTL_MODE_MODE0); + + /* Enable display power gate & LOCALMEM power gate*/ + reg = readl(priv->mmio + INSPUR_CURRENT_GATE); + reg &= ~INSPUR_CURR_GATE_DISPLAY_MASK; + reg &= ~INSPUR_CURR_GATE_LOCALMEM_MASK; + reg |= INSPUR_CURR_GATE_DISPLAY(1); + reg |= INSPUR_CURR_GATE_LOCALMEM(1); + inspur_set_current_gate(priv, reg); + + /* We can add more initialization as needed. */ +} + +static void inspur_crtc_atomic_flush(struct drm_crtc *crtc, +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 11, 0) + struct drm_atomic_state *state) +#else + struct drm_crtc_state *old_state) +#endif +{ + unsigned long flags; + + spin_lock_irqsave(&crtc->dev->event_lock, flags); + if (crtc->state->event) + drm_crtc_send_vblank_event(crtc, crtc->state->event); + crtc->state->event = NULL; + spin_unlock_irqrestore(&crtc->dev->event_lock, flags); +} + +static int inspur_crtc_enable_vblank(struct drm_crtc *crtc) +{ + struct inspur_drm_private *priv = crtc->dev->dev_private; + + writel(INSPUR_RAW_INTERRUPT_EN_VBLANK(1), + priv->mmio + INSPUR_RAW_INTERRUPT_EN); + + return 0; +} + +static void inspur_crtc_disable_vblank(struct drm_crtc *crtc) +{ + struct inspur_drm_private *priv = crtc->dev->dev_private; + + writel(INSPUR_RAW_INTERRUPT_EN_VBLANK(0), + priv->mmio + INSPUR_RAW_INTERRUPT_EN); +} + +static const struct drm_crtc_funcs inspur_crtc_funcs = { + .page_flip = drm_atomic_helper_page_flip, + .set_config = drm_atomic_helper_set_config, + .destroy = drm_crtc_cleanup, + .reset = drm_atomic_helper_crtc_reset, + .atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state, + .enable_vblank = inspur_crtc_enable_vblank, + .disable_vblank = inspur_crtc_disable_vblank, + +}; + +static const struct drm_crtc_helper_funcs inspur_crtc_helper_funcs = { + .mode_set_nofb = inspur_crtc_mode_set_nofb, + .atomic_begin = inspur_crtc_atomic_begin, + .atomic_flush = inspur_crtc_atomic_flush, + .atomic_enable = inspur_crtc_atomic_enable, + .atomic_disable = inspur_crtc_atomic_disable, + .mode_valid = inspur_crtc_mode_valid, +}; + +int inspur_de_init(struct inspur_drm_private *priv) +{ + struct drm_device *dev = priv->dev; + struct drm_crtc *crtc; + struct drm_plane *plane; + int ret; + + plane = inspur_plane_init(priv); + if (IS_ERR(plane)) { + DRM_ERROR("failed to create plane: %ld\n", PTR_ERR(plane)); + return PTR_ERR(plane); + } + + crtc = devm_kzalloc(dev->dev, sizeof(*crtc), GFP_KERNEL); + if (!crtc) { + DRM_ERROR("failed to alloc memory when init crtc\n"); + return -ENOMEM; + } + + ret = drm_crtc_init_with_planes(dev, crtc, plane, + NULL, &inspur_crtc_funcs, NULL); + if (ret) { + DRM_ERROR("failed to init crtc: %d\n", ret); + return ret; + } + + ret = drm_mode_crtc_set_gamma_size(crtc, 256); + if (ret) { + DRM_ERROR("failed to set gamma size: %d\n", ret); + return ret; + } + drm_crtc_helper_add(crtc, &inspur_crtc_helper_funcs); + + return 0; +} diff --git a/drivers/gpu/drm/inspur/inspur_drm_drv.c b/drivers/gpu/drm/inspur/inspur_drm_drv.c new file mode 100644 index 000000000000..d7026e1df167 --- /dev/null +++ b/drivers/gpu/drm/inspur/inspur_drm_drv.c @@ -0,0 +1,456 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* INSPUR SoC drm driver + * + * Based on the smi drm driver. + * + * Copyright (c) 2020 SMI Limited. + * + * Author: + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +#include <linux/console.h> +#include <linux/module.h> + +#include <drm/drm_atomic_helper.h> +#include <drm/drm_crtc_helper.h> +#include <drm/drm_probe_helper.h> + +#include "inspur_drm_drv.h" +#include "inspur_drm_regs.h" + +#define MEM_SIZE_RESERVE4KVM 0x200000 + + +DEFINE_DRM_GEM_FOPS(inspur_fops); +irqreturn_t inspur_drm_interrupt(int irq, void *arg) +{ + struct drm_device *dev = (struct drm_device *)arg; + struct inspur_drm_private *priv = + (struct inspur_drm_private *)dev->dev_private; + u32 status; + + status = readl(priv->mmio + INSPUR_RAW_INTERRUPT); + + if (status & INSPUR_RAW_INTERRUPT_VBLANK(1)) { + writel(INSPUR_RAW_INTERRUPT_VBLANK(1), + priv->mmio + INSPUR_RAW_INTERRUPT); + drm_handle_vblank(dev, 0); + } + + return IRQ_HANDLED; +} + + + +static struct drm_driver inspur_driver = { + .driver_features = DRIVER_GEM | DRIVER_MODESET | + DRIVER_ATOMIC | DRIVER_HAVE_IRQ, + + .fops = &inspur_fops, + .name = "inspur", + .date = "20230425", + .desc = "inspur drm driver", + .major = 2, + .minor = 2, + //.gem_free_object_unlocked = inspur_gem_free_object, + .dumb_create = inspur_dumb_create, +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 14, 0) + .dumb_map_offset = drm_gem_ttm_dumb_map_offset, +#else + .dumb_map_offset = drm_gem_vram_driver_dumb_mmap_offset, +#endif +}; + +static void inspur_remove_framebuffers(struct pci_dev *pdev) +{ + struct apertures_struct *ap; + + ap = alloc_apertures(1); + if (!ap) + return; + + ap->ranges[0].base = pci_resource_start(pdev, 0); + ap->ranges[0].size = pci_resource_len(pdev, 0); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 15, 0) + drm_aperture_remove_conflicting_pci_framebuffers(pdev, &inspur_driver); +#elif LINUX_VERSION_CODE >= KERNEL_VERSION(5, 14, 0) + drm_aperture_remove_conflicting_pci_framebuffers(pdev, "inspurdrmfb"); +#else + drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, "inspurdrmfb"); +#endif + + kfree(ap); +} + +static int __maybe_unused inspur_pm_suspend(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = pci_get_drvdata(pdev); + struct inspur_drm_private *priv = drm_dev->dev_private; + + drm_kms_helper_poll_disable(drm_dev); + priv->suspend_state = drm_atomic_helper_suspend(drm_dev); + if (IS_ERR(priv->suspend_state)) { + DRM_ERROR("drm_atomic_helper_suspend failed: %ld\n", + PTR_ERR(priv->suspend_state)); + drm_kms_helper_poll_enable(drm_dev); + return PTR_ERR(priv->suspend_state); + } + + return 0; +} + +static int __maybe_unused inspur_pm_resume(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = pci_get_drvdata(pdev); + struct inspur_drm_private *priv = drm_dev->dev_private; + + drm_atomic_helper_resume(drm_dev, priv->suspend_state); + drm_kms_helper_poll_enable(drm_dev); + + return 0; +} + +static const struct dev_pm_ops inspur_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(inspur_pm_suspend, + inspur_pm_resume) +}; + +static int inspur_kms_init(struct inspur_drm_private *priv) +{ + int ret; + + drm_mode_config_init(priv->dev); + priv->mode_config_initialized = true; + + priv->dev->mode_config.min_width = 0; + priv->dev->mode_config.min_height = 0; + priv->dev->mode_config.max_width = 1920; + priv->dev->mode_config.max_height = 1200; + + priv->dev->mode_config.fb_base = priv->fb_base; + priv->dev->mode_config.preferred_depth = 32; + priv->dev->mode_config.prefer_shadow = 1; + + if (getKVMHWCursorSetting(priv)) { + priv->dev->mode_config.cursor_width = 64; + priv->dev->mode_config.cursor_height = 64; + } + + priv->dev->mode_config.funcs = (void *)&inspur_mode_funcs; + + ret = inspur_de_init(priv); + if (ret) { + DRM_ERROR("failed to init de: %d\n", ret); + return ret; + } + + ret = inspur_vdac_init(priv); + if (ret) { + DRM_ERROR("failed to init vdac: %d\n", ret); + return ret; + } + + return 0; +} + +static void inspur_kms_fini(struct inspur_drm_private *priv) +{ + if (priv->mode_config_initialized) { + drm_mode_config_cleanup(priv->dev); + priv->mode_config_initialized = false; + } +} + +/* + * It can operate in one of three modes: 0, 1 or Sleep. + */ +void inspur_set_power_mode(struct inspur_drm_private *priv, + unsigned int power_mode) +{ + unsigned int control_value = 0; + void __iomem *mmio = priv->mmio; + unsigned int input = 1; + + if (power_mode > INSPUR_PW_MODE_CTL_MODE_SLEEP) + return; + + if (power_mode == INSPUR_PW_MODE_CTL_MODE_SLEEP) + input = 0; + + control_value = readl(mmio + INSPUR_POWER_MODE_CTRL); + control_value &= ~(INSPUR_PW_MODE_CTL_MODE_MASK | + INSPUR_PW_MODE_CTL_OSC_INPUT_MASK); + control_value |= INSPUR_FIELD(INSPUR_PW_MODE_CTL_MODE, power_mode); + control_value |= INSPUR_FIELD(INSPUR_PW_MODE_CTL_OSC_INPUT, input); + writel(control_value, mmio + INSPUR_POWER_MODE_CTRL); +} + +void inspur_set_current_gate(struct inspur_drm_private *priv, unsigned int gate) +{ + unsigned int gate_reg; + unsigned int mode; + void __iomem *mmio = priv->mmio; + + /* Get current power mode. */ + mode = (readl(mmio + INSPUR_POWER_MODE_CTRL) & + INSPUR_PW_MODE_CTL_MODE_MASK) >> INSPUR_PW_MODE_CTL_MODE_SHIFT; + + switch (mode) { + case INSPUR_PW_MODE_CTL_MODE_MODE0: + gate_reg = INSPUR_MODE0_GATE; + break; + + case INSPUR_PW_MODE_CTL_MODE_MODE1: + gate_reg = INSPUR_MODE1_GATE; + break; + + default: + gate_reg = INSPUR_MODE0_GATE; + break; + } + writel(gate, mmio + gate_reg); +} + +static void inspur_hw_config(struct inspur_drm_private *priv) +{ + unsigned int reg; + + /* On hardware reset, power mode 0 is default. */ + inspur_set_power_mode(priv, INSPUR_PW_MODE_CTL_MODE_MODE0); + + /* Enable display power gate & LOCALMEM power gate*/ + reg = readl(priv->mmio + INSPUR_CURRENT_GATE); + reg &= ~INSPUR_CURR_GATE_DISPLAY_MASK; + reg &= ~INSPUR_CURR_GATE_LOCALMEM_MASK; + reg |= INSPUR_CURR_GATE_DISPLAY(1); + reg |= INSPUR_CURR_GATE_LOCALMEM(1); + + inspur_set_current_gate(priv, reg); + + /* + * Reset the memory controller. If the memory controller + * is not reset in chip,the system might hang when sw accesses + * the memory.The memory should be resetted after + * changing the MXCLK. + */ + reg = readl(priv->mmio + INSPUR_MISC_CTRL); + reg &= ~INSPUR_MSCCTL_LOCALMEM_RESET_MASK; + reg |= INSPUR_MSCCTL_LOCALMEM_RESET(0); + writel(reg, priv->mmio + INSPUR_MISC_CTRL); + + reg &= ~INSPUR_MSCCTL_LOCALMEM_RESET_MASK; + reg |= INSPUR_MSCCTL_LOCALMEM_RESET(1); + + writel(reg, priv->mmio + INSPUR_MISC_CTRL); +} + +static int inspur_hw_map(struct inspur_drm_private *priv) +{ + struct drm_device *dev = priv->dev; + struct pci_dev *pdev = to_pci_dev(dev->dev); + resource_size_t addr, size, ioaddr, iosize; + + ioaddr = pci_resource_start(pdev, 1); + iosize = pci_resource_len(pdev, 1); + priv->mmio = devm_ioremap(dev->dev, ioaddr, iosize); + if (!priv->mmio) { + DRM_ERROR("Cannot map mmio region\n"); + return -ENOMEM; + } + + addr = pci_resource_start(pdev, 0); + size = pci_resource_len(pdev, 0); + priv->fb_map = devm_ioremap(dev->dev, addr, size); + if (!priv->fb_map) { + DRM_ERROR("Cannot map framebuffer\n"); + return -ENOMEM; + } + priv->fb_base = addr; + priv->fb_size = size - MEM_SIZE_RESERVE4KVM; + + return 0; +} + +static void inspur_hw_unmap(struct inspur_drm_private *priv) +{ + struct drm_device *dev = priv->dev; + + if (priv->mmio) { + devm_iounmap(dev->dev, priv->mmio); + priv->mmio = NULL; + } + if (priv->fb_map) { + devm_iounmap(dev->dev, priv->fb_map); + priv->fb_map = NULL; + } +} + +static int inspur_hw_init(struct inspur_drm_private *priv) +{ + int ret; + + ret = inspur_hw_map(priv); + if (ret) + return ret; + + inspur_hw_config(priv); + + return 0; +} + +void inspur_unload(struct drm_device *dev) +{ + struct inspur_drm_private *priv = dev->dev_private; + struct pci_dev *pdev = to_pci_dev(dev->dev); + + drm_atomic_helper_shutdown(dev); + + free_irq(pdev->irq, dev); + + inspur_kms_fini(priv); + inspur_hw_unmap(priv); + pci_disable_msi(to_pci_dev(dev->dev)); + dev->dev_private = NULL; +} + +int inspur_load(struct drm_device *dev, unsigned long flags) +{ + struct inspur_drm_private *priv; + struct pci_dev *pdev = to_pci_dev(dev->dev); + int ret; + + priv = devm_kzalloc(dev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) { + DRM_ERROR("no memory to allocate for inspur_drm_private\n"); + return -ENOMEM; + } + dev->dev_private = priv; + priv->dev = dev; + + ret = inspur_hw_init(priv); + if (ret) + goto err; + + ret = drmm_vram_helper_init(dev, pci_resource_start(pdev, 0), priv->fb_size); + if (ret) { + drm_err(dev, "Error initializing VRAM MM; %d\n", ret); + goto err; + } + ret = inspur_kms_init(priv); + if (ret) + goto err; + + + /* reset all the states of crtc/plane/encoder/connector */ + drm_mode_config_reset(dev); + + if (getKVMHWCursorSetting(priv)) { +#if 0 + inspur_bo_create(dev, PAGE_ALIGN(1024), 0, 0, &priv->cursor.cursor_1); + inspur_bo_create(dev, PAGE_ALIGN(1024), 0, 0, &priv->cursor.cursor_2); + if (!priv->cursor.cursor_1 || !priv->cursor.cursor_2) { + priv->cursor.cursor_1 = NULL; + priv->cursor.cursor_2 = NULL; + DRM_ERROR("Could not allocate space for cursors. Not doing hardware cursors.\n"); + } +#endif + } + + return 0; + +err: + inspur_unload(dev); + DRM_ERROR("failed to initialize drm driver: %d\n", ret); + return ret; +} + +static int inspur_pci_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + int ret = 0; + struct inspur_drm_private *priv; + struct drm_device *dev; + + inspur_remove_framebuffers(pdev); + + dev = drm_dev_alloc(&inspur_driver, &pdev->dev); + if (IS_ERR(dev)) { + DRM_ERROR("failed to allocate drm_device\n"); + return PTR_ERR(dev); + } + + pci_set_drvdata(pdev, dev); + ret = pci_enable_device(pdev); + if (ret) { + drm_err(dev, "failed to enable pci device: %d\n", ret); + return ret; + } + ret = inspur_load(dev, ent->driver_data); + if (ret) + goto err_return; + + ret = drm_dev_register(dev, ent->driver_data); + if (ret) + goto err_inspur_driver_unload; + + drm_fbdev_generic_setup(dev, dev->mode_config.preferred_depth); + + return 0; +err_inspur_driver_unload: + inspur_unload(dev); +err_return: + return ret; +} + +static void inspur_pci_remove(struct pci_dev *pdev) +{ + struct drm_device *dev = pci_get_drvdata(pdev); + + drm_put_dev(dev); + pci_disable_device(pdev); +} + +static void inspur_pci_shutdown(struct pci_dev *pdev) +{ + inspur_pci_remove(pdev); +} + +static struct pci_device_id inspur_pci_table[] = { + {0x1bd4, 0x0750, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + {0,} +}; + +static struct pci_driver inspur_pci_driver = { + .name = "inspur-drm", + .id_table = inspur_pci_table, + .probe = inspur_pci_probe, + .remove = inspur_pci_remove, + .shutdown = inspur_pci_shutdown, + .driver.pm = &inspur_pm_ops, +}; + +static int __init inspur_init(void) +{ + return pci_register_driver(&inspur_pci_driver); +} + +static void __exit inspur_exit(void) +{ + return pci_unregister_driver(&inspur_pci_driver); +} + +module_init(inspur_init); +module_exit(inspur_exit); + +MODULE_DEVICE_TABLE(pci, inspur_pci_table); +MODULE_AUTHOR(""); +MODULE_DESCRIPTION("DRM Driver for INSPUR"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/gpu/drm/inspur/inspur_drm_drv.h b/drivers/gpu/drm/inspur/inspur_drm_drv.h new file mode 100644 index 000000000000..b1a20f1b7df2 --- /dev/null +++ b/drivers/gpu/drm/inspur/inspur_drm_drv.h @@ -0,0 +1,116 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* INSPUR SoC drm driver + * + * Based on the smi drm driver. + * + * Copyright (c) 2020 SMI Limited. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +#ifndef INSPUR_DRM_DRV_H +#define INSPUR_DRM_DRV_H + +#include <linux/version.h> +#include <drm/drm_atomic.h> +#include <drm/drm_fb_helper.h> +#include <drm/drm_gem.h> +#include <drm/drm_gem_vram_helper.h> +#include <linux/pci.h> +#include <drm/drm_vblank.h> +#include <drm/drm_drv.h> + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 14, 0) +#include <drm/drm_aperture.h> +#endif + +#include <linux/delay.h> +#include <drm/drm_gem_framebuffer_helper.h> +struct drm_device; +struct drm_gem_object; + +#define inspur_framebuffer drm_framebuffer +#define BPP16_RED 0x0000f800 +#define BPP16_GREEN 0x000007e0 +#define BPP16_BLUE 0x0000001f +#define BPP16_WHITE 0x0000ffff +#define BPP16_GRAY 0x00008410 +#define BPP16_YELLOW 0x0000ffe0 +#define BPP16_CYAN 0x000007ff +#define BPP16_PINK 0x0000f81f +#define BPP16_BLACK 0x00000000 +struct inspur_fbdev { + struct drm_fb_helper helper; + struct inspur_framebuffer *fb; + int size; +}; + +struct inspur_cursor { + struct drm_gem_vram_object *gbo[2]; + unsigned int next_index; +}; + +struct inspur_drm_private { + /* hw */ + void __iomem *mmio; + void __iomem *fb_map; + unsigned long fb_base; + unsigned long fb_size; + + /* drm */ + struct drm_device *dev; + bool mode_config_initialized; + struct drm_atomic_state *suspend_state; + + /* fbdev */ + struct inspur_fbdev *fbdev; + + /* hw cursor */ + struct inspur_cursor cursor; +}; + +#define to_inspur_framebuffer(x) container_of(x, struct inspur_framebuffer, fb) + + +void inspur_set_power_mode(struct inspur_drm_private *priv, + unsigned int power_mode); +void inspur_set_current_gate(struct inspur_drm_private *priv, + unsigned int gate); +int inspur_load(struct drm_device *dev, unsigned long flags); +void inspur_unload(struct drm_device *dev); + +int inspur_de_init(struct inspur_drm_private *priv); +int inspur_vdac_init(struct inspur_drm_private *priv); +int inspur_fbdev_init(struct inspur_drm_private *priv); +void inspur_fbdev_fini(struct inspur_drm_private *priv); + +int inspur_gem_create(struct drm_device *dev, u32 size, bool iskernel, struct drm_gem_object **obj); +struct inspur_framebuffer * +inspur_framebuffer_init(struct drm_device *dev, + const struct drm_mode_fb_cmd2 *mode_cmd, + struct drm_gem_object *obj); + +int inspur_mm_init(struct inspur_drm_private *inspur); +void inspur_mm_fini(struct inspur_drm_private *inspur); +int inspur_dumb_create(struct drm_file *file, struct drm_device *dev, + struct drm_mode_create_dumb *args); + +extern const struct drm_mode_config_funcs inspur_mode_funcs; + +/* inspur_drm_cursor.c */ +int inspur_cursor_init(struct inspur_drm_private *priv); +void inspur_cursor_fini(struct inspur_drm_private *priv); +int inspur_crtc_cursor_set(struct drm_crtc *crtc, + struct drm_file *file_priv, + uint32_t handle, uint32_t width, + uint32_t height); +int inspur_crtc_cursor_move(struct drm_crtc *crtc, int x, int y); +unsigned char getKVMHWCursorSetting(struct inspur_drm_private *priv); +void colorcur2monocur(void *data, void *out); + + +#endif diff --git a/drivers/gpu/drm/inspur/inspur_drm_regs.h b/drivers/gpu/drm/inspur/inspur_drm_regs.h new file mode 100644 index 000000000000..a28dfd1285d7 --- /dev/null +++ b/drivers/gpu/drm/inspur/inspur_drm_regs.h @@ -0,0 +1,223 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* INSPUR SoC drm driver + * + * Based on the smi drm driver. + * + * Copyright (c) 2020 SMI Limited. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +#ifndef INSPUR_DRM_HW_H +#define INSPUR_DRM_HW_H + +/* register definition */ +#define INSPUR_MISC_CTRL 0x4 + +#define INSPUR_MSCCTL_LOCALMEM_RESET(x) ((x) << 6) +#define INSPUR_MSCCTL_LOCALMEM_RESET_MASK 0x40 + +#define INSPUR_CURRENT_GATE 0x000040 +#define INSPUR_CURR_GATE_DISPLAY(x) ((x) << 2) +#define INSPUR_CURR_GATE_DISPLAY_MASK 0x4 + +#define INSPUR_CURR_GATE_LOCALMEM(x) ((x) << 1) +#define INSPUR_CURR_GATE_LOCALMEM_MASK 0x2 + +#define INSPUR_MODE0_GATE 0x000044 +#define INSPUR_MODE1_GATE 0x000048 +#define INSPUR_POWER_MODE_CTRL 0x00004C + +#define INSPUR_PW_MODE_CTL_OSC_INPUT(x) ((x) << 3) +#define INSPUR_PW_MODE_CTL_OSC_INPUT_MASK 0x8 + +#define INSPUR_PW_MODE_CTL_MODE(x) ((x) << 0) +#define INSPUR_PW_MODE_CTL_MODE_MASK 0x03 +#define INSPUR_PW_MODE_CTL_MODE_SHIFT 0 + +#define INSPUR_PW_MODE_CTL_MODE_MODE0 0 +#define INSPUR_PW_MODE_CTL_MODE_MODE1 1 +#define INSPUR_PW_MODE_CTL_MODE_SLEEP 2 + +//#define INSPUR_CRT_PLL_CTRL 0x000060 + +#define INSPUR_PLL_CTRL_BYPASS(x) ((x) << 18) +#define INSPUR_PLL_CTRL_BYPASS_MASK 0x40000 + +#define INSPUR_PLL_CTRL_POWER(x) ((x) << 17) +#define INSPUR_PLL_CTRL_POWER_MASK 0x20000 + +#define INSPUR_PLL_CTRL_INPUT(x) ((x) << 16) +#define INSPUR_PLL_CTRL_INPUT_MASK 0x10000 + +#define INSPUR_PLL_CTRL_POD(x) ((x) << 14) +#define INSPUR_PLL_CTRL_POD_MASK 0xC000 + +#define INSPUR_PLL_CTRL_OD(x) ((x) << 12) +#define INSPUR_PLL_CTRL_OD_MASK 0x3000 + +#define INSPUR_PLL_CTRL_N(x) ((x) << 8) +#define INSPUR_PLL_CTRL_N_MASK 0xF00 + +#define INSPUR_PLL_CTRL_M(x) ((x) << 0) +#define INSPUR_PLL_CTRL_M_MASK 0xFF + +#define INSPUR_CRT_DISP_CTL 0x80200 + + +#define INSPUR_CRT_DISP_CTL_DPMS(x) ((x) << 30) +#define INSPUR_CRT_DISP_CTL_DPMS_MASK 0xc0000000 + +#define INSPUR_CRT_DPMS_ON 0 +#define INSPUR_CRT_DPMS_OFF 3 + + +#define INSPUR_CRT_DISP_CTL_CRTSELECT(x) ((x) << 25) +#define INSPUR_CRT_DISP_CTL_CRTSELECT_MASK 0x2000000 + +#define INSPUR_CRTSELECT_CRT 1 + +#define INSPUR_CRT_DISP_CTL_CLOCK_PHASE(x) ((x) << 14) +#define INSPUR_CRT_DISP_CTL_CLOCK_PHASE_MASK 0x4000 + +#define INSPUR_CRT_DISP_CTL_VSYNC_PHASE(x) ((x) << 13) +#define INSPUR_CRT_DISP_CTL_VSYNC_PHASE_MASK 0x2000 + +#define INSPUR_CRT_DISP_CTL_HSYNC_PHASE(x) ((x) << 12) +#define INSPUR_CRT_DISP_CTL_HSYNC_PHASE_MASK 0x1000 + +#define INSPUR_CRT_DISP_CTL_TIMING(x) ((x) << 8) +#define INSPUR_CRT_DISP_CTL_TIMING_MASK 0x100 + +#define INSPUR_CRT_DISP_CTL_PLANE(x) ((x) << 2) +#define INSPUR_CRT_DISP_CTL_PLANE_MASK 4 + +#define INSPUR_CRT_DISP_CTL_FORMAT(x) ((x) << 0) +#define INSPUR_CRT_DISP_CTL_FORMAT_MASK 0x03 + +#define INSPUR_CRT_FB_ADDRESS 0x080204 + +#define INSPUR_CRT_FB_WIDTH 0x080208 +#define INSPUR_CRT_FB_WIDTH_WIDTH(x) ((x) << 16) +#define INSPUR_CRT_FB_WIDTH_WIDTH_MASK 0x3FFF0000 +#define INSPUR_CRT_FB_WIDTH_OFFS(x) ((x) << 0) +#define INSPUR_CRT_FB_WIDTH_OFFS_MASK 0x3FFF + +#define INSPUR_CRT_HORZ_TOTAL 0x08020C +#define INSPUR_CRT_HORZ_TOTAL_TOTAL(x) ((x) << 16) +#define INSPUR_CRT_HORZ_TOTAL_TOTAL_MASK 0xFFF0000 + +#define INSPUR_CRT_HORZ_TOTAL_DISP_END(x) ((x) << 0) +#define INSPUR_CRT_HORZ_TOTAL_DISP_END_MASK 0xFFF + +#define INSPUR_CRT_HORZ_SYNC 0x080210 +#define INSPUR_CRT_HORZ_SYNC_WIDTH(x) ((x) << 16) +#define INSPUR_CRT_HORZ_SYNC_WIDTH_MASK 0xFF0000 + +#define INSPUR_CRT_HORZ_SYNC_START(x) ((x) << 0) +#define INSPUR_CRT_HORZ_SYNC_START_MASK 0xFFF + +#define INSPUR_CRT_VERT_TOTAL 0x080214 +#define INSPUR_CRT_VERT_TOTAL_TOTAL(x) ((x) << 16) +#define INSPUR_CRT_VERT_TOTAL_TOTAL_MASK 0x7FFF0000 + +#define INSPUR_CRT_VERT_TOTAL_DISP_END(x) ((x) << 0) +#define INSPUR_CRT_VERT_TOTAL_DISP_END_MASK 0x7FF + +#define INSPUR_CRT_VERT_SYNC 0x080218 +#define INSPUR_CRT_VERT_SYNC_HEIGHT(x) ((x) << 16) +#define INSPUR_CRT_VERT_SYNC_HEIGHT_MASK 0x3F0000 + +#define INSPUR_CRT_VERT_SYNC_START(x) ((x) << 0) +#define INSPUR_CRT_VERT_SYNC_START_MASK 0x7FF + +/* Hardware Cursor */ +#define INSPUR_HWC_ADDRESS 0x080230 +#define INSPUR_HWC_ADDRESS_ENABLE(x) ((x) << 31) +#define INSPUR_HWC_ADDRESS_ENABLE_MASK 0x80000000 +#define INSPUR_HWC_ADDRESS_ADDRESS(x) ((x) << 0) +#define INSPUR_HWC_ADDRESS_ADDRESS_MASK 0xFFFFFFF + +#define INSPUR_HWC_LOCATION 0x080234 +#define INSPUR_HWC_LOCATION_TOP(x) ((x) << 27) +#define INSPUR_HWC_LOCATION_TOP_MASK 0x8000000 +#define INSPUR_HWC_LOCATION_Y(x) ((x) << 16) +#define INSPUR_HWC_LOCATION_Y_MASK 0x7FF0000 +#define INSPUR_HWC_LOCATION_LEFT(x) ((x) << 11) +#define INSPUR_HWC_LOCATION_LEFT_MASK 0x800 +#define INSPUR_HWC_LOCATION_X(x) ((x) << 0) +#define INSPUR_HWC_LOCATION_X_MASK 0x7FF + +#define INSPUR_HWC_COLOR_12 0x080238 +#define INSPUR_HWC_COLOR_12_2_RGB(x) ((x) << 16) +#define INSPUR_HWC_COLOR_12_2_RGB_MASK 0xFFFF0000 +#define INSPUR_HWC_COLOR_12_1_RGB(x) ((x) << 0) +#define INSPUR_HWC_COLOR_12_1_RGB_MASK 0xFFFF + +#define INSPUR_HWC_COLOR_3 0x08023C +#define INSPUR_HWC_COLOR_3_RGB(x) ((x) << 0) +#define INSPUR_HWC_COLOR_3_RGB_MASK 0xFFFF + +/* Auto Centering */ +#define INSPUR_CRT_AUTO_CENTERING_TL 0x080280 +#define INSPUR_CRT_AUTO_CENTERING_TL_TOP(x) ((x) << 16) +#define INSPUR_CRT_AUTO_CENTERING_TL_TOP_MASK 0x7FF0000 + +#define INSPUR_CRT_AUTO_CENTERING_TL_LEFT(x) ((x) << 0) +#define INSPUR_CRT_AUTO_CENTERING_TL_LEFT_MASK 0x7FF + +#define INSPUR_CRT_AUTO_CENTERING_BR 0x080284 +#define INSPUR_CRT_AUTO_CENTERING_BR_BOTTOM(x) ((x) << 16) +#define INSPUR_CRT_AUTO_CENTERING_BR_BOTTOM_MASK 0x7FF0000 + +#define INSPUR_CRT_AUTO_CENTERING_BR_RIGHT(x) ((x) << 0) +#define INSPUR_CRT_AUTO_CENTERING_BR_RIGHT_MASK 0x7FF + +/* register to control panel output */ +#define INSPUR_DISPLAY_CONTROL_HISILE 0x80288 +#define INSPUR_DISPLAY_CONTROL_FPVDDEN(x) ((x) << 0) +#define INSPUR_DISPLAY_CONTROL_PANELDATE(x) ((x) << 1) +#define INSPUR_DISPLAY_CONTROL_FPEN(x) ((x) << 2) +#define INSPUR_DISPLAY_CONTROL_VBIASEN(x) ((x) << 3) + +#define INSPUR_RAW_INTERRUPT 0x80290 +#define INSPUR_RAW_INTERRUPT_VBLANK(x) ((x) << 2) +#define INSPUR_RAW_INTERRUPT_VBLANK_MASK 0x4 + +#define INSPUR_RAW_INTERRUPT_EN 0x80298 +#define INSPUR_RAW_INTERRUPT_EN_VBLANK(x) ((x) << 2) +#define INSPUR_RAW_INTERRUPT_EN_VBLANK_MASK 0x4 + +/* register and values for PLL control */ +#define CRT_PLL1_NS 0x802a8 +#define CRT_PLL1_NS_OUTER_BYPASS(x) ((x) << 30) +#define CRT_PLL1_NS_INTER_BYPASS(x) ((x) << 29) +#define CRT_PLL1_NS_POWERON(x) ((x) << 24) + +#define CRT_PLL1_NS_25MHZ 0x00006691 //640x480 +#define CRT_PLL1_NS_40MHZ 0x00004580 //800x600 +#define CRT_PLL1_NS_65MHZ 0x00002568 //1024x768 +#define CRT_PLL1_NS_83MHZ 0x000027bb //1280x800 +#define CRT_PLL1_NS_106MHZ 0x000027ef //1440x900 +#define CRT_PLL1_NS_108MHZ 0x000027f2 //1280x1024 +#define CRT_PLL1_NS_146MHZ 0x00001575 //1680x1050 +#define CRT_PLL1_NS_148MHZ 0x0000145f //1920x1080 +#define CRT_PLL1_NS_193MHZ 0x000018f7 //1920x1200 + +#define CRT_PLL2_NS 0x802ac +#define CRT_PLL2_NS_25MHZ 0x0 +#define CRT_PLL2_NS_40MHZ 0x0 +#define CRT_PLL2_NS_65MHZ 0x0 +#define CRT_PLL2_NS_83MHZ 0x0 +#define CRT_PLL2_NS_106MHZ 0x0 +#define CRT_PLL2_NS_108MHZ 0x0 +#define CRT_PLL2_NS_146MHZ 0x0 +#define CRT_PLL2_NS_148MHZ 0x0 +#define CRT_PLL2_NS_193MHZ 0x0 + +#define INSPUR_FIELD(field, value) (field(value) & field##_MASK) +#endif diff --git a/drivers/gpu/drm/inspur/inspur_drm_vdac.c b/drivers/gpu/drm/inspur/inspur_drm_vdac.c new file mode 100644 index 000000000000..20e22ef02546 --- /dev/null +++ b/drivers/gpu/drm/inspur/inspur_drm_vdac.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* INSPUR SoC drm driver + * + * Based on the smi drm driver. + * + * Copyright (c) 2020 SMI Limited. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +#include <drm/drm_atomic_helper.h> +#include <drm/drm_probe_helper.h> + +#include "inspur_drm_drv.h" +#include "inspur_drm_regs.h" + +static int inspur_connector_get_modes(struct drm_connector *connector) +{ + int count; + + count = drm_add_modes_noedid(connector, + connector->dev->mode_config.max_width, + connector->dev->mode_config.max_height); + drm_set_preferred_mode(connector, 1024, 768); + return count; +} + +static int inspur_connector_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode) +{ + return MODE_OK; +} + +static const struct drm_connector_helper_funcs + inspur_connector_helper_funcs = { + .get_modes = inspur_connector_get_modes, + .mode_valid = inspur_connector_mode_valid, +}; + +static const struct drm_connector_funcs inspur_connector_funcs = { + .fill_modes = drm_helper_probe_single_connector_modes, + .destroy = drm_connector_cleanup, + .reset = drm_atomic_helper_connector_reset, + .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, +}; + +static void inspur_encoder_mode_set(struct drm_encoder *encoder, + struct drm_display_mode *mode, + struct drm_display_mode *adj_mode) +{ + u32 reg; + struct drm_device *dev = encoder->dev; + struct inspur_drm_private *priv = dev->dev_private; + + reg = readl(priv->mmio + INSPUR_DISPLAY_CONTROL_HISILE); + reg |= INSPUR_DISPLAY_CONTROL_FPVDDEN(1); + reg |= INSPUR_DISPLAY_CONTROL_PANELDATE(1); + reg |= INSPUR_DISPLAY_CONTROL_FPEN(1); + reg |= INSPUR_DISPLAY_CONTROL_VBIASEN(1); + writel(reg, priv->mmio + INSPUR_DISPLAY_CONTROL_HISILE); +} + +static const struct drm_encoder_helper_funcs inspur_encoder_helper_funcs = { + .mode_set = inspur_encoder_mode_set, +}; + +static const struct drm_encoder_funcs inspur_encoder_funcs = { + .destroy = drm_encoder_cleanup, +}; + +int inspur_vdac_init(struct inspur_drm_private *priv) +{ + struct drm_device *dev = priv->dev; + struct drm_encoder *encoder; + struct drm_connector *connector; + int ret; + + encoder = devm_kzalloc(dev->dev, sizeof(*encoder), GFP_KERNEL); + if (!encoder) { + DRM_ERROR("failed to alloc memory when init encoder\n"); + return -ENOMEM; + } + + encoder->possible_crtcs = 0x1; + ret = drm_encoder_init(dev, encoder, &inspur_encoder_funcs, + DRM_MODE_ENCODER_DAC, NULL); + if (ret) { + DRM_ERROR("failed to init encoder: %d\n", ret); + return ret; + } + + drm_encoder_helper_add(encoder, &inspur_encoder_helper_funcs); + + connector = devm_kzalloc(dev->dev, sizeof(*connector), GFP_KERNEL); + if (!connector) { + DRM_ERROR("failed to alloc memory when init connector\n"); + return -ENOMEM; + } + + ret = drm_connector_init(dev, connector, + &inspur_connector_funcs, + DRM_MODE_CONNECTOR_VGA); + if (ret) { + DRM_ERROR("failed to init connector: %d\n", ret); + return ret; + } + drm_connector_helper_add(connector, &inspur_connector_helper_funcs); + + drm_connector_register(connector); + drm_connector_attach_encoder(connector, encoder); + return 0; +} diff --git a/drivers/gpu/drm/inspur/inspur_ttm.c b/drivers/gpu/drm/inspur/inspur_ttm.c new file mode 100644 index 000000000000..5757120597e9 --- /dev/null +++ b/drivers/gpu/drm/inspur/inspur_ttm.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* INSPUR SoC drm driver + * + * Based on the smi drm driver. + * + * Copyright (c) 2020 SMI Limited. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +#include <drm/drm_atomic_helper.h> + +#include "inspur_drm_drv.h" + + +int inspur_dumb_create(struct drm_file *file, struct drm_device *dev, + struct drm_mode_create_dumb *args) +{ + + return drm_gem_vram_fill_create_dumb(file, dev, 0, 128, args); +} + + + + + +const struct drm_mode_config_funcs inspur_mode_funcs = { + .atomic_check = drm_atomic_helper_check, + .atomic_commit = drm_atomic_helper_commit, + .fb_create = drm_gem_fb_create, + .mode_valid = drm_vram_helper_mode_valid, +}; -- 2.33.0

2 1

[PATCH OLK-5.10] LoongArch: Fix module relocation error with binutils 2.41
by Hongchen Zhang 13 Sep '23

13 Sep '23

From: Huacai Chen <chenhuacai(a)loongson.cn> stable inclusion from stable-v6.5-rc4 commit 03c53eb90c0c61885b2175adf8675fb56df7f8db category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I80YEI CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=… --------------------------- Binutils 2.41 enables linker relaxation by default, but the kernel module loader doesn't support that, so just disable it. Otherwise we get such an error when loading modules: "Unknown relocation type 102" As an alternative, we could add linker relaxation support in the kernel module loader. But it is relatively large complexity that may or may not bring a similar gain, and we don't really want to include this linker pass in the kernel. Reviewed-by: WANG Xuerui <git(a)xen0n.name> Signed-off-by: Huacai Chen <chenhuacai(a)loongson.cn> --- arch/loongarch/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 345dc10576d4..a0f194da592b 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -55,6 +55,8 @@ LDFLAGS_vmlinux += -G0 -static -n -nostdlib ifdef CONFIG_AS_HAS_EXPLICIT_RELOCS cflags-y += -mexplicit-relocs KBUILD_CFLAGS_KERNEL += -mdirect-extern-access +KBUILD_AFLAGS_MODULE += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax) +KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax) else cflags-y += $(call cc-option,-mno-explicit-relocs) KBUILD_AFLAGS_KERNEL += -Wa,-mla-global-with-pcrel -- 2.33.0

2 1

[PATCH OLK-5.10 1/2] LoongArch: Fix the write_fcsr() macro
by Hongchen Zhang 13 Sep '23

13 Sep '23

From: Qi Hu <huqi(a)loongson.cn> linux-next inclusion from next-20230616 commit 346dc929623cef70ff7832a4fa0ffd1b696e312a category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I80YEI CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/… --------------------------- The "write_fcsr()" macro uses wrong the positions for val and dest in asm. Fix it! Reported-by: Miao HAO <haomiao19(a)mails.ucas.ac.cn> Signed-off-by: Qi Hu <huqi(a)loongson.cn> Signed-off-by: Huacai Chen <chenhuacai(a)loongson.cn> --- arch/loongarch/include/asm/loongarch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index 33a8fa446ba9..0b8c1bde008f 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -1521,7 +1521,7 @@ __BUILD_CSR_OP(tlbidx) #define write_fcsr(dest, val) \ do { \ __asm__ __volatile__( \ - " movgr2fcsr %0, "__stringify(dest)" \n" \ + " movgr2fcsr "__stringify(dest)", %0 \n" \ : : "r" (val)); \ } while (0) -- 2.33.0

2 2

[PATCH openEuler-1.0-LTS] netfilter: nftables: exthdr: fix 4-byte stack OOB write
by Zhengchao Shao 13 Sep '23

13 Sep '23

From: Florian Westphal <fw(a)strlen.de> mainline inclusion from mainline-v6.6-rc1 commit fd94d9dadee58e09b49075240fe83423eb1dcd36 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I80I0G CVE: CVE-2023-4881 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?… -------------------------------- If priv->len is a multiple of 4, then dst[len / 4] can write past the destination array which leads to stack corruption. This construct is necessary to clean the remainder of the register in case ->len is NOT a multiple of the register size, so make it conditional just like nft_payload.c does. The bug was added in 4.1 cycle and then copied/inherited when tcp/sctp and ip option support was added. Bug reported by Zero Day Initiative project (ZDI-CAN-21950, ZDI-CAN-21951, ZDI-CAN-21961). Fixes: 49499c3e6e18 ("netfilter: nf_tables: switch registers to 32 bit addressing") Fixes: 935b7f643018 ("netfilter: nft_exthdr: add TCP option matching") Fixes: 133dc203d77d ("netfilter: nft_exthdr: Support SCTP chunks") Fixes: dbb5281a1f84 ("netfilter: nf_tables: add support for matching IPv4 options") Signed-off-by: Florian Westphal <fw(a)strlen.de> Conflicts: net/netfilter/nft_exthdr.c Signed-off-by: Zhengchao Shao <shaozhengchao(a)huawei.com> --- net/netfilter/nft_exthdr.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 93fee4106019..07dd5a723d79 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -36,6 +36,14 @@ static unsigned int optlen(const u8 *opt, unsigned int offset) return opt[offset + 1]; } +static int nft_skb_copy_to_reg(const struct sk_buff *skb, int offset, u32 *dest, unsigned int len) +{ + if (len % NFT_REG32_SIZE) + dest[len / NFT_REG32_SIZE] = 0; + + return skb_copy_bits(skb, offset, dest, len); +} + static void nft_exthdr_ipv6_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -57,8 +65,7 @@ static void nft_exthdr_ipv6_eval(const struct nft_expr *expr, } offset += priv->offset; - dest[priv->len / NFT_REG32_SIZE] = 0; - if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0) + if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0) goto err; return; err: @@ -114,7 +121,8 @@ static void nft_exthdr_tcp_eval(const struct nft_expr *expr, if (priv->flags & NFT_EXTHDR_F_PRESENT) { *dest = 1; } else { - dest[priv->len / NFT_REG32_SIZE] = 0; + if (priv->len % NFT_REG32_SIZE) + dest[priv->len / NFT_REG32_SIZE] = 0; memcpy(dest, opt + offset, priv->len); } -- 2.34.1

2 1

[PATCH OLK-5.10] netfilter: nftables: exthdr: fix 4-byte stack OOB write
by Zhengchao Shao 13 Sep '23

13 Sep '23

From: Florian Westphal <fw(a)strlen.de> mainline inclusion from mainline-v6.6-rc1 commit fd94d9dadee58e09b49075240fe83423eb1dcd36 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I80I0G CVE: CVE-2023-4881 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?… -------------------------------- If priv->len is a multiple of 4, then dst[len / 4] can write past the destination array which leads to stack corruption. This construct is necessary to clean the remainder of the register in case ->len is NOT a multiple of the register size, so make it conditional just like nft_payload.c does. The bug was added in 4.1 cycle and then copied/inherited when tcp/sctp and ip option support was added. Bug reported by Zero Day Initiative project (ZDI-CAN-21950, ZDI-CAN-21951, ZDI-CAN-21961). Fixes: 49499c3e6e18 ("netfilter: nf_tables: switch registers to 32 bit addressing") Fixes: 935b7f643018 ("netfilter: nft_exthdr: add TCP option matching") Fixes: 133dc203d77d ("netfilter: nft_exthdr: Support SCTP chunks") Fixes: dbb5281a1f84 ("netfilter: nf_tables: add support for matching IPv4 options") Signed-off-by: Florian Westphal <fw(a)strlen.de> Conflicts: net/netfilter/nft_exthdr.c Signed-off-by: Zhengchao Shao <shaozhengchao(a)huawei.com> --- net/netfilter/nft_exthdr.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 670dd146fb2b..ca268293cfa1 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -33,6 +33,14 @@ static unsigned int optlen(const u8 *opt, unsigned int offset) return opt[offset + 1]; } +static int nft_skb_copy_to_reg(const struct sk_buff *skb, int offset, u32 *dest, unsigned int len) +{ + if (len % NFT_REG32_SIZE) + dest[len / NFT_REG32_SIZE] = 0; + + return skb_copy_bits(skb, offset, dest, len); +} + static void nft_exthdr_ipv6_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -54,8 +62,7 @@ static void nft_exthdr_ipv6_eval(const struct nft_expr *expr, } offset += priv->offset; - dest[priv->len / NFT_REG32_SIZE] = 0; - if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0) + if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0) goto err; return; err: @@ -151,8 +158,7 @@ static void nft_exthdr_ipv4_eval(const struct nft_expr *expr, } offset += priv->offset; - dest[priv->len / NFT_REG32_SIZE] = 0; - if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0) + if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0) goto err; return; err: @@ -208,7 +214,8 @@ static void nft_exthdr_tcp_eval(const struct nft_expr *expr, if (priv->flags & NFT_EXTHDR_F_PRESENT) { *dest = 1; } else { - dest[priv->len / NFT_REG32_SIZE] = 0; + if (priv->len % NFT_REG32_SIZE) + dest[priv->len / NFT_REG32_SIZE] = 0; memcpy(dest, opt + offset, priv->len); } -- 2.34.1

2 1

[PATCH openEuler-22.03-LTS-SP1] io_uring: ensure IOPOLL locks around deferred work
by Zhihao Cheng 13 Sep '23

13 Sep '23

From: Jens Axboe <axboe(a)kernel.dk> stable inclusion from stable-v5.10.188 commit 810e401b34c4c4c244d8b93b9947ea5b3d4d49f8 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I7KXLN CVE: CVE-2023-21400 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id… -------------------------------- No direct upstream commit exists for this issue. It was fixed in 5.18 as part of a larger rework of the completion side. io_commit_cqring() writes the CQ ring tail to make it visible, but it also kicks off any deferred work we have. A ring setup with IOPOLL does not need any locking around the CQ ring updates, as we're always under the ctx uring_lock. But if we have deferred work that needs processing, then io_queue_deferred() assumes that the completion_lock is held, as it is for !IOPOLL. Add a lockdep assertion to check and document this fact, and have io_iopoll_complete() check if we have deferred work and run that separately with the appropriate lock grabbed. Cc: stable(a)vger.kernel.org # 5.10, 5.15 Reported-by: dghost david <daviduniverse18(a)gmail.com> Signed-off-by: Jens Axboe <axboe(a)kernel.dk> Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org> Signed-off-by: Lin Yujun <linyujun809(a)huawei.com> Signed-off-by: Zhihao Cheng <chengzhihao1(a)huawei.com> --- io_uring/io_uring.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 3d35f5d13666..781af0b05d8c 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1521,6 +1521,8 @@ static void io_kill_timeout(struct io_kiocb *req, int status) static void io_queue_deferred(struct io_ring_ctx *ctx) { + lockdep_assert_held(&ctx->completion_lock); + while (!list_empty(&ctx->defer_list)) { struct io_defer_entry *de = list_first_entry(&ctx->defer_list, struct io_defer_entry, list); @@ -1572,14 +1574,24 @@ static void __io_commit_cqring_flush(struct io_ring_ctx *ctx) io_queue_deferred(ctx); } -static inline void io_commit_cqring(struct io_ring_ctx *ctx) +static inline bool io_commit_needs_flush(struct io_ring_ctx *ctx) +{ + return ctx->off_timeout_used || ctx->drain_active; +} + +static inline void __io_commit_cqring(struct io_ring_ctx *ctx) { - if (unlikely(ctx->off_timeout_used || ctx->drain_active)) - __io_commit_cqring_flush(ctx); /* order cqe stores with ring update */ smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail); } +static inline void io_commit_cqring(struct io_ring_ctx *ctx) +{ + if (unlikely(io_commit_needs_flush(ctx))) + __io_commit_cqring_flush(ctx); + __io_commit_cqring(ctx); +} + static inline bool io_sqring_full(struct io_ring_ctx *ctx) { struct io_rings *r = ctx->rings; @@ -2509,7 +2521,12 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, io_req_free_batch(&rb, req, &ctx->submit_state); } - io_commit_cqring(ctx); + if (io_commit_needs_flush(ctx)) { + spin_lock(&ctx->completion_lock); + __io_commit_cqring_flush(ctx); + spin_unlock(&ctx->completion_lock); + } + __io_commit_cqring(ctx); io_cqring_ev_posted_iopoll(ctx); io_req_free_batch_finish(ctx, &rb); } -- 2.31.1

2 1

[PATCH openEuler-1.0-LTS] ucc: add ucc support
by Jinjie Ruan 13 Sep '23

13 Sep '23

hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I80YXE CVE: NA ---------------------------------------- ucc support for XPU. Signed-off-by: Chen Hui <judy.chenhui(a)huawei.com> Signed-off-by: Yang Yanchao <yangyanchao6(a)huawei.com> Signed-off-by: Hui Tang <tanghui20(a)huawei.com> Signed-off-by: Guan Jing <guanjing6(a)huawei.com> Signed-off-by: Jinjie Ruan <ruanjinjie(a)huawei.com> --- Kconfig | 2 + drivers/Kconfig | 2 + drivers/Makefile | 1 + drivers/xpu/Kconfig | 9 + drivers/xpu/Makefile | 1 + drivers/xpu/xpu_group.c | 175 ++++++++ fs/proc/base.c | 102 ++++- include/linux/sched.h | 3 + include/linux/ucc_common.h | 21 + include/linux/ucc_kfd.h | 110 +++++ include/linux/ucc_sched.h | 36 ++ include/linux/ucc_sched/ucc_sched.h | 71 +++ include/linux/ucc_ts.h | 254 +++++++++++ include/linux/vstream.h | 123 ++++++ include/linux/xpu_group.h | 66 +++ include/trace/events/ucc_sched.h | 120 +++++ init/init_task.c | 4 + init/main.c | 9 + kernel/Makefile | 2 + kernel/sched/Makefile | 1 + kernel/sched/core.c | 5 + kernel/sched/ucc_sched.c | 148 +++++++ kernel/sysctl.c | 17 +- kernel/ucc/Kconfig | 21 + kernel/ucc/Makefile | 1 + kernel/ucc/ascend_vstream.c | 654 ++++++++++++++++++++++++++++ kernel/ucc/ascend_vstream.h | 13 + kernel/ucc/vstream.c | 62 +++ kernel/ucc_sched/Makefile | 1 + kernel/ucc_sched/core.c | 591 +++++++++++++++++++++++++ kernel/ucc_sched/ucc_sched.h | 43 ++ 31 files changed, 2666 insertions(+), 2 deletions(-) create mode 100644 drivers/xpu/Kconfig create mode 100644 drivers/xpu/Makefile create mode 100644 drivers/xpu/xpu_group.c create mode 100644 include/linux/ucc_common.h create mode 100644 include/linux/ucc_kfd.h create mode 100644 include/linux/ucc_sched.h create mode 100644 include/linux/ucc_sched/ucc_sched.h create mode 100644 include/linux/ucc_ts.h create mode 100644 include/linux/vstream.h create mode 100644 include/linux/xpu_group.h create mode 100644 include/trace/events/ucc_sched.h create mode 100644 kernel/sched/ucc_sched.c create mode 100644 kernel/ucc/Kconfig create mode 100644 kernel/ucc/Makefile create mode 100644 kernel/ucc/ascend_vstream.c create mode 100644 kernel/ucc/ascend_vstream.h create mode 100644 kernel/ucc/vstream.c create mode 100644 kernel/ucc_sched/Makefile create mode 100644 kernel/ucc_sched/core.c create mode 100644 kernel/ucc_sched/ucc_sched.h diff --git a/Kconfig b/Kconfig index 48a80beab685..8e558777fb54 100644 --- a/Kconfig +++ b/Kconfig @@ -30,3 +30,5 @@ source "crypto/Kconfig" source "lib/Kconfig" source "lib/Kconfig.debug" + +source "kernel/ucc/Kconfig" diff --git a/drivers/Kconfig b/drivers/Kconfig index ab4d43923c4d..bd59e9e525ba 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -219,4 +219,6 @@ source "drivers/siox/Kconfig" source "drivers/slimbus/Kconfig" +source "drivers/xpu/Kconfig" + endmenu diff --git a/drivers/Makefile b/drivers/Makefile index 578f469f72fb..1130b2d92df1 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -186,3 +186,4 @@ obj-$(CONFIG_MULTIPLEXER) += mux/ obj-$(CONFIG_UNISYS_VISORBUS) += visorbus/ obj-$(CONFIG_SIOX) += siox/ obj-$(CONFIG_GNSS) += gnss/ +obj-$(CONFIG_XPU_SCHEDULE) += xpu/ diff --git a/drivers/xpu/Kconfig b/drivers/xpu/Kconfig new file mode 100644 index 000000000000..c4a391d0039d --- /dev/null +++ b/drivers/xpu/Kconfig @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 + +menuconfig XPU_SCHEDULE + bool "xpu schedule" + default n + help + Support xpu schedule, Say Y here if you want support for use + xpu schedule. + diff --git a/drivers/xpu/Makefile b/drivers/xpu/Makefile new file mode 100644 index 000000000000..9edc6dcdd4d0 --- /dev/null +++ b/drivers/xpu/Makefile @@ -0,0 +1 @@ +obj-y += xpu_group.o diff --git a/drivers/xpu/xpu_group.c b/drivers/xpu/xpu_group.c new file mode 100644 index 000000000000..53a598db0615 --- /dev/null +++ b/drivers/xpu/xpu_group.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/xpu_group.h> +#include <linux/rwsem.h> +#include <linux/slab.h> + +extern int ucc_rt_nr_running(struct xcu *cu); +static DECLARE_RWSEM(xpu_group_rwsem); + +static struct xpu_capability xpu_capability_root; + +struct xpu_group __xpu_root = { + .type = XPU_TYPE_ROOT, + .capability = &xpu_capability_root, + + .next_layer = IDR_INIT(next_layer), +}; + +struct xpu_group *xpu_root = &__xpu_root; +EXPORT_SYMBOL(xpu_root); + +int __xpu_group_attach(struct xpu_group *new_group, + struct xpu_group *previous_group) +{ + int id = new_group->id; + + if (id == -1) + id = idr_alloc(&previous_group->next_layer, new_group, + 0, INT_MAX, GFP_KERNEL); + else + id = idr_alloc(&previous_group->next_layer, new_group, + id, id + 1, GFP_KERNEL); + if (id < 0) + return -EEXIST; + + new_group->id = id; + new_group->previous_layer = previous_group; + + return 0; +} + +int xpu_group_attach(struct xpu_group *new_group, + struct xpu_group *previous_group) +{ + int ret; + + down_write(&xpu_group_rwsem); + ret = __xpu_group_attach(new_group, previous_group); + up_write(&xpu_group_rwsem); + return ret; +} +EXPORT_SYMBOL(xpu_group_attach); + +struct xpu_group *xpu_group_alloc_and_attach(struct xpu_group *previous_group, + int id) +{ + struct xpu_group *new = xpu_group_alloc(); + + if (!new) { + pr_err("alloc xpu_group failed\n"); + return NULL; + } + + new->id = id; + + if (!xpu_group_attach(new, previous_group)) + return NULL; + + return new; +} +EXPORT_SYMBOL(xpu_group_alloc_and_attach); + +int __xpu_group_detach(struct xpu_group *group) +{ + idr_remove(&group->previous_layer->next_layer, group->id); + return 0; +} + +int xpu_group_detach(struct xpu_group *group) +{ + int ret; + + down_write(&xpu_group_rwsem); + ret = __xpu_group_detach(group); + up_write(&xpu_group_rwsem); + return ret; +} +EXPORT_SYMBOL(xpu_group_detach); + +struct xpu_group *__xpu_group_find(struct xpu_group *group, int id) +{ + return idr_find(&group->next_layer, id); +} + +struct xpu_group *xpu_group_find(struct xpu_group *group, int id) +{ + struct xpu_group *p; + + p = xpu_group_alloc(); + + down_read(&xpu_group_rwsem); + p = __xpu_group_find(group, id); + up_read(&xpu_group_rwsem); + + return p; +} +EXPORT_SYMBOL(xpu_group_find); + + +struct xpu_group *xpu_idle_group_find(struct xpu_group *group) +{ + struct xpu_group *entry_group; + int id; + + down_read(&xpu_group_rwsem); + idr_for_each_entry(&group->next_layer, entry_group, id) { + if (!entry_group->used) { + up_read(&xpu_group_rwsem); + return entry_group; + } + } + up_read(&xpu_group_rwsem); + + return NULL; +} + +int xpu_run(struct xpu_group *group, void *para1, void *para2) +{ + int ret = 0; + + if (group->opt && group->opt->run) + ret = group->opt->run(group, para1, para2); + + return ret; +} + +int xpu_finish(struct xpu_group *group, void *para1, void *para2) +{ + if (group->opt && group->opt->finish) + return group->opt->finish(group, para1, para2); + + return 0; +} + +int xpu_wait(struct xpu_group *group, void *para1, void *para2, void *para3) +{ + if (group->opt && group->opt->wait) + return group->opt->wait(group, para1, para2, para3); + + return 0; +} + +int xpu_complete(struct xpu_group *group, void *para1, void *para2, void *para3) +{ + if (group->opt && group->opt->complete) + return group->opt->complete(group, para1, para2, para3); + + return 0; +} + +struct xpu_group *xpu_group_alloc(void) +{ + struct xpu_group *node = kzalloc(sizeof(*node), GFP_KERNEL); + + if (!node) + return NULL; + + node->type = XPU_TYPE_CUSTOM; + idr_init(&node->next_layer); + + return node; +} +EXPORT_SYMBOL(xpu_group_alloc); diff --git a/fs/proc/base.c b/fs/proc/base.c index dc9841826264..516eee1ae952 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -770,7 +770,6 @@ static const struct file_operations proc_single_file_operations = { .release = single_release, }; - struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode) { struct task_struct *task = get_proc_task(inode); @@ -1546,6 +1545,99 @@ static const struct file_operations proc_pid_sched_operations = { #endif +#ifdef CONFIG_XPU_SCHEDULE +static ssize_t ucc_step_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task; + char numbuf[PROC_NUMBUF]; + ssize_t len; + + task = get_proc_task(file_inode(file)); + if (!task) + return -ESRCH; + + len = snprintf(numbuf, sizeof(numbuf), "%u\n", task->ucc_step); + + put_task_struct(task); + + return simple_read_from_buffer(buf, count, ppos, numbuf, len); +} + +static ssize_t ucc_step_write(struct file *file, const char __user *buf, + size_t count, loff_t *offset) +{ + struct inode *inode = file_inode(file); + struct task_struct *p; + int err; + unsigned int ucc_step; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + err = kstrtouint_from_user(buf, count, 0, &ucc_step); + if (err) + return err; + + p->ucc_step = ucc_step; + put_task_struct(p); + + return count; +} + +static const struct file_operations ucc_step_operations = { + .write = ucc_step_write, + .read = ucc_step_read, +}; + +static ssize_t ucc_priority_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task; + char numbuf[PROC_NUMBUF]; + ssize_t len; + + task = get_proc_task(file_inode(file)); + if (!task) + return -ESRCH; + + len = snprintf(numbuf, sizeof(numbuf), "%u\n", task->ucc_priority); + + put_task_struct(task); + + return simple_read_from_buffer(buf, count, ppos, numbuf, len); +} + +static ssize_t ucc_priority_write(struct file *file, const char __user *buf, + size_t count, loff_t *offset) +{ + struct inode *inode = file_inode(file); + struct task_struct *p; + int err; + unsigned int ucc_priority; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + err = kstrtouint_from_user(buf, count, 0, &ucc_priority); + if (err) + return err; + + p->ucc_priority = ucc_priority; + put_task_struct(p); + + return count; +} + +static const struct file_operations ucc_priority_operations = { + .write = ucc_priority_write, + .read = ucc_priority_read, +}; + +#endif + #ifdef CONFIG_SCHED_AUTOGROUP /* * Print out autogroup related information: @@ -3151,6 +3243,10 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_ASCEND_SHARE_POOL ONE("sp_group", S_IRUGO, proc_sp_group_state), #endif +#ifdef CONFIG_XPU_SCHEDULE + REG("ucc_priority", 0644, ucc_priority_operations), + REG("ucc_step", 0644, ucc_step_operations), +#endif }; static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) @@ -3537,6 +3633,10 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_ASCEND_SHARE_POOL ONE("sp_group", S_IRUGO, proc_sp_group_state), #endif +#ifdef CONFIG_XPU_SCHEDULE + REG("ucc_priority", 0644, ucc_priority_operations), + REG("ucc_step", 0644, ucc_step_operations), +#endif }; static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx) diff --git a/include/linux/sched.h b/include/linux/sched.h index 8fd8c5b7cdc6..175659be95f3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1281,6 +1281,9 @@ struct task_struct { #if !defined(__GENKSYMS__) #if defined(CONFIG_QOS_SCHED_SMART_GRID) struct sched_grid_qos *grid_qos; +#elif defined(CONFIG_XPU_SCHEDULE) + u32 ucc_priority; + u32 ucc_step; #else KABI_RESERVE(8) #endif diff --git a/include/linux/ucc_common.h b/include/linux/ucc_common.h new file mode 100644 index 000000000000..3875c2226d24 --- /dev/null +++ b/include/linux/ucc_common.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _UCC_COMMON_H +#define _UCC_COMMON_H + +/* + * UCC Print Function + */ +#ifndef pr_fmt +#define pr_fmt(fmt) fmt +#endif + +#define ucc_err(fmt, ...) printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) + +#define ucc_warn(fmt, ...) printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) + +#define ucc_info(fmt, ...) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) + +#define ucc_dbg(fmt, ...) printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) + +#endif diff --git a/include/linux/ucc_kfd.h b/include/linux/ucc_kfd.h new file mode 100644 index 000000000000..07eedc2fd5f2 --- /dev/null +++ b/include/linux/ucc_kfd.h @@ -0,0 +1,110 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef KFD_PRIV_H_INCLUDED +#define KFD_PRIV_H_INCLUDED + +#include <linux/mmu_notifier.h> +#include <linux/types.h> +#include <linux/kref.h> +#include <linux/mutex.h> +#include <linux/sched.h> +#include <linux/mmu_notifier.h> +#include <linux/idr.h> +#include <linux/dma-fence.h> +#include <linux/workqueue.h> +#include <linux/fs.h> +#include <linux/kobject.h> +#include <linux/sysfs.h> + +struct process_queue_manager; +struct kfd_process; +struct kfd_signal_page; + +struct process_queue_manager { + struct kfd_process *process; + struct list_head queues; + unsigned long *queue_slot_bitmap; +}; + +struct kfd_signal_page { + uint64_t *kernel_address; + uint64_t __user *user_address; + bool need_to_free_pages; +}; + +/* Process data */ +struct kfd_process { + struct hlist_node kfd_processes; + void *mm; + struct kref ref; + struct work_struct release_work; + struct mutex mutex; + struct task_struct *lead_thread; + struct mmu_notifier mmu_notifier; +/* TODO: check if use right branch */ + struct rcu_head rcu; + uint16_t pasid; + struct list_head per_device_data; + struct process_queue_manager pqm; + bool is_32bit_user_mode; + struct mutex event_mutex; + struct idr event_idr; + struct kfd_signal_page *signal_page; + size_t signal_mapped_size; + size_t signal_event_count; + bool signal_event_limit_reached; +/* TODO: check if use right branch */ + struct rb_root bo_interval_tree; + void *kgd_process_info; + struct dma_fence *ef; + struct delayed_work eviction_work; + struct delayed_work restore_work; + unsigned int last_eviction_seqno; + unsigned long last_restore_timestamp; + unsigned long last_evict_timestamp; + bool debug_trap_enabled; + uint32_t trap_debug_wave_launch_mode; + struct file *dbg_ev_file; + uint32_t allocated_debug_watch_point_bitmask; + struct kobject *kobj; + struct kobject *kobj_queues; + struct attribute attr_pasid; + bool has_cwsr; + uint64_t exception_enable_mask; + uint64_t exception_status; +}; + +struct kfd_ioctl_create_queue_args { + __u64 ring_base_address; /* to KFD */ + __u64 write_pointer_address; /* from KFD */ + __u64 read_pointer_address; /* from KFD */ + __u64 doorbell_offset; /* from KFD */ + + __u32 ring_size; /* to KFD */ + __u32 gpu_id; /* to KFD */ + __u32 queue_type; /* to KFD */ + __u32 queue_percentage; /* to KFD */ + __u32 queue_priority; /* to KFD */ + __u32 queue_id; /* from KFD */ + + __u64 eop_buffer_address; /* to KFD */ + __u64 eop_buffer_size; /* to KFD */ + __u64 ctx_save_restore_address; /* to KFD */ + __u32 ctx_save_restore_size; /* to KFD */ + __u32 ctl_stack_size; /* to KFD */ +}; + +struct kfd_ioctl_destroy_queue_args { + __u32 queue_id; /* to KFD */ + __u32 pad; +}; + +struct kfd_ioctl_update_queue_args { + __u64 ring_base_address; /* to KFD */ + + __u32 queue_id; /* to KFD */ + __u32 ring_size; /* to KFD */ + __u32 queue_percentage; /* to KFD */ + __u32 queue_priority; /* to KFD */ +}; +#endif diff --git a/include/linux/ucc_sched.h b/include/linux/ucc_sched.h new file mode 100644 index 000000000000..5b170545f7c2 --- /dev/null +++ b/include/linux/ucc_sched.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __LINUX_UCC_SCHED_H__ +#define __LINUX_UCC_SCHED_H__ + +#include <linux/list.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/hash.h> +#include <linux/rculist.h> +#include <linux/idr.h> +#include <linux/xpu_group.h> +#include <linux/hashtable.h> +#include <linux/vstream.h> +#include <linux/slab.h> +#include <linux/sched.h> + +#define VRTSQ_RTSQ_HASH_ORDER 6 + +#ifdef CONFIG_XPU_SCHEDULE +int ucc_process_task(struct vstream_info *vsqcq_info, struct tsdrv_ctx *ctx, + int *sqenum); +int ucc_free_task(struct vstream_info *vsqcq_info, struct tsdrv_ctx *ctx); +int ucc_wait_cq(struct vstream_info *vsqcq_info, struct tsdrv_ctx *ctx, + struct devdrv_report_para *arg, int *sqenum); +struct xpu_group *select_sq(struct vstream_info *vstream_info); +int ucc_sched_register_xcu(int dev_id, int ts_id, int cu_num); +void ucc_set_vstream_state(struct vstream_info *vinfo, int state); +void ucc_dequeue_task(struct vstream_info *vInfo); +int ucc_rt_nr_running(struct xcu *cu); +struct xcu *ucc_get_xcu_by_id(int cu_id); +int ucc_xcu_is_sched(int cu_id); +void ucc_dump_statistics_info(struct ucc_se *se); +#endif + +#endif diff --git a/include/linux/ucc_sched/ucc_sched.h b/include/linux/ucc_sched/ucc_sched.h new file mode 100644 index 000000000000..6edd8930e09e --- /dev/null +++ b/include/linux/ucc_sched/ucc_sched.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) Huawei Technologies Co., Ltd. 2019. All rights reserved. + * Author: Huawei OS Kernel Lab + * Create: Mon Jan 30 14:29:19 2023 + */ + +#ifndef __LINUX_UCC_SCHED_USCHED_H__ +#define __LINUX_UCC_SCHED_USCHED_H__ + +enum ucc_se_state { + SE_PREPARE, + SE_READY, + SE_RUNNING, + SE_BLOCK, + SE_DEAD, +}; + +enum ucc_se_flag { + UCC_TIF_NONE, + UCC_TIF_PREEMPT, + UCC_TIF_BALANCE, +}; + +enum ucc_se_prio { + UCC_PRIO_HIGH, + UCC_PRIO_LOW, +}; + +enum ucc_se_step { + UCC_STEP_SLOW = 1, + UCC_STEP_FAST = 10, +}; + +struct ucc_statistics { + u64 wait_start; + u64 wait_max; + u64 wait_count; + u64 wait_sum; + + u64 preempt_start; + u64 preempt_max; + u64 preempt_count; + u64 preempt_sum; + + u64 kernel_sum; + u64 timeout_count; + + u64 run_start; + u64 run_max; + u64 run_count; + u64 run_sum; +}; + +struct ucc_se { + int on_cu; + struct list_head run_list; + enum ucc_se_state state; + enum ucc_se_flag flag; + enum ucc_se_prio prio; + enum ucc_se_step step; + raw_spinlock_t se_lock; + struct ucc_statistics statistics; + int is_timeout; +}; + +int ucc_sched_init(void); +int ucc_schedule(int cu_id); +int ucc_wake_up(struct ucc_se *se); + +#endif diff --git a/include/linux/ucc_ts.h b/include/linux/ucc_ts.h new file mode 100644 index 000000000000..7280ccca1059 --- /dev/null +++ b/include/linux/ucc_ts.h @@ -0,0 +1,254 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef TS_H +#define TS_H + +#include <linux/file.h> +#include <linux/device.h> +#include <linux/cdev.h> +#include <linux/fs.h> + +#define DEVDRV_MAX_SQ_DEPTH (1024) +#define DEVDRV_SQ_SLOT_SIZE (64) + +#define DEVDRV_MAX_SQ_NUM (512 - 1) +#define DEVDRV_MAX_CQ_NUM (352 - 1) + +#define DEVDRV_MAX_TS_NUM (1) + +#define REMAP_ALIGN_SIZE (64 * 1024) +#define REMAP_ALIGN_MASK (~(REMAP_ALIGN_SIZE - 1)) +#define REMAP_ALIGN(x) (((x) + REMAP_ALIGN_SIZE - 1) & \ + REMAP_ALIGN_MASK) + +#define DEVDRV_DB_SPACE_SIZE (1024 * 4096) + +#define SQCQ_RTS_INFO_LENGTH 5 +#define SQCQ_RESV_LENGTH 8 + +#define DEVDRV_CBCQ_MAX_GID 128 + +enum phy_sqcq_type { + NORMAL_SQCQ_TYPE = 0, + CALLBACK_SQCQ_TYPE, + LOGIC_SQCQ_TYPE, + SHM_SQCQ_TYPE, + DFX_SQCQ_TYPE, + TS_SQCQ_TYPE, + KERNEL_SQCQ_TYPE, +}; + +struct notifier_operations { + int (*notifier_call)(struct file *file_op, unsigned long mode); +}; + +#define MAX_DEVICE_COUNT 64 + +struct davinci_intf_stru { + atomic_t count; + struct mutex dmutex; + struct cdev cdev; + struct device *device; + struct list_head process_list; + struct list_head module_list; + unsigned int device_status[MAX_DEVICE_COUNT]; + cpumask_var_t cpumask; +}; + +#define DAVINIC_MODULE_NAME_MAX 256 +struct davinci_intf_private_stru { + char module_name[DAVINIC_MODULE_NAME_MAX]; + unsigned int device_id; + pid_t owner_pid; + int close_flag; + atomic_t work_count; + int release_status; + struct mutex fmutex; + const struct file_operations fops; + struct notifier_operations notifier; + struct davinci_intf_stru *device_cb; + struct file priv_filep; + unsigned int free_type; +}; + +enum sqcq_alloc_status { + SQCQ_INACTIVE = 0, + SQCQ_ACTIVE +}; + +struct devdrv_ts_sq_info { + enum phy_sqcq_type type; + pid_t tgid; + u32 head; + u32 tail; + u32 credit; + u32 index; + int uio_fd; + + u8 *uio_addr; + int uio_size; + + enum sqcq_alloc_status alloc_status; + u64 send_count; + + void *sq_sub; +}; + +struct devdrv_ts_cq_info { + enum phy_sqcq_type type; + pid_t tgid; + u32 vfid; + + u32 head; + u32 tail; + u32 release_head; /* runtime read cq head value */ + u32 index; + u32 phase; + u32 int_flag; + + int uio_fd; + + u8 *uio_addr; + int uio_size; + + enum sqcq_alloc_status alloc_status; + u64 receive_count; + + void *cq_sub; + + void (*complete_handle)(struct devdrv_ts_cq_info *cq_info); + + u8 slot_size; +}; + +#define DEVDRV_SQ_INFO_OCCUPY_SIZE \ + (sizeof(struct devdrv_ts_sq_info) * DEVDRV_MAX_SQ_NUM) +#define DEVDRV_CQ_INFO_OCCUPY_SIZE \ + (sizeof(struct devdrv_ts_cq_info) * DEVDRV_MAX_CQ_NUM) + +#define DEVDRV_MAX_INFO_SIZE \ + (DEVDRV_SQ_INFO_OCCUPY_SIZE + DEVDRV_CQ_INFO_OCCUPY_SIZE) +#define DEVDRV_VM_SQ_MEM_OFFSET 0 +#define DEVDRV_VM_SQ_SLOT_SIZE \ + REMAP_ALIGN(DEVDRV_MAX_SQ_DEPTH * DEVDRV_SQ_SLOT_SIZE) +#define DEVDRV_VM_SQ_MEM_SIZE \ + (DEVDRV_VM_SQ_SLOT_SIZE * DEVDRV_MAX_SQ_NUM) + +#define DEVDRV_VM_INFO_MEM_OFFSET \ + (DEVDRV_VM_SQ_MEM_OFFSET + DEVDRV_VM_SQ_MEM_SIZE) +#define DEVDRV_VM_INFO_MEM_SIZE REMAP_ALIGN(DEVDRV_MAX_INFO_SIZE) + +#define DEVDRV_VM_DB_MEM_OFFSET \ + (DEVDRV_VM_INFO_MEM_OFFSET + DEVDRV_VM_INFO_MEM_SIZE) +#define DEVDRV_VM_DB_MEM_SIZE REMAP_ALIGN(DEVDRV_DB_SPACE_SIZE) + +#define DEVDRV_VM_CQ_MEM_OFFSET \ + (DEVDRV_VM_DB_MEM_OFFSET + DEVDRV_VM_DB_MEM_SIZE) + +enum tsdrv_id_type { + TSDRV_STREAM_ID, + TSDRV_NOTIFY_ID, + TSDRV_MODEL_ID, + TSDRV_EVENT_SW_ID, /* should use for event alloc/free/inquiry res_num*/ + TSDRV_EVENT_HW_ID, + TSDRV_IPC_EVENT_ID, + TSDRV_SQ_ID, + TSDRV_CQ_ID, + TSDRV_PCQ_ID, + TSDRV_MAX_ID, +}; + +#define TSDRV_CQ_REUSE 0x00000001 +#define TSDRV_SQ_REUSE 0x00000002 + +struct normal_alloc_sqcq_para { + uint32_t fd; + uint32_t tsId; + uint32_t devId; + uint32_t sqeSize; + uint32_t cqeSize; + uint32_t sqeDepth; + uint32_t cqeDepth; + uint32_t grpId; + uint32_t flag; + uint32_t sqId; + uint32_t cqId; + uint32_t priority; + uint32_t info[SQCQ_RTS_INFO_LENGTH]; + uint32_t res[SQCQ_RESV_LENGTH]; +}; + +struct normal_free_sqcq_para { + uint32_t tsId; + uint32_t flag; + uint32_t sqId; + uint32_t cqId; + uint32_t res[SQCQ_RESV_LENGTH]; +}; + +struct tsdrv_sqcq_data_para { + uint32_t id; + uint32_t val; +}; + +struct devdrv_report_para { + int timeout; + u32 cq_tail; + u32 cq_id; +}; + +struct tsdrv_ts_id_ctx { + u32 id_num; + struct list_head id_list; + spinlock_t id_lock; +}; +struct tsdrv_ts_ctx { + u32 tsid; + atomic_t status; + u32 send_count; + u64 receive_count; + + int32_t cq_tail_updated; + wait_queue_head_t report_wait; + + struct work_struct recycle_work; + + wait_queue_head_t cbcq_wait[DEVDRV_CBCQ_MAX_GID]; + + void *shm_sqcq_ctx; + void *logic_sqcq_ctx; + void *sync_cb_sqcq_ctx; // mini callback + + struct tsdrv_ts_id_ctx id_ctx[TSDRV_MAX_ID]; + + /* only used by vm */ + u32 vcqid; + u32 wait_queue_inited; + u32 cq_report_status; + int32_t cq_tail; + spinlock_t ctx_lock; + + u32 recycle_cbsqcq_num; // min callback +}; + +//Context Delivers +struct tsdrv_ctx { + u32 ctx_index; + atomic_t status; + atomic_t type; + pid_t tgid; + pid_t pid; + int32_t ssid; + u32 thread_bind_irq_num; + u32 mirror_ctx_status; + struct rb_node node; + struct list_head list; + struct vm_area_struct *vma[DEVDRV_MAX_TS_NUM]; + spinlock_t ctx_lock; + struct mutex mutex_lock; + struct tsdrv_ts_ctx ts_ctx[DEVDRV_MAX_TS_NUM]; + + u64 unique_id; /* mark unique processes for vm */ +}; + +#endif diff --git a/include/linux/vstream.h b/include/linux/vstream.h new file mode 100644 index 000000000000..14d799296053 --- /dev/null +++ b/include/linux/vstream.h @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_VSTREAM_H +#define _LINUX_VSTREAM_H + +#include <linux/ucc_kfd.h> +#include <linux/ucc_sched/ucc_sched.h> +#include <linux/ucc_ts.h> + +#define MAX_VSTREAM_SIZE 1024 +#define MAX_VSTREAM_SLOT_SIZE 64 +#define MAX_CQ_SLOT_SIZE 12 + +/* + * XXX_VSTREAM_ALLOC: alloc a vstream, buffer for tasks + * XXX_VSTREAM_FREE: free a vstream + * XXX_VSTREAM_KICK: there are tasks to be executed in the vstream + * XXX_VSTREAM_UPDATE: update information for an existing vstream + * XXX_CALLBACK_VSTREAM_WAIT: waiting for callback tasks + * XXX_CALLBACK_VSTREAM_KICK: callback tasks have been executed + * + * NOTE: Callback vstream is only for Ascend now. We do not need + * CALLBACK_VSTREAM_ALLOC because the callback vstream will be + * alloced with vstream on Ascend. + */ +enum VSTREAM_COMMAND { + /* vstream command for Ascend */ + ASCEND_VSTREAM_ALLOC = 0, + ASCEND_VSTREAM_FREE, + ASCEND_VSTREAM_KICK, + ASCEND_CALLBACK_VSTREAM_WAIT, + ASCEND_CALLBACK_VSTREAM_KICK, + ASCEND_VSTREAM_GET_HEAD, + ASCEND_MAX_COMMAND, + + /* vstream command for amdgpu */ + AMDGPU_VSTREAM_ALLOC = ASCEND_MAX_COMMAND + 1, + AMDGPU_VSTREAM_FREE, + AMDGPU_VSTREAM_KICK, + AMDGPU_VSTREAM_UPDATE, + AMDGPU_MAX_COMMAND, +}; + +struct vstream_alloc_args { + union { + /* For Ascend */ + struct normal_alloc_sqcq_para ascend; + /* For amdgpu */ + struct kfd_ioctl_create_queue_args amdgpu; + }; +}; + +struct vstream_free_args { + union { + /* For Ascend */ + struct normal_free_sqcq_para ascend; + /* For amdgpu */ + struct kfd_ioctl_destroy_queue_args amdgpu; + }; +}; + +struct vstream_kick_args { + union { + /* For Ascend */ + struct tsdrv_sqcq_data_para ascend; + /* For amdgpu */ + }; +}; + +struct vstream_args { + union { + struct vstream_alloc_args va_args; + struct vstream_free_args vf_args; + struct vstream_kick_args vk_args; + struct kfd_ioctl_update_queue_args vu_args; + struct tsdrv_sqcq_data_para vh_args; + struct devdrv_report_para cvw_args; + struct tsdrv_sqcq_data_para cvk_args; + }; +}; + +struct vstream_node { + uint32_t id; + uint32_t head; + uint32_t tail; + uint32_t credit; + void *vstreamData; + raw_spinlock_t spin_lock; +}; + +struct vstream_id { + uint32_t vstreamId; + struct list_head list; +}; + +struct vcq_map_table { + uint32_t vcqId; + struct vstream_node *vcqNode; + struct list_head vstreamId_list; +}; + +struct vstream_info { + uint32_t vstreamId; //key + uint32_t vcqId; + uint32_t devId; + uint32_t tsId; + struct ucc_se se; + //TODO::check name + struct vstream_node *vsqNode; + struct vstream_node *vcqNode; + void *privdata; + uint32_t info[SQCQ_RTS_INFO_LENGTH]; + int cu_id; + struct xpu_group *group; + int send_cnt; + struct task_struct *p; +}; + +typedef int vstream_manage_t(struct vstream_args *arg); +int update_vstream_head(struct vstream_info *vstream_info, int num); +struct vstream_info *vstream_get_info(uint32_t id); +bool vstream_have_kernel(struct ucc_se *se); + +#endif /* _LINUX_VSTREAM_H */ diff --git a/include/linux/xpu_group.h b/include/linux/xpu_group.h new file mode 100644 index 000000000000..5e3a96b15f9c --- /dev/null +++ b/include/linux/xpu_group.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __XPU_GROUP_H__ +#define __XPU_GROUP_H__ +#include <linux/idr.h> + +struct xpu_group; +struct xcu; + +enum xpu_type { + XPU_TYPE_ROOT, + XPU_TYPE_TASK_QUEUE, + XPU_TYPE_NPU_310, + XPU_TYPE_CUSTOM, +}; + +enum xpu_capability_type { + TYPE_1, + XPU_CAPABILITY_TYPE_NR, +}; + +struct xpu_capability { + unsigned long capacities[XPU_CAPABILITY_TYPE_NR]; +}; + +struct xpu_operation { + int (*run)(struct xpu_group *group, void *para1, void *para2); + int (*finish)(struct xpu_group *group, void *para1, void *para2); + int (*wait)(struct xpu_group *group, void *para1, void *para2, + void *para3); + int (*complete)(struct xpu_group *group, void *para1, void *para2, + void *para3); +}; + +struct xpu_group { + int id; + enum xpu_type type; + struct xpu_capability *capability; + + struct xpu_group *previous_layer; + struct idr next_layer; + + struct xpu_operation *opt; + + int used; + + void *data; +}; + +extern struct xpu_group *xpu_root; + +#ifdef CONFIG_XPU_SCHEDULE +int xpu_group_attach(struct xpu_group *new_group, + struct xpu_group *previous_group); +int xpu_group_detach(struct xpu_group *group); +struct xpu_group *xpu_group_find(struct xpu_group *group, int id); +struct xpu_group *xpu_idle_group_find(struct xpu_group *group); +struct xpu_group *xpu_group_alloc(void); +struct xpu_group *xpu_group_alloc_and_attach(struct xpu_group *previous_group, + int id); +int xpu_run(struct xpu_group *group, void *para1, void *para2); +int xpu_finish(struct xpu_group *group, void *para1, void *para2); +int xpu_wait(struct xpu_group *group, void *para1, void *para2, void *para3); +#endif + +#endif diff --git a/include/trace/events/ucc_sched.h b/include/trace/events/ucc_sched.h new file mode 100644 index 000000000000..104a39b2f41c --- /dev/null +++ b/include/trace/events/ucc_sched.h @@ -0,0 +1,120 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM ucc_sched + +#if !defined(_TRACE_UCC_SCHED_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_UCC_SCHED_H + +#include <linux/tracepoint.h> +#include <linux/binfmts.h> + +/* + * XXX the below ucc_sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE + * adding ucc_sched_stat support to SCHED_FIFO/RR would be welcome. + */ +DECLARE_EVENT_CLASS(ucc_sched_stat_template, + + TP_PROTO(struct vstream_info *vinfo, u64 delay), + + TP_ARGS(vinfo, delay), + + TP_STRUCT__entry( + __array(char, comm, TASK_COMM_LEN) + __field(pid_t, pid) + __field(int, cu_id) + __field(u32, vstreamId) + __field(u32, prio) + __field(u64, delay) + ), + + TP_fast_assign( + memcpy(__entry->comm, vinfo->p->comm, TASK_COMM_LEN); + __entry->pid = vinfo->p->pid; + __entry->cu_id = vinfo->cu_id; + __entry->vstreamId = vinfo->vstreamId; + __entry->prio = vinfo->p->ucc_priority; + __entry->delay = delay; + ), + + TP_printk("comm=%s pid=%d cu_id=%d vstreamId %u prio %u, delay=%llu [ns]", + __entry->comm, __entry->pid, + __entry->cu_id, __entry->vstreamId, __entry->prio, + (unsigned long long)__entry->delay) +); + +DECLARE_EVENT_CLASS(ucc_sched_stat_template_1, + + TP_PROTO(struct vstream_info *vinfo, u64 delay, int is_timeout), + + TP_ARGS(vinfo, delay, is_timeout), + + TP_STRUCT__entry( + __array(char, comm, TASK_COMM_LEN) + __field(pid_t, pid) + __field(int, cu_id) + __field(u32, vstreamId) + __field(u64, delay) + __field(int, is_timeout) + ), + + TP_fast_assign( + memcpy(__entry->comm, vinfo->p->comm, TASK_COMM_LEN); + __entry->pid = vinfo->p->pid; + __entry->cu_id = vinfo->cu_id; + __entry->vstreamId = vinfo->vstreamId; + __entry->delay = delay; + __entry->is_timeout = is_timeout; + ), + + TP_printk("comm=%s pid=%d cu_id=%d vstreamId %u, delay=%llu [ns]:%d", + __entry->comm, __entry->pid, + __entry->cu_id, __entry->vstreamId, + (unsigned long long)__entry->delay, + __entry->is_timeout) +); +/* + * Tracepoint for accounting wait time (time the task is runnable + * but not actually running due to scheduler contention). + */ +DEFINE_EVENT(ucc_sched_stat_template, ucc_sched_stat_wait, + TP_PROTO(struct vstream_info *vinfo, u64 delay), + TP_ARGS(vinfo, delay)); + +DEFINE_EVENT(ucc_sched_stat_template, ucc_sched_stat_preempt, + TP_PROTO(struct vstream_info *vinfo, u64 delay), + TP_ARGS(vinfo, delay)); + +DEFINE_EVENT(ucc_sched_stat_template_1, ucc_sched_stat_run, + TP_PROTO(struct vstream_info *vinfo, u64 delay, int is_timeout), + TP_ARGS(vinfo, delay, is_timeout)); + +TRACE_EVENT(ucc_sched_switch, + + TP_PROTO(int preempt, + struct vstream_info *next), + + TP_ARGS(preempt, next), + + TP_STRUCT__entry( + __field(int, cu_id) + __field(u32, next_vstreamId) + __field(u32, next_prio) + __field(int, preempt) + ), + + TP_fast_assign( + __entry->cu_id = next->cu_id; + __entry->next_vstreamId = next->vstreamId; + __entry->next_prio = next->p->ucc_priority; + __entry->preempt = preempt; + ), + + TP_printk("cu_id=%d next_vstreamId %u next_prio %u preempt[%d]", + __entry->cu_id, + __entry->next_vstreamId, __entry->next_prio, + __entry->preempt) +); +#endif /* _TRACE_UCC_SCHED_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/init/init_task.c b/init/init_task.c index b312a045f4b9..c1a78b4da368 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -188,6 +188,10 @@ struct task_struct init_task .fork_pid = 0, }, #endif +#ifdef CONFIG_XPU_SCHEDULE + .ucc_priority = 1, + .ucc_step = 1, +#endif }; EXPORT_SYMBOL(init_task); diff --git a/init/main.c b/init/main.c index 50af60ff0ef6..7ed2e67d7011 100644 --- a/init/main.c +++ b/init/main.c @@ -66,6 +66,7 @@ #include <linux/kthread.h> #include <linux/sched.h> #include <linux/sched/init.h> +#include <linux/ucc_sched/ucc_sched.h> #include <linux/signal.h> #include <linux/idr.h> #include <linux/kgdb.h> @@ -599,6 +600,14 @@ asmlinkage __visible void __init start_kernel(void) * time - but meanwhile we still have a functioning scheduler. */ sched_init(); + +#ifdef CONFIG_XPU_SCHEDULE + /* + * Set up the ucc scheduler, to enable heterogeneous scheduling. + */ + ucc_sched_init(); +#endif + /* * Disable preemption - early bootup scheduling is extremely * fragile until we cpu_idle() for the first time. diff --git a/kernel/Makefile b/kernel/Makefile index d0482bd27ba4..273fe481d303 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -43,6 +43,8 @@ obj-y += irq/ obj-y += rcu/ obj-y += livepatch/ obj-y += dma/ +obj-(CONFIG_XPU_SCHEDULE) += ucc_sched/ +obj-(CONFIG_XPU_UCC) += ucc/ obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o obj-$(CONFIG_FREEZER) += freezer.o diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 0612af002ae5..0f659b2ad251 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -19,6 +19,7 @@ endif obj-y += core.o loadavg.o clock.o cputime.o obj-y += idle.o fair.o rt.o deadline.o obj-y += wait.o wait_bit.o swait.o completion.o +obj-(CONFIG_XPU_SCHEDULE) += ucc_sched.o obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o topology.o stop_task.o pelt.o obj-$(CONFIG_SCHED_AUTOGROUP) += autogroup.o diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 67bda877bfa8..89348097b29a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2316,6 +2316,11 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) */ p->prio = current->normal_prio; +#ifdef CONFIG_XPU_SCHEDULE + p->ucc_priority = current->ucc_priority; + p->ucc_step = current->ucc_step; +#endif + /* * Revert to default priority/policy on fork if requested. */ diff --git a/kernel/sched/ucc_sched.c b/kernel/sched/ucc_sched.c new file mode 100644 index 000000000000..646f120c3c34 --- /dev/null +++ b/kernel/sched/ucc_sched.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/ucc_sched.h> +#include <linux/ucc_common.h> + +static DEFINE_MUTEX(revmap_mutex); + +static DEFINE_HASHTABLE(vrtsq_rtsq_revmap, VRTSQ_RTSQ_HASH_ORDER); + +/** + * @group: value for this entry. + * @hash_node : hash node list. + * @ + */ +struct vsqce_idx_revmap_data { + unsigned int vrtsdId; + struct xpu_group *group; + struct hlist_node hash_node; +}; + +struct xpu_group *select_sq(struct vstream_info *vstream_info) +{ + struct vsqce_idx_revmap_data *revmap_data; + + /* find history */ + mutex_lock(&revmap_mutex); + hash_for_each_possible(vrtsq_rtsq_revmap, revmap_data, hash_node, + (unsigned long)vstream_info->vstreamId) { + if (revmap_data && revmap_data->group) { + mutex_unlock(&revmap_mutex); + return revmap_data->group; + } + } + mutex_unlock(&revmap_mutex); + + revmap_data = kzalloc(sizeof(struct vsqce_idx_revmap_data), GFP_KERNEL); + if (revmap_data == NULL) + return NULL; + /* find XPU group */ + revmap_data->group = xpu_group_find(xpu_root, XPU_TYPE_NPU_310); + if (revmap_data->group == NULL) { + ucc_err("find XPU group is failed.\n"); + return NULL; + } + /* find device group */ + revmap_data->group = xpu_group_find(revmap_data->group, + vstream_info->devId); + if (revmap_data->group == NULL) { + ucc_err("find device group is failed.\n"); + return NULL; + } + /* find tsgroup */ + revmap_data->group = xpu_group_find(revmap_data->group, + vstream_info->tsId); + if (revmap_data->group == NULL) { + ucc_err("find ts group is failed.\n"); + return NULL; + } + + /* select idle xcu */ + revmap_data->group = xpu_idle_group_find(revmap_data->group); + if (revmap_data->group == NULL) { + ucc_err("find rtsq group is failed.\n"); + return NULL; + } + + revmap_data->vrtsdId = vstream_info->vstreamId; + /* set group used : 1 */ + revmap_data->group->used = 1; + + mutex_lock(&revmap_mutex); + hash_add(vrtsq_rtsq_revmap, &revmap_data->hash_node, + (unsigned long)vstream_info->vstreamId); + mutex_unlock(&revmap_mutex); + return revmap_data->group; +} + +int ucc_process_task(struct vstream_info *vstream_info, struct tsdrv_ctx *ctx, + int *sqenum) +{ + struct xpu_group *group = NULL; + + if (vstream_info == NULL) { + ucc_err("vsqcq_info is NULL\n"); + return -1; + } + + group = select_sq(vstream_info); + if (group == NULL) { + ucc_err("find group is failed.\n"); + return -1; + } + /* send sqe */ + *sqenum = xpu_run(group, vstream_info, ctx); + + return 0; +} +EXPORT_SYMBOL(ucc_process_task); + +int ucc_free_task(struct vstream_info *vstream_info, struct tsdrv_ctx *ctx) +{ + struct vsqce_idx_revmap_data *revmap_data; + + ucc_dequeue_task(vstream_info); + + while (!ucc_xcu_is_sched(vstream_info->cu_id)) + schedule_timeout_interruptible(10); + + ucc_dump_statistics_info(&vstream_info->se); + + mutex_lock(&revmap_mutex); + hash_for_each_possible(vrtsq_rtsq_revmap, revmap_data, hash_node, + (unsigned long)vstream_info->vstreamId) { + if (revmap_data && + revmap_data->vrtsdId == vstream_info->vstreamId && + revmap_data->group) { + xpu_finish(revmap_data->group, vstream_info, ctx); + /* set group unused : 0 */ + revmap_data->group->used = 0; + hash_del(&revmap_data->hash_node); + kfree(revmap_data); + revmap_data = NULL; + break; + } + } + mutex_unlock(&revmap_mutex); + + return 0; +} +EXPORT_SYMBOL(ucc_free_task); + +int ucc_wait_cq(struct vstream_info *vstream_info, struct tsdrv_ctx *ctx, + struct devdrv_report_para *arg, int *cqenum) +{ + struct vsqce_idx_revmap_data *revmap_data; + + hash_for_each_possible(vrtsq_rtsq_revmap, revmap_data, hash_node, + (unsigned long)vstream_info->vstreamId) { + if (revmap_data && + revmap_data->vrtsdId == vstream_info->vstreamId && + revmap_data->group) + *cqenum = xpu_wait(revmap_data->group, vstream_info, + ctx, arg); + } + + return 0; +} +EXPORT_SYMBOL(ucc_wait_cq); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c7064f67f4a5..aeceb9e9c927 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -117,6 +117,10 @@ extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max; extern int sysctl_nr_trim_pages; #endif +#ifdef CONFIG_XPU_SCHEDULE +extern int sysctl_ucc_sched_rcv_timeout_ms; +#endif + /* Constants used for minimum and maximum */ #ifdef CONFIG_LOCKUP_DETECTOR static int sixty = 60; @@ -139,7 +143,7 @@ static int one_thousand = 1000; #ifdef CONFIG_PRINTK static int ten_thousand = 10000; #endif -#if defined(CONFIG_QOS_SCHED) || defined(CONFIG_QOS_SCHED_SMART_GRID) +#if defined(CONFIG_QOS_SCHED) || defined(CONFIG_QOS_SCHED_SMART_GRID) || defined(CONFIG_XPU_SCHEDULE) static int hundred_thousand = 100000; #endif #ifdef CONFIG_PERF_EVENTS @@ -352,6 +356,17 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, +#ifdef CONFIG_XPU_SCHEDULE + { + .procname = "ucc_sched_rcv_timeout", + .data = &sysctl_ucc_sched_rcv_timeout_ms, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &hundred_thousand, + }, +#endif #ifdef CONFIG_SCHED_DEBUG { .procname = "sched_min_granularity_ns", diff --git a/kernel/ucc/Kconfig b/kernel/ucc/Kconfig new file mode 100644 index 000000000000..279c11f702b1 --- /dev/null +++ b/kernel/ucc/Kconfig @@ -0,0 +1,21 @@ +# +# TODO: add description +# + +config XPU_UCC + bool "ucc" + default n + depends on ARM64 || X86 + help + Say Y here if you want support for using XPU UCC. XPU UCC + is helpfer for XPU schedule. The full name of UCC is + Universal Converged Computing. + + +config XPU_VSTREAM + bool "virtual submit queue and complete queue" + default n + depends on XPU_UCC + help + virtual Submit Queue and Complete Queue support for XPU. + It is used to help XPU schedule. diff --git a/kernel/ucc/Makefile b/kernel/ucc/Makefile new file mode 100644 index 000000000000..0e2735d2aef4 --- /dev/null +++ b/kernel/ucc/Makefile @@ -0,0 +1 @@ +obj-y += ascend_vstream.o vstream.o diff --git a/kernel/ucc/ascend_vstream.c b/kernel/ucc/ascend_vstream.c new file mode 100644 index 000000000000..d248aaff7639 --- /dev/null +++ b/kernel/ucc/ascend_vstream.c @@ -0,0 +1,654 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/uaccess.h> +#include <linux/syscalls.h> +#include <linux/mm.h> +#include <linux/pagemap.h> +#include <linux/vstream.h> +#include <linux/slab.h> +#include <linux/list.h> +#include <linux/ucc_common.h> +#include <linux/ucc_sched.h> + +DEFINE_MUTEX(vstreamId_Bitmap_mutex); +static DECLARE_BITMAP(vstreamIdBitmap, DEVDRV_MAX_SQ_NUM); + +static DEFINE_MUTEX(vcqId_Bitmap_mutex); +static DECLARE_BITMAP(vcqIdBitmap, DEVDRV_MAX_CQ_NUM); + +static DEFINE_MUTEX(revmap_mutex); + +static struct vstream_info *vstreamContainer[DEVDRV_MAX_SQ_NUM]; +static struct vcq_map_table *vsqcqMapTable[DEVDRV_MAX_CQ_NUM]; + +#define MAX_SQ_SIZE (MAX_VSTREAM_SIZE * MAX_VSTREAM_SLOT_SIZE) +#define MAX_CQ_SIZE (MAX_VSTREAM_SIZE * MAX_CQ_SLOT_SIZE) + +#define SQ_USER_ADDR_OFFSET(id) ((unsigned long)REMAP_ALIGN(MAX_SQ_SIZE) * id) +#define CQ_USER_ADDR_OFFSET(id) ((unsigned long)REMAP_ALIGN(MAX_CQ_SIZE) * id) + +#define SQ_VSTREAM_DATA(id) vstreamContainer[id]->vsqNode->vstreamData +#define CQ_VSTREAM_DATA(id) vstreamContainer[id]->vcqNode->vstreamData + +static struct tsdrv_ctx *get_ctx(int fd) +{ + struct fd f; + struct davinci_intf_private_stru *file_private_data; + struct tsdrv_ctx *ctx = NULL; + + f = fdget(fd); + if (!f.file) + goto out; + + file_private_data = f.file->private_data; + if (!file_private_data) + goto out; + + ctx = file_private_data->priv_filep.private_data; + +out: + fdput(f); + return ctx; +} + +static struct vcq_map_table *vstream_get_map_table(uint32_t id) +{ + return vsqcqMapTable[id]; +} + +static void free_vstreamId(uint32_t vstreamId) +{ + mutex_lock(&vstreamId_Bitmap_mutex); + clear_bit(vstreamId, vstreamIdBitmap); + mutex_unlock(&vstreamId_Bitmap_mutex); +} + +static void free_vcqId(uint32_t vcqId, uint32_t flag) +{ + mutex_lock(&vcqId_Bitmap_mutex); + if (!(flag & TSDRV_CQ_REUSE)) + clear_bit(vcqId, vcqIdBitmap); + mutex_unlock(&vcqId_Bitmap_mutex); +} + +static void vstream_free_map_table(uint32_t vcqId, uint32_t vstreamId, + uint32_t flag) +{ + struct vcq_map_table *freeTable = NULL; + struct vstream_id *vstreamIdNode = NULL; + + freeTable = vstream_get_map_table(vcqId); + if (!freeTable) { + ucc_err("No map found for vcq:%d.\n", vcqId); + return; + } + + list_for_each_entry(vstreamIdNode, &freeTable->vstreamId_list, list) { + if (vstreamIdNode->vstreamId == vstreamId) { + list_del(&vstreamIdNode->list); + kfree(vstreamIdNode); + break; + } + } + if (!(flag & TSDRV_CQ_REUSE)) { + kfree(freeTable->vcqNode->vstreamData); + kfree(freeTable->vcqNode); + kfree(freeTable); + } +} + +static void vstream_alloc_ucc_se(struct ucc_se *se) +{ + memset(&se->statistics, 0, sizeof(se->statistics)); + se->on_cu = 0; + se->state = SE_PREPARE; + se->flag = UCC_TIF_NONE; + se->prio = UCC_PRIO_HIGH; + se->step = UCC_STEP_SLOW; + raw_spin_lock_init(&se->se_lock); +} + +static struct vstream_info *vstream_create_info(struct tsdrv_ctx *ctx, + struct normal_alloc_sqcq_para *para) +{ + struct vcq_map_table *mapTable = NULL; + + struct vstream_info *vstream = kzalloc(sizeof(struct vstream_info), + GFP_KERNEL); + if (!vstream) + return NULL; + + (void)memcpy(vstream->info, para->info, + sizeof(uint32_t) * SQCQ_RTS_INFO_LENGTH); + + vstream->privdata = ctx; + vstream->tsId = para->tsId; + vstream->vstreamId = para->sqId; + vstream->vcqId = para->cqId; + + mapTable = vstream_get_map_table(vstream->vcqId); + if (!mapTable || !mapTable->vcqNode) { + ucc_err("No map found for vcqId:%d.\n", vstream->vcqId); + goto free_vstream; + } + vstream->vcqNode = mapTable->vcqNode; + vstream->vsqNode = kmalloc(sizeof(struct vstream_node), GFP_KERNEL); + if (!vstream->vsqNode) { + ucc_err("Failed to alloc memory for vsqNode:%d.\n", + vstream->vstreamId); + goto free_vstream; + } + vstream->vsqNode->vstreamData = kmalloc(MAX_SQ_SIZE, GFP_KERNEL); + if (!vstream->vsqNode->vstreamData) + goto free_vsqNode; + vstream->vsqNode->id = vstream->vstreamId; + vstream->vsqNode->head = 0; + vstream->vsqNode->tail = 0; + vstream->vsqNode->credit = MAX_VSTREAM_SIZE; + raw_spin_lock_init(&vstream->vsqNode->spin_lock); + vstream->send_cnt = 0; + vstream->p = current; + vstream_alloc_ucc_se(&vstream->se); + + return vstream; + +free_vsqNode: + kfree(vstream->vsqNode); + +free_vstream: + kfree(vstream); + return NULL; +} + +struct vstream_info *vstream_get_info(uint32_t id) +{ + return vstreamContainer[id]; +} + +static void vstream_free_info(uint32_t id) +{ + struct vstream_info *freeInfo = vstream_get_info(id); + + ucc_set_vstream_state(freeInfo, SE_DEAD); + + if (freeInfo) { + if (freeInfo->vsqNode) + kfree(freeInfo->vsqNode->vstreamData); + + kfree(freeInfo->vsqNode); + } + + kfree(freeInfo); +} + +static int queue_pop_by_num(struct vstream_node *node, uint32_t pop_num) +{ + if (node->credit + pop_num > MAX_VSTREAM_SIZE) { + ucc_err("Queue usage out-of-bounds"); + return -EACCES; + } + + node->credit += pop_num; + node->head = (node->head + pop_num) % MAX_VSTREAM_SIZE; + return 0; +} + +static int queue_pop_by_head(struct vstream_node *node, uint32_t head) +{ + int pop_num = (head - node->head + MAX_VSTREAM_SIZE) % + MAX_VSTREAM_SIZE; + return queue_pop_by_num(node, pop_num); +} + +int update_vstream_head(struct vstream_info *vstream_info, int num) +{ + struct vstream_node *node = vstream_info->vsqNode; + + raw_spin_lock(&node->spin_lock); + if (node->credit + num > MAX_VSTREAM_SIZE) { + raw_spin_unlock(&node->spin_lock); + return -1; + } + + node->credit += num; + node->head = (node->head + num) % MAX_VSTREAM_SIZE; + raw_spin_unlock(&node->spin_lock); + + return 0; +} + +bool vstream_have_kernel(struct ucc_se *se) +{ + struct vstream_info *vinfo; + + vinfo = container_of(se, struct vstream_info, se); + return vinfo->vsqNode->credit != MAX_VSTREAM_SIZE; +} + +static int queue_push_by_num(struct vstream_node *node, uint32_t push_num) +{ + if (node->credit - push_num < 0) + return -EACCES; + + node->credit -= push_num; + node->tail = (node->tail + push_num) % MAX_VSTREAM_SIZE; + return 0; +} + +static int queue_push_by_tail(struct vstream_node *node, uint32_t tail) +{ + int push_num = (tail - node->tail + MAX_VSTREAM_SIZE) % + MAX_VSTREAM_SIZE; + return queue_push_by_num(node, push_num); +} + +static uint32_t vstream_alloc_vstreamId(void) +{ + uint32_t vstreamId = DEVDRV_MAX_SQ_NUM; + + /* alloc vstreamId */ + mutex_lock(&vstreamId_Bitmap_mutex); + vstreamId = find_first_zero_bit(vstreamIdBitmap, DEVDRV_MAX_SQ_NUM); + if (vstreamId == DEVDRV_MAX_SQ_NUM) { + ucc_err("vstreamId exhausted.\n"); + mutex_unlock(&vstreamId_Bitmap_mutex); + return DEVDRV_MAX_SQ_NUM; + } + set_bit(vstreamId, vstreamIdBitmap); + mutex_unlock(&vstreamId_Bitmap_mutex); + + return vstreamId; +} + +static uint32_t vstream_alloc_vcqid(void) +{ + uint32_t vcqId = DEVDRV_MAX_CQ_NUM; + + /* alloc vcqid */ + mutex_lock(&vcqId_Bitmap_mutex); + vcqId = find_first_zero_bit(vcqIdBitmap, DEVDRV_MAX_CQ_NUM); + if (vcqId == DEVDRV_MAX_CQ_NUM) { + ucc_err("vcqId has been used up.\n"); + mutex_unlock(&vcqId_Bitmap_mutex); + return DEVDRV_MAX_CQ_NUM; + } + set_bit(vcqId, vcqIdBitmap); + mutex_unlock(&vcqId_Bitmap_mutex); + + ucc_info("vcqId = %d\n", vcqId); + return vcqId; +} + +int vstream_map_pfnaddr(struct tsdrv_ctx *ctx, + struct normal_alloc_sqcq_para *para) +{ + int err = 0; + unsigned long vsqAddr; + unsigned long vcqAddr; + pgprot_t vm_page_prot; + struct vm_area_struct *vma = ctx->vma[para->tsId]; + + vsqAddr = vma->vm_start + SQ_USER_ADDR_OFFSET(para->sqId); + vm_page_prot = pgprot_device(vma->vm_page_prot); + err = remap_pfn_range(vma, vsqAddr, + virt_to_pfn(SQ_VSTREAM_DATA(para->sqId)), + MAX_SQ_SIZE, vm_page_prot); + if (err) { + ucc_err("remap_pfn_range failed,ret=%d.\n", err); + return -EFAULT; + } + if (!(para->flag & TSDRV_CQ_REUSE)) { + vcqAddr = vma->vm_start + DEVDRV_VM_CQ_MEM_OFFSET + + CQ_USER_ADDR_OFFSET(para->cqId); + err = remap_pfn_range(vma, vcqAddr, + virt_to_pfn(CQ_VSTREAM_DATA(para->sqId)), + MAX_CQ_SIZE, vm_page_prot); + if (err) { + ucc_err("remap_pfn_range failed,ret=%d.\n", err); + return -EFAULT; + } + } + + return err; +} + +void vstream_unmap_pfnaddr(struct tsdrv_ctx *ctx, + struct normal_free_sqcq_para *para) +{ + unsigned long vsqAddr; + unsigned long vcqAddr; + size_t cqSize = PAGE_ALIGN(MAX_CQ_SIZE); + struct vm_area_struct *vma = ctx->vma[para->tsId]; + + vsqAddr = vma->vm_start + SQ_USER_ADDR_OFFSET(para->sqId); + zap_vma_ptes(vma, vsqAddr, MAX_SQ_SIZE); + + if (!(para->flag & TSDRV_CQ_REUSE)) { + vcqAddr = vma->vm_start + DEVDRV_VM_CQ_MEM_OFFSET + + CQ_USER_ADDR_OFFSET(para->cqId); + zap_vma_ptes(vma, vcqAddr, cqSize); + } +} + +static int vstream_update_vcqtable(uint32_t vcqId, uint32_t vstreamId, + uint32_t flag) +{ + int err = -ENOSPC; + struct vcq_map_table *vcqTable = NULL; + struct vstream_id *vstreamIdNode = NULL; + + if (!(flag & TSDRV_CQ_REUSE)) { + vcqTable = kmalloc(sizeof(struct vcq_map_table), GFP_KERNEL); + if (!vcqTable) + return -ENOMEM; + + vcqTable->vcqId = vcqId; + vcqTable->vcqNode = kmalloc(sizeof(struct vstream_node), + GFP_KERNEL); + if (!vcqTable->vcqNode) { + err = -ENOMEM; + goto free_vcqTable; + } + + vcqTable->vcqNode->vstreamData = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!vcqTable->vcqNode->vstreamData) { + err = -ENOMEM; + goto free_vcqNode; + } + vcqTable->vcqNode->id = vcqId; + vcqTable->vcqNode->head = 0; + vcqTable->vcqNode->tail = 0; + vcqTable->vcqNode->credit = MAX_VSTREAM_SIZE; + INIT_LIST_HEAD(&vcqTable->vstreamId_list); + vsqcqMapTable[vcqId] = vcqTable; + } else { + vcqTable = vsqcqMapTable[vcqId]; + } + vstreamIdNode = kmalloc(sizeof(struct vstream_id), GFP_KERNEL); + if (!vstreamIdNode) { + err = -ENOMEM; + + if (!(flag & TSDRV_CQ_REUSE)) + goto free_vstreamData; + return err; + } + vstreamIdNode->vstreamId = vstreamId; + list_add(&vstreamIdNode->list, &vcqTable->vstreamId_list); + + return 0; + +free_vstreamData: + kfree(vcqTable->vcqNode->vstreamData); + +free_vcqNode: + kfree(vcqTable->vcqNode); + +free_vcqTable: + kfree(vcqTable); + + return err; +} + +int ascend_vstream_alloc(struct vstream_args *arg) +{ + uint32_t vstreamId; + uint32_t vcqId = DEVDRV_MAX_CQ_NUM; + int err = -EINVAL; + struct vstream_info *vstream = NULL; + struct tsdrv_ctx *ctx = NULL; + struct normal_alloc_sqcq_para *sqcq_alloc_para = &arg->va_args.ascend; + + ctx = get_ctx(sqcq_alloc_para->fd); + if (!ctx) + return err; + + vstreamId = vstream_alloc_vstreamId(); + if (vstreamId == DEVDRV_MAX_SQ_NUM) { + ucc_err("vstreamId alloc failed.\n"); + return err; + } + if (!(sqcq_alloc_para->flag & TSDRV_CQ_REUSE)) + vcqId = vstream_alloc_vcqid(); + else + vcqId = sqcq_alloc_para->cqId; + + if (vcqId >= DEVDRV_MAX_CQ_NUM) { + ucc_err("vcqId alloc failed.\n"); + goto free_vstreamIds; + } + err = vstream_update_vcqtable(vcqId, vstreamId, sqcq_alloc_para->flag); + if (err) { + ucc_err("vcqtable update failed, vcqId:%d, vstreamId:%d, flag:%d.\n", + vcqId, vstreamId, sqcq_alloc_para->flag); + goto free_vcqid; + } + + sqcq_alloc_para->sqId = vstreamId; + sqcq_alloc_para->cqId = vcqId; + vstream = vstream_create_info(ctx, sqcq_alloc_para); + if (!vstream) { + ucc_err("vstream create failed: vcqId:%d, vstreamId:%d.\n", + vcqId, vstreamId); + err = -ENOSPC; + goto free_vcqtable; + } + + vstream->devId = sqcq_alloc_para->devId; + vstreamContainer[vstreamId] = vstream; + + vstream->group = select_sq(vstream); + if (!vstream->group) { + ucc_err("Failed to select sq\n"); + err = -EINVAL; + goto free_vstream_info; + } + + err = vstream_map_pfnaddr(ctx, sqcq_alloc_para); + if (err) { + ucc_err("vstream map failed, ret=%d.\n", err); + goto free_vstream_info; + } + return 0; + +free_vstream_info: + vstream_free_info(vstreamId); + +free_vcqtable: + vstream_free_map_table(vcqId, vstreamId, sqcq_alloc_para->flag); + +free_vcqid: + free_vcqId(vcqId, sqcq_alloc_para->flag); + +free_vstreamIds: + free_vstreamId(vstreamId); + + return err; +} + +int ascend_vstream_free(struct vstream_args *arg) +{ + int err = 0; + struct vstream_info *vstreamInfo = NULL; + struct normal_free_sqcq_para *sqcq_free_para = &arg->vf_args.ascend; + uint32_t vstreamId = sqcq_free_para->sqId; + uint32_t vcqId = sqcq_free_para->cqId; + + if (vstreamId >= DEVDRV_MAX_SQ_NUM || vcqId >= DEVDRV_MAX_CQ_NUM) { + ucc_err("vstream index out-of-range, vstreamId=%d, vcqId=%d.\n", + vstreamId, vcqId); + return -EPERM; + } + + vstreamInfo = vstream_get_info(vstreamId); + if (!vstreamInfo) { + ucc_err("vstreamInfo get failed, vstreamId=%d.\n", vstreamId); + return -EPERM; + } + err = ucc_free_task(vstreamInfo, vstreamInfo->privdata); + + free_vcqId(vcqId, sqcq_free_para->flag); + vstream_free_map_table(vcqId, vstreamId, sqcq_free_para->flag); + + vstream_unmap_pfnaddr(vstreamInfo->privdata, sqcq_free_para); + + vstream_free_info(vstreamId); + free_vstreamId(vstreamId); + return err; +} + +int ascend_vstream_kick(struct vstream_args *arg) +{ + int err = 0; + struct tsdrv_sqcq_data_para *sqcq_data_para = &arg->vk_args.ascend; + int vstreamId = sqcq_data_para->id; + int tail = sqcq_data_para->val; + struct vstream_info *vstreamInfo = NULL; + int push_num; + + vstreamInfo = vstream_get_info(vstreamId); + vstreamInfo->p = current; + + if (!vstreamInfo) { + ucc_err("vstreamInfo get failed, vstreamId=%d.\n", vstreamId); + return -ENOMEM; + } + + push_num = (tail - vstreamInfo->vsqNode->tail + MAX_VSTREAM_SIZE) % + MAX_VSTREAM_SIZE; + + raw_spin_lock(&vstreamInfo->vsqNode->spin_lock); + err = queue_push_by_tail(vstreamInfo->vsqNode, tail); + if (err) { + raw_spin_unlock(&vstreamInfo->vsqNode->spin_lock); + ucc_err("queue_push_by_tail error, ret = %d\n", err); + return err; + } + raw_spin_unlock(&vstreamInfo->vsqNode->spin_lock); + + err = ucc_wake_up(&vstreamInfo->se); + return err; +} + +int ascend_callback_vstream_wait(struct vstream_args *arg) +{ + int err = 0; + int cqeNum = 0; + int cqeSum = 0; + struct vstream_info *vstreamInfo = NULL; + struct vcq_map_table *vcqTable = NULL; + struct vcq_map_table *waitTable = NULL; + struct vstream_id *vstreamIdNode = NULL; + struct devdrv_report_para *report_para = &arg->cvw_args; + uint32_t *sqlist; + uint32_t sqlist_num = 0; + uint32_t vstreamId, vcqId; + + sqlist = kmalloc_array(DEVDRV_MAX_SQ_NUM, sizeof(uint32_t), GFP_KERNEL); + if (!sqlist) + return -ENOMEM; + + vcqId = report_para->cq_id; + if (vcqId >= DEVDRV_MAX_CQ_NUM) { + ucc_err("vcqId out-of-range, vcqId=%d.\n", vcqId); + err = -EPERM; + goto out; + } + + mutex_lock(&vcqId_Bitmap_mutex); + waitTable = vstream_get_map_table(vcqId); + if (!waitTable) { + ucc_err("No map found for vcq:%d.\n", vcqId); + mutex_unlock(&vcqId_Bitmap_mutex); + err = -EPERM; + goto out; + } + + list_for_each_entry(vstreamIdNode, &waitTable->vstreamId_list, list) + sqlist[sqlist_num++] = vstreamIdNode->vstreamId; + mutex_unlock(&vcqId_Bitmap_mutex); + + //get sqInfo from hardware + for (vstreamId = 0; vstreamId < sqlist_num; vstreamId++) { + vstreamInfo = vstream_get_info(sqlist[vstreamId]); + if (!vstreamInfo) + continue; + err |= ucc_wait_cq(vstreamInfo, vstreamInfo->privdata, + report_para, &cqeNum); + cqeSum += cqeNum; + if (cqeNum) + break; + } + + //update cqInfo + mutex_lock(&vcqId_Bitmap_mutex); + vcqTable = vstream_get_map_table(vcqId); + if (!vcqTable) { + ucc_err("No map found for vcq:%d.\n", vcqId); + err = -EPERM; + goto out; + } + + err = queue_push_by_num(vcqTable->vcqNode, cqeSum); + if (err) { + mutex_unlock(&vcqId_Bitmap_mutex); + ucc_err("failed to queue_push_by_num, ret = %d.\n", err); + goto out; + } + report_para->cq_tail = vcqTable->vcqNode->tail; + mutex_unlock(&vcqId_Bitmap_mutex); + +out: + kfree(sqlist); + return err; +} + +int ascend_callback_vstream_kick(struct vstream_args *arg) +{ + u32 vcqId, release_head; + struct vstream_info *vstreamInfo = NULL; + int err = 0; + + vcqId = arg->cvk_args.id; + release_head = arg->cvk_args.val; + if (vcqId >= DEVDRV_MAX_CQ_NUM || release_head >= MAX_VSTREAM_SIZE) { + ucc_err("vstream index out-of-range, vcqId=%d, release_head=%d.\n", + vcqId, release_head); + return -EPERM; + } + + mutex_lock(&vcqId_Bitmap_mutex); + vstreamInfo = vstream_get_info(vcqId); + if (!vstreamInfo) { + err = -EPERM; + goto out; + } + + err = queue_pop_by_head(vstreamInfo->vcqNode, release_head); + +out: + mutex_unlock(&vcqId_Bitmap_mutex); + return err; +} + +int ascend_vstream_get_head(struct vstream_args *arg) +{ + u32 vstreamId = arg->vh_args.id; + struct vstream_info *vstreamInfo = NULL; + + if (vstreamId >= DEVDRV_MAX_SQ_NUM) { + ucc_err("vstreamId out-of-range, vstreamId=%d.\n", vstreamId); + return -EINVAL; + } + + vstreamInfo = vstream_get_info(vstreamId); + if (!vstreamInfo) { + ucc_err("vstreamInfo get failed, vstreamId=%d.\n", vstreamId); + return -EINVAL; + } + arg->vh_args.val = vstreamInfo->vsqNode->head; + + return 0; +} + diff --git a/kernel/ucc/ascend_vstream.h b/kernel/ucc/ascend_vstream.h new file mode 100644 index 000000000000..0cd200168495 --- /dev/null +++ b/kernel/ucc/ascend_vstream.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#ifndef _ASCEND_VSTREAM_H +#define _ASCEND_VSTREAM_H + +int ascend_vstream_alloc(struct vstream_args *arg); +int ascend_vstream_free(struct vstream_args *arg); +int ascend_vstream_kick(struct vstream_args *arg); +int ascend_callback_vstream_wait(struct vstream_args *arg); +int ascend_callback_vstream_kick(struct vstream_args *arg); +int ascend_vstream_get_head(struct vstream_args *arg); + +#endif /* _ASCEND_VSTREAM_H */ diff --git a/kernel/ucc/vstream.c b/kernel/ucc/vstream.c new file mode 100644 index 000000000000..d4705f285b89 --- /dev/null +++ b/kernel/ucc/vstream.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/syscalls.h> +#include <linux/vstream.h> + +#include "ascend_vstream.h" + +static int amdgpu_vstream_alloc(struct vstream_args *arg) +{ + return 0; +} +static int amdgpu_vstream_free(struct vstream_args *arg) +{ + return 0; +} +static int amdgpu_vstream_kick(struct vstream_args *arg) +{ + return 0; +} +static int amdgpu_vstream_update(struct vstream_args *arg) +{ + return 0; +} + +/* + * vstream_manage_cmd table + */ +static vstream_manage_t (*vstream_command_table[AMDGPU_MAX_COMMAND + 1]) = { + ascend_vstream_alloc, // ASCEND_VSTREAM_ALLOC + ascend_vstream_free, // ASCEND_VSTREAM_FREE + ascend_vstream_kick, // ASCEND_VSTREAM_KICK + ascend_callback_vstream_wait, // ASCEND_CALLBACK_VSTREAM_WAIT + ascend_callback_vstream_kick, // ASCEND_CALLBACK_VSTREAM_KICK + ascend_vstream_get_head, // ASCEND_VSTREAM_GET_HEAD + NULL, // ASCEND_MAX_COMMAND + amdgpu_vstream_alloc, // AMDGPU_VSTREAM_ALLOC + amdgpu_vstream_free, // AMDGPU_VSTREAM_FREE + amdgpu_vstream_kick, // AMDGPU_VSTREAM_KICK + amdgpu_vstream_update, // AMDGPU_VSTREAM_UPDATE + NULL // AMDGPU_MAX_COMMAND +}; + +SYSCALL_DEFINE2(vstream_manage, struct vstream_args __user *, arg, int, cmd) +{ + int res = 0; + struct vstream_args vstream_arg; + + if (cmd > AMDGPU_MAX_COMMAND) + return -EINVAL; + + if (copy_from_user(&vstream_arg, arg, sizeof(struct vstream_args))) { + pr_err("copy_from_user failed\n"); + return -EFAULT; + } + res = vstream_command_table[cmd](&vstream_arg); + if (copy_to_user(arg, &vstream_arg, sizeof(struct vstream_args))) { + pr_err("copy_to_user failed\n"); + return -EFAULT; + } + + return res; +} diff --git a/kernel/ucc_sched/Makefile b/kernel/ucc_sched/Makefile new file mode 100644 index 000000000000..4a41f07d091c --- /dev/null +++ b/kernel/ucc_sched/Makefile @@ -0,0 +1 @@ +obj-(CONFIG_XPU_SCHEDULE) += core.o diff --git a/kernel/ucc_sched/core.c b/kernel/ucc_sched/core.c new file mode 100644 index 000000000000..4c7f1f59aeb9 --- /dev/null +++ b/kernel/ucc_sched/core.c @@ -0,0 +1,591 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) Huawei Technologies Co., Ltd. 2023. All rights reserved. + * Author: Huawei OS Kernel Lab + * Create: Tue Jan 17 22:19:17 2023 + */ + +#include <uapi/linux/sched/types.h> +#include <linux/kthread.h> +#include <linux/slab.h> +#include <linux/ucc_sched.h> + +#include "ucc_sched.h" +#include "../sched/sched.h" +#define CREATE_TRACE_POINTS +#include <trace/events/ucc_sched.h> + +#define MAX_XCU_NUM (100) +#define TS_SQ_TRANS_TASK_THRESHOLD (20) + +static struct xcu xcu_manager[MAX_XCU_NUM]; +static int num_active_xcu; +raw_spinlock_t xcu_mgr_lock; +int sysctl_ucc_sched_rcv_timeout_ms = 10; + +static struct task_struct vstream_idle_task; +static struct vstream_info vstream_idle = { + .vstreamId = UINT_MAX, + .p = &vstream_idle_task, +}; + +struct sched_args { + int cu_id; +}; + +static inline int is_xcu_offline(struct xcu *cu) +{ + return cu->state == XCU_INACTIVE; +} + +void ucc_set_vstream_state(struct vstream_info *vinfo, int state) +{ + vinfo->se.state = state; +} + +static inline int should_se_run(struct ucc_se *se) +{ + return se->state != SE_BLOCK && se->state != SE_DEAD; +} + +static inline void update_stats_run_start(struct xcu *cu, + struct ucc_se *se) +{ + u64 start; + + if (!schedstat_enabled()) + return; + + start = ktime_get_boot_ns(); + __schedstat_set(se->statistics.run_start, start); +} + +static inline void update_stats_run_end(struct xcu *cu, + struct ucc_se *se) +{ + + struct vstream_info *vinfo; + u64 delta; + + if (!schedstat_enabled()) + return; + + delta = ktime_get_boot_ns() - schedstat_val(se->statistics.run_start); + vinfo = container_of(se, struct vstream_info, se); + trace_ucc_sched_stat_run(vinfo, delta, se->is_timeout); + + __schedstat_set(se->statistics.run_max, + max(schedstat_val(se->statistics.run_max), delta)); + __schedstat_inc(se->statistics.run_count); + __schedstat_add(se->statistics.run_sum, delta); + __schedstat_set(se->statistics.run_start, 0); +} + +static inline void update_stats_preempt_start(struct xcu *cu, + struct ucc_se *se) +{ + u64 wait_start; + + if (!schedstat_enabled()) + return; + + wait_start = ktime_get_boot_ns(); + __schedstat_set(se->statistics.preempt_start, wait_start); +} + +static inline void update_stats_wait_start(struct xcu *cu, struct ucc_se *se) +{ + u64 wait_start; + + if (!schedstat_enabled()) + return; + + wait_start = ktime_get_boot_ns(); + __schedstat_set(se->statistics.wait_start, wait_start); +} + + +static inline void update_stats_wait_end(struct xcu *cu, struct ucc_se *se) +{ + struct vstream_info *vinfo; + u64 delta, preempt_delta; + + if (!schedstat_enabled()) + return; + + delta = ktime_get_boot_ns() - schedstat_val(se->statistics.wait_start); + vinfo = container_of(se, struct vstream_info, se); + trace_ucc_sched_stat_wait(vinfo, delta); + + __schedstat_set(se->statistics.wait_max, + max(schedstat_val(se->statistics.wait_max), delta)); + __schedstat_inc(se->statistics.wait_count); + __schedstat_add(se->statistics.wait_sum, delta); + __schedstat_set(se->statistics.wait_start, 0); + + if (se->statistics.preempt_start) { + preempt_delta = ktime_get_boot_ns() - + schedstat_val(se->statistics.preempt_start); + trace_ucc_sched_stat_preempt(vinfo, preempt_delta); + + __schedstat_set(se->statistics.preempt_max, + max(schedstat_val(se->statistics.preempt_max), + preempt_delta)); + __schedstat_inc(se->statistics.preempt_count); + __schedstat_add(se->statistics.preempt_sum, preempt_delta); + __schedstat_set(se->statistics.preempt_start, 0); + } +} + +void ucc_dump_statistics_info(struct ucc_se *se) +{ + struct vstream_info *vinfo = container_of(se, struct vstream_info, se); + + pr_info("comm %s pid %d vstreamId %d kernel_sum %llu wait_count %llu wait_max %llu[ns] wait_sum %llu[ns] preempt_count %llu preempt_max %llu[ns] preempt_sum %llu[ns]\n", + vinfo->p->comm, + vinfo->p->pid, + vinfo->vstreamId, + vinfo->se.statistics.kernel_sum, + vinfo->se.statistics.wait_count, + vinfo->se.statistics.wait_max, + vinfo->se.statistics.wait_sum, + vinfo->se.statistics.preempt_count, + vinfo->se.statistics.preempt_max, + vinfo->se.statistics.preempt_sum); +} + +static void put_prev_entity(struct xcu *cu, struct ucc_se *prev) +{ + if (!prev) + return; + + if (prev->on_cu) + update_stats_wait_start(cu, prev); + + prev->state = SE_READY; + cu->curr_se->state = SE_RUNNING; +} + +static void set_next_entity(struct xcu *cu, struct ucc_se *se) +{ + if (se->on_cu && se != cu->curr_se) + update_stats_wait_end(cu, se); + + cu->curr_se = se; +} + +static void dequeue_ucc_se(struct ucc_se *se, struct xcu *cu) +{ + raw_spin_lock(&cu->xcu_lock); + if (!se->on_cu) { + raw_spin_unlock(&cu->xcu_lock); + return; + } + + se->on_cu = 0; + + list_del_init(&se->run_list); + + if (list_empty(cu->queue + se->prio)) + __clear_bit(se->prio, cu->bitmap); + cu->rt_nr_running--; + + if (se != cu->curr_se) + update_stats_wait_end(cu, se); + + if (cu->curr_se == se) + cu->curr_se = NULL; + + raw_spin_unlock(&cu->xcu_lock); +} + +static void enqueue_ucc_se(struct ucc_se *se, struct xcu *cu) +{ + struct list_head *queue = cu->queue + se->prio; + + raw_spin_lock(&cu->xcu_lock); + if (se->on_cu) { + raw_spin_unlock(&cu->xcu_lock); + return; + } + se->on_cu = 1; + se->is_timeout = 0; + list_add_tail(&se->run_list, queue); + __set_bit(se->prio, cu->bitmap); + cu->rt_nr_running++; + + update_stats_wait_start(cu, se); + + raw_spin_unlock(&cu->xcu_lock); +} + +static struct xcu *ucc_select_cu(struct ucc_se *se) +{ + struct vstream_info *vstream_info; + int min_nr_running = INT_MAX; + struct xcu *cu; + int select_cu = 0; + int cu_id; + + vstream_info = container_of(se, struct vstream_info, se); + for (cu_id = 0; cu_id < num_active_xcu; cu_id++) { + cu = &xcu_manager[cu_id]; + + if (vstream_info->devId != cu->dev_id || + vstream_info->tsId != cu->ts_id) + continue; + + if (cu->rt_nr_running < min_nr_running) { + min_nr_running = cu->rt_nr_running; + select_cu = cu_id; + } + } + + vstream_info->cu_id = select_cu; + return &xcu_manager[select_cu]; +} + +static int ucc_check_preempt(struct ucc_se *se, struct xcu *cu) +{ + struct vstream_info *vinfo_curr, *vinfo; + struct ucc_se *curr_se; + + curr_se = cu->curr_se; + if (!curr_se) + return 1; + + vinfo = container_of(se, struct vstream_info, se); + vinfo_curr = container_of(curr_se, struct vstream_info, se); + if (vinfo_curr->p->ucc_priority > vinfo->p->ucc_priority) { + update_stats_preempt_start(cu, se); + curr_se->flag = UCC_TIF_PREEMPT; + return 1; + } + + return 0; +} + +static inline void ucc_wakeup_idle_worker(struct xcu *cu) +{ + wake_up_state(cu->worker, TASK_INTERRUPTIBLE); +} + +static inline void ucc_wakeup_running_worker(struct xcu *cu) +{ + wake_up_state(cu->worker, TASK_UNINTERRUPTIBLE); +} + +int ucc_schedule(int cu_id) +{ + struct xcu *cu; + + cu = &xcu_manager[cu_id]; + cu->is_wake = 1; + ucc_wakeup_running_worker(cu); + + return 0; +} +EXPORT_SYMBOL(ucc_schedule); + +int ucc_wake_up(struct ucc_se *se) +{ + struct xcu *cu; + + raw_spin_lock(&se->se_lock); + if (se->on_cu) { + raw_spin_unlock(&se->se_lock); + return 0; + } + + if (se->state == SE_BLOCK) + se->state = SE_READY; + + cu = ucc_select_cu(se); + if (!cu) { + raw_spin_unlock(&se->se_lock); + return -1; + } + + enqueue_ucc_se(se, cu); + if (ucc_check_preempt(se, cu)) + ucc_wakeup_idle_worker(cu); + + raw_spin_unlock(&se->se_lock); + + return 0; +} + +static struct ucc_se *pick_next_ucc_se(struct xcu *cu) +{ + struct ucc_se *se; + struct list_head *queue; + int idx; + + if (!cu->rt_nr_running) + return NULL; + + idx = sched_find_first_bit(cu->bitmap); + BUG_ON(idx >= MAX_UCC_PRIO); + + queue = cu->queue + idx; + se = list_entry(queue->next, struct ucc_se, run_list); + + return se; +} + +static int ucc_submit_kernel(struct xcu *cu, struct ucc_se *se) +{ + struct vstream_info *vstream_info; + struct xpu_group *group; + struct tsdrv_ctx *ctx; + int kernel_num, left; + + vstream_info = container_of(se, struct vstream_info, se); + ctx = vstream_info->privdata; + left = (vstream_info->vsqNode->tail - vstream_info->vsqNode->head + + MAX_VSTREAM_SIZE) % MAX_VSTREAM_SIZE; + + group = vstream_info->group; + + kernel_num = xpu_run(group, vstream_info, ctx); + if (kernel_num <= 0) + return kernel_num; + + //update vstream info head and tail; + update_vstream_head(vstream_info, kernel_num); + + left -= kernel_num; + + return kernel_num; +} + +static inline void ucc_wait_idle(struct xcu *cu) +{ + cu->state = XCU_IDLE; + + do { + schedule_timeout_interruptible(1); + } while (cu->rt_nr_running == 0); + + cu->state = XCU_BUSY; +} + +static inline void ucc_wait_running(struct xcu *cu, struct ucc_se *se) +{ + int cnt = 1; + + do { + schedule_timeout_uninterruptible( + msecs_to_jiffies(sysctl_ucc_sched_rcv_timeout_ms)); + } while (cu->is_wake == 0 && --cnt > 0); + + if (cnt == 0) { + __schedstat_inc(se->statistics.timeout_count); + se->is_timeout = 1; + } +} + +static inline void clear_se_flag(struct ucc_se *se) +{ + if (se) + se->flag = UCC_TIF_NONE; +} + +void ucc_dequeue_task(struct vstream_info *vInfo) +{ + struct xcu *cu = &xcu_manager[vInfo->cu_id]; + struct ucc_se *se = &vInfo->se; + + raw_spin_lock(&se->se_lock); + dequeue_ucc_se(se, cu); + raw_spin_unlock(&se->se_lock); +} + +/* + * dynamic padding: select kernels with no QoS confilcts to current ucc_se + * to fill cu; + */ +static void dynamic_padding(struct xcu *cu, struct ucc_se *se) +{ +} + +static int __ucc_schedule(void *args) +{ + struct sched_args *sargs = (struct sched_args *)args; + int cu_id = sargs->cu_id; + struct xcu *cu = &xcu_manager[cu_id]; + struct ucc_se *se = NULL, *curr_se = NULL; + struct ucc_se *prev_se = NULL; + struct vstream_info *vinfo; + int send_cnt = 0; + int kernel_num, preempt; + + while (!is_xcu_offline(cu)) { + raw_spin_lock(&cu->xcu_lock); + cu->is_sched = 0; + prev_se = cu->curr_se; + + preempt = 0; + if (prev_se) { + if (prev_se->flag != UCC_TIF_PREEMPT) + goto submit_kernel; + + vinfo = container_of(prev_se, struct vstream_info, se); + if (send_cnt < vinfo->p->ucc_step) + goto submit_kernel; + + preempt = 1; + } + + clear_se_flag(prev_se); + se = pick_next_ucc_se(cu); + if (!se) { + cu->is_sched = 1; + raw_spin_unlock(&cu->xcu_lock); + trace_ucc_sched_switch(0, &vstream_idle); + ucc_wait_idle(cu); + continue; + } + + set_next_entity(cu, se); + if (se != prev_se) { + put_prev_entity(cu, prev_se); + vinfo = container_of(se, struct vstream_info, se); + trace_ucc_sched_switch(preempt, vinfo); + } + send_cnt = 0; +submit_kernel: + curr_se = cu->curr_se; + dynamic_padding(cu, curr_se); + raw_spin_unlock(&cu->xcu_lock); + + curr_se->is_timeout = 0; + kernel_num = ucc_submit_kernel(cu, curr_se); + //has no more kernels to submit. + if (kernel_num <= 0 && !vstream_have_kernel(curr_se)) { + raw_spin_lock(&curr_se->se_lock); + curr_se->state = SE_BLOCK; + dequeue_ucc_se(curr_se, cu); + raw_spin_unlock(&curr_se->se_lock); + cu->is_sched = 1; + continue; + } + cu->is_sched = 1; + + vinfo = container_of(curr_se, struct vstream_info, se); + if (vinfo->send_cnt > TS_SQ_TRANS_TASK_THRESHOLD) { + update_stats_run_start(cu, curr_se); + /* kernel has not finish */ + if (!cu->is_wake) + ucc_wait_running(cu, curr_se); + + update_stats_run_end(cu, curr_se); + cu->is_wake = 0; + vinfo->send_cnt = 0; + } + + send_cnt += kernel_num; + schedstat_add(se->statistics.kernel_sum, kernel_num); + } + + return 0; +} + +static void init_xcu_rq(struct xcu *cu) +{ + int i; + + for (i = 0; i < MAX_UCC_PRIO; i++) { + INIT_LIST_HEAD(cu->queue + i); + __clear_bit(i, cu->bitmap); + } + + /* delimiter for bitsearch: */ + __set_bit(MAX_UCC_PRIO, cu->bitmap); + cu->rt_nr_running = 0; + raw_spin_lock_init(&cu->xcu_lock); +} + +static int alloc_cu_id(void) +{ + int cu_id = -1; + + raw_spin_lock(&xcu_mgr_lock); + if (num_active_xcu >= MAX_XCU_NUM) { + raw_spin_unlock(&xcu_mgr_lock); + return cu_id; + } + + cu_id = num_active_xcu; + num_active_xcu++; + raw_spin_unlock(&xcu_mgr_lock); + + return cu_id; +} + +int ucc_sched_register_xcu(int dev_id, int ts_id, int cu_num) +{ + int cu_id; + struct xcu *cu; + struct sched_args *args; + struct sched_param param = { .sched_priority = 1 }; + char id_buf[16]; + int i; + + for (i = 0; i < cu_num; i++) { + cu_id = alloc_cu_id(); + if (cu_id < 0) { + pr_err("alloc cu id failed\n"); + return -1; + } + + cu = &xcu_manager[cu_id]; + cu->cu_id = cu_id; + cu->state = XCU_IDLE; + cu->curr_se = NULL; + cu->dev_id = dev_id; + cu->ts_id = ts_id; + cu->is_wake = 0; + init_xcu_rq(cu); + + args = kzalloc(sizeof(struct sched_args), GFP_KERNEL); + if (!args) + return -1; + + args->cu_id = cu->cu_id; + snprintf(id_buf, sizeof(id_buf), "%d:%d:%d", + cu->cu_id, cu->dev_id, cu->ts_id); + cu->worker = kthread_create_on_node(__ucc_schedule, + (void *)args, NUMA_NO_NODE, + "u_sched/%s", id_buf); + sched_setscheduler_nocheck(cu->worker, SCHED_FIFO, &param); + wake_up_process(cu->worker); + } + + return 0; +} +EXPORT_SYMBOL(ucc_sched_register_xcu); + +int ucc_sched_init(void) +{ + raw_spin_lock_init(&xcu_mgr_lock); + return 0; +} + +int ucc_rt_nr_running(struct xcu *cu) +{ + return cu->rt_nr_running; +} +EXPORT_SYMBOL(ucc_rt_nr_running); + +struct xcu *ucc_get_xcu_by_id(int cu_id) +{ + return &xcu_manager[cu_id]; +} +EXPORT_SYMBOL(ucc_get_xcu_by_id); + +int ucc_xcu_is_sched(int cu_id) +{ + return xcu_manager[cu_id].is_sched; +} +EXPORT_SYMBOL(ucc_xcu_is_sched); diff --git a/kernel/ucc_sched/ucc_sched.h b/kernel/ucc_sched/ucc_sched.h new file mode 100644 index 000000000000..30e2aa10cf2f --- /dev/null +++ b/kernel/ucc_sched/ucc_sched.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) Huawei Technologies Co., Ltd. 2023. All rights reserved. + * Author: Huawei OS Kernel Lab + * Create: Tue Jan 17 22:27:22 2023 + */ +#ifndef __UCC_SCHED_USCHED_H__ +#define __UCC_SCHED_USCHED_H__ + +#include <linux/sched.h> +#include <linux/spinlock_types.h> +#include <linux/types.h> +#include <linux/vstream.h> + +//For simplicity, we set this parameter to 2. +#define MAX_UCC_PRIO (2) + +enum xcu_state { + XCU_INACTIVE, + XCU_IDLE, + XCU_BUSY, + XCU_SUBMIT, +}; + +/* + * This is the abstraction object of the xpu computing unit. + */ +struct xcu { + int is_sched; + int cu_id; + int dev_id; + int ts_id; + int rt_nr_running; + int is_wake; + struct task_struct *worker; + DECLARE_BITMAP(bitmap, MAX_UCC_PRIO); + struct list_head queue[MAX_UCC_PRIO]; + enum xcu_state state; + struct ucc_se *curr_se; + raw_spinlock_t xcu_lock; +}; + +#endif -- 2.34.1

2 1