From: Ma Hai mahai1@huawei.com
mainline inclusion from mainline-v5.12-rc2 commit 4eb4d99dfe3018d86f4529112aa7082f43b6996a category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I4M6CD?from=project-issue CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/dr...
----------------------------------------------------------------------
Adding driver implementation to support i2c driver algorithms for bit-shift adapters, so hibmc will using the interface provided by drm to read edid.
Signed-off-by: Ma Hai mahai1@huawei.com Signed-off-by: Tian Tao tiantao6@hisilicon.com Reviewed-by: Thomas Zimmermann tzimmermann@suse.de Reviewed-by: Li Dongming lidongming5@huawei.com Link: https://patchwork.freedesktop.org/patch/msgid/1600778670-60370-2-git-send-em... Acked-by: Xie XiuQi xiexiuqi@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/gpu/drm/hisilicon/hibmc/Makefile | 2 +- .../gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h | 25 ++++- .../gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c | 99 +++++++++++++++++++ .../gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c | 2 +- 4 files changed, 125 insertions(+), 3 deletions(-) create mode 100644 drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c
diff --git a/drivers/gpu/drm/hisilicon/hibmc/Makefile b/drivers/gpu/drm/hisilicon/hibmc/Makefile index f99132715597..684ef794eb7c 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/Makefile +++ b/drivers/gpu/drm/hisilicon/hibmc/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -hibmc-drm-y := hibmc_drm_drv.o hibmc_drm_de.o hibmc_drm_vdac.o hibmc_ttm.o +hibmc-drm-y := hibmc_drm_drv.o hibmc_drm_de.o hibmc_drm_vdac.o hibmc_ttm.o hibmc_drm_i2c.o
obj-$(CONFIG_DRM_HISI_HIBMC) += hibmc-drm.o diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h index 197485e2fe0b..87d2aad0bb5e 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h @@ -14,11 +14,23 @@ #ifndef HIBMC_DRM_DRV_H #define HIBMC_DRM_DRV_H
+#include <linux/gpio/consumer.h> +#include <linux/i2c-algo-bit.h> +#include <linux/i2c.h> + +#include <drm/drm_edid.h> #include <drm/drm_fb_helper.h> #include <drm/drm_framebuffer.h>
struct drm_device;
+struct hibmc_connector { + struct drm_connector base; + + struct i2c_adapter adapter; + struct i2c_algo_bit_data bit_data; +}; + struct hibmc_drm_private { /* hw */ void __iomem *mmio; @@ -31,10 +43,20 @@ struct hibmc_drm_private { struct drm_plane primary_plane; struct drm_crtc crtc; struct drm_encoder encoder; - struct drm_connector connector; + struct hibmc_connector connector; bool mode_config_initialized; };
+static inline struct hibmc_connector *to_hibmc_connector(struct drm_connector *connector) +{ + return container_of(connector, struct hibmc_connector, base); +} + +static inline struct hibmc_drm_private *to_hibmc_drm_private(struct drm_device *dev) +{ + return dev->dev_private; +} + void hibmc_set_power_mode(struct hibmc_drm_private *priv, unsigned int power_mode); void hibmc_set_current_gate(struct hibmc_drm_private *priv, @@ -47,6 +69,7 @@ int hibmc_mm_init(struct hibmc_drm_private *hibmc); void hibmc_mm_fini(struct hibmc_drm_private *hibmc); int hibmc_dumb_create(struct drm_file *file, struct drm_device *dev, struct drm_mode_create_dumb *args); +int hibmc_ddc_create(struct drm_device *drm_dev, struct hibmc_connector *connector);
extern const struct drm_mode_config_funcs hibmc_mode_funcs;
diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c new file mode 100644 index 000000000000..86d712090d87 --- /dev/null +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_i2c.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Hisilicon Hibmc SoC drm driver + * + * Based on the bochs drm driver. + * + * Copyright (c) 2016 Huawei Limited. + * + * Author: + * Tian Tao tiantao6@hisilicon.com + */ + +#include <linux/delay.h> +#include <linux/pci.h> + +#include <drm/drm_atomic_helper.h> +#include <drm/drm_probe_helper.h> + +#include "hibmc_drm_drv.h" + +#define GPIO_DATA 0x0802A0 +#define GPIO_DATA_DIRECTION 0x0802A4 + +#define I2C_SCL_MASK BIT(0) +#define I2C_SDA_MASK BIT(1) + +static void hibmc_set_i2c_signal(void *data, u32 mask, int value) +{ + struct hibmc_connector *hibmc_connector = data; + struct hibmc_drm_private *priv = to_hibmc_drm_private(hibmc_connector->base.dev); + u32 tmp_dir = readl(priv->mmio + GPIO_DATA_DIRECTION); + + if (value) { + tmp_dir &= ~mask; + writel(tmp_dir, priv->mmio + GPIO_DATA_DIRECTION); + } else { + u32 tmp_data = readl(priv->mmio + GPIO_DATA); + + tmp_data &= ~mask; + writel(tmp_data, priv->mmio + GPIO_DATA); + + tmp_dir |= mask; + writel(tmp_dir, priv->mmio + GPIO_DATA_DIRECTION); + } +} + +static int hibmc_get_i2c_signal(void *data, u32 mask) +{ + struct hibmc_connector *hibmc_connector = data; + struct hibmc_drm_private *priv = to_hibmc_drm_private(hibmc_connector->base.dev); + u32 tmp_dir = readl(priv->mmio + GPIO_DATA_DIRECTION); + + if ((tmp_dir & mask) != mask) { + tmp_dir &= ~mask; + writel(tmp_dir, priv->mmio + GPIO_DATA_DIRECTION); + } + + return (readl(priv->mmio + GPIO_DATA) & mask) ? 1 : 0; +} + +static void hibmc_ddc_setsda(void *data, int state) +{ + hibmc_set_i2c_signal(data, I2C_SDA_MASK, state); +} + +static void hibmc_ddc_setscl(void *data, int state) +{ + hibmc_set_i2c_signal(data, I2C_SCL_MASK, state); +} + +static int hibmc_ddc_getsda(void *data) +{ + return hibmc_get_i2c_signal(data, I2C_SDA_MASK); +} + +static int hibmc_ddc_getscl(void *data) +{ + return hibmc_get_i2c_signal(data, I2C_SCL_MASK); +} + +int hibmc_ddc_create(struct drm_device *drm_dev, + struct hibmc_connector *connector) +{ + connector->adapter.owner = THIS_MODULE; + connector->adapter.class = I2C_CLASS_DDC; + snprintf(connector->adapter.name, I2C_NAME_SIZE, "HIS i2c bit bus"); + connector->adapter.dev.parent = &drm_dev->pdev->dev; + i2c_set_adapdata(&connector->adapter, connector); + connector->adapter.algo_data = &connector->bit_data; + + connector->bit_data.udelay = 20; + connector->bit_data.timeout = usecs_to_jiffies(2000); + connector->bit_data.data = connector; + connector->bit_data.setsda = hibmc_ddc_setsda; + connector->bit_data.setscl = hibmc_ddc_setscl; + connector->bit_data.getsda = hibmc_ddc_getsda; + connector->bit_data.getscl = hibmc_ddc_getscl; + + return i2c_bit_add_bus(&connector->adapter); +} diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c index 376a05ddbc2f..c8b14afbcbed 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c @@ -78,7 +78,7 @@ int hibmc_vdac_init(struct hibmc_drm_private *priv) { struct drm_device *dev = priv->dev; struct drm_encoder *encoder = &priv->encoder; - struct drm_connector *connector = &priv->connector; + struct drm_connector *connector = &priv->connector.base; int ret;
encoder->possible_crtcs = 0x1;
From: Ma Hai mahai1@huawei.com
mainline inclusion from mainline-v5.12-rc2 commit a0d078d06e516184e2f575f3803935697b5e3ac6 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I4M6CD?from=project-issue CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/dr...
----------------------------------------------------------------------
Use drm_get_edid to get the resolution, if that fails, set it to a fixed resolution. Rewrite the desrtoy callback function to release resources.
Signed-off-by: Ma Hai mahai1@huawei.com Signed-off-by: Tian Tao tiantao6@hisilicon.com Reviewed-by: Thomas Zimmermann tzimmermann@suse.de Reviewed-by: Li Dongming lidongming5@huawei.com Link: https://patchwork.freedesktop.org/patch/msgid/1600778670-60370-3-git-send-em... Acked-by: Xie XiuQi xiexiuqi@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- .../gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c | 37 +++++++++++++++++-- 1 file changed, 33 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c index c8b14afbcbed..61edae1837b2 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c @@ -21,12 +21,24 @@ static int hibmc_connector_get_modes(struct drm_connector *connector) { int count; + void *edid; + struct hibmc_connector *hibmc_connector = to_hibmc_connector(connector); + + edid = drm_get_edid(connector, &hibmc_connector->adapter); + if (edid) { + drm_connector_update_edid_property(connector, edid); + count = drm_add_edid_modes(connector, edid); + if (count) + goto out; + }
count = drm_add_modes_noedid(connector, connector->dev->mode_config.max_width, connector->dev->mode_config.max_height); drm_set_preferred_mode(connector, 1024, 768);
+out: + kfree(edid); return count; }
@@ -36,6 +48,14 @@ static enum drm_mode_status hibmc_connector_mode_valid(struct drm_connector *con return MODE_OK; }
+static void hibmc_connector_destroy(struct drm_connector *connector) +{ + struct hibmc_connector *hibmc_connector = to_hibmc_connector(connector); + + i2c_del_adapter(&hibmc_connector->adapter); + drm_connector_cleanup(connector); +} + static const struct drm_connector_helper_funcs hibmc_connector_helper_funcs = { .get_modes = hibmc_connector_get_modes, @@ -44,7 +64,7 @@ static const struct drm_connector_helper_funcs
static const struct drm_connector_funcs hibmc_connector_funcs = { .fill_modes = drm_helper_probe_single_connector_modes, - .destroy = drm_connector_cleanup, + .destroy = hibmc_connector_destroy, .reset = drm_atomic_helper_connector_reset, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, @@ -77,10 +97,17 @@ static const struct drm_encoder_funcs hibmc_encoder_funcs = { int hibmc_vdac_init(struct hibmc_drm_private *priv) { struct drm_device *dev = priv->dev; + struct hibmc_connector *hibmc_connector = &priv->connector; struct drm_encoder *encoder = &priv->encoder; - struct drm_connector *connector = &priv->connector.base; + struct drm_connector *connector = &hibmc_connector->base; int ret;
+ ret = hibmc_ddc_create(dev, hibmc_connector); + if (ret) { + drm_err(dev, "failed to create ddc: %d\n", ret); + return ret; + } + encoder->possible_crtcs = 0x1; ret = drm_encoder_init(dev, encoder, &hibmc_encoder_funcs, DRM_MODE_ENCODER_DAC, NULL); @@ -91,8 +118,10 @@ int hibmc_vdac_init(struct hibmc_drm_private *priv)
drm_encoder_helper_add(encoder, &hibmc_encoder_helper_funcs);
- ret = drm_connector_init(dev, connector, &hibmc_connector_funcs, - DRM_MODE_CONNECTOR_VGA); + ret = drm_connector_init_with_ddc(dev, connector, + &hibmc_connector_funcs, + DRM_MODE_CONNECTOR_VGA, + &hibmc_connector->adapter); if (ret) { drm_err(dev, "failed to init connector: %d\n", ret); return ret;
From: Ye Bin yebin10@huawei.com
hulk inclusion category: bugfix bugzilla: 185870 https://gitee.com/openeuler/kernel/issues/I4M8IW?from=project-issue CVE: NA
-----------------------------------------------------------
The maximum linear address is U32_MAX, when enabled kaslr, A small value should be taken between end_addr of initrd and U32_MAX when calculating the size of initrd.
Fixes: c1a0cdb549be ("ARM: decompressor: add KASLR support") Signed-off-by: Ye Bin yebin10@huawei.com Signed-off-by: yangerkun yangerkun@huawei.com Signed-off-by: Cui GaoSheng cuigaosheng1@huawei.com Reviewed-by: Xiu Jianfeng xiujianfeng@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/arm/boot/compressed/kaslr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm/boot/compressed/kaslr.c b/arch/arm/boot/compressed/kaslr.c index ff81a2c31233..3b37c0441574 100644 --- a/arch/arm/boot/compressed/kaslr.c +++ b/arch/arm/boot/compressed/kaslr.c @@ -410,7 +410,7 @@ u32 kaslr_early_init(u32 *kaslr_offset, u32 image_base, u32 image_size, } if (start != 0 && end != 0 && start < U32_MAX) { regions.initrd_start = start; - regions.initrd_size = max_t(u64, end, U32_MAX) - start; + regions.initrd_size = min_t(u64, end, U32_MAX) - start; } }
From: Paul Moore paul@paul-moore.com
maillist inclusion category: bugfix bugzilla: 185904 https://gitee.com/openeuler/kernel/issues/I4N958?from=project-issue CVE: NA
Reference: https://patchwork.kernel.org/project/linux-audit/patch/163942029335.62691.71...
-------------------------------------------------------------------
If the audit daemon were ever to get stuck in a stopped state the kernel's kauditd_thread() could get blocked attempting to send audit records to the userspace audit daemon. With the kernel thread blocked it is possible that the audit queue could grow unbounded as certain audit record generating events must be exempt from the queue limits else the system enter a deadlock state.
This patch resolves this problem by lowering the kernel thread's socket sending timeout from MAX_SCHEDULE_TIMEOUT to HZ/10 and tweaks the kauditd_send_queue() function to better manage the various audit queues when connection problems occur between the kernel and the audit daemon. With this patch, the backlog may temporarily grow beyond the defined limits when the audit daemon is stopped and the system is under heavy audit pressure, but kauditd_thread() will continue to make progress and drain the queues as it would for other connection problems. For example, with the audit daemon put into a stopped state and the system configured to audit every syscall it was still possible to shutdown the system without a kernel panic, deadlock, etc.; granted, the system was slow to shutdown but that is to be expected given the extreme pressure of recording every syscall.
The timeout value of HZ/10 was chosen primarily through experimentation and this developer's "gut feeling". There is likely no one perfect value, but as this scenario is limited in scope (root privileges would be needed to send SIGSTOP to the audit daemon), it is likely not worth exposing this as a tunable at present. This can always be done at a later date if it proves necessary.
Cc: stable@vger.kernel.org Fixes: 5b52330bbfe63 ("audit: fix auditd/kernel connection state tracking") Reported-by: Gaosheng Cui cuigaosheng1@huawei.com Tested-by: Gaosheng Cui cuigaosheng1@huawei.com Reviewed-by: Richard Guy Briggs rgb@redhat.com Signed-off-by: Paul Moore paul@paul-moore.com Signed-off-by: Cui GaoSheng cuigaosheng1@huawei.com Reviewed-by: Xiu Jianfeng xiujianfeng@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- kernel/audit.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-)
diff --git a/kernel/audit.c b/kernel/audit.c index 68cee3bc8cfe..d784000921da 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -718,7 +718,7 @@ static int kauditd_send_queue(struct sock *sk, u32 portid, { int rc = 0; struct sk_buff *skb; - static unsigned int failed = 0; + unsigned int failed = 0;
/* NOTE: kauditd_thread takes care of all our locking, we just use * the netlink info passed to us (e.g. sk and portid) */ @@ -735,32 +735,30 @@ static int kauditd_send_queue(struct sock *sk, u32 portid, continue; }
+retry: /* grab an extra skb reference in case of error */ skb_get(skb); rc = netlink_unicast(sk, skb, portid, 0); if (rc < 0) { - /* fatal failure for our queue flush attempt? */ + /* send failed - try a few times unless fatal error */ if (++failed >= retry_limit || rc == -ECONNREFUSED || rc == -EPERM) { - /* yes - error processing for the queue */ sk = NULL; if (err_hook) (*err_hook)(skb); - if (!skb_hook) - goto out; - /* keep processing with the skb_hook */ + if (rc == -EAGAIN) + rc = 0; + /* continue to drain the queue */ continue; } else - /* no - requeue to preserve ordering */ - skb_queue_head(queue, skb); + goto retry; } else { - /* it worked - drop the extra reference and continue */ + /* skb sent - drop the extra reference and continue */ consume_skb(skb); failed = 0; } }
-out: return (rc >= 0 ? 0 : rc); }
@@ -1609,7 +1607,8 @@ static int __net_init audit_net_init(struct net *net) audit_panic("cannot initialize netlink socket in namespace"); return -ENOMEM; } - aunet->sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; + /* limit the timeout in case auditd is blocked/stopped */ + aunet->sk->sk_sndtimeo = HZ / 10;
return 0; }
From: Paul Moore paul@paul-moore.com
maillist inclusion category: bugfix bugzilla: 185906 https://gitee.com/openeuler/kernel/issues/I4N958?from=project-issue CVE: NA
Reference: https://patchwork.kernel.org/project/linux-audit/patch/163949858723.23091.53...
-------------------------------------------------------------------
Due to the audit control mutex necessary for serializing audit userspace messages we haven't been able to block/penalize userspace processes that attempt to send audit records while the system is under audit pressure. The result is that privileged userspace applications have a priority boost with respect to audit as they are not bound by the same audit queue throttling as the other tasks on the system.
This patch attempts to restore some balance to the system when under audit pressure by blocking these privileged userspace tasks after they have finished their audit processing, and dropped the audit control mutex, but before they return to userspace.
Reported-by: Gaosheng Cui cuigaosheng1@huawei.com Tested-by: Gaosheng Cui cuigaosheng1@huawei.com Reviewed-by: Richard Guy Briggs rgb@redhat.com Signed-off-by: Paul Moore paul@paul-moore.com Signed-off-by: Cui GaoSheng cuigaosheng1@huawei.com Reviewed-by: Xiu Jianfeng xiujianfeng@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- kernel/audit.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-)
diff --git a/kernel/audit.c b/kernel/audit.c index d784000921da..2a38cbaf3ddb 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1540,6 +1540,20 @@ static void audit_receive(struct sk_buff *skb) nlh = nlmsg_next(nlh, &len); } audit_ctl_unlock(); + + /* can't block with the ctrl lock, so penalize the sender now */ + if (audit_backlog_limit && + (skb_queue_len(&audit_queue) > audit_backlog_limit)) { + DECLARE_WAITQUEUE(wait, current); + + /* wake kauditd to try and flush the queue */ + wake_up_interruptible(&kauditd_wait); + + add_wait_queue_exclusive(&audit_backlog_wait, &wait); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(audit_backlog_wait_time); + remove_wait_queue(&audit_backlog_wait, &wait); + } }
/* Log information about who is connecting to the audit multicast socket */ @@ -1824,7 +1838,9 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, * task_tgid_vnr() since auditd_pid is set in audit_receive_msg() * using a PID anchored in the caller's namespace * 2. generator holding the audit_cmd_mutex - we don't want to block - * while holding the mutex */ + * while holding the mutex, although we do penalize the sender + * later in audit_receive() when it is safe to block + */ if (!(auditd_test_task(current) || audit_ctl_owner_current())) { long stime = audit_backlog_wait_time;
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4J96Y CVE: NA
-------------------------------------------------
Set CONFIG_NR_CPUS of openeuler_defconfig to 4096 to meet scalability requirement for arm64.
Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Reviewed-by: Xie XiuQi xiexiuqi@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/arm64/configs/openeuler_defconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 73cd7d9c9a78..66124b991d96 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -380,7 +380,7 @@ CONFIG_ARM64_PA_BITS=52 CONFIG_CPU_LITTLE_ENDIAN=y CONFIG_SCHED_MC=y CONFIG_SCHED_SMT=y -CONFIG_NR_CPUS=1024 +CONFIG_NR_CPUS=4096 CONFIG_HOTPLUG_CPU=y # CONFIG_ARM64_BOOTPARAM_HOTPLUG_CPU0 is not set CONFIG_NUMA=y
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4J6G3 CVE: NA
-------------------------------------------------
As phytium 4-processor server may have 32 NUMA nodes, in order to support scalability in the future, adjust CONFIG_NODES_SHIFT to 7 to increase maximum number of NUMA Nodes available on the target system to 128 in advance for arm64 openeuler_defconfig.
Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Reviewed-by: Xie XiuQi xiexiuqi@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/arm64/configs/openeuler_defconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 66124b991d96..3d66cdd5826a 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -384,7 +384,7 @@ CONFIG_NR_CPUS=4096 CONFIG_HOTPLUG_CPU=y # CONFIG_ARM64_BOOTPARAM_HOTPLUG_CPU0 is not set CONFIG_NUMA=y -CONFIG_NODES_SHIFT=4 +CONFIG_NODES_SHIFT=7 CONFIG_USE_PERCPU_NUMA_NODE_ID=y CONFIG_HAVE_SETUP_PER_CPU_AREA=y CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK=y
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4HDHZ CVE: NA
-------------------------------------------------
For arm64 openeuler_defconfig: Change default page size from 64K to 4K, then enable CONFIG_ARM64_VA_BITS_48.
Following configs are involved: CONFIG_ARM64_PAGE_SHIFT=12 CONFIG_ARM64_CONT_PTE_SHIFT=4 CONFIG_ARM64_CONT_PMD_SHIFT=4 CONFIG_ARCH_MMAP_RND_BITS_MIN=18 CONFIG_ARCH_MMAP_RND_BITS_MAX=33 CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=11 CONFIG_PGTABLE_LEVELS=4 CONFIG_ARM64_4K_PAGES=y CONFIG_ARM64_64K_PAGES=n CONFIG_ARM64_VA_BITS_48=y CONFIG_ARM64_VA_BITS_52=n CONFIG_ARM64_VA_BITS=48 CONFIG_ARM64_PA_BITS_48=y CONFIG_ARM64_PA_BITS_52=n CONFIG_ARM64_PA_BITS=48 CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y CONFIG_FORCE_MAX_ZONEORDER=11 CONFIG_ARCH_MMAP_RND_BITS=18 CONFIG_ARCH_MMAP_RND_COMPAT_BITS=11
Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Reviewed-by: Xie XiuQi xiexiuqi@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/arm64/configs/openeuler_defconfig | 38 ++++++++++++++------------ 1 file changed, 20 insertions(+), 18 deletions(-)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 3d66cdd5826a..589364f741de 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -250,12 +250,12 @@ CONFIG_TRACEPOINTS=y CONFIG_ARM64=y CONFIG_64BIT=y CONFIG_MMU=y -CONFIG_ARM64_PAGE_SHIFT=16 -CONFIG_ARM64_CONT_PTE_SHIFT=5 -CONFIG_ARM64_CONT_PMD_SHIFT=5 -CONFIG_ARCH_MMAP_RND_BITS_MIN=14 -CONFIG_ARCH_MMAP_RND_BITS_MAX=14 -CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=7 +CONFIG_ARM64_PAGE_SHIFT=12 +CONFIG_ARM64_CONT_PTE_SHIFT=4 +CONFIG_ARM64_CONT_PMD_SHIFT=4 +CONFIG_ARCH_MMAP_RND_BITS_MIN=18 +CONFIG_ARCH_MMAP_RND_BITS_MAX=33 +CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MIN=11 CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX=16 CONFIG_STACKTRACE_SUPPORT=y CONFIG_ILLEGAL_POINTER_VALUE=0xdead000000000000 @@ -273,7 +273,7 @@ CONFIG_ARCH_ENABLE_MEMORY_HOTREMOVE=y CONFIG_SMP=y CONFIG_KERNEL_MODE_NEON=y CONFIG_FIX_EARLYCON_MEM=y -CONFIG_PGTABLE_LEVELS=3 +CONFIG_PGTABLE_LEVELS=4 CONFIG_ARCH_SUPPORTS_UPROBES=y CONFIG_ARCH_PROC_KCORE_TEXT=y
@@ -365,17 +365,18 @@ CONFIG_HISILICON_ERRATUM_HIP08_RU_PREFETCH=y CONFIG_SOCIONEXT_SYNQUACER_PREITS=y # end of ARM errata workarounds via the alternatives framework
-# CONFIG_ARM64_4K_PAGES is not set +CONFIG_ARM64_4K_PAGES=y # CONFIG_ARM64_16K_PAGES is not set -CONFIG_ARM64_64K_PAGES=y +# CONFIG_ARM64_64K_PAGES is not set +# CONFIG_ARM64_VA_BITS_39 is not set # CONFIG_ARM64_VA_BITS_42 is not set -# CONFIG_ARM64_VA_BITS_48 is not set -CONFIG_ARM64_VA_BITS_52=y +CONFIG_ARM64_VA_BITS_48=y +# CONFIG_ARM64_VA_BITS_52 is not set # CONFIG_ARM64_FORCE_52BIT is not set -CONFIG_ARM64_VA_BITS=52 -# CONFIG_ARM64_PA_BITS_48 is not set -CONFIG_ARM64_PA_BITS_52=y -CONFIG_ARM64_PA_BITS=52 +CONFIG_ARM64_VA_BITS=48 +CONFIG_ARM64_PA_BITS_48=y +# CONFIG_ARM64_PA_BITS_52 is not set +CONFIG_ARM64_PA_BITS=48 # CONFIG_CPU_BIG_ENDIAN is not set CONFIG_CPU_LITTLE_ENDIAN=y CONFIG_SCHED_MC=y @@ -402,6 +403,7 @@ CONFIG_ARCH_SELECT_MEMORY_MODEL=y CONFIG_HAVE_ARCH_PFN_VALID=y CONFIG_HW_PERF_EVENTS=y CONFIG_SYS_SUPPORTS_HUGETLBFS=y +CONFIG_ARCH_WANT_HUGE_PMD_SHARE=y CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK=y CONFIG_PARAVIRT=y @@ -411,7 +413,7 @@ CONFIG_KEXEC=y CONFIG_CRASH_DUMP=y CONFIG_ARM64_CPU_PARK=y # CONFIG_XEN is not set -CONFIG_FORCE_MAX_ZONEORDER=14 +CONFIG_FORCE_MAX_ZONEORDER=11 CONFIG_UNMAP_KERNEL_AT_EL0=y CONFIG_RODATA_FULL_DEFAULT_ENABLED=y CONFIG_ARM64_PMEM_RESERVE=y @@ -774,9 +776,9 @@ CONFIG_HAVE_MOD_ARCH_SPECIFIC=y CONFIG_MODULES_USE_ELF_RELA=y CONFIG_ARCH_HAS_ELF_RANDOMIZE=y CONFIG_HAVE_ARCH_MMAP_RND_BITS=y -CONFIG_ARCH_MMAP_RND_BITS=14 +CONFIG_ARCH_MMAP_RND_BITS=18 CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS=y -CONFIG_ARCH_MMAP_RND_COMPAT_BITS=7 +CONFIG_ARCH_MMAP_RND_COMPAT_BITS=11 CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT=y CONFIG_CLONE_BACKWARDS=y CONFIG_OLD_SIGSUSPEND3=y
From: Lijun Fang fanglijun3@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4M24Q CVE: NA
-------------------------------------------------
The DvPP means Davinci Video Pre-Processor, add new config ASCEND_FEATURES and DVPP_MMAP to enable the DvPP features for Ascend platform.
The DvPP could only use a limit range of virtual address, just like the Ascend310/910 could only use the 4 GB range of virtual address, so add a new mmap flag which is named MAP_DVPP to use the DvPP processor by mmap syscall, the new flag is only valid for Ascend platform.
You should alloc the memory for dvpp like this:
addr = mmap(NULL, length, PROT_READ, MAP_ANONYMOUS | MAP_DVPP, -1, 0);
Signed-off-by: Lijun Fang fanglijun3@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/arm64/Kconfig | 29 ++++++++++++++++++++ drivers/char/svm.c | 11 ++++++-- fs/hugetlbfs/inode.c | 16 +++++++++++ include/linux/mman.h | 64 ++++++++++++++++++++++++++++++++++++++++++++ mm/mmap.c | 40 +++++++++++++++++++++++++++ 5 files changed, 158 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 08a93ca8f0d9..df90a6e05ad2 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1994,6 +1994,35 @@ config STACKPROTECTOR_PER_TASK def_bool y depends on STACKPROTECTOR && CC_HAVE_STACKPROTECTOR_SYSREG
+menuconfig ASCEND_FEATURES + bool "Support Ascend Features" + depends on ARM64 + help + The Ascend chip use the Hisilicon DaVinci architecture, and mainly + focus on AI and machine leanring area, contains many external features. + + Enable this config to enable selective list of these features. + + If unsure, say Y + +if ASCEND_FEATURES + +config ASCEND_DVPP_MMAP + bool "Enable support for the DvPP mmap" + default y + help + The DvPP means Davinci Video Pre-Processor, are mainly consist of VDEC + (Video Decode), VENC(Video Encode), JPEG D/E (Decode/Encode), PNGD + (PNG Decode) and VPC (Video Process) processors. + + The DvPP could only use a limit range of virtual address, just like the + Ascend310/910 could only use the limit range of virtual address (default + 4 GB), so add a new mmap flag which is named MAP_DVPP to allocate the + special memory for DvPP processor, the new flag is only valid for Ascend + platform. + +endif + endmenu
menu "Boot options" diff --git a/drivers/char/svm.c b/drivers/char/svm.c index 531c765e4415..b85283118417 100644 --- a/drivers/char/svm.c +++ b/drivers/char/svm.c @@ -1433,6 +1433,9 @@ static unsigned long svm_get_unmapped_area(struct file *file,
addr = ALIGN(addr, len);
+ if (dvpp_mmap_check(addr, len, flags)) + return -ENOMEM; + vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && (vma == NULL || addr + len <= vm_start_gap(vma))) @@ -1442,7 +1445,8 @@ static unsigned long svm_get_unmapped_area(struct file *file, info.flags = VM_UNMAPPED_AREA_TOPDOWN; info.length = len; info.low_limit = max(PAGE_SIZE, mmap_min_addr); - info.high_limit = mm->mmap_base; + info.high_limit = ((mm->mmap_base <= DVPP_MMAP_BASE) ? + mm->mmap_base : DVPP_MMAP_BASE); info.align_mask = ((len >> PAGE_SHIFT) - 1) << PAGE_SHIFT; info.align_offset = pgoff << PAGE_SHIFT;
@@ -1452,7 +1456,10 @@ static unsigned long svm_get_unmapped_area(struct file *file, VM_BUG_ON(addr != -ENOMEM); info.flags = 0; info.low_limit = TASK_UNMAPPED_BASE; - info.high_limit = TASK_SIZE; + info.high_limit = DVPP_MMAP_BASE; + + if (enable_mmap_dvpp) + dvpp_mmap_get_area(&info, flags);
addr = vm_unmapped_area(&info); } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 2e2e4983f1ba..246858ea0a52 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -254,6 +254,10 @@ hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long addr, info.high_limit = TASK_SIZE; info.align_mask = PAGE_MASK & ~huge_page_mask(h); info.align_offset = 0; + + if (enable_mmap_dvpp) + dvpp_mmap_get_area(&info, flags); + return vm_unmapped_area(&info); }
@@ -270,6 +274,10 @@ hugetlb_get_unmapped_area_topdown(struct file *file, unsigned long addr, info.high_limit = current->mm->mmap_base; info.align_mask = PAGE_MASK & ~huge_page_mask(h); info.align_offset = 0; + + if (enable_mmap_dvpp) + dvpp_mmap_get_area(&info, flags); + addr = vm_unmapped_area(&info);
/* @@ -283,6 +291,10 @@ hugetlb_get_unmapped_area_topdown(struct file *file, unsigned long addr, info.flags = 0; info.low_limit = current->mm->mmap_base; info.high_limit = TASK_SIZE; + + if (enable_mmap_dvpp) + dvpp_mmap_get_area(&info, flags); + addr = vm_unmapped_area(&info); }
@@ -310,6 +322,10 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
if (addr) { addr = ALIGN(addr, huge_page_size(h)); + + if (dvpp_mmap_check(addr, len, flags)) + return -ENOMEM; + vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && (!vma || addr + len <= vm_start_gap(vma))) diff --git a/include/linux/mman.h b/include/linux/mman.h index 7908bf3e5696..f13546c357e1 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -23,6 +23,70 @@ static inline void set_vm_checknode(vm_flags_t *vm_flags, unsigned long flags) {} #endif
+extern int enable_mmap_dvpp; +/* + * Enable MAP_32BIT for Ascend Platform + */ +#ifdef CONFIG_ASCEND_DVPP_MMAP + +#define MAP_DVPP 0x200 + +#define DVPP_MMAP_SIZE (0x100000000UL) +#define DVPP_MMAP_BASE (TASK_SIZE - DVPP_MMAP_SIZE) + +static inline int dvpp_mmap_check(unsigned long addr, unsigned long len, + unsigned long flags) +{ + if (enable_mmap_dvpp && (flags & MAP_DVPP) && + (addr < DVPP_MMAP_BASE + DVPP_MMAP_SIZE) && + (addr > DVPP_MMAP_BASE)) + return -EINVAL; + else + return 0; +} + +static inline void dvpp_mmap_get_area(struct vm_unmapped_area_info *info, + unsigned long flags) +{ + if (flags & MAP_DVPP) { + info->low_limit = DVPP_MMAP_BASE; + info->high_limit = DVPP_MMAP_BASE + DVPP_MMAP_SIZE; + } else { + info->low_limit = max(info->low_limit, TASK_UNMAPPED_BASE); + info->high_limit = min(info->high_limit, DVPP_MMAP_BASE); + } +} + +static inline int dvpp_mmap_zone(unsigned long addr) +{ + if (addr >= DVPP_MMAP_BASE) + return 1; + else + return 0; +} +#else + +#define MAP_DVPP (0) + +static inline int dvpp_mmap_check(unsigned long addr, unsigned long len, + unsigned long flags) +{ + return 0; +} + +static inline void dvpp_mmap_get_area(struct vm_unmapped_area_info *info, + unsigned long flags) +{ +} + +static inline int dvpp_mmap_zone(unsigned long addr) { return 0; } + +#define DVPP_MMAP_BASE (0) + +#define DVPP_MMAP_SIZE (0) + +#endif + /* * Arrange for legacy / undefined architecture specific flags to be * ignored by mmap handling code. diff --git a/mm/mmap.c b/mm/mmap.c index a208057be6f1..3991634121d7 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2409,6 +2409,10 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
if (addr) { addr = PAGE_ALIGN(addr); + + if (dvpp_mmap_check(addr, len, flags)) + return -ENOMEM; + vma = find_vma_prev(mm, addr, &prev); if (mmap_end - len >= addr && addr >= mmap_min_addr && (!vma || addr + len <= vm_start_gap(vma)) && @@ -2422,6 +2426,10 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, info.high_limit = mmap_end; info.align_mask = 0; info.align_offset = 0; + + if (enable_mmap_dvpp) + dvpp_mmap_get_area(&info, flags); + return vm_unmapped_area(&info); } #endif @@ -2451,6 +2459,10 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, /* requesting a specific address */ if (addr) { addr = PAGE_ALIGN(addr); + + if (dvpp_mmap_check(addr, len, flags)) + return -ENOMEM; + vma = find_vma_prev(mm, addr, &prev); if (mmap_end - len >= addr && addr >= mmap_min_addr && (!vma || addr + len <= vm_start_gap(vma)) && @@ -2464,6 +2476,10 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, info.high_limit = arch_get_mmap_base(addr, mm->mmap_base); info.align_mask = 0; info.align_offset = 0; + + if (enable_mmap_dvpp) + dvpp_mmap_get_area(&info, flags); + addr = vm_unmapped_area(&info);
/* @@ -2477,6 +2493,10 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, info.flags = 0; info.low_limit = TASK_UNMAPPED_BASE; info.high_limit = mmap_end; + + if (enable_mmap_dvpp) + dvpp_mmap_get_area(&info, flags); + addr = vm_unmapped_area(&info); }
@@ -4069,3 +4089,23 @@ static int __meminit init_reserve_notifier(void) return 0; } subsys_initcall(init_reserve_notifier); + + +/* + * Enable the MAP_32BIT (mmaps and hugetlb). + */ +int enable_mmap_dvpp __read_mostly; + +#ifdef CONFIG_ASCEND_DVPP_MMAP + +static int __init ascend_enable_mmap_dvpp(char *s) +{ + enable_mmap_dvpp = 1; + + pr_info("Ascend enable dvpp mmap features\n"); + + return 1; +} +__setup("enable_mmap_dvpp", ascend_enable_mmap_dvpp); + +#endif
From: Lijun Fang fanglijun3@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4M24Q CVE: NA
-------------------------------------------------
Enable CONFIG_ASCEND_DVPP_MMAP by default y.
Signed-off-by: Lijun Fang fanglijun3@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/arm64/configs/openeuler_defconfig | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 589364f741de..17bc8750bba7 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -475,6 +475,8 @@ CONFIG_ARM64_PSEUDO_NMI=y CONFIG_RELOCATABLE=y CONFIG_RANDOMIZE_BASE=y CONFIG_RANDOMIZE_MODULE_REGION_FULL=y +CONFIG_ASCEND_FEATURES=y +CONFIG_ASCEND_DVPP_MMAP=y # end of Kernel Features
#
From: Yufen Yu yuyufen@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I4JYYO?from=project-issue CVE: NA
---------------------------
We get a NULL pointer dereference oops when test raid1 as follow:
mdadm -CR /dev/md1 -l 1 -n 2 /dev/sd[ab]
mdadm /dev/md1 -f /dev/sda mdadm /dev/md1 -r /dev/sda mdadm /dev/md1 -a /dev/sda sleep 5 mdadm /dev/md1 -f /dev/sdb mdadm /dev/md1 -r /dev/sdb mdadm /dev/md1 -a /dev/sdb
After a disk(/dev/sda) has been removed, we add the disk to raid array again, which would trigger recovery action. Since the rdev current state is 'spare', read/write bio can be issued to the disk.
Then we set the other disk (/dev/sdb) faulty. Since the raid array is now in degraded state and /dev/sdb is the only 'In_sync' disk, raid1_error() will return but without set faulty success.
However, that can interrupt the recovery action and md_check_recovery will try to call remove_and_add_spares() to remove the spare disk. And the race condition between remove_and_add_spares() and raid1_write_request() in follow can cause NULL pointer dereference for conf->mirrors[i].rdev:
raid1_write_request() md_check_recovery raid1_error() rcu_read_lock() rdev != NULL !test_bit(Faulty, &rdev->flags)
conf->recovery_disabled= mddev->recovery_disabled; return busy
remove_and_add_spares raid1_remove_disk rdev->nr_pending == 0
atomic_inc(&rdev->nr_pending); rcu_read_unlock()
p->rdev=NULL
conf->mirrors[i].rdev->data_offset NULL pointer deref!!!
if (!test_bit(RemoveSynchronized, &rdev->flags)) synchronize_rcu(); p->rdev=rdev
To fix the race condition, we add a new flag 'WantRemove' for rdev. Before access conf->mirrors[i].rdev, we need to ensure the rdev without 'WantRemove' bit.
Link: https://marc.info/?l=linux-raid&m=156412052717709&w=2
Reported-by: Zou Wei zou_wei@huawei.com Signed-off-by: Yufen Yu yuyufen@huawei.com Confilct: drivers/md/md.h Signed-off-by: Laibin Qiu qiulaibin@huawei.com Reviewed-by: yuyufen yuyufen@huawei.com Reviewed-by: Jason Yan yanaijie@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/md/md.h | 4 ++++ drivers/md/raid1.c | 28 ++++++++++++++++++++++------ 2 files changed, 26 insertions(+), 6 deletions(-)
diff --git a/drivers/md/md.h b/drivers/md/md.h index c94811cf2600..766ecfb0ff5c 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -213,6 +213,10 @@ enum flag_bits { * check if there is collision between raid1 * serial bios. */ + WantRemove, /* Before set conf->mirrors[i] as NULL, + * we set the bit first, avoiding access the + * conf->mirrors[i] after it set NULL. + */ };
static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors, diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index fb31e5dd54a6..da6772f49f07 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -641,7 +641,8 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect rdev = rcu_dereference(conf->mirrors[disk].rdev); if (r1_bio->bios[disk] == IO_BLOCKED || rdev == NULL - || test_bit(Faulty, &rdev->flags)) + || test_bit(Faulty, &rdev->flags) + || test_bit(WantRemove, &rdev->flags)) continue; if (!test_bit(In_sync, &rdev->flags) && rdev->recovery_offset < this_sector + sectors) @@ -770,7 +771,8 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
if (best_disk >= 0) { rdev = rcu_dereference(conf->mirrors[best_disk].rdev); - if (!rdev) + if (!rdev || test_bit(Faulty, &rdev->flags) + || test_bit(WantRemove, &rdev->flags)) goto retry; atomic_inc(&rdev->nr_pending); sectors = best_good_sectors; @@ -1382,7 +1384,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, break; } r1_bio->bios[i] = NULL; - if (!rdev || test_bit(Faulty, &rdev->flags)) { + if (!rdev || test_bit(Faulty, &rdev->flags) + || test_bit(WantRemove, &rdev->flags)) { if (i < conf->raid_disks) set_bit(R1BIO_Degraded, &r1_bio->state); continue; @@ -1759,6 +1762,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
p->head_position = 0; rdev->raid_disk = mirror; + clear_bit(WantRemove, &rdev->flags); err = 0; /* As all devices are equivalent, we don't need a full recovery * if this was recently any drive of the array @@ -1773,6 +1777,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) /* Add this device as a replacement */ clear_bit(In_sync, &rdev->flags); set_bit(Replacement, &rdev->flags); + clear_bit(WantRemove, &rdev->flags); rdev->raid_disk = mirror; err = 0; conf->fullsync = 1; @@ -1812,16 +1817,26 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev) err = -EBUSY; goto abort; } - p->rdev = NULL; + + /* + * Before set p->rdev = NULL, we set WantRemove bit avoiding + * race between rdev remove and issue bio, which can cause + * NULL pointer deference of rdev by conf->mirrors[i].rdev. + */ + set_bit(WantRemove, &rdev->flags); + if (!test_bit(RemoveSynchronized, &rdev->flags)) { synchronize_rcu(); if (atomic_read(&rdev->nr_pending)) { /* lost the race, try later */ err = -EBUSY; - p->rdev = rdev; + clear_bit(WantRemove, &rdev->flags); goto abort; } } + + p->rdev = NULL; + if (conf->mirrors[conf->raid_disks + number].rdev) { /* We just removed a device that is being replaced. * Move down the replacement. We drain all IO before @@ -2716,7 +2731,8 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
rdev = rcu_dereference(conf->mirrors[i].rdev); if (rdev == NULL || - test_bit(Faulty, &rdev->flags)) { + test_bit(Faulty, &rdev->flags) || + test_bit(WantRemove, &rdev->flags)) { if (i < conf->raid_disks) still_degraded = 1; } else if (!test_bit(In_sync, &rdev->flags)) {
From: Laibin Qiu qiulaibin@huawei.com
hulk inclusion category: bugfix bugzilla: 185857 https://gitee.com/openeuler/kernel/issues/I4MMUW CVE: NA
--------------------------------
Now that we disable wbt by set WBT_STATE_OFF_DEFAULT in wbt_disable_default() when switch elevator to bfq. And when we remove scsi device, wbt will be enabled by wbt_enable_default. If it become false positive between wbt_wait() and wbt_track() when submit write request.
The following is the scenario that triggered the problem.
T1 T2 T3 elevator_switch_mq bfq_init_queue wbt_disable_default <= Set rwb->enable_state (OFF) Submit_bio blk_mq_make_request rq_qos_throttle <= rwb->enable_state (OFF) scsi_remove_device sd_remove del_gendisk blk_unregister_queue elv_unregister_queue wbt_enable_default <= Set rwb->enable_state (ON) q_qos_track <= rwb->enable_state (ON) ^^^^^^ this request will mark WBT_TRACKED without inflight add and will lead to drop rqw->inflight to -1 in wbt_done() which will trigger IO hung.
Fix this by move wbt_enable_default() from elv_unregister to bfq_exit_queue(). Only re-enable wbt when bfq exit. Fixes: 76a8040817b4b ("blk-wbt: make sure throttle is enabled properly") Signed-off-by: Laibin Qiu qiulaibin@huawei.com Reviewed-by: Jason Yan yanaijie@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- block/bfq-iosched.c | 4 ++++ block/elevator.c | 2 -- 2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index aa1a808fa072..70f2aeadd21c 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -6369,6 +6369,7 @@ static void bfq_exit_queue(struct elevator_queue *e) { struct bfq_data *bfqd = e->elevator_data; struct bfq_queue *bfqq, *n; + struct request_queue *q = bfqd->queue;
hrtimer_cancel(&bfqd->idle_slice_timer);
@@ -6392,6 +6393,9 @@ static void bfq_exit_queue(struct elevator_queue *e) #endif
kfree(bfqd); + + /* Re-enable throttling in case elevator disabled it */ + wbt_enable_default(q); }
static void bfq_init_root_group(struct bfq_group *root_group, diff --git a/block/elevator.c b/block/elevator.c index 65dfc7559a36..76f70f679a1b 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -523,8 +523,6 @@ void elv_unregister_queue(struct request_queue *q) kobject_del(&e->kobj);
e->registered = 0; - /* Re-enable throttling in case elevator disabled it */ - wbt_enable_default(q); } }
From: Xingang Wang wangxingang5@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4L735 CVE: NA
-------------------------------------------------
This introduce support to set and get device configuration in iommu private driver. For example, when the smmu mpam configuration need to be set in the smmu driver, these interfaces will help.
Signed-off-by: Xingang Wang wangxingang5@huawei.com Reviewed-by: Zhen Lei thunder.leizhen@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Acked-by: Xie XiuQi xiexiuqi@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 18 +++++++++++++++++ drivers/iommu/iommu.c | 22 +++++++++++++++++++++ include/linux/iommu.h | 18 +++++++++++++++++ 3 files changed, 58 insertions(+)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index b9b2232b6b83..be4b66ccdd05 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -4081,6 +4081,22 @@ static int arm_smmu_device_domain_type(struct device *dev) } #endif
+static int arm_smmu_device_get_config(struct device *dev, int type, void *data) +{ + switch (type) { + default: + return -EINVAL; + } +} + +static int arm_smmu_device_set_config(struct device *dev, int type, void *data) +{ + switch (type) { + default: + return -EINVAL; + } +} + static struct iommu_ops arm_smmu_ops = { .capable = arm_smmu_capable, .domain_alloc = arm_smmu_domain_alloc, @@ -4122,6 +4138,8 @@ static struct iommu_ops arm_smmu_ops = { #ifdef CONFIG_SMMU_BYPASS_DEV .def_domain_type = arm_smmu_device_domain_type, #endif + .dev_get_config = arm_smmu_device_get_config, + .dev_set_config = arm_smmu_device_set_config, .pgsize_bitmap = -1UL, /* Restricted during device attach */ };
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 9adb9d2502ae..25b3b8386ca9 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -3567,3 +3567,25 @@ u32 iommu_sva_get_pasid(struct iommu_sva *handle) return ops->sva_get_pasid(handle); } EXPORT_SYMBOL_GPL(iommu_sva_get_pasid); + +int iommu_dev_set_config(struct device *dev, int type, void *data) +{ + const struct iommu_ops *ops = dev->bus->iommu_ops; + + if (ops && ops->dev_set_config) + return ops->dev_set_config(dev, type, data); + + return -ENODEV; +} +EXPORT_SYMBOL_GPL(iommu_dev_set_config); + +int iommu_dev_get_config(struct device *dev, int type, void *data) +{ + const struct iommu_ops *ops = dev->bus->iommu_ops; + + if (ops && ops->dev_get_config) + return ops->dev_get_config(dev, type, data); + + return -ENODEV; +} +EXPORT_SYMBOL_GPL(iommu_dev_get_config); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index d899e7a5f234..ed12f5cac0b4 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -348,6 +348,9 @@ struct iommu_ops { dma_addr_t giova, phys_addr_t gpa, size_t size); void (*unbind_guest_msi)(struct iommu_domain *domain, dma_addr_t giova);
+ int (*dev_get_config)(struct device *dev, int type, void *data); + int (*dev_set_config)(struct device *dev, int type, void *data); + unsigned long pgsize_bitmap; struct module *owner; }; @@ -584,6 +587,9 @@ extern int iommu_clear_dirty_log(struct iommu_domain *domain, unsigned long iova unsigned long base_iova, unsigned long bitmap_pgshift);
+extern int iommu_dev_set_config(struct device *dev, int type, void *data); +extern int iommu_dev_get_config(struct device *dev, int type, void *data); + /* Window handling function prototypes */ extern int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr, phys_addr_t offset, u64 size, @@ -1215,6 +1221,18 @@ int iommu_bind_guest_msi(struct iommu_domain *domain, static inline void iommu_unbind_guest_msi(struct iommu_domain *domain, dma_addr_t giova) {}
+static inline +int iommu_dev_set_config(struct device *dev, int type, void *data) +{ + return -ENODEV; +} + +static inline +int iommmu_dev_get_config(struct device *dev, int type, void *data) +{ + return -ENODEV; +} + #endif /* CONFIG_IOMMU_API */
/**
From: Xingang Wang wangxingang5@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4L735 CVE: NA
-------------------------------------------------
To support limiting qos of device, the partid and pmg need to be set into the SMMU STE/CD context. This introduce support of SMMU mpam feature and add interface to set mpam configuration in STE/CD.
Signed-off-by: Xingang Wang wangxingang5@huawei.com Reviewed-by: Zhen Lei thunder.leizhen@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Acked-by: Xie XiuQi xiexiuqi@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 116 ++++++++++++++++++++ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 19 ++++ include/linux/arm-smmu.h | 17 +++ 3 files changed, 152 insertions(+) create mode 100644 include/linux/arm-smmu.h
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index be4b66ccdd05..b8595658ad33 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -11,6 +11,7 @@
#include <linux/acpi.h> #include <linux/acpi_iort.h> +#include <linux/arm-smmu.h> #include <linux/bitops.h> #include <linux/crash_dump.h> #include <linux/delay.h> @@ -4081,6 +4082,111 @@ static int arm_smmu_device_domain_type(struct device *dev) } #endif
+static int arm_smmu_set_mpam(struct arm_smmu_device *smmu, + int sid, int ssid, int partid, int pmg, int s1mpam) +{ + struct arm_smmu_master *master = arm_smmu_find_master(smmu, sid); + struct arm_smmu_domain *domain = master ? master->domain : NULL; + u64 val; + __le64 *ste, *cd; + + struct arm_smmu_cmdq_ent prefetch_cmd = { + .opcode = CMDQ_OP_PREFETCH_CFG, + .prefetch = { + .sid = sid, + }, + }; + + if (WARN_ON(!domain)) + return -EINVAL; + if (WARN_ON(!domain->s1_cfg.set)) + return -EINVAL; + if (WARN_ON(ssid >= (1 << domain->s1_cfg.s1cdmax))) + return -E2BIG; + + if (!(smmu->features & ARM_SMMU_FEAT_MPAM)) + return -ENODEV; + + if (partid > smmu->mpam_partid_max || pmg > smmu->mpam_pmg_max) { + dev_err(smmu->dev, + "mpam rmid out of range: partid[0, %d] pmg[0, %d]\n", + smmu->mpam_partid_max, smmu->mpam_pmg_max); + return -ERANGE; + } + + /* get ste ptr */ + ste = arm_smmu_get_step_for_sid(smmu, sid); + + /* write s1mpam to ste */ + val = le64_to_cpu(ste[1]); + val &= ~STRTAB_STE_1_S1MPAM; + val |= FIELD_PREP(STRTAB_STE_1_S1MPAM, s1mpam); + WRITE_ONCE(ste[1], cpu_to_le64(val)); + + val = le64_to_cpu(ste[4]); + val &= ~STRTAB_STE_4_PARTID_MASK; + val |= FIELD_PREP(STRTAB_STE_4_PARTID_MASK, partid); + WRITE_ONCE(ste[4], cpu_to_le64(val)); + + val = le64_to_cpu(ste[5]); + val &= ~STRTAB_STE_5_PMG_MASK; + val |= FIELD_PREP(STRTAB_STE_5_PMG_MASK, pmg); + WRITE_ONCE(ste[5], cpu_to_le64(val)); + arm_smmu_sync_ste_for_sid(smmu, sid); + + /* do not modify cd table which owned by guest */ + if (domain->stage == ARM_SMMU_DOMAIN_NESTED) { + dev_err(smmu->dev, + "mpam: smmu cd is owned by guest, not modified\n"); + return 0; + } + + /* get cd ptr */ + cd = arm_smmu_get_cd_ptr(domain, ssid); + if (s1mpam && WARN_ON(!cd)) + return -ENOMEM; + + val = le64_to_cpu(cd[5]); + val &= ~CTXDESC_CD_5_PARTID_MASK; + val &= ~CTXDESC_CD_5_PMG_MASK; + val |= FIELD_PREP(CTXDESC_CD_5_PARTID_MASK, partid); + val |= FIELD_PREP(CTXDESC_CD_5_PMG_MASK, pmg); + WRITE_ONCE(cd[5], cpu_to_le64(val)); + arm_smmu_sync_cd(domain, ssid, true); + + /* It's likely that we'll want to use the new STE soon */ + if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH)) + arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd); + + dev_info(smmu->dev, "partid %d, pmg %d\n", partid, pmg); + + return 0; +} + +static int arm_smmu_device_set_mpam(struct device *dev, + struct arm_smmu_mpam *mpam) +{ + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + int ret; + + if (WARN_ON(!master) || WARN_ON(!mpam)) + return -EINVAL; + + if (mpam->flags & ARM_SMMU_DEV_SET_MPAM) { + ret = arm_smmu_set_mpam(master->domain->smmu, + master->streams->id, + mpam->pasid, + mpam->partid, + mpam->pmg, + mpam->s1mpam); + if (ret < 0) + return ret; + } + + return 0; + +} + static int arm_smmu_device_get_config(struct device *dev, int type, void *data) { switch (type) { @@ -4092,6 +4198,8 @@ static int arm_smmu_device_get_config(struct device *dev, int type, void *data) static int arm_smmu_device_set_config(struct device *dev, int type, void *data) { switch (type) { + case ARM_SMMU_MPAM: + return arm_smmu_device_set_mpam(dev, data); default: return -EINVAL; } @@ -5210,6 +5318,14 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) if (FIELD_GET(IDR3_RIL, reg)) smmu->features |= ARM_SMMU_FEAT_RANGE_INV;
+ if (reg & IDR3_MPAM) { + reg = readl_relaxed(smmu->base + ARM_SMMU_MPAMIDR); + smmu->mpam_partid_max = FIELD_GET(MPAMIDR_PARTID_MAX, reg); + smmu->mpam_pmg_max = FIELD_GET(MPAMIDR_PMG_MAX, reg); + if (smmu->mpam_partid_max || smmu->mpam_pmg_max) + smmu->features |= ARM_SMMU_FEAT_MPAM; + } + /* IDR5 */ reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 785451c62730..7ea791a13e9b 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -61,6 +61,8 @@ #define IDR3_BBML1 1 #define IDR3_BBML2 2 #define IDR3_RIL (1 << 10) +#define IDR3_MPAM (1 << 7) +#define ARM_SMMU_IDR3_CFG 0x140C
#define ARM_SMMU_IDR5 0x14 #define IDR5_STALL_MAX GENMASK(31, 16) @@ -162,6 +164,10 @@ #define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8 #define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc
+#define ARM_SMMU_MPAMIDR 0x130 +#define MPAMIDR_PMG_MAX GENMASK(23, 16) +#define MPAMIDR_PARTID_MAX GENMASK(15, 0) + #define ARM_SMMU_IDR6 0x190 #define IDR6_LOG2NUMP GENMASK(27, 24) #define IDR6_LOG2NUMQ GENMASK(19, 16) @@ -258,6 +264,7 @@ #define STRTAB_STE_1_S1CSH GENMASK_ULL(7, 6)
#define STRTAB_STE_1_PPAR (1UL << 18) +#define STRTAB_STE_1_S1MPAM (1UL << 26) #define STRTAB_STE_1_S1STALLD (1UL << 27)
#define STRTAB_STE_1_EATS GENMASK_ULL(29, 28) @@ -290,6 +297,11 @@
#define STRTAB_STE_3_S2TTB_MASK GENMASK_ULL(51, 4)
+#define STRTAB_STE_4_PARTID_MASK GENMASK_ULL(31, 16) + +#define STRTAB_STE_5_MPAM_NS (1UL << 8) +#define STRTAB_STE_5_PMG_MASK GENMASK_ULL(7, 0) + /* * Context descriptors. * @@ -331,6 +343,9 @@
#define CTXDESC_CD_1_TTB0_MASK GENMASK_ULL(51, 4)
+#define CTXDESC_CD_5_PARTID_MASK GENMASK_ULL(47, 32) +#define CTXDESC_CD_5_PMG_MASK GENMASK_ULL(55, 48) + /* * When the SMMU only supports linear context descriptor tables, pick a * reasonable size limit (64kB). @@ -698,6 +713,7 @@ struct arm_smmu_device { #define ARM_SMMU_FEAT_BBML1 (1 << 21) #define ARM_SMMU_FEAT_BBML2 (1 << 22) #define ARM_SMMU_FEAT_ECMDQ (1 << 23) +#define ARM_SMMU_FEAT_MPAM (1 << 24) u32 features;
#define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) @@ -739,6 +755,9 @@ struct arm_smmu_device {
struct rb_root streams; struct mutex streams_mutex; + + unsigned int mpam_partid_max; + unsigned int mpam_pmg_max; };
struct arm_smmu_stream { diff --git a/include/linux/arm-smmu.h b/include/linux/arm-smmu.h new file mode 100644 index 000000000000..f1f0dfda7d70 --- /dev/null +++ b/include/linux/arm-smmu.h @@ -0,0 +1,17 @@ +#ifndef _ARM_SMMU_H_ +#define _ARM_SMMU_H_ + +enum arm_smmu_device_config_type { + ARM_SMMU_MPAM = 0, +}; + +struct arm_smmu_mpam { +#define ARM_SMMU_DEV_SET_MPAM (1 << 0) + int flags; + int pasid; + int partid; + int pmg; + int s1mpam; +}; + +#endif
From: Xingang Wang wangxingang5@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4L735 CVE: NA
-------------------------------------------------
Add interface to get mpam configuration of CD/STE context, use s1mpam to indicate whether partid and pmg from CD or STE.
Signed-off-by: Xingang Wang wangxingang5@huawei.com Reviewed-by: Zhen Lei thunder.leizhen@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Acked-by: Xie XiuQi xiexiuqi@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 70 +++++++++++++++++++++ include/linux/arm-smmu.h | 1 + 2 files changed, 71 insertions(+)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index b8595658ad33..7d5ba739b22b 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -4187,9 +4187,79 @@ static int arm_smmu_device_set_mpam(struct device *dev,
}
+static int arm_smmu_get_mpam(struct arm_smmu_device *smmu, + int sid, int ssid, int *partid, int *pmg, int *s1mpam) +{ + struct arm_smmu_master *master = arm_smmu_find_master(smmu, sid); + struct arm_smmu_domain *domain = master ? master->domain : NULL; + u64 val; + __le64 *ste, *cd; + + if (WARN_ON(!domain)) + return -EINVAL; + if (WARN_ON(!domain->s1_cfg.set)) + return -EINVAL; + if (WARN_ON(ssid >= (1 << domain->s1_cfg.s1cdmax))) + return -E2BIG; + + if (!(smmu->features & ARM_SMMU_FEAT_MPAM)) + return -ENODEV; + + /* get ste ptr */ + ste = arm_smmu_get_step_for_sid(smmu, sid); + + val = le64_to_cpu(ste[4]); + *partid = FIELD_GET(STRTAB_STE_4_PARTID_MASK, val); + + val = le64_to_cpu(ste[5]); + *pmg = FIELD_GET(STRTAB_STE_5_PMG_MASK, val); + + val = le64_to_cpu(ste[1]); + *s1mpam = FIELD_GET(STRTAB_STE_1_S1MPAM, val); + /* return STE mpam configuration when s1mpam == 0 */ + if (!(*s1mpam)) + return 0; + + /* get cd ptr */ + cd = arm_smmu_get_cd_ptr(domain, ssid); + if (WARN_ON(!cd)) + return -ENOMEM; + + val = le64_to_cpu(cd[5]); + *partid = FIELD_GET(CTXDESC_CD_5_PARTID_MASK, val); + *pmg = FIELD_GET(CTXDESC_CD_5_PMG_MASK, val); + + return 0; +} + +static int arm_smmu_device_get_mpam(struct device *dev, + struct arm_smmu_mpam *mpam) +{ + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + int ret; + + if (WARN_ON(!master) || WARN_ON(!mpam)) + return -EINVAL; + + if (mpam->flags & ARM_SMMU_DEV_GET_MPAM) { + ret = arm_smmu_get_mpam(master->domain->smmu, + master->streams->id, + mpam->pasid, + &mpam->partid, + &mpam->pmg, + &mpam->s1mpam); + if (ret < 0) + return ret; + } + + return 0; +} + static int arm_smmu_device_get_config(struct device *dev, int type, void *data) { switch (type) { + case ARM_SMMU_MPAM: + return arm_smmu_device_get_mpam(dev, data); default: return -EINVAL; } diff --git a/include/linux/arm-smmu.h b/include/linux/arm-smmu.h index f1f0dfda7d70..52982fd3d723 100644 --- a/include/linux/arm-smmu.h +++ b/include/linux/arm-smmu.h @@ -7,6 +7,7 @@ enum arm_smmu_device_config_type {
struct arm_smmu_mpam { #define ARM_SMMU_DEV_SET_MPAM (1 << 0) +#define ARM_SMMU_DEV_GET_MPAM (1 << 1) int flags; int pasid; int partid;
From: Xingang Wang wangxingang5@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4L735 CVE: NA
-------------------------------------------------
The user_mpam_en configuration is used to enable/disable whether SMMU mpam configuration will be used. If user_mpam_en is 1, the memory requests across SMMU will not carry the SMMU mpam configuration.
Signed-off-by: Xingang Wang wangxingang5@huawei.com Reviewed-by: Zhen Lei thunder.leizhen@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Acked-by: Xie XiuQi xiexiuqi@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 47 +++++++++++++++++++++ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 ++ include/linux/arm-smmu.h | 3 ++ 3 files changed, 53 insertions(+)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 7d5ba739b22b..5a7e141d39cc 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -4163,6 +4163,25 @@ static int arm_smmu_set_mpam(struct arm_smmu_device *smmu, return 0; }
+static int arm_smmu_set_dev_user_mpam_en(struct device *dev, int user_mpam_en) +{ + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + struct arm_smmu_device *smmu; + u32 reg, __iomem *cfg; + + if (WARN_ON(!master)) + return -EINVAL; + + smmu = master->domain->smmu; + cfg = smmu->base + ARM_SMMU_USER_CFG0; + + reg = readl_relaxed(cfg); + reg &= ~ARM_SMMU_USER_MPAM_EN; + reg |= FIELD_PREP(ARM_SMMU_USER_MPAM_EN, user_mpam_en); + writel(reg, cfg); + return 0; +} + static int arm_smmu_device_set_mpam(struct device *dev, struct arm_smmu_mpam *mpam) { @@ -4183,6 +4202,12 @@ static int arm_smmu_device_set_mpam(struct device *dev, return ret; }
+ if (mpam->flags & ARM_SMMU_DEV_SET_USER_MPAM_EN) { + ret = arm_smmu_set_dev_user_mpam_en(dev, mpam->user_mpam_en); + if (ret < 0) + return ret; + } + return 0;
} @@ -4232,6 +4257,22 @@ static int arm_smmu_get_mpam(struct arm_smmu_device *smmu, return 0; }
+static int arm_smmu_get_dev_user_mpam_en(struct device *dev, int *user_mpam_en) +{ + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + struct arm_smmu_device *smmu; + u32 reg; + + if (WARN_ON(!master)) + return -EINVAL; + + smmu = master->domain->smmu; + + reg = readl_relaxed(smmu->base + ARM_SMMU_USER_CFG0); + *user_mpam_en = FIELD_GET(ARM_SMMU_USER_MPAM_EN, reg); + return 0; +} + static int arm_smmu_device_get_mpam(struct device *dev, struct arm_smmu_mpam *mpam) { @@ -4252,6 +4293,12 @@ static int arm_smmu_device_get_mpam(struct device *dev, return ret; }
+ if (mpam->flags & ARM_SMMU_DEV_GET_USER_MPAM_EN) { + ret = arm_smmu_get_dev_user_mpam_en(dev, &mpam->user_mpam_en); + if (ret < 0) + return ret; + } + return 0; }
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 7ea791a13e9b..0be76a9c15c0 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -168,6 +168,9 @@ #define MPAMIDR_PMG_MAX GENMASK(23, 16) #define MPAMIDR_PARTID_MAX GENMASK(15, 0)
+#define ARM_SMMU_USER_CFG0 0xe00 +#define ARM_SMMU_USER_MPAM_EN (1UL << 30) + #define ARM_SMMU_IDR6 0x190 #define IDR6_LOG2NUMP GENMASK(27, 24) #define IDR6_LOG2NUMQ GENMASK(19, 16) diff --git a/include/linux/arm-smmu.h b/include/linux/arm-smmu.h index 52982fd3d723..502b86e7834a 100644 --- a/include/linux/arm-smmu.h +++ b/include/linux/arm-smmu.h @@ -8,11 +8,14 @@ enum arm_smmu_device_config_type { struct arm_smmu_mpam { #define ARM_SMMU_DEV_SET_MPAM (1 << 0) #define ARM_SMMU_DEV_GET_MPAM (1 << 1) +#define ARM_SMMU_DEV_SET_USER_MPAM_EN (1 << 2) +#define ARM_SMMU_DEV_GET_USER_MPAM_EN (1 << 3) int flags; int pasid; int partid; int pmg; int s1mpam; + int user_mpam_en; };
#endif
From: Xingang Wang wangxingang5@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4L735 CVE: NA
-------------------------------------------------
The rdt_mon_capable should be enabled when the msmon_mbwu feature is supported, so that the mpam monitor function can be effective.
Signed-off-by: Xingang Wang wangxingang5@huawei.com Reviewed-by: Wang ShaoBo bobo.shaobowang@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Acked-by: Xie XiuQi xiexiuqi@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/arm64/kernel/mpam/mpam_setup.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/arch/arm64/kernel/mpam/mpam_setup.c b/arch/arm64/kernel/mpam/mpam_setup.c index aae4a0be6304..d30910e0cda2 100644 --- a/arch/arm64/kernel/mpam/mpam_setup.c +++ b/arch/arm64/kernel/mpam/mpam_setup.c @@ -335,6 +335,7 @@ static void mpam_resctrl_pick_event_mbm_local(void)
if (mpam_has_feature(mpam_feat_msmon_mbwu, res->class->features)) { res->resctrl_res.mon_capable = true; + rdt_mon_capable = true; mpam_resctrl_events[QOS_L3_MBM_LOCAL_EVENT_ID] = *res; } }
From: Xingang Wang wangxingang5@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4L735 CVE: NA
-------------------------------------------------
The mpam driver controls the allocation of rmid, however there are some ocassions that the partid and pmg of a group might come from elsewhere. This add support for group rmid modify. The sysfs rmid file is set to write accessible, and this add a write interface to accept rmid from user. When the rmid from user is different and valid, update the group with new rmid. When error occurs, rollback to old rmid.
Signed-off-by: Xingang Wang wangxingang5@huawei.com Reviewed-by: Wang ShaoBo bobo.shaobowang@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Acked-by: Xie XiuQi xiexiuqi@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/arm64/kernel/mpam/mpam_resctrl.c | 128 ++++++++++++++++++++++++-- 1 file changed, 120 insertions(+), 8 deletions(-)
diff --git a/arch/arm64/kernel/mpam/mpam_resctrl.c b/arch/arm64/kernel/mpam/mpam_resctrl.c index 0746800feb5d..7b8d77666516 100644 --- a/arch/arm64/kernel/mpam/mpam_resctrl.c +++ b/arch/arm64/kernel/mpam/mpam_resctrl.c @@ -803,6 +803,11 @@ static void unset_rmid_remap_bmp_occ(unsigned long *bmp) set_bit(0, bmp); }
+static int is_rmid_remap_bmp_bdr_set(unsigned long *bmp, int b) +{ + return (test_bit(b + 1, bmp) == 0) ? 1 : 0; +} + static void rmid_remap_bmp_bdr_set(unsigned long *bmp, int b) { set_bit(b + 1, bmp); @@ -909,11 +914,11 @@ static int rmid_to_partid_pmg(int rmid, int *partid, int *pmg) return 0; }
-static int __rmid_alloc(int partid) +static int __rmid_alloc(int partid, int pmg) { int stride = 0; int partid_sel = 0; - int ret, pmg; + int ret; int rmid[2] = {-1, -1}; unsigned long **cmp, **bmp;
@@ -928,10 +933,19 @@ static int __rmid_alloc(int partid) continue; set_rmid_remap_bmp_occ(*bmp);
- ret = rmid_remap_bmp_alloc_pmg(*bmp); - if (ret < 0) - goto out; - pmg = ret; + if (pmg >= 0) { + if (is_rmid_remap_bmp_bdr_set(*bmp, pmg)) { + ret = -EEXIST; + goto out; + } + rmid_remap_bmp_bdr_clear(*bmp, pmg); + } else { + ret = rmid_remap_bmp_alloc_pmg(*bmp); + if (ret < 0) + goto out; + pmg = ret; + } + rmid[stride] = to_rmid(partid + stride, pmg); if (STRIDE_INC_CHK(stride)) break; @@ -971,7 +985,7 @@ static int __rmid_alloc(int partid)
int rmid_alloc(int partid) { - return __rmid_alloc(partid); + return __rmid_alloc(partid, -1); }
void rmid_free(int rmid) @@ -1809,6 +1823,103 @@ static int resctrl_group_rmid_show(struct kernfs_open_file *of, return ret; }
+static ssize_t resctrl_group_rmid_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) +{ + struct rdtgroup *rdtgrp; + int ret = 0; + int partid; + int pmg; + int rmid; + int old_rmid; + int old_reqpartid; + struct task_struct *p, *t; + + if (kstrtoint(strstrip(buf), 0, &rmid) || rmid < 0) + return -EINVAL; + + rdtgrp = resctrl_group_kn_lock_live(of->kn); + if (!rdtgrp) { + ret = -ENOENT; + goto unlock; + } + + rdt_last_cmd_clear(); + + if (rmid == 0 || rdtgrp->mon.rmid == 0) { + ret = -EINVAL; + goto unlock; + } + + ret = rmid_to_partid_pmg(rmid, &partid, &pmg); + if (ret < 0) { + ret = -EINVAL; + goto unlock; + } + + if (rmid == rdtgrp->mon.rmid) + goto unlock; + + if (rdtgrp->type != RDTCTRL_GROUP || + !list_empty(&rdtgrp->mon.crdtgrp_list)) { + rdt_last_cmd_puts("unsupported operation\n"); + goto unlock; + } + + ret = __rmid_alloc(partid, pmg); + if (ret < 0) { + rdt_last_cmd_puts("set rmid failed\n"); + goto unlock; + } + + old_rmid = rdtgrp->mon.rmid; + old_reqpartid = rdtgrp->closid.reqpartid; + + /* + * we use intpartid as group control, use reqpartid for config + * synchronization and monitor, only update the reqpartid + */ + rdtgrp->closid.reqpartid = partid; + rdtgrp->mon.rmid = rmid; + + read_lock(&tasklist_lock); + for_each_process_thread(p, t) { + if (t->closid == rdtgrp->closid.intpartid) { + ret = __resctrl_group_move_task(t, rdtgrp); + if (ret) { + read_unlock(&tasklist_lock); + goto rollback; + } + } + } + read_unlock(&tasklist_lock); + + update_closid_rmid(&rdtgrp->cpu_mask, rdtgrp); + rmid_free(old_rmid); + +unlock: + resctrl_group_kn_unlock(of->kn); + if (ret) + return ret; + + return nbytes; + +rollback: + rdtgrp->mon.rmid = old_rmid; + rdtgrp->closid.reqpartid = old_reqpartid; + + read_lock(&tasklist_lock); + for_each_process_thread(p, t) { + if (t->closid == rdtgrp->closid.intpartid) + WARN_ON_ONCE(__resctrl_group_move_task(t, rdtgrp)); + } + read_unlock(&tasklist_lock); + + rmid_free(rmid); + resctrl_group_kn_unlock(of->kn); + return ret; +} + /* rdtgroup information files for one cache resource. */ static struct rftype res_specific_files[] = { { @@ -1908,8 +2019,9 @@ static struct rftype res_specific_files[] = { }, { .name = "rmid", - .mode = 0444, + .mode = 0644, .kf_ops = &resctrl_group_kf_single_ops, + .write = resctrl_group_rmid_write, .seq_show = resctrl_group_rmid_show, .fflags = RFTYPE_BASE, },
From: Xingang Wang wangxingang5@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4L735 CVE: NA
-------------------------------------------------
When group rmid changes, as introduced by 6bbf2791b ("mpam: Add support for group rmid modify") the sysfs monitor data file rmid needs to update as well. This add support for updating rmid for monitoring, and then resync the group configuration. When update failed, roll back to the previous rmid.
Signed-off-by: Xingang Wang wangxingang5@huawei.com Reviewed-by: Wang ShaoBo bobo.shaobowang@huawei.com Reviewed-by: Weilong Chen chenweilong@huawei.com Acked-by: Xie XiuQi xiexiuqi@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/arm64/include/asm/resctrl.h | 1 + arch/arm64/kernel/mpam/mpam_ctrlmon.c | 93 +++++++++++++++++++++++---- arch/arm64/kernel/mpam/mpam_resctrl.c | 21 ++++++ 3 files changed, 102 insertions(+), 13 deletions(-)
diff --git a/arch/arm64/include/asm/resctrl.h b/arch/arm64/include/asm/resctrl.h index f2157df661af..f43fee368098 100644 --- a/arch/arm64/include/asm/resctrl.h +++ b/arch/arm64/include/asm/resctrl.h @@ -162,6 +162,7 @@ struct rdtgroup { atomic_t waitcount; enum rdt_group_type type; struct mongroup mon; + int resync; };
enum resctrl_ctrl_type { diff --git a/arch/arm64/kernel/mpam/mpam_ctrlmon.c b/arch/arm64/kernel/mpam/mpam_ctrlmon.c index b1d32d432556..724bed6a8e2c 100644 --- a/arch/arm64/kernel/mpam/mpam_ctrlmon.c +++ b/arch/arm64/kernel/mpam/mpam_ctrlmon.c @@ -185,6 +185,55 @@ resctrl_dom_ctrl_config(bool cdp_both_ctrl, struct resctrl_resource *r, } }
+/** + * Resync resctrl group domain ctrls, use rdtgrp->resync to indicate + * whether the resync procedure will be called. When resync==1, all + * domain ctrls of this group be synchronized again. This happens + * when rmid of this group is changed, and all configurations need to + * be remapped again accordingly. + */ +static void resctrl_group_resync_domain_ctrls(struct rdtgroup *rdtgrp, + struct resctrl_resource *r, struct rdt_domain *dom) +{ + int i; + int staged_start, staged_end; + struct resctrl_staged_config *cfg; + struct sd_closid closid; + struct list_head *head; + struct rdtgroup *entry; + struct msr_param para; + bool cdp_both_ctrl; + + cfg = dom->staged_cfg; + para.closid = &closid; + + staged_start = (r->cdp_enable) ? CDP_CODE : CDP_BOTH; + staged_end = (r->cdp_enable) ? CDP_DATA : CDP_BOTH; + + for (i = staged_start; i <= staged_end; i++) { + cdp_both_ctrl = cfg[i].cdp_both_ctrl; + /* + * for ctrl group configuration, hw_closid of cfg[i] equals + * to rdtgrp->closid.intpartid. + */ + closid.intpartid = hw_closid_val(cfg[i].hw_closid); + resctrl_cdp_mpamid_map_val(rdtgrp->closid.reqpartid, + cfg[i].conf_type, closid.reqpartid); + resctrl_dom_ctrl_config(cdp_both_ctrl, r, dom, ¶); + + /* + * we should synchronize all child mon groups' + * configuration from this ctrl rdtgrp + */ + head = &rdtgrp->mon.crdtgrp_list; + list_for_each_entry(entry, head, mon.crdtgrp_list) { + resctrl_cdp_mpamid_map_val(entry->closid.reqpartid, + cfg[i].conf_type, closid.reqpartid); + resctrl_dom_ctrl_config(cdp_both_ctrl, r, dom, ¶); + } + } +} + static void resctrl_group_update_domain_ctrls(struct rdtgroup *rdtgrp, struct resctrl_resource *r, struct rdt_domain *dom) { @@ -247,8 +296,12 @@ static int resctrl_group_update_domains(struct rdtgroup *rdtgrp, { struct rdt_domain *d;
- list_for_each_entry(d, &r->domains, list) - resctrl_group_update_domain_ctrls(rdtgrp, r, d); + list_for_each_entry(d, &r->domains, list) { + if (rdtgrp->resync) + resctrl_group_resync_domain_ctrls(rdtgrp, r, d); + else + resctrl_group_update_domain_ctrls(rdtgrp, r, d); + }
return 0; } @@ -663,20 +716,31 @@ static int resctrl_mkdir_mondata_dom(struct kernfs_node *parent_kn,
md.u.cdp_both_mon = s->cdp_mc_both;
+ if (!parent_kn) { + pr_err("%s: error parent_kn null\n", __func__); + return -EINVAL; + } + snprintf(name, sizeof(name), "mon_%s_%02d", s->name, d->id); - kn = __kernfs_create_file(parent_kn, name, 0444, - GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0, - &kf_mondata_ops, md.priv, NULL, NULL); - if (IS_ERR(kn)) - return PTR_ERR(kn); - - ret = resctrl_group_kn_set_ugid(kn); - if (ret) { - pr_info("%s: create name %s, error ret %d\n", __func__, name, ret); - kernfs_remove(kn); - return ret; + kn = kernfs_find_and_get(parent_kn, name); + if (!kn) { + kn = __kernfs_create_file(parent_kn, name, 0444, + GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0, + &kf_mondata_ops, md.priv, NULL, NULL); + if (IS_ERR(kn)) + return PTR_ERR(kn); + + ret = resctrl_group_kn_set_ugid(kn); + if (ret) { + pr_info("%s: create name %s, error ret %d\n", + __func__, name, ret); + kernfs_remove(kn); + return ret; + } }
+ kn->priv = md.priv; + /* Could we remove the MATCH_* param ? */ rr->mon_write(d, md.priv);
@@ -954,5 +1018,8 @@ int resctrl_update_groups_config(struct rdtgroup *rdtgrp) } }
+ /* after resync all configurations, restore resync to 0 */ + rdtgrp->resync = 0; + return ret; } diff --git a/arch/arm64/kernel/mpam/mpam_resctrl.c b/arch/arm64/kernel/mpam/mpam_resctrl.c index 7b8d77666516..a9b99a0f347f 100644 --- a/arch/arm64/kernel/mpam/mpam_resctrl.c +++ b/arch/arm64/kernel/mpam/mpam_resctrl.c @@ -1882,6 +1882,21 @@ static ssize_t resctrl_group_rmid_write(struct kernfs_open_file *of, rdtgrp->closid.reqpartid = partid; rdtgrp->mon.rmid = rmid;
+ /* update rmid for mondata */ + ret = resctrl_mkdir_mondata_all_subdir(rdtgrp->mon.mon_data_kn, rdtgrp); + if (ret) { + rdt_last_cmd_puts("update rmid for mondata failed\n"); + goto rollback; + } + + /* resync groups configuration */ + rdtgrp->resync = 1; + ret = resctrl_update_groups_config(rdtgrp); + if (ret) { + rdt_last_cmd_puts("update groups config failed\n"); + goto rollback; + } + read_lock(&tasklist_lock); for_each_process_thread(p, t) { if (t->closid == rdtgrp->closid.intpartid) { @@ -1908,6 +1923,12 @@ static ssize_t resctrl_group_rmid_write(struct kernfs_open_file *of, rdtgrp->mon.rmid = old_rmid; rdtgrp->closid.reqpartid = old_reqpartid;
+ /* the old rmid is valid, so mkdir mondata here won't fail */ + resctrl_mkdir_mondata_all_subdir(rdtgrp->mon.mon_data_kn, rdtgrp); + + rdtgrp->resync = 1; + WARN_ON_ONCE(resctrl_update_groups_config(rdtgrp)); + read_lock(&tasklist_lock); for_each_process_thread(p, t) { if (t->closid == rdtgrp->closid.intpartid)