From: Mimi Zohar zohar@linux.ibm.com
stable inclusion from stable-5.10.47 commit 3051f230f19feb02dfe5b36794f8c883b576e184 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 0c18f29aae7ce3dadd26d8ee3505d07cc982df75 ]
Irrespective as to whether CONFIG_MODULE_SIG is configured, specifying "module.sig_enforce=1" on the boot command line sets "sig_enforce". Only allow "sig_enforce" to be set when CONFIG_MODULE_SIG is configured.
This patch makes the presence of /sys/module/module/parameters/sig_enforce dependent on CONFIG_MODULE_SIG=y.
Fixes: fda784e50aac ("module: export module signature enforcement status") Reported-by: Nayna Jain nayna@linux.ibm.com Tested-by: Mimi Zohar zohar@linux.ibm.com Tested-by: Jessica Yu jeyu@kernel.org Signed-off-by: Mimi Zohar zohar@linux.ibm.com Signed-off-by: Jessica Yu jeyu@kernel.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- kernel/module.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-)
diff --git a/kernel/module.c b/kernel/module.c index a54dda703feb..c5af21dcb873 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -272,9 +272,18 @@ static void module_assert_mutex_or_preempt(void) #endif }
+#ifdef CONFIG_MODULE_SIG static bool sig_enforce = IS_ENABLED(CONFIG_MODULE_SIG_FORCE); module_param(sig_enforce, bool_enable_only, 0644);
+void set_module_sig_enforced(void) +{ + sig_enforce = true; +} +#else +#define sig_enforce false +#endif + /* * Export sig_enforce kernel cmdline parameter to allow other subsystems rely * on that instead of directly to CONFIG_MODULE_SIG_FORCE config. @@ -285,11 +294,6 @@ bool is_module_sig_enforced(void) } EXPORT_SYMBOL(is_module_sig_enforced);
-void set_module_sig_enforced(void) -{ - sig_enforce = true; -} - /* Block module loading/unloading? */ int modules_disabled = 0; core_param(nomodule, modules_disabled, bint, 0);
From: Yifan Zhang yifan1.zhang@amd.com
stable inclusion from stable-5.10.47 commit 1bd81429d53ded4e111616c755a64fad80849354 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit ee5468b9f1d3bf48082eed351dace14598e8ca39 upstream.
This reverts commit 4cbbe34807938e6e494e535a68d5ff64edac3f20.
Reason for revert: side effect of enlarging CP_MEC_DOORBELL_RANGE may cause some APUs fail to enter gfxoff in certain user cases.
Signed-off-by: Yifan Zhang yifan1.zhang@amd.com Acked-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 1859d293ef71..fb15e8b5af32 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3619,12 +3619,8 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) if (ring->use_doorbell) { WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, (adev->doorbell_index.kiq * 2) << 2); - /* If GC has entered CGPG, ringing doorbell > first page doesn't - * wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround - * this issue. - */ WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, - (adev->doorbell.size - 4)); + (adev->doorbell_index.userqueue_end * 2) << 2); }
WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
From: Yifan Zhang yifan1.zhang@amd.com
stable inclusion from stable-5.10.47 commit fea853aca3210c21dfcf07bb82d501b7fd1900a7 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit baacf52a473b24e10322b67757ddb92ab8d86717 upstream.
This reverts commit 1c0b0efd148d5b24c4932ddb3fa03c8edd6097b3.
Reason for revert: Side effect of enlarging CP_MEC_DOORBELL_RANGE may cause some APUs fail to enter gfxoff in certain user cases.
Signed-off-by: Yifan Zhang yifan1.zhang@amd.com Acked-by: Alex Deucher alexander.deucher@amd.com Signed-off-by: Alex Deucher alexander.deucher@amd.com Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 3c92dacbc24a..fc8da5fed779 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -6590,12 +6590,8 @@ static int gfx_v10_0_kiq_init_register(struct amdgpu_ring *ring) if (ring->use_doorbell) { WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, (adev->doorbell_index.kiq * 2) << 2); - /* If GC has entered CGPG, ringing doorbell > first page doesn't - * wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to workaround - * this issue. - */ WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, - (adev->doorbell.size - 4)); + (adev->doorbell_index.userqueue_end * 2) << 2); }
WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
From: Desmond Cheong Zhi Xi desmondcheongzx@gmail.com
stable inclusion from stable-5.10.47 commit 3ef0ca0ec995fe5012d70a31f01826e12f323d0e bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 1815d9c86e3090477fbde066ff314a7e9721ee0f upstream.
While checking the master status of the DRM file in drm_is_current_master(), the device's master mutex should be held. Without the mutex, the pointer fpriv->master may be freed concurrently by another process calling drm_setmaster_ioctl(). This could lead to use-after-free errors when the pointer is subsequently dereferenced in drm_lease_owner().
The callers of drm_is_current_master() from drm_auth.c hold the device's master mutex, but external callers do not. Hence, we implement drm_is_current_master_locked() to be used within drm_auth.c, and modify drm_is_current_master() to grab the device's master mutex before checking the master status.
Reported-by: Daniel Vetter daniel.vetter@ffwll.ch Signed-off-by: Desmond Cheong Zhi Xi desmondcheongzx@gmail.com Reviewed-by: Emil Velikov emil.l.velikov@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter daniel.vetter@ffwll.ch Link: https://patchwork.freedesktop.org/patch/msgid/20210620110327.4964-2-desmondc... Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/gpu/drm/drm_auth.c | 51 ++++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 19 deletions(-)
diff --git a/drivers/gpu/drm/drm_auth.c b/drivers/gpu/drm/drm_auth.c index 232abbba3686..86d4b72e95cb 100644 --- a/drivers/gpu/drm/drm_auth.c +++ b/drivers/gpu/drm/drm_auth.c @@ -61,6 +61,35 @@ * trusted clients. */
+static bool drm_is_current_master_locked(struct drm_file *fpriv) +{ + lockdep_assert_held_once(&fpriv->master->dev->master_mutex); + + return fpriv->is_master && drm_lease_owner(fpriv->master) == fpriv->minor->dev->master; +} + +/** + * drm_is_current_master - checks whether @priv is the current master + * @fpriv: DRM file private + * + * Checks whether @fpriv is current master on its device. This decides whether a + * client is allowed to run DRM_MASTER IOCTLs. + * + * Most of the modern IOCTL which require DRM_MASTER are for kernel modesetting + * - the current master is assumed to own the non-shareable display hardware. + */ +bool drm_is_current_master(struct drm_file *fpriv) +{ + bool ret; + + mutex_lock(&fpriv->master->dev->master_mutex); + ret = drm_is_current_master_locked(fpriv); + mutex_unlock(&fpriv->master->dev->master_mutex); + + return ret; +} +EXPORT_SYMBOL(drm_is_current_master); + int drm_getmagic(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_auth *auth = data; @@ -223,7 +252,7 @@ int drm_setmaster_ioctl(struct drm_device *dev, void *data, if (ret) goto out_unlock;
- if (drm_is_current_master(file_priv)) + if (drm_is_current_master_locked(file_priv)) goto out_unlock;
if (dev->master) { @@ -272,7 +301,7 @@ int drm_dropmaster_ioctl(struct drm_device *dev, void *data, if (ret) goto out_unlock;
- if (!drm_is_current_master(file_priv)) { + if (!drm_is_current_master_locked(file_priv)) { ret = -EINVAL; goto out_unlock; } @@ -321,7 +350,7 @@ void drm_master_release(struct drm_file *file_priv) if (file_priv->magic) idr_remove(&file_priv->master->magic_map, file_priv->magic);
- if (!drm_is_current_master(file_priv)) + if (!drm_is_current_master_locked(file_priv)) goto out;
drm_legacy_lock_master_cleanup(dev, master); @@ -342,22 +371,6 @@ void drm_master_release(struct drm_file *file_priv) mutex_unlock(&dev->master_mutex); }
-/** - * drm_is_current_master - checks whether @priv is the current master - * @fpriv: DRM file private - * - * Checks whether @fpriv is current master on its device. This decides whether a - * client is allowed to run DRM_MASTER IOCTLs. - * - * Most of the modern IOCTL which require DRM_MASTER are for kernel modesetting - * - the current master is assumed to own the non-shareable display hardware. - */ -bool drm_is_current_master(struct drm_file *fpriv) -{ - return fpriv->is_master && drm_lease_owner(fpriv->master) == fpriv->minor->dev->master; -} -EXPORT_SYMBOL(drm_is_current_master); - /** * drm_master_get - reference a master pointer * @master: &struct drm_master
From: Christian König christian.koenig@amd.com
stable inclusion from stable-5.10.47 commit bcfea2412f4b3fa38ae91dd1640b7c63b3a8295e bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 17b11f71795abdce46f62a808f906857e525cea8 upstream.
We actually need to wait for the moving fence after pinning the BO to make sure that the pin is completed.
v2: grab the lock while waiting
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch References: https://lore.kernel.org/dri-devel/20210621151758.2347474-1-daniel.vetter@ffw... CC: stable@kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20210622114506.106349-1-christ... Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/gpu/drm/nouveau/nouveau_prime.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_prime.c b/drivers/gpu/drm/nouveau/nouveau_prime.c index b2ecb91f8ddc..5f5b87f99546 100644 --- a/drivers/gpu/drm/nouveau/nouveau_prime.c +++ b/drivers/gpu/drm/nouveau/nouveau_prime.c @@ -111,7 +111,22 @@ int nouveau_gem_prime_pin(struct drm_gem_object *obj) if (ret) return -EINVAL;
- return 0; + ret = ttm_bo_reserve(&nvbo->bo, false, false, NULL); + if (ret) + goto error; + + if (nvbo->bo.moving) + ret = dma_fence_wait(nvbo->bo.moving, true); + + ttm_bo_unreserve(&nvbo->bo); + if (ret) + goto error; + + return ret; + +error: + nouveau_bo_unpin(nvbo); + return ret; }
void nouveau_gem_prime_unpin(struct drm_gem_object *obj)
From: Christian König christian.koenig@amd.com
stable inclusion from stable-5.10.47 commit 694bb36aa75da5fa433ab0601989d58d71cf7f10 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 4b41726aae563273bb4b4a9462ba51ce4d372f78 upstream.
We actually need to wait for the moving fence after pinning the BO to make sure that the pin is completed.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch References: https://lore.kernel.org/dri-devel/20210621151758.2347474-1-daniel.vetter@ffw... CC: stable@kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20210622114506.106349-2-christ... Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/gpu/drm/radeon/radeon_prime.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon_prime.c b/drivers/gpu/drm/radeon/radeon_prime.c index b9de0e51c0be..cbad81578190 100644 --- a/drivers/gpu/drm/radeon/radeon_prime.c +++ b/drivers/gpu/drm/radeon/radeon_prime.c @@ -94,9 +94,19 @@ int radeon_gem_prime_pin(struct drm_gem_object *obj)
/* pin buffer into GTT */ ret = radeon_bo_pin(bo, RADEON_GEM_DOMAIN_GTT, NULL); - if (likely(ret == 0)) - bo->prime_shared_count++; - + if (unlikely(ret)) + goto error; + + if (bo->tbo.moving) { + ret = dma_fence_wait(bo->tbo.moving, false); + if (unlikely(ret)) { + radeon_bo_unpin(bo); + goto error; + } + } + + bo->prime_shared_count++; +error: radeon_bo_unreserve(bo); return ret; }
From: Christian König christian.koenig@amd.com
stable inclusion from stable-5.10.47 commit 3d6c4f78ec6165dd89e0799dd26987b9d9c07ccd bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 8ddf5b9bb479570a3825d70fecfb9399bc15700c upstream.
We actually need to wait for the moving fence after pinning the BO to make sure that the pin is completed.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Daniel Vetter daniel.vetter@ffwll.ch References: https://lore.kernel.org/dri-devel/20210621151758.2347474-1-daniel.vetter@ffw... CC: stable@kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20210622114506.106349-3-christ... Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 1b56dbc1f304..e93ccdc5faf4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -238,9 +238,21 @@ static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach) { struct drm_gem_object *obj = attach->dmabuf->priv; struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + int r;
/* pin buffer into GTT */ - return amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); + r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT); + if (r) + return r; + + if (bo->tbo.moving) { + r = dma_fence_wait(bo->tbo.moving, true); + if (r) { + amdgpu_bo_unpin(bo); + return r; + } + } + return 0; }
/**
From: Arnd Bergmann arnd@arndb.de
stable inclusion from stable-5.10.47 commit b8fd230ae085747a556f0e4fddd8507a168edc1e bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit dad7b9896a5dbac5da8275d5a6147c65c81fb5f2 upstream.
When building the kernel wtih gcc-10 or higher using the CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y flag, the compiler picks a slightly different set of registers for the inline assembly in cpu_init() that subsequently results in a corrupt kernel stack as well as remaining in FIQ mode. If a banked register is used for the last argument, the wrong version of that register gets loaded into CPSR_c. When building in Arm mode, the arguments are passed as immediate values and the bug cannot happen.
This got introduced when Daniel reworked the FIQ handling and was technically always broken, but happened to work with both clang and gcc before gcc-10 as long as they picked one of the lower registers. This is probably an indication that still very few people build the kernel in Thumb2 mode.
Marek pointed out the problem on IRC, Arnd narrowed it down to this inline assembly and Russell pinpointed the exact bug.
Change the constraints to force the final mode switch to use a non-banked register for the argument to ensure that the correct constant gets loaded. Another alternative would be to always use registers for the constant arguments to avoid the #ifdef that has now become more complex.
Cc: stable@vger.kernel.org # v3.18+ Cc: Daniel Thompson daniel.thompson@linaro.org Reported-by: Marek Vasut marek.vasut@gmail.com Acked-by: Ard Biesheuvel ardb@kernel.org Fixes: c0e7f7ee717e ("ARM: 8150/3: fiq: Replace default FIQ handler") Signed-off-by: Arnd Bergmann arnd@arndb.de Signed-off-by: Russell King rmk+kernel@armlinux.org.uk Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/arm/kernel/setup.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index e37ebb7f1b0b..d7ae50b32c7b 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -545,9 +545,11 @@ void notrace cpu_init(void) * In Thumb-2, msr with an immediate value is not allowed. */ #ifdef CONFIG_THUMB2_KERNEL -#define PLC "r" +#define PLC_l "l" +#define PLC_r "r" #else -#define PLC "I" +#define PLC_l "I" +#define PLC_r "I" #endif
/* @@ -569,15 +571,15 @@ void notrace cpu_init(void) "msr cpsr_c, %9" : : "r" (stk), - PLC (PSR_F_BIT | PSR_I_BIT | IRQ_MODE), + PLC_r (PSR_F_BIT | PSR_I_BIT | IRQ_MODE), "I" (offsetof(struct stack, irq[0])), - PLC (PSR_F_BIT | PSR_I_BIT | ABT_MODE), + PLC_r (PSR_F_BIT | PSR_I_BIT | ABT_MODE), "I" (offsetof(struct stack, abt[0])), - PLC (PSR_F_BIT | PSR_I_BIT | UND_MODE), + PLC_r (PSR_F_BIT | PSR_I_BIT | UND_MODE), "I" (offsetof(struct stack, und[0])), - PLC (PSR_F_BIT | PSR_I_BIT | FIQ_MODE), + PLC_r (PSR_F_BIT | PSR_I_BIT | FIQ_MODE), "I" (offsetof(struct stack, fiq[0])), - PLC (PSR_F_BIT | PSR_I_BIT | SVC_MODE) + PLC_l (PSR_F_BIT | PSR_I_BIT | SVC_MODE) : "r14"); #endif }
From: Neil Armstrong narmstrong@baylibre.com
stable inclusion from stable-5.10.47 commit 03096a46019e733db2f438dc6c1eec64293174f5 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 103a5348c22c3fca8b96c735a9e353b8a0801842 upstream.
It has been reported that usage of memcpy() to/from an iomem mapping is invalid, and a recent arm64 memcpy update [1] triggers a memory abort when dram-access-quirk is used on the G12A/G12B platforms.
This adds a local sg_copy_to_buffer which makes usage of io versions of memcpy when dram-access-quirk is enabled.
[1] 285133040e6c ("arm64: Import latest memcpy()/memmove() implementation")
Fixes: acdc8e71d9bb ("mmc: meson-gx: add dram-access-quirk") Reported-by: Marek Szyprowski m.szyprowski@samsung.com Suggested-by: Mark Rutland mark.rutland@arm.com Signed-off-by: Neil Armstrong narmstrong@baylibre.com Tested-by: Marek Szyprowski m.szyprowski@samsung.com Link: https://lore.kernel.org/r/20210609150230.9291-1-narmstrong@baylibre.com Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson ulf.hansson@linaro.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/mmc/host/meson-gx-mmc.c | 50 +++++++++++++++++++++++++++++---- 1 file changed, 45 insertions(+), 5 deletions(-)
diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c index 4ec41579940a..d3f40c9a8c6c 100644 --- a/drivers/mmc/host/meson-gx-mmc.c +++ b/drivers/mmc/host/meson-gx-mmc.c @@ -165,6 +165,7 @@ struct meson_host {
unsigned int bounce_buf_size; void *bounce_buf; + void __iomem *bounce_iomem_buf; dma_addr_t bounce_dma_addr; struct sd_emmc_desc *descs; dma_addr_t descs_dma_addr; @@ -734,6 +735,47 @@ static void meson_mmc_desc_chain_transfer(struct mmc_host *mmc, u32 cmd_cfg) writel(start, host->regs + SD_EMMC_START); }
+/* local sg copy to buffer version with _to/fromio usage for dram_access_quirk */ +static void meson_mmc_copy_buffer(struct meson_host *host, struct mmc_data *data, + size_t buflen, bool to_buffer) +{ + unsigned int sg_flags = SG_MITER_ATOMIC; + struct scatterlist *sgl = data->sg; + unsigned int nents = data->sg_len; + struct sg_mapping_iter miter; + unsigned int offset = 0; + + if (to_buffer) + sg_flags |= SG_MITER_FROM_SG; + else + sg_flags |= SG_MITER_TO_SG; + + sg_miter_start(&miter, sgl, nents, sg_flags); + + while ((offset < buflen) && sg_miter_next(&miter)) { + unsigned int len; + + len = min(miter.length, buflen - offset); + + /* When dram_access_quirk, the bounce buffer is a iomem mapping */ + if (host->dram_access_quirk) { + if (to_buffer) + memcpy_toio(host->bounce_iomem_buf + offset, miter.addr, len); + else + memcpy_fromio(miter.addr, host->bounce_iomem_buf + offset, len); + } else { + if (to_buffer) + memcpy(host->bounce_buf + offset, miter.addr, len); + else + memcpy(miter.addr, host->bounce_buf + offset, len); + } + + offset += len; + } + + sg_miter_stop(&miter); +} + static void meson_mmc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd) { struct meson_host *host = mmc_priv(mmc); @@ -777,8 +819,7 @@ static void meson_mmc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd) if (data->flags & MMC_DATA_WRITE) { cmd_cfg |= CMD_CFG_DATA_WR; WARN_ON(xfer_bytes > host->bounce_buf_size); - sg_copy_to_buffer(data->sg, data->sg_len, - host->bounce_buf, xfer_bytes); + meson_mmc_copy_buffer(host, data, xfer_bytes, true); dma_wmb(); }
@@ -947,8 +988,7 @@ static irqreturn_t meson_mmc_irq_thread(int irq, void *dev_id) if (meson_mmc_bounce_buf_read(data)) { xfer_bytes = data->blksz * data->blocks; WARN_ON(xfer_bytes > host->bounce_buf_size); - sg_copy_from_buffer(data->sg, data->sg_len, - host->bounce_buf, xfer_bytes); + meson_mmc_copy_buffer(host, data, xfer_bytes, false); }
next_cmd = meson_mmc_get_next_command(cmd); @@ -1168,7 +1208,7 @@ static int meson_mmc_probe(struct platform_device *pdev) * instead of the DDR memory */ host->bounce_buf_size = SD_EMMC_SRAM_DATA_BUF_LEN; - host->bounce_buf = host->regs + SD_EMMC_SRAM_DATA_BUF_OFF; + host->bounce_iomem_buf = host->regs + SD_EMMC_SRAM_DATA_BUF_OFF; host->bounce_dma_addr = res->start + SD_EMMC_SRAM_DATA_BUF_OFF; } else { /* data bounce buffer */
From: Nathan Chancellor nathan@kernel.org
stable inclusion from stable-5.10.47 commit 3bbdf5a6fcd2467a8e4854c6dba320369713bd0c bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit e607ff630c6053ecc67502677c0e50053d7892d4 upstream.
With the latest mkimage from U-Boot 2021.04, the generic defconfigs no longer build, failing with:
/usr/bin/mkimage: verify_header failed for FIT Image support with exit code 1
This is expected after the linked U-Boot commits because '@' is forbidden in the node names due to the way that libfdt treats nodes with the same prefix but different unit addresses.
Switch the '@' in the node name to '-'. Drop the unit addresses from the hash and kernel child nodes because there is only one node so they do not need to have a number to differentiate them.
Cc: stable@vger.kernel.org Link: https://source.denx.de/u-boot/u-boot/-/commit/79af75f7776fc20b0d7eb6afe1e27c... Link: https://source.denx.de/u-boot/u-boot/-/commit/3f04db891a353f4b127ed57279279f... Suggested-by: Simon Glass sjg@chromium.org Signed-off-by: Nathan Chancellor nathan@kernel.org Reviewed-by: Tom Rini trini@konsulko.com Signed-off-by: Thomas Bogendoerfer tsbogend@alpha.franken.de [nathan: Backport to 5.10, only apply to .its.S files that exist] Signed-off-by: Nathan Chancellor nathan@kernel.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org
Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/mips/generic/board-boston.its.S | 10 +++++----- arch/mips/generic/board-ni169445.its.S | 10 +++++----- arch/mips/generic/board-ocelot.its.S | 20 ++++++++++---------- arch/mips/generic/board-xilfpga.its.S | 10 +++++----- arch/mips/generic/vmlinux.its.S | 10 +++++----- 5 files changed, 30 insertions(+), 30 deletions(-)
diff --git a/arch/mips/generic/board-boston.its.S b/arch/mips/generic/board-boston.its.S index a7f51f97b910..c45ad2759421 100644 --- a/arch/mips/generic/board-boston.its.S +++ b/arch/mips/generic/board-boston.its.S @@ -1,22 +1,22 @@ / { images { - fdt@boston { + fdt-boston { description = "img,boston Device Tree"; data = /incbin/("boot/dts/img/boston.dtb"); type = "flat_dt"; arch = "mips"; compression = "none"; - hash@0 { + hash { algo = "sha1"; }; }; };
configurations { - conf@boston { + conf-boston { description = "Boston Linux kernel"; - kernel = "kernel@0"; - fdt = "fdt@boston"; + kernel = "kernel"; + fdt = "fdt-boston"; }; }; }; diff --git a/arch/mips/generic/board-ni169445.its.S b/arch/mips/generic/board-ni169445.its.S index e4cb4f95a8cc..0a2e8f7a8526 100644 --- a/arch/mips/generic/board-ni169445.its.S +++ b/arch/mips/generic/board-ni169445.its.S @@ -1,22 +1,22 @@ / { images { - fdt@ni169445 { + fdt-ni169445 { description = "NI 169445 device tree"; data = /incbin/("boot/dts/ni/169445.dtb"); type = "flat_dt"; arch = "mips"; compression = "none"; - hash@0 { + hash { algo = "sha1"; }; }; };
configurations { - conf@ni169445 { + conf-ni169445 { description = "NI 169445 Linux Kernel"; - kernel = "kernel@0"; - fdt = "fdt@ni169445"; + kernel = "kernel"; + fdt = "fdt-ni169445"; }; }; }; diff --git a/arch/mips/generic/board-ocelot.its.S b/arch/mips/generic/board-ocelot.its.S index 3da23988149a..8c7e3a1b68d3 100644 --- a/arch/mips/generic/board-ocelot.its.S +++ b/arch/mips/generic/board-ocelot.its.S @@ -1,40 +1,40 @@ /* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ / { images { - fdt@ocelot_pcb123 { + fdt-ocelot_pcb123 { description = "MSCC Ocelot PCB123 Device Tree"; data = /incbin/("boot/dts/mscc/ocelot_pcb123.dtb"); type = "flat_dt"; arch = "mips"; compression = "none"; - hash@0 { + hash { algo = "sha1"; }; };
- fdt@ocelot_pcb120 { + fdt-ocelot_pcb120 { description = "MSCC Ocelot PCB120 Device Tree"; data = /incbin/("boot/dts/mscc/ocelot_pcb120.dtb"); type = "flat_dt"; arch = "mips"; compression = "none"; - hash@0 { + hash { algo = "sha1"; }; }; };
configurations { - conf@ocelot_pcb123 { + conf-ocelot_pcb123 { description = "Ocelot Linux kernel"; - kernel = "kernel@0"; - fdt = "fdt@ocelot_pcb123"; + kernel = "kernel"; + fdt = "fdt-ocelot_pcb123"; };
- conf@ocelot_pcb120 { + conf-ocelot_pcb120 { description = "Ocelot Linux kernel"; - kernel = "kernel@0"; - fdt = "fdt@ocelot_pcb120"; + kernel = "kernel"; + fdt = "fdt-ocelot_pcb120"; }; }; }; diff --git a/arch/mips/generic/board-xilfpga.its.S b/arch/mips/generic/board-xilfpga.its.S index a2e773d3f14f..08c1e900eb4e 100644 --- a/arch/mips/generic/board-xilfpga.its.S +++ b/arch/mips/generic/board-xilfpga.its.S @@ -1,22 +1,22 @@ / { images { - fdt@xilfpga { + fdt-xilfpga { description = "MIPSfpga (xilfpga) Device Tree"; data = /incbin/("boot/dts/xilfpga/nexys4ddr.dtb"); type = "flat_dt"; arch = "mips"; compression = "none"; - hash@0 { + hash { algo = "sha1"; }; }; };
configurations { - conf@xilfpga { + conf-xilfpga { description = "MIPSfpga Linux kernel"; - kernel = "kernel@0"; - fdt = "fdt@xilfpga"; + kernel = "kernel"; + fdt = "fdt-xilfpga"; }; }; }; diff --git a/arch/mips/generic/vmlinux.its.S b/arch/mips/generic/vmlinux.its.S index 1a08438fd893..3e254676540f 100644 --- a/arch/mips/generic/vmlinux.its.S +++ b/arch/mips/generic/vmlinux.its.S @@ -6,7 +6,7 @@ #address-cells = <ADDR_CELLS>;
images { - kernel@0 { + kernel { description = KERNEL_NAME; data = /incbin/(VMLINUX_BINARY); type = "kernel"; @@ -15,18 +15,18 @@ compression = VMLINUX_COMPRESSION; load = /bits/ ADDR_BITS <VMLINUX_LOAD_ADDRESS>; entry = /bits/ ADDR_BITS <VMLINUX_ENTRY_ADDRESS>; - hash@0 { + hash { algo = "sha1"; }; }; };
configurations { - default = "conf@default"; + default = "conf-default";
- conf@default { + conf-default { description = "Generic Linux kernel"; - kernel = "kernel@0"; + kernel = "kernel"; }; }; };
From: Catalin Marinas catalin.marinas@arm.com
stable inclusion from stable-5.10.47 commit bd5d4df4dcc1f2d1ceac7b167df1e699a7f6deb7 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 791ab8b2e3db0c6e4295467d10398800ec29144c upstream.
Currently, the kernel assumes that if RAM starts above 32-bit (or zone_bits), there is still a ZONE_DMA/DMA32 at the bottom of the RAM and such constrained devices have a hardwired DMA offset. In practice, we haven't noticed any such hardware so let's assume that we can expand ZONE_DMA32 to the available memory if no RAM below 4GB. Similarly, ZONE_DMA is expanded to the 4GB limit if no RAM addressable by zone_bits.
Signed-off-by: Catalin Marinas catalin.marinas@arm.com Tested-by: Nicolas Saenz Julienne nsaenzjulienne@suse.de Reviewed-by: Nicolas Saenz Julienne nsaenzjulienne@suse.de Cc: Nicolas Saenz Julienne nsaenzjulienne@suse.de Cc: Robin Murphy robin.murphy@arm.com Link: https://lore.kernel.org/r/20201118185809.1078362-1-catalin.marinas@arm.com Signed-off-by: Catalin Marinas catalin.marinas@arm.com Cc: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- arch/arm64/mm/init.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 3800fb73db9c..3b9401ee9c58 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -230,14 +230,21 @@ static void __init reserve_quick_kexec(void) #endif
/* - * Return the maximum physical address for a zone with a given address size - * limit. It currently assumes that for memory starting above 4G, 32-bit - * devices will use a DMA offset. + * Return the maximum physical address for a zone accessible by the given bits + * limit. If DRAM starts above 32-bit, expand the zone to the maximum + * available memory, otherwise cap it at 32-bit. */ static phys_addr_t __init max_zone_phys(unsigned int zone_bits) { - phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, zone_bits); - return min(offset + (1ULL << zone_bits), memblock_end_of_DRAM()); + phys_addr_t zone_mask = DMA_BIT_MASK(zone_bits); + phys_addr_t phys_start = memblock_start_of_DRAM(); + + if (phys_start > U32_MAX) + zone_mask = PHYS_ADDR_MAX; + else if (phys_start > zone_mask) + zone_mask = U32_MAX; + + return min(zone_mask, memblock_end_of_DRAM() - 1) + 1; }
static void __init zone_sizes_init(unsigned long min, unsigned long max)
From: Haibo Chen haibo.chen@nxp.com
stable inclusion from stable-5.10.47 commit 4b06ebab4a828c03e9a1ba9bbced505560f8aab6 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit f422316c8e9d3c4aff3c56549dfb44a677d02f14 ]
Move the register operation after the clock enable, otherwise system will stuck when this driver probe.
Fixes: 71d80563b076 ("spi: spi-nxp-fspi: fix fspi panic by unexpected interrupts") Signed-off-by: Haibo Chen haibo.chen@nxp.com Link: https://lore.kernel.org/r/1623317073-25158-1-git-send-email-haibo.chen@nxp.c... Signed-off-by: Mark Brown broonie@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/spi/spi-nxp-fspi.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/drivers/spi/spi-nxp-fspi.c b/drivers/spi/spi-nxp-fspi.c index ab9035662717..bcc0b5a3a459 100644 --- a/drivers/spi/spi-nxp-fspi.c +++ b/drivers/spi/spi-nxp-fspi.c @@ -1033,12 +1033,6 @@ static int nxp_fspi_probe(struct platform_device *pdev) goto err_put_ctrl; }
- /* Clear potential interrupts */ - reg = fspi_readl(f, f->iobase + FSPI_INTR); - if (reg) - fspi_writel(f, reg, f->iobase + FSPI_INTR); - - /* find the resources - controller memory mapped space */ if (is_acpi_node(f->dev->fwnode)) res = platform_get_resource(pdev, IORESOURCE_MEM, 1); @@ -1076,6 +1070,11 @@ static int nxp_fspi_probe(struct platform_device *pdev) } }
+ /* Clear potential interrupts */ + reg = fspi_readl(f, f->iobase + FSPI_INTR); + if (reg) + fspi_writel(f, reg, f->iobase + FSPI_INTR); + /* find the irq */ ret = platform_get_irq(pdev, 0); if (ret < 0)
From: "Rafael J. Wysocki" rafael.j.wysocki@intel.com
stable inclusion from stable-5.10.47 commit f11f9ff8a7c97b2a3990c7322304627d9b58d362 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 4d6035f9bf4ea12776322746a216e856dfe46698 ]
Revert commit 4514d991d992 ("PCI: PM: Do not read power state in pci_enable_device_flags()") that is reported to cause PCI device initialization issues on some systems.
BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=213481 https://gitee.com/openeuler/kernel/issues/I4DAKB Link: https://lore.kernel.org/linux-acpi/YNDoGICcg0V8HhpQ@eldamar.lan Reported-by: Michael phyre@rogers.com Reported-by: Salvatore Bonaccorso carnil@debian.org Fixes: 4514d991d992 ("PCI: PM: Do not read power state in pci_enable_device_flags()") Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/pci/pci.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-)
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index d5d9ea864fe6..9e971fffeb6a 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1874,11 +1874,21 @@ static int pci_enable_device_flags(struct pci_dev *dev, unsigned long flags) int err; int i, bars = 0;
- if (atomic_inc_return(&dev->enable_cnt) > 1) { - pci_update_current_state(dev, dev->current_state); - return 0; /* already enabled */ + /* + * Power state could be unknown at this point, either due to a fresh + * boot or a device removal call. So get the current power state + * so that things like MSI message writing will behave as expected + * (e.g. if the device really is in D0 at enable time). + */ + if (dev->pm_cap) { + u16 pmcsr; + pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr); + dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK); }
+ if (atomic_inc_return(&dev->enable_cnt) > 1) + return 0; /* already enabled */ + bridge = pci_upstream_bridge(dev); if (bridge) pci_enable_bridge(bridge);
From: Maxime Ripard maxime@cerno.tech
stable inclusion from stable-5.10.47 commit f73aca83fd83ab6270ce170e3ac8268b90e48fec bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 411efa18e4b03840553ff58ad9b4621b82a30c04 ]
In order to access the HDMI controller, we need to make sure the HSM clock is enabled. If we were to access it with the clock disabled, the CPU would completely hang, resulting in an hard crash.
Since we have different code path that would require it, let's move that clock enable / disable to runtime_pm that will take care of the reference counting for us.
Fixes: 4f6e3d66ac52 ("drm/vc4: Add runtime PM support to the HDMI encoder driver") Signed-off-by: Maxime Ripard maxime@cerno.tech Reviewed-by: Dave Stevenson dave.stevenson@raspberrypi.com Link: https://patchwork.freedesktop.org/patch/msgid/20210525091059.234116-3-maxime... Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/gpu/drm/vc4/vc4_hdmi.c | 40 +++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 10 deletions(-)
diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index af5f01eff872..5978e99a0e85 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -415,7 +415,6 @@ static void vc4_hdmi_encoder_post_crtc_powerdown(struct drm_encoder *encoder) HDMI_READ(HDMI_VID_CTL) & ~VC4_HD_VID_CTL_ENABLE);
clk_disable_unprepare(vc4_hdmi->pixel_bvb_clock); - clk_disable_unprepare(vc4_hdmi->hsm_clock); clk_disable_unprepare(vc4_hdmi->pixel_clock);
ret = pm_runtime_put(&vc4_hdmi->pdev->dev); @@ -666,13 +665,6 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder) return; }
- ret = clk_prepare_enable(vc4_hdmi->hsm_clock); - if (ret) { - DRM_ERROR("Failed to turn on HSM clock: %d\n", ret); - clk_disable_unprepare(vc4_hdmi->pixel_clock); - return; - } - vc4_hdmi_cec_update_clk_div(vc4_hdmi);
/* @@ -683,7 +675,6 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder) (hsm_rate > VC4_HSM_MID_CLOCK ? 150000000 : 75000000)); if (ret) { DRM_ERROR("Failed to set pixel bvb clock rate: %d\n", ret); - clk_disable_unprepare(vc4_hdmi->hsm_clock); clk_disable_unprepare(vc4_hdmi->pixel_clock); return; } @@ -691,7 +682,6 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder) ret = clk_prepare_enable(vc4_hdmi->pixel_bvb_clock); if (ret) { DRM_ERROR("Failed to turn on pixel bvb clock: %d\n", ret); - clk_disable_unprepare(vc4_hdmi->hsm_clock); clk_disable_unprepare(vc4_hdmi->pixel_clock); return; } @@ -1724,6 +1714,29 @@ static int vc5_hdmi_init_resources(struct vc4_hdmi *vc4_hdmi) return 0; }
+#ifdef CONFIG_PM +static int vc4_hdmi_runtime_suspend(struct device *dev) +{ + struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev); + + clk_disable_unprepare(vc4_hdmi->hsm_clock); + + return 0; +} + +static int vc4_hdmi_runtime_resume(struct device *dev) +{ + struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev); + int ret; + + ret = clk_prepare_enable(vc4_hdmi->hsm_clock); + if (ret) + return ret; + + return 0; +} +#endif + static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) { const struct vc4_hdmi_variant *variant = of_device_get_match_data(dev); @@ -1959,11 +1972,18 @@ static const struct of_device_id vc4_hdmi_dt_match[] = { {} };
+static const struct dev_pm_ops vc4_hdmi_pm_ops = { + SET_RUNTIME_PM_OPS(vc4_hdmi_runtime_suspend, + vc4_hdmi_runtime_resume, + NULL) +}; + struct platform_driver vc4_hdmi_driver = { .probe = vc4_hdmi_dev_probe, .remove = vc4_hdmi_dev_remove, .driver = { .name = "vc4_hdmi", .of_match_table = vc4_hdmi_dt_match, + .pm = &vc4_hdmi_pm_ops, }, };
From: Maxime Ripard maxime@cerno.tech
stable inclusion from stable-5.10.47 commit cf593548759465bb3c40cedfe0913a150ca6bda8 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 9984d6664ce9dcbbc713962539eaf7636ea246c2 ]
If the HPD GPIO is not available and drm_probe_ddc fails, we end up reading the HDMI_HOTPLUG register, but the controller might be powered off resulting in a CPU hang. Make sure we have the power domain and the HSM clock powered during the detect cycle to prevent the hang from happening.
Fixes: 4f6e3d66ac52 ("drm/vc4: Add runtime PM support to the HDMI encoder driver") Signed-off-by: Maxime Ripard maxime@cerno.tech Reviewed-by: Dave Stevenson dave.stevenson@raspberrypi.com Link: https://patchwork.freedesktop.org/patch/msgid/20210525091059.234116-4-maxime... Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/gpu/drm/vc4/vc4_hdmi.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 5978e99a0e85..88a8cb840cd5 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -146,6 +146,8 @@ vc4_hdmi_connector_detect(struct drm_connector *connector, bool force) struct vc4_hdmi *vc4_hdmi = connector_to_vc4_hdmi(connector); bool connected = false;
+ WARN_ON(pm_runtime_resume_and_get(&vc4_hdmi->pdev->dev)); + if (vc4_hdmi->hpd_gpio) { if (gpio_get_value_cansleep(vc4_hdmi->hpd_gpio) ^ vc4_hdmi->hpd_active_low) @@ -167,10 +169,12 @@ vc4_hdmi_connector_detect(struct drm_connector *connector, bool force) } }
+ pm_runtime_put(&vc4_hdmi->pdev->dev); return connector_status_connected; }
cec_phys_addr_invalidate(vc4_hdmi->cec_adap); + pm_runtime_put(&vc4_hdmi->pdev->dev); return connector_status_disconnected; }
From: Peter Zijlstra peterz@infradead.org
stable inclusion from stable-5.10.47 commit cb83c99cf675f3d48a343f8a17d0b953996660cb bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 240001d4e3041832e8a2654adc3ccf1683132b92 ]
Fix:
vmlinux.o: warning: objtool: __do_fast_syscall_32()+0xf5: call to trace_hardirqs_off() leaves .noinstr.text section
Fixes: 5d5675df792f ("x86/entry: Fix entry/exit mismatch on failed fast 32-bit syscalls") Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Signed-off-by: Ingo Molnar mingo@kernel.org Link: https://lore.kernel.org/r/20210621120120.467898710@infradead.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- arch/x86/entry/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 2e4d91f3feea..93a3122cd15f 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -127,8 +127,8 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) /* User code screwed up. */ regs->ax = -EFAULT;
- instrumentation_end(); local_irq_disable(); + instrumentation_end(); irqentry_exit_to_user_mode(regs); return false; }
From: Peter Zijlstra peterz@infradead.org
stable inclusion from stable-5.10.47 commit 59aa5c91f86336b9bfdb10c1ab23fb08bb291e53 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 4c9c26f1e67648f41f28f8c997c5c9467a3dbbe4 ]
Fix:
vmlinux.o: warning: objtool: exc_xen_unknown_trap()+0x7: call to printk() leaves .noinstr.text section
Fixes: 2e92493637a0 ("x86/xen: avoid warning in Xen pv guest with CONFIG_AMD_MEM_ENCRYPT enabled") Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Signed-off-by: Ingo Molnar mingo@kernel.org Link: https://lore.kernel.org/r/20210621120120.606560778@infradead.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- arch/x86/xen/enlighten_pv.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 8064df638222..d3cdf467d91f 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -586,8 +586,10 @@ DEFINE_IDTENTRY_RAW(xenpv_exc_debug) DEFINE_IDTENTRY_RAW(exc_xen_unknown_trap) { /* This should never happen and there is no way to handle it. */ + instrumentation_begin(); pr_err("Unknown trap in Xen PV mode."); BUG(); + instrumentation_end(); }
struct trap_array_entry {
From: Peter Zijlstra peterz@infradead.org
stable inclusion from stable-5.10.47 commit ca2acbd5483ff03a1c1ea0268d7a4acf0b3a13ee bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 49faa77759b211fff344898edc23bb780707fff5 ]
Better handle the failure paths.
vmlinux.o: warning: objtool: debug_locks_off()+0x23: call to console_verbose() leaves .noinstr.text section vmlinux.o: warning: objtool: debug_locks_off()+0x19: call to __kasan_check_write() leaves .noinstr.text section
debug_locks_off+0x19/0x40: instrument_atomic_write at include/linux/instrumented.h:86 (inlined by) __debug_locks_off at include/linux/debug_locks.h:17 (inlined by) debug_locks_off at lib/debug_locks.c:41
Fixes: 6eebad1ad303 ("lockdep: __always_inline more for noinstr") Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Signed-off-by: Ingo Molnar mingo@kernel.org Link: https://lore.kernel.org/r/20210621120120.784404944@infradead.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- include/linux/debug_locks.h | 2 ++ kernel/locking/lockdep.c | 4 +++- lib/debug_locks.c | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h index 2915f56ad421..edb5c186b0b7 100644 --- a/include/linux/debug_locks.h +++ b/include/linux/debug_locks.h @@ -27,8 +27,10 @@ extern int debug_locks_off(void); int __ret = 0; \ \ if (!oops_in_progress && unlikely(c)) { \ + instrumentation_begin(); \ if (debug_locks_off() && !debug_locks_silent) \ WARN(1, "DEBUG_LOCKS_WARN_ON(%s)", #c); \ + instrumentation_end(); \ __ret = 1; \ } \ __ret; \ diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 858b96b438ce..cdca007551e7 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -842,7 +842,7 @@ static int count_matching_names(struct lock_class *new_class) }
/* used from NMI context -- must be lockless */ -static __always_inline struct lock_class * +static noinstr struct lock_class * look_up_lock_class(const struct lockdep_map *lock, unsigned int subclass) { struct lockdep_subclass_key *key; @@ -850,12 +850,14 @@ look_up_lock_class(const struct lockdep_map *lock, unsigned int subclass) struct lock_class *class;
if (unlikely(subclass >= MAX_LOCKDEP_SUBCLASSES)) { + instrumentation_begin(); debug_locks_off(); printk(KERN_ERR "BUG: looking up invalid subclass: %u\n", subclass); printk(KERN_ERR "turning off the locking correctness validator.\n"); dump_stack(); + instrumentation_end(); return NULL; }
diff --git a/lib/debug_locks.c b/lib/debug_locks.c index 06d3135bd184..a75ee30b77cb 100644 --- a/lib/debug_locks.c +++ b/lib/debug_locks.c @@ -36,7 +36,7 @@ EXPORT_SYMBOL_GPL(debug_locks_silent); /* * Generic 'turn off all lock debugging' function: */ -noinstr int debug_locks_off(void) +int debug_locks_off(void) { if (debug_locks && __debug_locks_off()) { if (!debug_locks_silent) {
From: Like Xu like.xu@linux.intel.com
stable inclusion from stable-5.10.47 commit 56bc20e5fc64d55c194475692ee60893a3f452a5 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 488e13a489e9707a7e81e1991fdd1f20c0f04689 ]
If the kernel is compiled with the CONFIG_LOCKDEP option, the conditional might_sleep_if() deep in kmem_cache_alloc() will generate the following trace, and potentially cause a deadlock when another LBR event is added:
[] BUG: sleeping function called from invalid context at include/linux/sched/mm.h:196 [] Call Trace: [] kmem_cache_alloc+0x36/0x250 [] intel_pmu_lbr_add+0x152/0x170 [] x86_pmu_add+0x83/0xd0
Make it symmetric with the release_lbr_buffers() call and mirror the existing DS buffers.
Fixes: c085fb8774 ("perf/x86/intel/lbr: Support XSAVES for arch LBR read") Signed-off-by: Like Xu like.xu@linux.intel.com [peterz: simplified] Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Tested-by: Kan Liang kan.liang@linux.intel.com Link: https://lkml.kernel.org/r/20210430052247.3079672-2-like.xu@linux.intel.com Signed-off-by: Sasha Levin sashal@kernel.org
Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- arch/x86/events/core.c | 6 ++++-- arch/x86/events/intel/lbr.c | 26 ++++++++++++++++++++------ arch/x86/events/perf_event.h | 6 ++++++ 3 files changed, 30 insertions(+), 8 deletions(-)
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index a88c94d65693..b7f8ed87bfbc 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -372,10 +372,12 @@ int x86_reserve_hardware(void) if (!atomic_inc_not_zero(&pmc_refcount)) { mutex_lock(&pmc_reserve_mutex); if (atomic_read(&pmc_refcount) == 0) { - if (!reserve_pmc_hardware()) + if (!reserve_pmc_hardware()) { err = -EBUSY; - else + } else { reserve_ds_buffers(); + reserve_lbr_buffers(); + } } if (!err) atomic_inc(&pmc_refcount); diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index e2b0efcba101..6c1231837382 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -658,7 +658,6 @@ static inline bool branch_user_callstack(unsigned br_sel)
void intel_pmu_lbr_add(struct perf_event *event) { - struct kmem_cache *kmem_cache = event->pmu->task_ctx_cache; struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
if (!x86_pmu.lbr_nr) @@ -696,11 +695,6 @@ void intel_pmu_lbr_add(struct perf_event *event) perf_sched_cb_inc(event->ctx->pmu); if (!cpuc->lbr_users++ && !event->total_time_running) intel_pmu_lbr_reset(); - - if (static_cpu_has(X86_FEATURE_ARCH_LBR) && - kmem_cache && !cpuc->lbr_xsave && - (cpuc->lbr_users != cpuc->lbr_pebs_users)) - cpuc->lbr_xsave = kmem_cache_alloc(kmem_cache, GFP_KERNEL); }
void release_lbr_buffers(void) @@ -721,6 +715,26 @@ void release_lbr_buffers(void) } }
+void reserve_lbr_buffers(void) +{ + struct kmem_cache *kmem_cache; + struct cpu_hw_events *cpuc; + int cpu; + + if (!static_cpu_has(X86_FEATURE_ARCH_LBR)) + return; + + for_each_possible_cpu(cpu) { + cpuc = per_cpu_ptr(&cpu_hw_events, cpu); + kmem_cache = x86_get_pmu(cpu)->task_ctx_cache; + if (!kmem_cache || cpuc->lbr_xsave) + continue; + + cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache, GFP_KERNEL, + cpu_to_node(cpu)); + } +} + void intel_pmu_lbr_del(struct perf_event *event) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 6a8edfe59b09..d4f2ea2d9a9e 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -1122,6 +1122,8 @@ void reserve_ds_buffers(void);
void release_lbr_buffers(void);
+void reserve_lbr_buffers(void); + extern struct event_constraint bts_constraint; extern struct event_constraint vlbr_constraint;
@@ -1267,6 +1269,10 @@ static inline void release_lbr_buffers(void) { }
+static inline void reserve_lbr_buffers(void) +{ +} + static inline int intel_pmu_init(void) { return 0;
From: Thomas Gleixner tglx@linutronix.de
stable inclusion from stable-5.10.47 commit 4df9ed0edb9f6f60afcf1fd70640cebc7eceb3c7 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 7f049fbdd57f6ea71dc741d903c19c73b2f70950 ]
XRSTORS requires a valid xstate buffer to work correctly. XSAVES does not guarantee to write a fully valid buffer according to the SDM:
"XSAVES does not write to any parts of the XSAVE header other than the XSTATE_BV and XCOMP_BV fields."
XRSTORS triggers a #GP:
"If bytes 63:16 of the XSAVE header are not all zero."
It's dubious at best how this can work at all when the buffer is not zeroed before use.
Allocate the buffers with __GFP_ZERO to prevent XRSTORS failure.
Fixes: ce711ea3cab9 ("perf/x86/intel/lbr: Support XSAVES/XRSTORS for LBR context switch") Signed-off-by: Thomas Gleixner tglx@linutronix.de Signed-off-by: Ingo Molnar mingo@kernel.org Link: https://lore.kernel.org/r/87wnr0wo2z.ffs@nanos.tec.linutronix.de Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- arch/x86/events/intel/lbr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 6c1231837382..29ec4fe48507 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -730,7 +730,8 @@ void reserve_lbr_buffers(void) if (!kmem_cache || cpuc->lbr_xsave) continue;
- cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache, GFP_KERNEL, + cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache, + GFP_KERNEL | __GFP_ZERO, cpu_to_node(cpu)); } }
From: Yu Kuai yukuai3@huawei.com
stable inclusion from stable-5.10.47 commit 86f3e72dcb721675216a8a6fa5a3d669ba79c6a0 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 8982d48af36d2562c0f904736b0fc80efc9f2532 ]
pm_runtime_get_sync will increment pm usage counter even it failed. Forgetting to putting operation will result in reference leak here. Fix it by replacing it with pm_runtime_resume_and_get to keep usage counter balanced.
Reported-by: Hulk Robot hulkci@huawei.com Signed-off-by: Yu Kuai yukuai3@huawei.com Link: https://lore.kernel.org/r/20210517081826.1564698-4-yukuai3@huawei.com Signed-off-by: Vinod Koul vkoul@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/dma/xilinx/zynqmp_dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/dma/xilinx/zynqmp_dma.c b/drivers/dma/xilinx/zynqmp_dma.c index d8419565b92c..5fecf5aa6e85 100644 --- a/drivers/dma/xilinx/zynqmp_dma.c +++ b/drivers/dma/xilinx/zynqmp_dma.c @@ -468,7 +468,7 @@ static int zynqmp_dma_alloc_chan_resources(struct dma_chan *dchan) struct zynqmp_dma_desc_sw *desc; int i, ret;
- ret = pm_runtime_get_sync(chan->dev); + ret = pm_runtime_resume_and_get(chan->dev); if (ret < 0) return ret;
From: Yu Kuai yukuai3@huawei.com
stable inclusion from stable-5.10.47 commit 13b245a7bd59359abb6e328254b7536265448b29 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 83eb4868d325b86e18509d0874e911497667cb54 ]
pm_runtime_get_sync will increment pm usage counter even it failed. Forgetting to putting operation will result in reference leak here. Fix it by replacing it with pm_runtime_resume_and_get to keep usage counter balanced.
Reported-by: Hulk Robot hulkci@huawei.com Signed-off-by: Yu Kuai yukuai3@huawei.com Link: https://lore.kernel.org/r/20210517081826.1564698-2-yukuai3@huawei.com Signed-off-by: Vinod Koul vkoul@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/dma/stm32-mdma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c index 08cfbfab837b..9d473923712a 100644 --- a/drivers/dma/stm32-mdma.c +++ b/drivers/dma/stm32-mdma.c @@ -1448,7 +1448,7 @@ static int stm32_mdma_alloc_chan_resources(struct dma_chan *c) return -ENOMEM; }
- ret = pm_runtime_get_sync(dmadev->ddev.dev); + ret = pm_runtime_resume_and_get(dmadev->ddev.dev); if (ret < 0) return ret;
@@ -1714,7 +1714,7 @@ static int stm32_mdma_pm_suspend(struct device *dev) u32 ccr, id; int ret;
- ret = pm_runtime_get_sync(dev); + ret = pm_runtime_resume_and_get(dev); if (ret < 0) return ret;
From: Laurent Pinchart laurent.pinchart@ideasonboard.com
stable inclusion from stable-5.10.47 commit 524f70b30ef811ca82ebf36fd148bf2796454314 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 32828b82fb875b06511918b139d3a3cd93d34262 ]
The driver depends on both OF and IOMEM support, express those dependencies in Kconfig. This fixes a build failure on S390 reported by the 0day bot.
Reported-by: kernel test robot lkp@intel.com Signed-off-by: Laurent Pinchart laurent.pinchart@ideasonboard.com Tested-by: Jianqiang Chen jianqiang.chen@xilinx.com Reviewed-by: Jianqiang Chen jianqiang.chen@xilinx.com Link: https://lore.kernel.org/r/20210520152420.23986-2-laurent.pinchart@ideasonboa... Signed-off-by: Vinod Koul vkoul@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/dma/Kconfig | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index f2db761ee548..f28bb2334e74 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -693,6 +693,7 @@ config XILINX_ZYNQMP_DMA
config XILINX_ZYNQMP_DPDMA tristate "Xilinx DPDMA Engine" + depends on HAS_IOMEM && OF select DMA_ENGINE select DMA_VIRTUAL_CHANNELS help
From: Laurent Pinchart laurent.pinchart@ideasonboard.com
stable inclusion from stable-5.10.47 commit b671b98169821836f12d646c85aafd6968a331a3 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 9f007e7b6643799e2a6538a5fe04f51c371c6657 ]
While the descriptor ID is stored in a 32-bit field in the hardware descriptor, only 16 bits are used by the hardware and are reported through the XILINX_DPDMA_CH_DESC_ID register. Failure to handle the wrap-around results in a descriptor ID mismatch after 65536 frames. Fix it.
Signed-off-by: Laurent Pinchart laurent.pinchart@ideasonboard.com Tested-by: Jianqiang Chen jianqiang.chen@xilinx.com Reviewed-by: Jianqiang Chen jianqiang.chen@xilinx.com Link: https://lore.kernel.org/r/20210520152420.23986-5-laurent.pinchart@ideasonboa... Signed-off-by: Vinod Koul vkoul@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/dma/xilinx/xilinx_dpdma.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/drivers/dma/xilinx/xilinx_dpdma.c b/drivers/dma/xilinx/xilinx_dpdma.c index ff7dfb3fdeb4..6c709803203a 100644 --- a/drivers/dma/xilinx/xilinx_dpdma.c +++ b/drivers/dma/xilinx/xilinx_dpdma.c @@ -113,6 +113,7 @@ #define XILINX_DPDMA_CH_VDO 0x020 #define XILINX_DPDMA_CH_PYLD_SZ 0x024 #define XILINX_DPDMA_CH_DESC_ID 0x028 +#define XILINX_DPDMA_CH_DESC_ID_MASK GENMASK(15, 0)
/* DPDMA descriptor fields */ #define XILINX_DPDMA_DESC_CONTROL_PREEMBLE 0xa5 @@ -866,7 +867,8 @@ static void xilinx_dpdma_chan_queue_transfer(struct xilinx_dpdma_chan *chan) * will be used, but it should be enough. */ list_for_each_entry(sw_desc, &desc->descriptors, node) - sw_desc->hw.desc_id = desc->vdesc.tx.cookie; + sw_desc->hw.desc_id = desc->vdesc.tx.cookie + & XILINX_DPDMA_CH_DESC_ID_MASK;
sw_desc = list_first_entry(&desc->descriptors, struct xilinx_dpdma_sw_desc, node); @@ -1086,7 +1088,8 @@ static void xilinx_dpdma_chan_vsync_irq(struct xilinx_dpdma_chan *chan) if (!chan->running || !pending) goto out;
- desc_id = dpdma_read(chan->reg, XILINX_DPDMA_CH_DESC_ID); + desc_id = dpdma_read(chan->reg, XILINX_DPDMA_CH_DESC_ID) + & XILINX_DPDMA_CH_DESC_ID_MASK;
/* If the retrigger raced with vsync, retry at the next frame. */ sw_desc = list_first_entry(&pending->descriptors,
From: Johannes Berg johannes.berg@intel.com
stable inclusion from stable-5.10.47 commit 8cfe765afd5a8d127770780f45fd971a28897484 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 0ee4d55534f82a0624701d0bb9fc2304d4529086 ]
Syzbot reports that it's possible to hit this from userspace, by trying to add a station before any other connection setup has been done. Instead of trying to catch this in some other way simply remove the warning, that will appropriately reject the call from userspace.
Reported-by: syzbot+7716dbc401d9a437890d@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20210517164715.f537da276d17.Id05f40ec8761d6a8cc2df... Signed-off-by: Johannes Berg johannes.berg@intel.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- net/mac80211/ieee80211_i.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index be40f6b16199..a83f0c2fcdf7 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1445,7 +1445,7 @@ ieee80211_get_sband(struct ieee80211_sub_if_data *sdata) rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
- if (WARN_ON_ONCE(!chanctx_conf)) { + if (!chanctx_conf) { rcu_read_unlock(); return NULL; }
From: Johannes Berg johannes.berg@intel.com
stable inclusion from stable-5.10.47 commit a9028333001f793b2724e8be42fce3336de2cf1c bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit bd18de517923903a177508fc8813f44e717b1c00 ]
Syzbot reports that we may be able to get into a situation where mac80211 has pending ACK frames on shutdown with hwsim. It appears that the reason for this is that syzbot uses the wmediumd hooks to intercept/injection frames, and may shut down hwsim, removing the radio(s), while frames are pending in the air simulation.
Clean out the pending queue when the interface is stopped, after this the frames can't be reported back to mac80211 properly anyway.
Reported-by: syzbot+a063bbf0b15737362592@syzkaller.appspotmail.com Link: https://lore.kernel.org/r/20210517170429.b0f85ab0eda1.Ie42a6ec6b940c971f3441... Signed-off-by: Johannes Berg johannes.berg@intel.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/net/wireless/mac80211_hwsim.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 3b3fc7c9c91d..f147d4feedb9 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -1623,8 +1623,13 @@ static int mac80211_hwsim_start(struct ieee80211_hw *hw) static void mac80211_hwsim_stop(struct ieee80211_hw *hw) { struct mac80211_hwsim_data *data = hw->priv; + data->started = false; hrtimer_cancel(&data->beacon_timer); + + while (!skb_queue_empty(&data->pending)) + ieee80211_free_txskb(hw, skb_dequeue(&data->pending)); + wiphy_dbg(hw->wiphy, "%s\n", __func__); }
From: Du Cheng ducheng2@gmail.com
stable inclusion from stable-5.10.47 commit 6a07cf36064afb14849d8e038fcca3538fe94bd4 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit a64b6a25dd9f984ed05fade603a00e2eae787d2f ]
If the userland switches back-and-forth between NL80211_IFTYPE_OCB and NL80211_IFTYPE_ADHOC via send_msg(NL80211_CMD_SET_INTERFACE), there is a chance where the cleanup cfg80211_leave_ocb() is not called. This leads to initialization of in-use memory (e.g. init u.ibss while in-use by u.ocb) due to a shared struct/union within ieee80211_sub_if_data:
struct ieee80211_sub_if_data { ... union { struct ieee80211_if_ap ap; struct ieee80211_if_vlan vlan; struct ieee80211_if_managed mgd; struct ieee80211_if_ibss ibss; // <- shares address struct ieee80211_if_mesh mesh; struct ieee80211_if_ocb ocb; // <- shares address struct ieee80211_if_mntr mntr; struct ieee80211_if_nan nan; } u; ... }
Therefore add handling of otype == NL80211_IFTYPE_OCB, during cfg80211_change_iface() to perform cleanup when leaving OCB mode.
link to syzkaller bug: https://syzkaller.appspot.com/bug?id=0612dbfa595bf4b9b680ff7b4948257b8e3732d...
Reported-by: syzbot+105896fac213f26056f9@syzkaller.appspotmail.com Signed-off-by: Du Cheng ducheng2@gmail.com Link: https://lore.kernel.org/r/20210428063941.105161-1-ducheng2@gmail.com Signed-off-by: Johannes Berg johannes.berg@intel.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- net/wireless/util.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/net/wireless/util.c b/net/wireless/util.c index 2731267fd0f9..4fb8d1b14e76 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1059,6 +1059,9 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, case NL80211_IFTYPE_MESH_POINT: /* mesh should be handled? */ break; + case NL80211_IFTYPE_OCB: + cfg80211_leave_ocb(rdev, dev); + break; default: break; }
From: Zou Wei zou_wei@huawei.com
stable inclusion from stable-5.10.47 commit 78fa0f707d73c21621454ffd78cdfa1e2c7003c0 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit dea8464ddf553803382efb753b6727dbf3931d06 ]
pm_runtime_get_sync will increment pm usage counter even it failed. Forgetting to putting operation will result in reference leak here. Fix it by replacing it with pm_runtime_resume_and_get to keep usage counter balanced.
Reported-by: Hulk Robot hulkci@huawei.com Signed-off-by: Zou Wei zou_wei@huawei.com Reviewed-by: Laurent Pinchart laurent.pinchart@ideasonboard.com Reviewed-by: Geert Uytterhoeven geert+renesas@glider.be Link: https://lore.kernel.org/r/1622442963-54095-1-git-send-email-zou_wei@huawei.c... Signed-off-by: Vinod Koul vkoul@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/dma/sh/rcar-dmac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c index a57705356e8b..991a7b5da29f 100644 --- a/drivers/dma/sh/rcar-dmac.c +++ b/drivers/dma/sh/rcar-dmac.c @@ -1874,7 +1874,7 @@ static int rcar_dmac_probe(struct platform_device *pdev)
/* Enable runtime PM and initialize the device. */ pm_runtime_enable(&pdev->dev); - ret = pm_runtime_get_sync(&pdev->dev); + ret = pm_runtime_resume_and_get(&pdev->dev); if (ret < 0) { dev_err(&pdev->dev, "runtime PM get sync failed (%d)\n", ret); return ret;
From: Guillaume Ranquet granquet@baylibre.com
stable inclusion from stable-5.10.47 commit 63fa5b2d4b55ebf4d12e7b4d536ae3e63117cd1a bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 0a2ff58f9f8f95526ecb0ccd7517fefceb96f661 ]
The desc_free handler assumed that the desc we want to free was always the current one associated with the channel.
This is seldom the case and this is causing use after free crashes in multiple places (tx/rx/terminate...).
BUG: KASAN: use-after-free in mtk_uart_apdma_rx_handler+0x120/0x304
Call trace: dump_backtrace+0x0/0x1b0 show_stack+0x24/0x34 dump_stack+0xe0/0x150 print_address_description+0x8c/0x55c __kasan_report+0x1b8/0x218 kasan_report+0x14/0x20 __asan_load4+0x98/0x9c mtk_uart_apdma_rx_handler+0x120/0x304 mtk_uart_apdma_irq_handler+0x50/0x80 __handle_irq_event_percpu+0xe0/0x210 handle_irq_event+0x8c/0x184 handle_fasteoi_irq+0x1d8/0x3ac __handle_domain_irq+0xb0/0x110 gic_handle_irq+0x50/0xb8 el0_irq_naked+0x60/0x6c
Allocated by task 3541: __kasan_kmalloc+0xf0/0x1b0 kasan_kmalloc+0x10/0x1c kmem_cache_alloc_trace+0x90/0x2dc mtk_uart_apdma_prep_slave_sg+0x6c/0x1a0 mtk8250_dma_rx_complete+0x220/0x2e4 vchan_complete+0x290/0x340 tasklet_action_common+0x220/0x298 tasklet_action+0x28/0x34 __do_softirq+0x158/0x35c
Freed by task 3541: __kasan_slab_free+0x154/0x224 kasan_slab_free+0x14/0x24 slab_free_freelist_hook+0xf8/0x15c kfree+0xb4/0x278 mtk_uart_apdma_desc_free+0x34/0x44 vchan_complete+0x1bc/0x340 tasklet_action_common+0x220/0x298 tasklet_action+0x28/0x34 __do_softirq+0x158/0x35c
The buggy address belongs to the object at ffff000063606800 which belongs to the cache kmalloc-256 of size 256 The buggy address is located 176 bytes inside of 256-byte region [ffff000063606800, ffff000063606900) The buggy address belongs to the page: page:fffffe00016d8180 refcount:1 mapcount:0 mapping:ffff00000302f600 index:0x0 compound_mapcount: 0 flags: 0xffff00000010200(slab|head) raw: 0ffff00000010200 dead000000000100 dead000000000122 ffff00000302f600 raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected
Signed-off-by: Guillaume Ranquet granquet@baylibre.com
Link: https://lore.kernel.org/r/20210513192642.29446-2-granquet@baylibre.com Signed-off-by: Vinod Koul vkoul@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/dma/mediatek/mtk-uart-apdma.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/drivers/dma/mediatek/mtk-uart-apdma.c b/drivers/dma/mediatek/mtk-uart-apdma.c index 27c07350971d..e38b67fc0c0c 100644 --- a/drivers/dma/mediatek/mtk-uart-apdma.c +++ b/drivers/dma/mediatek/mtk-uart-apdma.c @@ -131,10 +131,7 @@ static unsigned int mtk_uart_apdma_read(struct mtk_chan *c, unsigned int reg)
static void mtk_uart_apdma_desc_free(struct virt_dma_desc *vd) { - struct dma_chan *chan = vd->tx.chan; - struct mtk_chan *c = to_mtk_uart_apdma_chan(chan); - - kfree(c->desc); + kfree(container_of(vd, struct mtk_uart_apdma_desc, vd)); }
static void mtk_uart_apdma_start_tx(struct mtk_chan *c)
From: Guillaume Ranquet granquet@baylibre.com
stable inclusion from stable-5.10.47 commit 0f48f927718252cde6ca55a67efcd9ba4ed6ca21 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 2537b40b0a4f61d2c83900744fe89b09076be9c6 ]
Avoid issuing a new desc if one is still being processed as this can lead to some desc never being marked as completed.
Signed-off-by: Guillaume Ranquet granquet@baylibre.com
Link: https://lore.kernel.org/r/20210513192642.29446-3-granquet@baylibre.com Signed-off-by: Vinod Koul vkoul@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/dma/mediatek/mtk-uart-apdma.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-)
diff --git a/drivers/dma/mediatek/mtk-uart-apdma.c b/drivers/dma/mediatek/mtk-uart-apdma.c index e38b67fc0c0c..a09ab2dd3b46 100644 --- a/drivers/dma/mediatek/mtk-uart-apdma.c +++ b/drivers/dma/mediatek/mtk-uart-apdma.c @@ -204,14 +204,9 @@ static void mtk_uart_apdma_start_rx(struct mtk_chan *c)
static void mtk_uart_apdma_tx_handler(struct mtk_chan *c) { - struct mtk_uart_apdma_desc *d = c->desc; - mtk_uart_apdma_write(c, VFF_INT_FLAG, VFF_TX_INT_CLR_B); mtk_uart_apdma_write(c, VFF_INT_EN, VFF_INT_EN_CLR_B); mtk_uart_apdma_write(c, VFF_EN, VFF_EN_CLR_B); - - list_del(&d->vd.node); - vchan_cookie_complete(&d->vd); }
static void mtk_uart_apdma_rx_handler(struct mtk_chan *c) @@ -242,9 +237,17 @@ static void mtk_uart_apdma_rx_handler(struct mtk_chan *c)
c->rx_status = d->avail_len - cnt; mtk_uart_apdma_write(c, VFF_RPT, wg); +}
- list_del(&d->vd.node); - vchan_cookie_complete(&d->vd); +static void mtk_uart_apdma_chan_complete_handler(struct mtk_chan *c) +{ + struct mtk_uart_apdma_desc *d = c->desc; + + if (d) { + list_del(&d->vd.node); + vchan_cookie_complete(&d->vd); + c->desc = NULL; + } }
static irqreturn_t mtk_uart_apdma_irq_handler(int irq, void *dev_id) @@ -258,6 +261,7 @@ static irqreturn_t mtk_uart_apdma_irq_handler(int irq, void *dev_id) mtk_uart_apdma_rx_handler(c); else if (c->dir == DMA_MEM_TO_DEV) mtk_uart_apdma_tx_handler(c); + mtk_uart_apdma_chan_complete_handler(c); spin_unlock_irqrestore(&c->vc.lock, flags);
return IRQ_HANDLED; @@ -363,7 +367,7 @@ static void mtk_uart_apdma_issue_pending(struct dma_chan *chan) unsigned long flags;
spin_lock_irqsave(&c->vc.lock, flags); - if (vchan_issue_pending(&c->vc)) { + if (vchan_issue_pending(&c->vc) && !c->desc) { vd = vchan_next_desc(&c->vc); c->desc = to_mtk_uart_apdma_desc(&vd->tx);
From: Guillaume Ranquet granquet@baylibre.com
stable inclusion from stable-5.10.47 commit 93c2aac13b0856fa7e8bf7bd59476650963292fa bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 9041575348b21ade1fb74d790f1aac85d68198c7 ]
As recommended by the doc in: Documentation/drivers-api/dmaengine/provider.rst
Use GFP_NOWAIT to not deplete the emergency pool.
Signed-off-by: Guillaume Ranquet granquet@baylibre.com
Link: https://lore.kernel.org/r/20210513192642.29446-4-granquet@baylibre.com Signed-off-by: Vinod Koul vkoul@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/dma/mediatek/mtk-uart-apdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/dma/mediatek/mtk-uart-apdma.c b/drivers/dma/mediatek/mtk-uart-apdma.c index a09ab2dd3b46..375e7e647df6 100644 --- a/drivers/dma/mediatek/mtk-uart-apdma.c +++ b/drivers/dma/mediatek/mtk-uart-apdma.c @@ -349,7 +349,7 @@ static struct dma_async_tx_descriptor *mtk_uart_apdma_prep_slave_sg return NULL;
/* Now allocate and setup the descriptor */ - d = kzalloc(sizeof(*d), GFP_ATOMIC); + d = kzalloc(sizeof(*d), GFP_NOWAIT); if (!d) return NULL;
From: Zheng Yongjun zhengyongjun3@huawei.com
stable inclusion from stable-5.10.47 commit fedc4d4f548ce538d18ec87a63d5a618dc730117 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 5ac6b198d7e312bd10ebe7d58c64690dc59cc49a ]
When 'nla_parse_nested_deprecated' failed, it's no need to BUG() here, return -EINVAL is ok.
Signed-off-by: Zheng Yongjun zhengyongjun3@huawei.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- net/ipv4/devinet.c | 2 +- net/ipv6/addrconf.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 123a6d39438f..7c1859777429 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1989,7 +1989,7 @@ static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla) return -EAFNOSUPPORT;
if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0) - BUG(); + return -EINVAL;
if (tb[IFLA_INET_CONF]) { nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4c881f5d9080..884d430e23cb 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5799,7 +5799,7 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) return -EAFNOSUPPORT;
if (nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0) - BUG(); + return -EINVAL;
if (tb[IFLA_INET6_TOKEN]) { err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN]));
From: Johannes Berg johannes.berg@intel.com
stable inclusion from stable-5.10.47 commit c2813d1966ba45ec205e57e94c53cd07805be2e1 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit a9799541ca34652d9996e45f80e8e03144c12949 ]
These are not permitted by the spec, just drop them.
Link: https://lore.kernel.org/r/20210609161305.23def022b750.Ibd6dd3cdce573dae262fc... Signed-off-by: Johannes Berg johannes.berg@intel.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- net/mac80211/rx.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index ef8ff0bc66f1..38b5695c2a0c 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2250,17 +2250,15 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) sc = le16_to_cpu(hdr->seq_ctrl); frag = sc & IEEE80211_SCTL_FRAG;
- if (is_multicast_ether_addr(hdr->addr1)) { - I802_DEBUG_INC(rx->local->dot11MulticastReceivedFrameCount); - goto out_no_led; - } - if (rx->sta) cache = &rx->sta->frags;
if (likely(!ieee80211_has_morefrags(fc) && frag == 0)) goto out;
+ if (is_multicast_ether_addr(hdr->addr1)) + return RX_DROP_MONITOR; + I802_DEBUG_INC(rx->local->rx_handlers_fragments);
if (skb_linearize(rx->skb)) @@ -2386,7 +2384,6 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
out: ieee80211_led_rx(rx->local); - out_no_led: if (rx->sta) rx->sta->rx_stats.packets++; return RX_CONTINUE;
From: Austin Kim austindh.kim@gmail.com
stable inclusion from stable-5.10.47 commit c2311fd6de7815187fc31fe79fe6c969bc8a0b4c bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 80ec82e3d2c1fab42eeb730aaa7985494a963d3f ]
Several ethtool functions leave heap uncleared (potentially) by drivers. This will leave the unused portion of heap unchanged and might copy the full contents back to userspace.
Signed-off-by: Austin Kim austindh.kim@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- net/ethtool/ioctl.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 488d64ccc122..83b7467bd5fd 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1421,7 +1421,7 @@ static int ethtool_get_any_eeprom(struct net_device *dev, void __user *useraddr, if (eeprom.offset + eeprom.len > total_len) return -EINVAL;
- data = kmalloc(PAGE_SIZE, GFP_USER); + data = kzalloc(PAGE_SIZE, GFP_USER); if (!data) return -ENOMEM;
@@ -1486,7 +1486,7 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr) if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev)) return -EINVAL;
- data = kmalloc(PAGE_SIZE, GFP_USER); + data = kzalloc(PAGE_SIZE, GFP_USER); if (!data) return -ENOMEM;
@@ -1774,7 +1774,7 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr) return -EFAULT;
test.len = test_len; - data = kmalloc_array(test_len, sizeof(u64), GFP_USER); + data = kcalloc(test_len, sizeof(u64), GFP_USER); if (!data) return -ENOMEM;
@@ -2290,7 +2290,7 @@ static int ethtool_get_tunable(struct net_device *dev, void __user *useraddr) ret = ethtool_tunable_valid(&tuna); if (ret) return ret; - data = kmalloc(tuna.len, GFP_USER); + data = kzalloc(tuna.len, GFP_USER); if (!data) return -ENOMEM; ret = ops->get_tunable(dev, &tuna, data); @@ -2482,7 +2482,7 @@ static int get_phy_tunable(struct net_device *dev, void __user *useraddr) ret = ethtool_phy_tunable_valid(&tuna); if (ret) return ret; - data = kmalloc(tuna.len, GFP_USER); + data = kzalloc(tuna.len, GFP_USER); if (!data) return -ENOMEM; if (phy_drv_tunable) {
From: Eric Dumazet edumazet@google.com
stable inclusion from stable-5.10.47 commit 08c389de6d53ce6055573e4edd4e2010d1789f05 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit dcd01eeac14486b56a790f5cce9b823440ba5b34 ]
Both functions are known to be racy when reading inet_num as we do not want to grab locks for the common case the socket has been bound already. The race is resolved in inet_autobind() by reading again inet_num under the socket lock.
syzbot reported: BUG: KCSAN: data-race in inet_send_prepare / udp_lib_get_port
write to 0xffff88812cba150e of 2 bytes by task 24135 on cpu 0: udp_lib_get_port+0x4b2/0xe20 net/ipv4/udp.c:308 udp_v6_get_port+0x5e/0x70 net/ipv6/udp.c:89 inet_autobind net/ipv4/af_inet.c:183 [inline] inet_send_prepare+0xd0/0x210 net/ipv4/af_inet.c:807 inet6_sendmsg+0x29/0x80 net/ipv6/af_inet6.c:639 sock_sendmsg_nosec net/socket.c:654 [inline] sock_sendmsg net/socket.c:674 [inline] ____sys_sendmsg+0x360/0x4d0 net/socket.c:2350 ___sys_sendmsg net/socket.c:2404 [inline] __sys_sendmmsg+0x315/0x4b0 net/socket.c:2490 __do_sys_sendmmsg net/socket.c:2519 [inline] __se_sys_sendmmsg net/socket.c:2516 [inline] __x64_sys_sendmmsg+0x53/0x60 net/socket.c:2516 do_syscall_64+0x4a/0x90 arch/x86/entry/common.c:47 entry_SYSCALL_64_after_hwframe+0x44/0xae
read to 0xffff88812cba150e of 2 bytes by task 24132 on cpu 1: inet_send_prepare+0x21/0x210 net/ipv4/af_inet.c:806 inet6_sendmsg+0x29/0x80 net/ipv6/af_inet6.c:639 sock_sendmsg_nosec net/socket.c:654 [inline] sock_sendmsg net/socket.c:674 [inline] ____sys_sendmsg+0x360/0x4d0 net/socket.c:2350 ___sys_sendmsg net/socket.c:2404 [inline] __sys_sendmmsg+0x315/0x4b0 net/socket.c:2490 __do_sys_sendmmsg net/socket.c:2519 [inline] __se_sys_sendmmsg net/socket.c:2516 [inline] __x64_sys_sendmmsg+0x53/0x60 net/socket.c:2516 do_syscall_64+0x4a/0x90 arch/x86/entry/common.c:47 entry_SYSCALL_64_after_hwframe+0x44/0xae
value changed: 0x0000 -> 0x9db4
Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 24132 Comm: syz-executor.2 Not tainted 5.13.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Signed-off-by: Eric Dumazet edumazet@google.com Reported-by: syzbot syzkaller@googlegroups.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- net/ipv4/af_inet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index b7260c8cef2e..8267349afe23 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -572,7 +572,7 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, return err; }
- if (!inet_sk(sk)->inet_num && inet_autobind(sk)) + if (data_race(!inet_sk(sk)->inet_num) && inet_autobind(sk)) return -EAGAIN; return sk->sk_prot->connect(sk, uaddr, addr_len); } @@ -799,7 +799,7 @@ int inet_send_prepare(struct sock *sk) sock_rps_record_flow(sk);
/* We may need to bind the socket. */ - if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind && + if (data_race(!inet_sk(sk)->inet_num) && !sk->sk_prot->no_autobind && inet_autobind(sk)) return -EAGAIN;
From: Zheng Yongjun zhengyongjun3@huawei.com
stable inclusion from stable-5.10.47 commit 61b132f67c0d69ff367a8b52f42f0ececffc89e4 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 9d44fa3e50cc91691896934d106c86e4027e61ca ]
Function 'ping_queue_rcv_skb' not always return success, which will also return fail. If not check the wrong return value of it, lead to function `ping_rcv` return success.
Signed-off-by: Zheng Yongjun zhengyongjun3@huawei.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- net/ipv4/ping.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 248856b301c4..8ce8b7300b9d 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -952,6 +952,7 @@ bool ping_rcv(struct sk_buff *skb) struct sock *sk; struct net *net = dev_net(skb->dev); struct icmphdr *icmph = icmp_hdr(skb); + bool rc = false;
/* We assume the packet has already been checked by icmp_rcv */
@@ -966,14 +967,15 @@ bool ping_rcv(struct sk_buff *skb) struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
pr_debug("rcv on socket %p\n", sk); - if (skb2) - ping_queue_rcv_skb(sk, skb2); + if (skb2 && !ping_queue_rcv_skb(sk, skb2)) + rc = true; sock_put(sk); - return true; } - pr_debug("no socket, dropping\n");
- return false; + if (!rc) + pr_debug("no socket, dropping\n"); + + return rc; } EXPORT_SYMBOL_GPL(ping_rcv);
From: Eric Dumazet edumazet@google.com
stable inclusion from stable-5.10.47 commit 7293f63b7b620e888b6ab08127e91c579b825fb1 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit f13ef10059ccf5f4ed201cd050176df62ec25bb8 ]
sock_error() is known to be racy. The code avoids an atomic operation is sk_err is zero, and this field could be changed under us, this is fine.
Sysbot reported:
BUG: KCSAN: data-race in sock_alloc_send_pskb / unix_release_sock
write to 0xffff888131855630 of 4 bytes by task 9365 on cpu 1: unix_release_sock+0x2e9/0x6e0 net/unix/af_unix.c:550 unix_release+0x2f/0x50 net/unix/af_unix.c:859 __sock_release net/socket.c:599 [inline] sock_close+0x6c/0x150 net/socket.c:1258 __fput+0x25b/0x4e0 fs/file_table.c:280 ____fput+0x11/0x20 fs/file_table.c:313 task_work_run+0xae/0x130 kernel/task_work.c:164 tracehook_notify_resume include/linux/tracehook.h:189 [inline] exit_to_user_mode_loop kernel/entry/common.c:174 [inline] exit_to_user_mode_prepare+0x156/0x190 kernel/entry/common.c:208 __syscall_exit_to_user_mode_work kernel/entry/common.c:290 [inline] syscall_exit_to_user_mode+0x20/0x40 kernel/entry/common.c:301 do_syscall_64+0x56/0x90 arch/x86/entry/common.c:57 entry_SYSCALL_64_after_hwframe+0x44/0xae
read to 0xffff888131855630 of 4 bytes by task 9385 on cpu 0: sock_error include/net/sock.h:2269 [inline] sock_alloc_send_pskb+0xe4/0x4e0 net/core/sock.c:2336 unix_dgram_sendmsg+0x478/0x1610 net/unix/af_unix.c:1671 unix_seqpacket_sendmsg+0xc2/0x100 net/unix/af_unix.c:2055 sock_sendmsg_nosec net/socket.c:654 [inline] sock_sendmsg net/socket.c:674 [inline] ____sys_sendmsg+0x360/0x4d0 net/socket.c:2350 __sys_sendmsg_sock+0x25/0x30 net/socket.c:2416 io_sendmsg fs/io_uring.c:4367 [inline] io_issue_sqe+0x231a/0x6750 fs/io_uring.c:6135 __io_queue_sqe+0xe9/0x360 fs/io_uring.c:6414 __io_req_task_submit fs/io_uring.c:2039 [inline] io_async_task_func+0x312/0x590 fs/io_uring.c:5074 __tctx_task_work fs/io_uring.c:1910 [inline] tctx_task_work+0x1d4/0x3d0 fs/io_uring.c:1924 task_work_run+0xae/0x130 kernel/task_work.c:164 tracehook_notify_signal include/linux/tracehook.h:212 [inline] handle_signal_work kernel/entry/common.c:145 [inline] exit_to_user_mode_loop kernel/entry/common.c:171 [inline] exit_to_user_mode_prepare+0xf8/0x190 kernel/entry/common.c:208 __syscall_exit_to_user_mode_work kernel/entry/common.c:290 [inline] syscall_exit_to_user_mode+0x20/0x40 kernel/entry/common.c:301 do_syscall_64+0x56/0x90 arch/x86/entry/common.c:57 entry_SYSCALL_64_after_hwframe+0x44/0xae
value changed: 0x00000000 -> 0x00000068
Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 9385 Comm: syz-executor.3 Not tainted 5.13.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Signed-off-by: Eric Dumazet edumazet@google.com Reported-by: syzbot syzkaller@googlegroups.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- include/net/sock.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/include/net/sock.h b/include/net/sock.h index f68184b8c0aa..9199ab7ab2da 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2232,8 +2232,13 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk); static inline int sock_error(struct sock *sk) { int err; - if (likely(!sk->sk_err)) + + /* Avoid an atomic operation for the common case. + * This is racy since another cpu/thread can change sk_err under us. + */ + if (likely(data_race(!sk->sk_err))) return 0; + err = xchg(&sk->sk_err, 0); return -err; }
From: Eric Dumazet edumazet@google.com
stable inclusion from stable-5.10.47 commit 9707960ecfdc27d1d57c83ab47472f79f3a4c3f8 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit b71eaed8c04f72a919a9c44e83e4ee254e69e7f3 ]
UDP sendmsg() path can be lockless, it is possible for another thread to re-connect an change sk->sk_txhash under us.
There is no serious impact, but we can use READ_ONCE()/WRITE_ONCE() pair to document the race.
BUG: KCSAN: data-race in __ip4_datagram_connect / skb_set_owner_w
write to 0xffff88813397920c of 4 bytes by task 30997 on cpu 1: sk_set_txhash include/net/sock.h:1937 [inline] __ip4_datagram_connect+0x69e/0x710 net/ipv4/datagram.c:75 __ip6_datagram_connect+0x551/0x840 net/ipv6/datagram.c:189 ip6_datagram_connect+0x2a/0x40 net/ipv6/datagram.c:272 inet_dgram_connect+0xfd/0x180 net/ipv4/af_inet.c:580 __sys_connect_file net/socket.c:1837 [inline] __sys_connect+0x245/0x280 net/socket.c:1854 __do_sys_connect net/socket.c:1864 [inline] __se_sys_connect net/socket.c:1861 [inline] __x64_sys_connect+0x3d/0x50 net/socket.c:1861 do_syscall_64+0x4a/0x90 arch/x86/entry/common.c:47 entry_SYSCALL_64_after_hwframe+0x44/0xae
read to 0xffff88813397920c of 4 bytes by task 31039 on cpu 0: skb_set_hash_from_sk include/net/sock.h:2211 [inline] skb_set_owner_w+0x118/0x220 net/core/sock.c:2101 sock_alloc_send_pskb+0x452/0x4e0 net/core/sock.c:2359 sock_alloc_send_skb+0x2d/0x40 net/core/sock.c:2373 __ip6_append_data+0x1743/0x21a0 net/ipv6/ip6_output.c:1621 ip6_make_skb+0x258/0x420 net/ipv6/ip6_output.c:1983 udpv6_sendmsg+0x160a/0x16b0 net/ipv6/udp.c:1527 inet6_sendmsg+0x5f/0x80 net/ipv6/af_inet6.c:642 sock_sendmsg_nosec net/socket.c:654 [inline] sock_sendmsg net/socket.c:674 [inline] ____sys_sendmsg+0x360/0x4d0 net/socket.c:2350 ___sys_sendmsg net/socket.c:2404 [inline] __sys_sendmmsg+0x315/0x4b0 net/socket.c:2490 __do_sys_sendmmsg net/socket.c:2519 [inline] __se_sys_sendmmsg net/socket.c:2516 [inline] __x64_sys_sendmmsg+0x53/0x60 net/socket.c:2516 do_syscall_64+0x4a/0x90 arch/x86/entry/common.c:47 entry_SYSCALL_64_after_hwframe+0x44/0xae
value changed: 0xbca3c43d -> 0xfdb309e0
Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 31039 Comm: syz-executor.2 Not tainted 5.13.0-rc3-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Signed-off-by: Eric Dumazet edumazet@google.com Reported-by: syzbot syzkaller@googlegroups.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- include/net/sock.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/include/net/sock.h b/include/net/sock.h index 9199ab7ab2da..3c7addf95150 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1900,7 +1900,8 @@ static inline u32 net_tx_rndhash(void)
static inline void sk_set_txhash(struct sock *sk) { - sk->sk_txhash = net_tx_rndhash(); + /* This pairs with READ_ONCE() in skb_set_hash_from_sk() */ + WRITE_ONCE(sk->sk_txhash, net_tx_rndhash()); }
static inline bool sk_rethink_txhash(struct sock *sk) @@ -2172,9 +2173,12 @@ static inline void sock_poll_wait(struct file *filp, struct socket *sock,
static inline void skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk) { - if (sk->sk_txhash) { + /* This pairs with WRITE_ONCE() in sk_set_txhash() */ + u32 txhash = READ_ONCE(sk->sk_txhash); + + if (txhash) { skb->l4_hash = 1; - skb->hash = sk->sk_txhash; + skb->hash = txhash; } }
From: Eric Dumazet edumazet@google.com
stable inclusion from stable-5.10.47 commit f57132a887ea4be3d7d813b5003d56716ba33448 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit d1b5bee4c8be01585033be9b3a8878789285285f ]
There is a known race in packet_sendmsg(), addressed in commit 32d3182cd2cd ("net/packet: fix race in tpacket_snd()")
Now we have data_race(), we can use it to avoid a future KCSAN warning, as syzbot loves stressing af_packet sockets :)
Signed-off-by: Eric Dumazet edumazet@google.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- net/packet/af_packet.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ddb68aa836f7..7892d7074847 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3033,10 +3033,13 @@ static int packet_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk);
- if (po->tx_ring.pg_vec) + /* Reading tx_ring.pg_vec without holding pg_vec_lock is racy. + * tpacket_snd() will redo the check safely. + */ + if (data_race(po->tx_ring.pg_vec)) return tpacket_snd(po, msg); - else - return packet_snd(sock, msg, len); + + return packet_snd(sock, msg, len); }
/*
From: Praneeth Bajjuri praneeth@ti.com
stable inclusion from stable-5.10.47 commit 47c07f919fabb6fc8b3c27ad985f24648f5fd6df bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit da9ef50f545f86ffe6ff786174d26500c4db737a ]
Current logic is performing hard reset and causing the programmed registers to be wiped out.
as per datasheet: https://www.ti.com/lit/ds/symlink/dp83867cr.pdf 8.6.26 Control Register (CTRL)
do SW_RESTART to perform a reset not including the registers, If performed when link is already present, it will drop the link and trigger re-auto negotiation.
Signed-off-by: Praneeth Bajjuri praneeth@ti.com Signed-off-by: Geet Modi geet.modi@ti.com Reviewed-by: Andrew Lunn andrew@lunn.ch Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/net/phy/dp83867.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index 69d3eacc2b96..c716074fdef0 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -792,16 +792,12 @@ static int dp83867_phy_reset(struct phy_device *phydev) { int err;
- err = phy_write(phydev, DP83867_CTRL, DP83867_SW_RESET); + err = phy_write(phydev, DP83867_CTRL, DP83867_SW_RESTART); if (err < 0) return err;
usleep_range(10, 20);
- /* After reset FORCE_LINK_GOOD bit is set. Although the - * default value should be unset. Disable FORCE_LINK_GOOD - * for the phy to work properly. - */ return phy_modify(phydev, MII_DP83867_PHYCTRL, DP83867_PHYCR_FORCE_LINK_GOOD, 0); }
From: Khem Raj raj.khem@gmail.com
stable inclusion from stable-5.10.47 commit edcd7594ada9204744f7208176c690ba36d13b07 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 5d2388dbf84adebeb6d9742164be8d32728e4269 ]
When CONFIG_CMODEL_MEDLOW is used it ends up generating riscv_hi20_rela relocations in modules which are not resolved during runtime and following errors would be seen
[ 4.802714] virtio_input: target 00000000c1539090 can not be addressed by the 32-bit offset from PC = 39148b7b [ 4.854800] virtio_input: target 00000000c1539090 can not be addressed by the 32-bit offset from PC = 9774456d
Signed-off-by: Khem Raj raj.khem@gmail.com Signed-off-by: Palmer Dabbelt palmerdabbelt@google.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- arch/riscv/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 525736007ac4..dc04f47714b4 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -14,7 +14,7 @@ ifeq ($(CONFIG_DYNAMIC_FTRACE),y) LDFLAGS_vmlinux := --no-relax endif
-ifeq ($(CONFIG_64BIT)$(CONFIG_CMODEL_MEDLOW),yy) +ifeq ($(CONFIG_CMODEL_MEDLOW),y) KBUILD_CFLAGS_MODULE += -mcmodel=medany endif
From: Pavel Skripkin paskripkin@gmail.com
stable inclusion from stable-5.10.47 commit 18ed1789bbce54f34367877344e35d9bb877f8f4 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 58af3d3d54e87bfc1f936e16c04ade3369d34011 ]
Syzbot reported memory leak in tty_init_dev(). The problem was in unputted tty in ldisc_open()
static int ldisc_open(struct tty_struct *tty) { ... ser->tty = tty_kref_get(tty); ... result = register_netdevice(dev); if (result) { rtnl_unlock(); free_netdev(dev); return -ENODEV; } ... }
Ser pointer is netdev private_data, so after free_netdev() this pointer goes away with unputted tty reference. So, fix it by adding tty_kref_put() before freeing netdev.
Reported-and-tested-by: syzbot+f303e045423e617d2cad@syzkaller.appspotmail.com Signed-off-by: Pavel Skripkin paskripkin@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/net/caif/caif_serial.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c index d025ea434933..39fbd0be179c 100644 --- a/drivers/net/caif/caif_serial.c +++ b/drivers/net/caif/caif_serial.c @@ -351,6 +351,7 @@ static int ldisc_open(struct tty_struct *tty) rtnl_lock(); result = register_netdevice(dev); if (result) { + tty_kref_put(tty); rtnl_unlock(); free_netdev(dev); return -ENODEV;
From: Eric Dumazet edumazet@google.com
stable inclusion from stable-5.10.47 commit da8b3aeff4ad7f8f777dd4b4d1929150849c9f46 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit c7d2ef5dd4b03ed0ee1d13bc0c55f9cf62d49bd6 ]
tpacket_snd(), packet_snd(), packet_getname() and packet_seq_show() can read po->num without holding a lock. This means other threads can change po->num at the same time.
KCSAN complained about this known fact [1] Add READ_ONCE()/WRITE_ONCE() to address the issue.
[1] BUG: KCSAN: data-race in packet_do_bind / packet_sendmsg
write to 0xffff888131a0dcc0 of 2 bytes by task 24714 on cpu 0: packet_do_bind+0x3ab/0x7e0 net/packet/af_packet.c:3181 packet_bind+0xc3/0xd0 net/packet/af_packet.c:3255 __sys_bind+0x200/0x290 net/socket.c:1637 __do_sys_bind net/socket.c:1648 [inline] __se_sys_bind net/socket.c:1646 [inline] __x64_sys_bind+0x3d/0x50 net/socket.c:1646 do_syscall_64+0x4a/0x90 arch/x86/entry/common.c:47 entry_SYSCALL_64_after_hwframe+0x44/0xae
read to 0xffff888131a0dcc0 of 2 bytes by task 24719 on cpu 1: packet_snd net/packet/af_packet.c:2899 [inline] packet_sendmsg+0x317/0x3570 net/packet/af_packet.c:3040 sock_sendmsg_nosec net/socket.c:654 [inline] sock_sendmsg net/socket.c:674 [inline] ____sys_sendmsg+0x360/0x4d0 net/socket.c:2350 ___sys_sendmsg net/socket.c:2404 [inline] __sys_sendmsg+0x1ed/0x270 net/socket.c:2433 __do_sys_sendmsg net/socket.c:2442 [inline] __se_sys_sendmsg net/socket.c:2440 [inline] __x64_sys_sendmsg+0x42/0x50 net/socket.c:2440 do_syscall_64+0x4a/0x90 arch/x86/entry/common.c:47 entry_SYSCALL_64_after_hwframe+0x44/0xae
value changed: 0x0000 -> 0x1200
Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 24719 Comm: syz-executor.5 Not tainted 5.13.0-rc4-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Signed-off-by: Eric Dumazet edumazet@google.com Reported-by: syzbot syzkaller@googlegroups.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- net/packet/af_packet.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 7892d7074847..c4eb26f0f1a7 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2682,7 +2682,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) } if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); - proto = po->num; + proto = READ_ONCE(po->num); } else { err = -EINVAL; if (msg->msg_namelen < sizeof(struct sockaddr_ll)) @@ -2895,7 +2895,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); - proto = po->num; + proto = READ_ONCE(po->num); } else { err = -EINVAL; if (msg->msg_namelen < sizeof(struct sockaddr_ll)) @@ -3170,7 +3170,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, /* prevents packet_notifier() from calling * register_prot_hook() */ - po->num = 0; + WRITE_ONCE(po->num, 0); __unregister_prot_hook(sk, true); rcu_read_lock(); dev_curr = po->prot_hook.dev; @@ -3180,7 +3180,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, }
BUG_ON(po->running); - po->num = proto; + WRITE_ONCE(po->num, proto); po->prot_hook.type = proto;
if (unlikely(unlisted)) { @@ -3525,7 +3525,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
sll->sll_family = AF_PACKET; sll->sll_ifindex = po->ifindex; - sll->sll_protocol = po->num; + sll->sll_protocol = READ_ONCE(po->num); sll->sll_pkttype = 0; rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex); @@ -4413,7 +4413,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, was_running = po->running; num = po->num; if (was_running) { - po->num = 0; + WRITE_ONCE(po->num, 0); __unregister_prot_hook(sk, false); } spin_unlock(&po->bind_lock); @@ -4448,7 +4448,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
spin_lock(&po->bind_lock); if (was_running) { - po->num = num; + WRITE_ONCE(po->num, num); register_prot_hook(sk); } spin_unlock(&po->bind_lock); @@ -4616,7 +4616,7 @@ static int packet_seq_show(struct seq_file *seq, void *v) s, refcount_read(&s->sk_refcnt), s->sk_type, - ntohs(po->num), + ntohs(READ_ONCE(po->num)), po->ifindex, po->running, atomic_read(&s->sk_rmem_alloc),
From: Eric Dumazet edumazet@google.com
stable inclusion from stable-5.10.47 commit 196b22ef6cd1cdd87a85811d27ec8936fd71f346 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit e032f7c9c7cefffcfb79b9fc16c53011d2d9d11f ]
Like prior patch, we need to annotate lockless accesses to po->ifindex For instance, packet_getname() is reading po->ifindex (twice) while another thread is able to change po->ifindex.
KCSAN reported:
BUG: KCSAN: data-race in packet_do_bind / packet_getname
write to 0xffff888143ce3cbc of 4 bytes by task 25573 on cpu 1: packet_do_bind+0x420/0x7e0 net/packet/af_packet.c:3191 packet_bind+0xc3/0xd0 net/packet/af_packet.c:3255 __sys_bind+0x200/0x290 net/socket.c:1637 __do_sys_bind net/socket.c:1648 [inline] __se_sys_bind net/socket.c:1646 [inline] __x64_sys_bind+0x3d/0x50 net/socket.c:1646 do_syscall_64+0x4a/0x90 arch/x86/entry/common.c:47 entry_SYSCALL_64_after_hwframe+0x44/0xae
read to 0xffff888143ce3cbc of 4 bytes by task 25578 on cpu 0: packet_getname+0x5b/0x1a0 net/packet/af_packet.c:3525 __sys_getsockname+0x10e/0x1a0 net/socket.c:1887 __do_sys_getsockname net/socket.c:1902 [inline] __se_sys_getsockname net/socket.c:1899 [inline] __x64_sys_getsockname+0x3e/0x50 net/socket.c:1899 do_syscall_64+0x4a/0x90 arch/x86/entry/common.c:47 entry_SYSCALL_64_after_hwframe+0x44/0xae
value changed: 0x00000000 -> 0x00000001
Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 25578 Comm: syz-executor.5 Not tainted 5.13.0-rc6-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Signed-off-by: Eric Dumazet edumazet@google.com Reported-by: syzbot syzkaller@googlegroups.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- net/packet/af_packet.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c4eb26f0f1a7..08144559eed5 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3186,11 +3186,11 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, if (unlikely(unlisted)) { dev_put(dev); po->prot_hook.dev = NULL; - po->ifindex = -1; + WRITE_ONCE(po->ifindex, -1); packet_cached_dev_reset(po); } else { po->prot_hook.dev = dev; - po->ifindex = dev ? dev->ifindex : 0; + WRITE_ONCE(po->ifindex, dev ? dev->ifindex : 0); packet_cached_dev_assign(po, dev); } } @@ -3504,7 +3504,7 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, uaddr->sa_family = AF_PACKET; memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data)); rcu_read_lock(); - dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex); + dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex)); if (dev) strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data)); rcu_read_unlock(); @@ -3519,16 +3519,18 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr, struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr); + int ifindex;
if (peer) return -EOPNOTSUPP;
+ ifindex = READ_ONCE(po->ifindex); sll->sll_family = AF_PACKET; - sll->sll_ifindex = po->ifindex; + sll->sll_ifindex = ifindex; sll->sll_protocol = READ_ONCE(po->num); sll->sll_pkttype = 0; rcu_read_lock(); - dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex); + dev = dev_get_by_index_rcu(sock_net(sk), ifindex); if (dev) { sll->sll_hatype = dev->type; sll->sll_halen = dev->addr_len; @@ -4107,7 +4109,7 @@ static int packet_notifier(struct notifier_block *this, } if (msg == NETDEV_UNREGISTER) { packet_cached_dev_reset(po); - po->ifindex = -1; + WRITE_ONCE(po->ifindex, -1); if (po->prot_hook.dev) dev_put(po->prot_hook.dev); po->prot_hook.dev = NULL; @@ -4617,7 +4619,7 @@ static int packet_seq_show(struct seq_file *seq, void *v) refcount_read(&s->sk_refcnt), s->sk_type, ntohs(READ_ONCE(po->num)), - po->ifindex, + READ_ONCE(po->ifindex), po->running, atomic_read(&s->sk_rmem_alloc), from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)),
From: Kees Cook keescook@chromium.org
stable inclusion from stable-5.10.47 commit a10856ea6066872a01a59007fa108026ef58c24d bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 99718abdc00e86e4f286dd836408e2834886c16e ]
In preparation for FORTIFY_SOURCE performing compile-time and run-time field bounds checking for memcpy(), memmove(), and memset(), avoid intentionally reading across neighboring array fields.
The memcpy() is copying the entire structure, not just the first array. Adjust the source argument so the compiler can do appropriate bounds checking.
Signed-off-by: Kees Cook keescook@chromium.org Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/net/usb/r8152.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index c92bb345e987..bfc3b2d920ee 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -6054,7 +6054,7 @@ static void rtl8152_get_strings(struct net_device *dev, u32 stringset, u8 *data) { switch (stringset) { case ETH_SS_STATS: - memcpy(data, *rtl8152_gstrings, sizeof(rtl8152_gstrings)); + memcpy(data, rtl8152_gstrings, sizeof(rtl8152_gstrings)); break; } }
From: Kees Cook keescook@chromium.org
stable inclusion from stable-5.10.47 commit 992b105abf57d49d4b695318dd50fae19cd36a5c bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 224004fbb033600715dbd626bceec10bfd9c58bc ]
In preparation for FORTIFY_SOURCE performing compile-time and run-time field bounds checking for memcpy(), memmove(), and memset(), avoid intentionally reading across neighboring array fields.
The memcpy() is copying the entire structure, not just the first array. Adjust the source argument so the compiler can do appropriate bounds checking.
Signed-off-by: Kees Cook keescook@chromium.org Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/net/ethernet/renesas/sh_eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 6d84266c03ca..5cab2d3c0023 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -2287,7 +2287,7 @@ static void sh_eth_get_strings(struct net_device *ndev, u32 stringset, u8 *data) { switch (stringset) { case ETH_SS_STATS: - memcpy(data, *sh_eth_gstrings_stats, + memcpy(data, sh_eth_gstrings_stats, sizeof(sh_eth_gstrings_stats)); break; }
From: Kees Cook keescook@chromium.org
stable inclusion from stable-5.10.47 commit e83e3c5d85a70f9f6b52ede2aa9ed55b57879656 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit da5ac772cfe2a03058b0accfac03fad60c46c24d ]
In preparation for FORTIFY_SOURCE performing compile-time and run-time field bounds checking for memcpy(), memmove(), and memset(), avoid intentionally reading across neighboring array fields.
The memcpy() is copying the entire structure, not just the first array. Adjust the source argument so the compiler can do appropriate bounds checking.
Signed-off-by: Kees Cook keescook@chromium.org Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/net/ethernet/realtek/r8169_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index e6e7e54f20a9..9e826788ccb4 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -1673,7 +1673,7 @@ static void rtl8169_get_strings(struct net_device *dev, u32 stringset, u8 *data) { switch(stringset) { case ETH_SS_STATS: - memcpy(data, *rtl8169_gstrings, sizeof(rtl8169_gstrings)); + memcpy(data, rtl8169_gstrings, sizeof(rtl8169_gstrings)); break; } }
From: Fuad Tabba tabba@google.com
stable inclusion from stable-5.10.47 commit 4658a8d3079137d8d407892bdc95b390a419c261 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit d8ac05ea13d789d5491a5920d70a05659015441d ]
KVM_CHECK_EXTENSION ioctl can return any negative value on error, and not necessarily -1. Change the assertion to reflect that.
Signed-off-by: Fuad Tabba tabba@google.com Message-Id: 20210615150443.1183365-1-tabba@google.com Signed-off-by: Paolo Bonzini pbonzini@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- tools/testing/selftests/kvm/lib/kvm_util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 126c6727a6b0..49805fd16fdf 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -55,7 +55,7 @@ int kvm_check_cap(long cap) exit(KSFT_SKIP);
ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap); - TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n" + TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION IOCTL failed,\n" " rc: %i errno: %i", ret, errno);
close(kvm_fd);
From: Kees Cook keescook@chromium.org
stable inclusion from stable-5.10.47 commit ca0e1fefbb534cc86296d9e5d116b0bf49084112 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 1c200f832e14420fa770193f9871f4ce2df00d07 ]
The source (&dcbx_info->operational.params) and dest (&p_hwfn->p_dcbx_info->set.config.params) are both struct qed_dcbx_params (560 bytes), not struct qed_dcbx_admin_params (564 bytes), which is used as the memcpy() size.
However it seems that struct qed_dcbx_operational_params (dcbx_info->operational)'s layout matches struct qed_dcbx_admin_params (p_hwfn->p_dcbx_info->set.config)'s 4 byte difference (3 padding, 1 byte for "valid").
On the assumption that the size is wrong (rather than the source structure type), adjust the memcpy() size argument to be 4 bytes smaller and add a BUILD_BUG_ON() to validate any changes to the structure sizes.
Signed-off-by: Kees Cook keescook@chromium.org Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/net/ethernet/qlogic/qed/qed_dcbx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index 17d5b649eb36..e81dd34a3cac 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -1266,9 +1266,11 @@ int qed_dcbx_get_config_params(struct qed_hwfn *p_hwfn, p_hwfn->p_dcbx_info->set.ver_num |= DCBX_CONFIG_VERSION_STATIC;
p_hwfn->p_dcbx_info->set.enabled = dcbx_info->operational.enabled; + BUILD_BUG_ON(sizeof(dcbx_info->operational.params) != + sizeof(p_hwfn->p_dcbx_info->set.config.params)); memcpy(&p_hwfn->p_dcbx_info->set.config.params, &dcbx_info->operational.params, - sizeof(struct qed_dcbx_admin_params)); + sizeof(p_hwfn->p_dcbx_info->set.config.params)); p_hwfn->p_dcbx_info->set.config.valid = true;
memcpy(params, &p_hwfn->p_dcbx_info->set, sizeof(struct qed_dcbx_set));
From: Johannes Berg johannes.berg@intel.com
stable inclusion from stable-5.10.47 commit 676a7cb1a96bfa8e8cb38a9fcdd69f3d91a0abda bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit bbc6f03ff26e7b71d6135a7b78ce40e7dee3d86a ]
Apparently we never clear these values, so they'll remain set since the setting of them is conditional. Clear the values in the relevant other cases.
Signed-off-by: Johannes Berg johannes.berg@intel.com Signed-off-by: Luca Coelho luciano.coelho@intel.com Link: https://lore.kernel.org/r/iwlwifi.20210618133832.316e32d136a9.I2a12e51814258... Signed-off-by: Johannes Berg johannes.berg@intel.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- net/mac80211/mlme.c | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 6d3220c66931..fbe26e912300 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -4019,10 +4019,14 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, if (elems.mbssid_config_ie) bss_conf->profile_periodicity = elems.mbssid_config_ie->profile_periodicity; + else + bss_conf->profile_periodicity = 0;
if (elems.ext_capab_len >= 11 && (elems.ext_capab[10] & WLAN_EXT_CAPA11_EMA_SUPPORT)) bss_conf->ema_ap = true; + else + bss_conf->ema_ap = false;
/* continue assoc process */ ifmgd->assoc_data->timeout = jiffies; @@ -5749,12 +5753,16 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, beacon_ies->data, beacon_ies->len); if (elem && elem->datalen >= 3) sdata->vif.bss_conf.profile_periodicity = elem->data[2]; + else + sdata->vif.bss_conf.profile_periodicity = 0;
elem = cfg80211_find_elem(WLAN_EID_EXT_CAPABILITY, beacon_ies->data, beacon_ies->len); if (elem && elem->datalen >= 11 && (elem->data[10] & WLAN_EXT_CAPA11_EMA_SUPPORT)) sdata->vif.bss_conf.ema_ap = true; + else + sdata->vif.bss_conf.ema_ap = false; } else { assoc_data->timeout = jiffies; assoc_data->timeout_started = true;
From: Johannes Berg johannes.berg@intel.com
stable inclusion from stable-5.10.47 commit fb71d81ccd69c3d607c44a8562f7abf606ae74e8 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 652e8363bbc7d149fa194a5cbf30b1001c0274b0 ]
Various elements are parsed with a requirement to have an exact size, when really we should only check that they have the minimum size that we need. Check only that and therefore ignore any additional data that they might carry.
Signed-off-by: Johannes Berg johannes.berg@intel.com Signed-off-by: Luca Coelho luciano.coelho@intel.com Link: https://lore.kernel.org/r/iwlwifi.20210618133832.cd101f8040a4.Iadf0e9b37b100... Signed-off-by: Johannes Berg johannes.berg@intel.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- net/mac80211/util.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/net/mac80211/util.c b/net/mac80211/util.c index d8f9fb0646a4..fbf56a203c0e 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -954,7 +954,7 @@ static void ieee80211_parse_extension_element(u32 *crc,
switch (elem->data[0]) { case WLAN_EID_EXT_HE_MU_EDCA: - if (len == sizeof(*elems->mu_edca_param_set)) { + if (len >= sizeof(*elems->mu_edca_param_set)) { elems->mu_edca_param_set = data; if (crc) *crc = crc32_be(*crc, (void *)elem, @@ -975,7 +975,7 @@ static void ieee80211_parse_extension_element(u32 *crc, } break; case WLAN_EID_EXT_UORA: - if (len == 1) + if (len >= 1) elems->uora_element = data; break; case WLAN_EID_EXT_MAX_CHANNEL_SWITCH_TIME: @@ -983,7 +983,7 @@ static void ieee80211_parse_extension_element(u32 *crc, elems->max_channel_switch_time = data; break; case WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION: - if (len == sizeof(*elems->mbssid_config_ie)) + if (len >= sizeof(*elems->mbssid_config_ie)) elems->mbssid_config_ie = data; break; case WLAN_EID_EXT_HE_SPR: @@ -992,7 +992,7 @@ static void ieee80211_parse_extension_element(u32 *crc, elems->he_spr = data; break; case WLAN_EID_EXT_HE_6GHZ_CAPA: - if (len == sizeof(*elems->he_6ghz_capa)) + if (len >= sizeof(*elems->he_6ghz_capa)) elems->he_6ghz_capa = data; break; } @@ -1081,14 +1081,14 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
switch (id) { case WLAN_EID_LINK_ID: - if (elen + 2 != sizeof(struct ieee80211_tdls_lnkie)) { + if (elen + 2 < sizeof(struct ieee80211_tdls_lnkie)) { elem_parse_failed = true; break; } elems->lnk_id = (void *)(pos - 2); break; case WLAN_EID_CHAN_SWITCH_TIMING: - if (elen != sizeof(struct ieee80211_ch_switch_timing)) { + if (elen < sizeof(struct ieee80211_ch_switch_timing)) { elem_parse_failed = true; break; } @@ -1251,7 +1251,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, elems->sec_chan_offs = (void *)pos; break; case WLAN_EID_CHAN_SWITCH_PARAM: - if (elen != + if (elen < sizeof(*elems->mesh_chansw_params_ie)) { elem_parse_failed = true; break; @@ -1260,7 +1260,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, break; case WLAN_EID_WIDE_BW_CHANNEL_SWITCH: if (!action || - elen != sizeof(*elems->wide_bw_chansw_ie)) { + elen < sizeof(*elems->wide_bw_chansw_ie)) { elem_parse_failed = true; break; } @@ -1279,7 +1279,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, ie = cfg80211_find_ie(WLAN_EID_WIDE_BW_CHANNEL_SWITCH, pos, elen); if (ie) { - if (ie[1] == sizeof(*elems->wide_bw_chansw_ie)) + if (ie[1] >= sizeof(*elems->wide_bw_chansw_ie)) elems->wide_bw_chansw_ie = (void *)(ie + 2); else @@ -1323,7 +1323,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, elems->cisco_dtpc_elem = pos; break; case WLAN_EID_ADDBA_EXT: - if (elen != sizeof(struct ieee80211_addba_ext_ie)) { + if (elen < sizeof(struct ieee80211_addba_ext_ie)) { elem_parse_failed = true; break; } @@ -1349,7 +1349,7 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, elem, elems); break; case WLAN_EID_S1G_CAPABILITIES: - if (elen == sizeof(*elems->s1g_capab)) + if (elen >= sizeof(*elems->s1g_capab)) elems->s1g_capab = (void *)pos; else elem_parse_failed = true;
From: Peter Zijlstra peterz@infradead.org
stable inclusion from stable-5.10.47 commit d91c50e6a67800bee69f681ee78c3b767e9a0c2e bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit fb780761e7bd9f2e94f5b9a296ead6b35b944206 ]
One should only use st_shndx when >SHN_UNDEF and <SHN_LORESERVE. When SHN_XINDEX, then use .symtab_shndx. Otherwise use 0.
This handles the case: st_shndx >= SHN_LORESERVE && st_shndx != SHN_XINDEX.
Link: https://lore.kernel.org/lkml/20210607023839.26387-1-mark-pk.tsai@mediatek.co... Link: https://lkml.kernel.org/r/20210616154126.2794-1-mark-pk.tsai@mediatek.com
Reported-by: Mark-PK Tsai mark-pk.tsai@mediatek.com Tested-by: Mark-PK Tsai mark-pk.tsai@mediatek.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org [handle endianness of sym->st_shndx] Signed-off-by: Mark-PK Tsai mark-pk.tsai@mediatek.com Signed-off-by: Steven Rostedt (VMware) rostedt@goodmis.org Signed-off-by: Sasha Levin sashal@kernel.org
Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- scripts/recordmcount.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-)
diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h index f9b19524da11..1e9baa5c4fc6 100644 --- a/scripts/recordmcount.h +++ b/scripts/recordmcount.h @@ -192,15 +192,20 @@ static unsigned int get_symindex(Elf_Sym const *sym, Elf32_Word const *symtab, Elf32_Word const *symtab_shndx) { unsigned long offset; + unsigned short shndx = w2(sym->st_shndx); int index;
- if (sym->st_shndx != SHN_XINDEX) - return w2(sym->st_shndx); + if (shndx > SHN_UNDEF && shndx < SHN_LORESERVE) + return shndx;
- offset = (unsigned long)sym - (unsigned long)symtab; - index = offset / sizeof(*sym); + if (shndx == SHN_XINDEX) { + offset = (unsigned long)sym - (unsigned long)symtab; + index = offset / sizeof(*sym);
- return w(symtab_shndx[index]); + return w(symtab_shndx[index]); + } + + return 0; }
static unsigned int get_shnum(Elf_Ehdr const *ehdr, Elf_Shdr const *shdr0)
From: Mikel Rychliski mikel@mikelr.com
stable inclusion from stable-5.10.47 commit bafb6cdd4f7030621ed567d3de7f7e06da85eca5 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit cacf994a91d3a55c0c2f853d6429cd7b86113915 ]
Although the AMD RS690 chipset has 64-bit DMA support, BIOS implementations sometimes fail to configure the memory limit registers correctly.
The Acer F690GVM mainboard uses this chipset and a Marvell 88E8056 NIC. The sky2 driver programs the NIC to use 64-bit DMA, which will not work:
sky2 0000:02:00.0: error interrupt status=0x8 sky2 0000:02:00.0 eth0: tx timeout sky2 0000:02:00.0 eth0: transmit ring 0 .. 22 report=0 done=0
Other drivers required by this mainboard either don't support 64-bit DMA, or have it disabled using driver specific quirks. For example, the ahci driver has quirks to enable or disable 64-bit DMA depending on the BIOS version (see ahci_sb600_enable_64bit() in ahci.c). This ahci quirk matches against the SB600 SATA controller, but the real issue is almost certainly with the RS690 PCI host that it was commonly attached to.
To avoid this issue in all drivers with 64-bit DMA support, fix the configuration of the PCI host. If the kernel is aware of physical memory above 4GB, but the BIOS never configured the PCI host with this information, update the registers with our values.
[bhelgaas: drop PCI_DEVICE_ID_ATI_RS690 definition] Link: https://lore.kernel.org/r/20210611214823.4898-1-mikel@mikelr.com Signed-off-by: Mikel Rychliski mikel@mikelr.com Signed-off-by: Bjorn Helgaas bhelgaas@google.com Signed-off-by: Sasha Levin sashal@kernel.org
Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- arch/x86/pci/fixup.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+)
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 0a0e168be1cb..9b0e771302ce 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -779,4 +779,48 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar); DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar); DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar);
+#define RS690_LOWER_TOP_OF_DRAM2 0x30 +#define RS690_LOWER_TOP_OF_DRAM2_VALID 0x1 +#define RS690_UPPER_TOP_OF_DRAM2 0x31 +#define RS690_HTIU_NB_INDEX 0xA8 +#define RS690_HTIU_NB_INDEX_WR_ENABLE 0x100 +#define RS690_HTIU_NB_DATA 0xAC + +/* + * Some BIOS implementations support RAM above 4GB, but do not configure the + * PCI host to respond to bus master accesses for these addresses. These + * implementations set the TOP_OF_DRAM_SLOT1 register correctly, so PCI DMA + * works as expected for addresses below 4GB. + * + * Reference: "AMD RS690 ASIC Family Register Reference Guide" (pg. 2-57) + * https://www.amd.com/system/files/TechDocs/43372_rs690_rrg_3.00o.pdf + */ +static void rs690_fix_64bit_dma(struct pci_dev *pdev) +{ + u32 val = 0; + phys_addr_t top_of_dram = __pa(high_memory - 1) + 1; + + if (top_of_dram <= (1ULL << 32)) + return; + + pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX, + RS690_LOWER_TOP_OF_DRAM2); + pci_read_config_dword(pdev, RS690_HTIU_NB_DATA, &val); + + if (val) + return; + + pci_info(pdev, "Adjusting top of DRAM to %pa for 64-bit DMA support\n", &top_of_dram); + + pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX, + RS690_UPPER_TOP_OF_DRAM2 | RS690_HTIU_NB_INDEX_WR_ENABLE); + pci_write_config_dword(pdev, RS690_HTIU_NB_DATA, top_of_dram >> 32); + + pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX, + RS690_LOWER_TOP_OF_DRAM2 | RS690_HTIU_NB_INDEX_WR_ENABLE); + pci_write_config_dword(pdev, RS690_HTIU_NB_DATA, + top_of_dram | RS690_LOWER_TOP_OF_DRAM2_VALID); +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7910, rs690_fix_64bit_dma); + #endif
From: Esben Haabendal esben@geanix.com
stable inclusion from stable-5.10.47 commit 1c9cf96f5652ca3120190a991a04bf74cf5897e0 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 28d9fab458b16bcd83f9dd07ede3d585c3e1a69e ]
Add a couple of memory-barriers to ensure correct ordering of read/write access to TX BDs.
In xmit_done, we should ensure that reading the additional BD fields are only done after STS_CTRL_APP0_CMPLT bit is set.
When xmit_done marks the BD as free by setting APP0=0, we need to ensure that the other BD fields are reset first, so we avoid racing with the xmit path, which writes to the same fields.
Finally, making sure to read APP0 of next BD after the current BD, ensures that we see all available buffers.
Signed-off-by: Esben Haabendal esben@geanix.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/net/ethernet/xilinx/ll_temac_main.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 9c394fb407d6..13df2226d79b 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -774,12 +774,15 @@ static void temac_start_xmit_done(struct net_device *ndev) stat = be32_to_cpu(cur_p->app0);
while (stat & STS_CTRL_APP0_CMPLT) { + /* Make sure that the other fields are read after bd is + * released by dma + */ + rmb(); dma_unmap_single(ndev->dev.parent, be32_to_cpu(cur_p->phys), be32_to_cpu(cur_p->len), DMA_TO_DEVICE); skb = (struct sk_buff *)ptr_from_txbd(cur_p); if (skb) dev_consume_skb_irq(skb); - cur_p->app0 = 0; cur_p->app1 = 0; cur_p->app2 = 0; cur_p->app3 = 0; @@ -788,6 +791,12 @@ static void temac_start_xmit_done(struct net_device *ndev) ndev->stats.tx_packets++; ndev->stats.tx_bytes += be32_to_cpu(cur_p->len);
+ /* app0 must be visible last, as it is used to flag + * availability of the bd + */ + smp_mb(); + cur_p->app0 = 0; + lp->tx_bd_ci++; if (lp->tx_bd_ci >= lp->tx_bd_num) lp->tx_bd_ci = 0; @@ -814,6 +823,9 @@ static inline int temac_check_tx_bd_space(struct temac_local *lp, int num_frag) if (cur_p->app0) return NETDEV_TX_BUSY;
+ /* Make sure to read next bd app0 after this one */ + rmb(); + tail++; if (tail >= lp->tx_bd_num) tail = 0;
From: Esben Haabendal esben@geanix.com
stable inclusion from stable-5.10.47 commit f9e73b2967f6082376183d2850d30d9497cdbd62 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit f6396341194234e9b01cd7538bc2c6ac4501ab14 ]
As documented in Documentation/networking/driver.rst, the ndo_start_xmit method must not return NETDEV_TX_BUSY under any normal circumstances, and as recommended, we simply stop the tx queue in advance, when there is a risk that the next xmit would cause a NETDEV_TX_BUSY return.
Signed-off-by: Esben Haabendal esben@geanix.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/net/ethernet/xilinx/ll_temac_main.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 13df2226d79b..333d4451e1aa 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -942,6 +942,11 @@ temac_start_xmit(struct sk_buff *skb, struct net_device *ndev) wmb(); lp->dma_out(lp, TX_TAILDESC_PTR, tail_p); /* DMA start */
+ if (temac_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1)) { + netdev_info(ndev, "%s -> netif_stop_queue\n", __func__); + netif_stop_queue(ndev); + } + return NETDEV_TX_OK; }
From: Kan Liang kan.liang@linux.intel.com
stable inclusion from stable-5.10.47 commit df654cd3d3001917ba18c0e5445a9ab8e9240f1e bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 61e76d53c39bb768ad264d379837cfc56b9e35b4 ]
Some platforms, e.g. Alder Lake, have hybrid architecture. In the same package, there may be more than one type of CPU. The PMU capabilities are different among different types of CPU. Perf will register a dedicated PMU for each type of CPU.
Add a 'pmu' variable in the struct cpu_hw_events to track the dedicated PMU of the current CPU.
Current x86_get_pmu() use the global 'pmu', which will be broken on a hybrid platform. Modify it to apply the 'pmu' of the specific CPU.
Initialize the per-CPU 'pmu' variable with the global 'pmu'. There is nothing changed for the non-hybrid platforms.
The is_x86_event() will be updated in the later patch ("perf/x86: Register hybrid PMUs") for hybrid platforms. For the non-hybrid platforms, nothing is changed here.
Suggested-by: Peter Zijlstra (Intel) peterz@infradead.org Signed-off-by: Kan Liang kan.liang@linux.intel.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Link: https://lkml.kernel.org/r/1618237865-33448-4-git-send-email-kan.liang@linux.... Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- arch/x86/events/core.c | 17 +++++++++++++---- arch/x86/events/intel/core.c | 2 +- arch/x86/events/intel/ds.c | 4 ++-- arch/x86/events/intel/lbr.c | 9 +++++---- arch/x86/events/perf_event.h | 4 +++- 5 files changed, 24 insertions(+), 12 deletions(-)
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index b7f8ed87bfbc..e6db1a1f22d7 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -45,9 +45,11 @@ #include "perf_event.h"
struct x86_pmu x86_pmu __read_mostly; +static struct pmu pmu;
DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, + .pmu = &pmu, };
DEFINE_STATIC_KEY_FALSE(rdpmc_never_available_key); @@ -712,16 +714,23 @@ void x86_pmu_enable_all(int added) } }
-static struct pmu pmu; - static inline int is_x86_event(struct perf_event *event) { return event->pmu == &pmu; }
-struct pmu *x86_get_pmu(void) +struct pmu *x86_get_pmu(unsigned int cpu) { - return &pmu; + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + + /* + * All CPUs of the hybrid type have been offline. + * The x86_get_pmu() should not be invoked. + */ + if (WARN_ON_ONCE(!cpuc->pmu)) + return &pmu; + + return cpuc->pmu; } /* * Event scheduler state: diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index ee659b5faf71..3b8b8eede1a8 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4747,7 +4747,7 @@ static void update_tfa_sched(void *ignored) * and if so force schedule out for all event types all contexts */ if (test_bit(3, cpuc->active_mask)) - perf_pmu_resched(x86_get_pmu()); + perf_pmu_resched(x86_get_pmu(smp_processor_id())); }
static ssize_t show_sysctl_tfa(struct device *cdev, diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 31a7a6566d07..945d470f62d0 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -2076,7 +2076,7 @@ void __init intel_ds_init(void) PERF_SAMPLE_TIME; x86_pmu.flags |= PMU_FL_PEBS_ALL; pebs_qual = "-baseline"; - x86_get_pmu()->capabilities |= PERF_PMU_CAP_EXTENDED_REGS; + x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_EXTENDED_REGS; } else { /* Only basic record supported */ x86_pmu.large_pebs_flags &= @@ -2091,7 +2091,7 @@ void __init intel_ds_init(void)
if (x86_pmu.intel_cap.pebs_output_pt_available) { pr_cont("PEBS-via-PT, "); - x86_get_pmu()->capabilities |= PERF_PMU_CAP_AUX_OUTPUT; + x86_get_pmu(smp_processor_id())->capabilities |= PERF_PMU_CAP_AUX_OUTPUT; }
break; diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 29ec4fe48507..9c1a013d5682 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -699,7 +699,7 @@ void intel_pmu_lbr_add(struct perf_event *event)
void release_lbr_buffers(void) { - struct kmem_cache *kmem_cache = x86_get_pmu()->task_ctx_cache; + struct kmem_cache *kmem_cache; struct cpu_hw_events *cpuc; int cpu;
@@ -708,6 +708,7 @@ void release_lbr_buffers(void)
for_each_possible_cpu(cpu) { cpuc = per_cpu_ptr(&cpu_hw_events, cpu); + kmem_cache = x86_get_pmu(cpu)->task_ctx_cache; if (kmem_cache && cpuc->lbr_xsave) { kmem_cache_free(kmem_cache, cpuc->lbr_xsave); cpuc->lbr_xsave = NULL; @@ -1624,7 +1625,7 @@ void intel_pmu_lbr_init_hsw(void) x86_pmu.lbr_sel_mask = LBR_SEL_MASK; x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
- x86_get_pmu()->task_ctx_cache = create_lbr_kmem_cache(size, 0); + x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0);
if (lbr_from_signext_quirk_needed()) static_branch_enable(&lbr_from_quirk_key); @@ -1644,7 +1645,7 @@ __init void intel_pmu_lbr_init_skl(void) x86_pmu.lbr_sel_mask = LBR_SEL_MASK; x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
- x86_get_pmu()->task_ctx_cache = create_lbr_kmem_cache(size, 0); + x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0);
/* * SW branch filter usage: @@ -1741,7 +1742,7 @@ static bool is_arch_lbr_xsave_available(void)
void __init intel_pmu_arch_lbr_init(void) { - struct pmu *pmu = x86_get_pmu(); + struct pmu *pmu = x86_get_pmu(smp_processor_id()); union cpuid28_eax eax; union cpuid28_ebx ebx; union cpuid28_ecx ecx; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index d4f2ea2d9a9e..f07d77cffb3c 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -326,6 +326,8 @@ struct cpu_hw_events { int n_pair; /* Large increment events */
void *kfree_on_online[X86_PERF_KFREE_MAX]; + + struct pmu *pmu; };
#define __EVENT_CONSTRAINT_RANGE(c, e, n, m, w, o, f) { \ @@ -897,7 +899,7 @@ static struct perf_pmu_events_ht_attr event_attr_##v = { \ .event_str_ht = ht, \ }
-struct pmu *x86_get_pmu(void); +struct pmu *x86_get_pmu(unsigned int cpu); extern struct x86_pmu x86_pmu __read_mostly;
static __always_inline struct x86_perf_task_context_opt *task_context_opt(void *ctx)
From: Fabien Dessenne fabien.dessenne@foss.st.com
stable inclusion from stable-5.10.47 commit 018d03fcf77a7e6a37503397f871095f7fd909ec bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 67e2996f72c71ebe4ac2fcbcf77e54479bb7aa11 ]
Each GPIO bank supports a variable number of lines which is usually 16, but is less in some cases : this is specified by the last argument of the "gpio-ranges" bank node property. Report to the framework, the actual number of lines, so the libgpiod gpioinfo command lists the actually existing GPIO lines.
Fixes: 1dc9d289154b ("pinctrl: stm32: add possibility to use gpio-ranges to declare bank range") Signed-off-by: Fabien Dessenne fabien.dessenne@foss.st.com Link: https://lore.kernel.org/r/20210617144629.2557693-1-fabien.dessenne@foss.st.c... Signed-off-by: Linus Walleij linus.walleij@linaro.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/pinctrl/stm32/pinctrl-stm32.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index 7d9bdedcd71b..3af4430543dc 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -1229,7 +1229,7 @@ static int stm32_gpiolib_register_bank(struct stm32_pinctrl *pctl, struct device *dev = pctl->dev; struct resource res; int npins = STM32_GPIO_PINS_PER_BANK; - int bank_nr, err; + int bank_nr, err, i = 0;
if (!IS_ERR(bank->rstc)) reset_control_deassert(bank->rstc); @@ -1251,9 +1251,14 @@ static int stm32_gpiolib_register_bank(struct stm32_pinctrl *pctl,
of_property_read_string(np, "st,bank-name", &bank->gpio_chip.label);
- if (!of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, 0, &args)) { + if (!of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, i, &args)) { bank_nr = args.args[1] / STM32_GPIO_PINS_PER_BANK; bank->gpio_chip.base = args.args[1]; + + npins = args.args[2]; + while (!of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, + ++i, &args)) + npins += args.args[2]; } else { bank_nr = pctl->nbanks; bank->gpio_chip.base = bank_nr * STM32_GPIO_PINS_PER_BANK;
From: Heiner Kallweit hkallweit1@gmail.com
stable inclusion from stable-5.10.47 commit 0221a5a4db46bcfd797edd92036028db2879c06c bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 065b6211a87746e196b56759a70c7851418dd741 ]
As explained in [0] currently we may leave SMBHSTSTS_INUSE_STS set, thus potentially breaking ACPI/BIOS usage of the SMBUS device.
Seems patch [0] needs a little bit more of review effort, therefore I'd suggest to apply a part of it as quick win. Just clearing SMBHSTSTS_INUSE_STS when leaving i801_access() should fix the referenced issue and leaves more time for discussing a more sophisticated locking handling.
[0] https://www.spinics.net/lists/linux-i2c/msg51558.html
Fixes: 01590f361e94 ("i2c: i801: Instantiate SPD EEPROMs automatically") Suggested-by: Hector Martin marcan@marcan.st Signed-off-by: Heiner Kallweit hkallweit1@gmail.com Reviewed-by: Hector Martin marcan@marcan.st Reviewed-by: Jean Delvare jdelvare@suse.de Tested-by: Jean Delvare jdelvare@suse.de Signed-off-by: Wolfram Sang wsa@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/i2c/busses/i2c-i801.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index e42b87e96f74..eab6fd6b890e 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -974,6 +974,9 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr, }
out: + /* Unlock the SMBus device for use by BIOS/ACPI */ + outb_p(SMBHSTSTS_INUSE_STS, SMBHSTSTS(priv)); + pm_runtime_mark_last_busy(&priv->pci_dev->dev); pm_runtime_put_autosuspend(&priv->pci_dev->dev); mutex_unlock(&priv->acpi_lock);
From: Gabriel Knezek gabeknez@linux.microsoft.com
stable inclusion from stable-5.10.47 commit b9e6c20d4c9d337742d5944aec6cee1e2c277d8c bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit cb8f63b8cbf39845244f3ccae43bb7e63bd70543 ]
When userspace requests a GPIO v1 line info changed event, lineinfo_watch_read() populates and returns the gpioline_info_changed structure. It contains 5 words of padding at the end which are not initialized before being returned to userspace.
Zero the structure in gpio_v2_line_info_change_to_v1() before populating its contents.
Fixes: aad955842d1c ("gpiolib: cdev: support GPIO_V2_GET_LINEINFO_IOCTL and GPIO_V2_GET_LINEINFO_WATCH_IOCTL") Signed-off-by: Gabriel Knezek gabeknez@linux.microsoft.com Reviewed-by: Kent Gibson warthog618@gmail.com Signed-off-by: Bartosz Golaszewski bgolaszewski@baylibre.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/gpio/gpiolib-cdev.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c index ade3ecf2ee49..2613881a66e6 100644 --- a/drivers/gpio/gpiolib-cdev.c +++ b/drivers/gpio/gpiolib-cdev.c @@ -1865,6 +1865,7 @@ static void gpio_v2_line_info_changed_to_v1( struct gpio_v2_line_info_changed *lic_v2, struct gpioline_info_changed *lic_v1) { + memset(lic_v1, 0, sizeof(*lic_v1)); gpio_v2_line_info_to_v1(&lic_v2->info, &lic_v1->info); lic_v1->timestamp = lic_v2->timestamp_ns; lic_v1->event_type = lic_v2->event_type;
From: Pavel Skripkin paskripkin@gmail.com
stable inclusion from stable-5.10.47 commit ab5bef9780386be0e54f98a40eeedb8295cd450b bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 8fd0c1b0647a6bda4067ee0cd61e8395954b6f28 ]
My local syzbot instance hit memory leak in nilfs2. The problem was in missing kobject_put() in nilfs_sysfs_delete_device_group().
kobject_del() does not call kobject_cleanup() for passed kobject and it leads to leaking duped kobject name if kobject_put() was not called.
Fail log:
BUG: memory leak unreferenced object 0xffff8880596171e0 (size 8): comm "syz-executor379", pid 8381, jiffies 4294980258 (age 21.100s) hex dump (first 8 bytes): 6c 6f 6f 70 30 00 00 00 loop0... backtrace: kstrdup+0x36/0x70 mm/util.c:60 kstrdup_const+0x53/0x80 mm/util.c:83 kvasprintf_const+0x108/0x190 lib/kasprintf.c:48 kobject_set_name_vargs+0x56/0x150 lib/kobject.c:289 kobject_add_varg lib/kobject.c:384 [inline] kobject_init_and_add+0xc9/0x160 lib/kobject.c:473 nilfs_sysfs_create_device_group+0x150/0x800 fs/nilfs2/sysfs.c:999 init_nilfs+0xe26/0x12b0 fs/nilfs2/the_nilfs.c:637
Link: https://lkml.kernel.org/r/20210612140559.20022-1-paskripkin@gmail.com Fixes: da7141fb78db ("nilfs2: add /sys/fs/nilfs2/<device> group") Signed-off-by: Pavel Skripkin paskripkin@gmail.com Acked-by: Ryusuke Konishi konishi.ryusuke@gmail.com Cc: Michael L. Semon mlsemon35@gmail.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- fs/nilfs2/sysfs.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index 303d71430bdd..9c6c0e2e5880 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -1053,6 +1053,7 @@ void nilfs_sysfs_delete_device_group(struct the_nilfs *nilfs) nilfs_sysfs_delete_superblock_group(nilfs); nilfs_sysfs_delete_segctor_group(nilfs); kobject_del(&nilfs->ns_dev_kobj); + kobject_put(&nilfs->ns_dev_kobj); kfree(nilfs->ns_dev_subgroups); }
From: Heiko Carstens hca@linux.ibm.com
stable inclusion from stable-5.10.47 commit 5fd0c2cf7b11c23aebbbc512cfed08485f0a422a bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 67147e96a332b56c7206238162771d82467f86c0 upstream.
The CALL_ON_STACK macro is used to call a C function from inline assembly, and therefore must consider the C ABI, which says that only registers 6-13, and 15 are non-volatile (restored by the called function).
The inline assembly incorrectly marks all registers used to pass parameters to the called function as read-only input operands, instead of operands that are read and written to. This might result in register corruption depending on usage, compiler, and compile options.
Fix this by marking all operands used to pass parameters as read/write operands. To keep the code simple even register 6, if used, is marked as read-write operand.
Fixes: ff340d2472ec ("s390: add stack switch helper") Cc: stable@kernel.org # 4.20 Reviewed-by: Vasily Gorbik gor@linux.ibm.com Signed-off-by: Heiko Carstens hca@linux.ibm.com Signed-off-by: Vasily Gorbik gor@linux.ibm.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- arch/s390/include/asm/stacktrace.h | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-)
diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h index ee056f4a4fa3..ee582896b6a3 100644 --- a/arch/s390/include/asm/stacktrace.h +++ b/arch/s390/include/asm/stacktrace.h @@ -90,12 +90,16 @@ struct stack_frame { CALL_ARGS_4(arg1, arg2, arg3, arg4); \ register unsigned long r4 asm("6") = (unsigned long)(arg5)
-#define CALL_FMT_0 "=&d" (r2) : -#define CALL_FMT_1 "+&d" (r2) : -#define CALL_FMT_2 CALL_FMT_1 "d" (r3), -#define CALL_FMT_3 CALL_FMT_2 "d" (r4), -#define CALL_FMT_4 CALL_FMT_3 "d" (r5), -#define CALL_FMT_5 CALL_FMT_4 "d" (r6), +/* + * To keep this simple mark register 2-6 as being changed (volatile) + * by the called function, even though register 6 is saved/nonvolatile. + */ +#define CALL_FMT_0 "=&d" (r2) +#define CALL_FMT_1 "+&d" (r2) +#define CALL_FMT_2 CALL_FMT_1, "+&d" (r3) +#define CALL_FMT_3 CALL_FMT_2, "+&d" (r4) +#define CALL_FMT_4 CALL_FMT_3, "+&d" (r5) +#define CALL_FMT_5 CALL_FMT_4, "+&d" (r6)
#define CALL_CLOBBER_5 "0", "1", "14", "cc", "memory" #define CALL_CLOBBER_4 CALL_CLOBBER_5 @@ -117,7 +121,7 @@ struct stack_frame { " brasl 14,%[_fn]\n" \ " la 15,0(%[_prev])\n" \ : [_prev] "=&a" (prev), CALL_FMT_##nr \ - [_stack] "R" (stack), \ + : [_stack] "R" (stack), \ [_bc] "i" (offsetof(struct stack_frame, back_chain)), \ [_frame] "d" (frame), \ [_fn] "X" (fn) : CALL_CLOBBER_##nr); \
From: Nicholas Piggin npiggin@gmail.com
stable inclusion from stable-5.10.47 commit dd8ed6c9bc2224c1ace5292d01089d3feb7ebbc3 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit f8be156be163a052a067306417cd0ff679068c97 upstream.
It's possible to create a region which maps valid but non-refcounted pages (e.g., tail pages of non-compound higher order allocations). These host pages can then be returned by gfn_to_page, gfn_to_pfn, etc., family of APIs, which take a reference to the page, which takes it from 0 to 1. When the reference is dropped, this will free the page incorrectly.
Fix this by only taking a reference on valid pages if it was non-zero, which indicates it is participating in normal refcounting (and can be released with put_page).
This addresses CVE-2021-22543.
Signed-off-by: Nicholas Piggin npiggin@gmail.com Tested-by: Paolo Bonzini pbonzini@redhat.com Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini pbonzini@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- virt/kvm/kvm_main.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7924ec1ac491..f5545dfb5091 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1891,6 +1891,13 @@ static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault) return true; }
+static int kvm_try_get_pfn(kvm_pfn_t pfn) +{ + if (kvm_is_reserved_pfn(pfn)) + return 1; + return get_page_unless_zero(pfn_to_page(pfn)); +} + static int hva_to_pfn_remapped(struct vm_area_struct *vma, unsigned long addr, bool *async, bool write_fault, bool *writable, @@ -1940,13 +1947,21 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, * Whoever called remap_pfn_range is also going to call e.g. * unmap_mapping_range before the underlying pages are freed, * causing a call to our MMU notifier. + * + * Certain IO or PFNMAP mappings can be backed with valid + * struct pages, but be allocated without refcounting e.g., + * tail pages of non-compound higher order allocations, which + * would then underflow the refcount when the caller does the + * required put_page. Don't allow those pages here. */ - kvm_get_pfn(pfn); + if (!kvm_try_get_pfn(pfn)) + r = -EFAULT;
out: pte_unmap_unlock(ptep, ptl); *p_pfn = pfn; - return 0; + + return r; }
/*
From: Johan Hovold johan@kernel.org
stable inclusion from stable-5.10.47 commit de0af2651daac89dac3eab73bcd1bfd653ccd36f bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 4ca070ef0dd885616ef294d269a9bf8e3b258e1a upstream.
The direction of the pipe argument must match the request-type direction bit or control requests may fail depending on the host-controller-driver implementation.
Control transfers without a data stage are treated as OUT requests by the USB stack and should be using usb_sndctrlpipe(). Failing to do so will now trigger a warning.
Fix the OSIFI2C_SET_BIT_RATE and OSIFI2C_STOP requests which erroneously used the osif_usb_read() helper and set the IN direction bit.
Reported-by: syzbot+9d7dadd15b8819d73f41@syzkaller.appspotmail.com Fixes: 83e53a8f120f ("i2c: Add bus driver for for OSIF USB i2c device.") Cc: stable@vger.kernel.org # 3.14 Signed-off-by: Johan Hovold johan@kernel.org Signed-off-by: Wolfram Sang wsa@kernel.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/i2c/busses/i2c-robotfuzz-osif.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/i2c/busses/i2c-robotfuzz-osif.c b/drivers/i2c/busses/i2c-robotfuzz-osif.c index a39f7d092797..66dfa211e736 100644 --- a/drivers/i2c/busses/i2c-robotfuzz-osif.c +++ b/drivers/i2c/busses/i2c-robotfuzz-osif.c @@ -83,7 +83,7 @@ static int osif_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, } }
- ret = osif_usb_read(adapter, OSIFI2C_STOP, 0, 0, NULL, 0); + ret = osif_usb_write(adapter, OSIFI2C_STOP, 0, 0, NULL, 0); if (ret) { dev_err(&adapter->dev, "failure sending STOP\n"); return -EREMOTEIO; @@ -153,7 +153,7 @@ static int osif_probe(struct usb_interface *interface, * Set bus frequency. The frequency is: * 120,000,000 / ( 16 + 2 * div * 4^prescale). * Using dev = 52, prescale = 0 give 100KHz */ - ret = osif_usb_read(&priv->adapter, OSIFI2C_SET_BIT_RATE, 52, 0, + ret = osif_usb_write(&priv->adapter, OSIFI2C_SET_BIT_RATE, 52, 0, NULL, 0); if (ret) { dev_err(&interface->dev, "failure sending bit rate");
From: Jeff Layton jlayton@kernel.org
stable inclusion from stable-5.10.47 commit 02c303f3b9fbc5ad9c1f50f971304ef86b69f29b bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 27171ae6a0fdc75571e5bf3d0961631a1e4fb765 upstream.
...and add a lockdep assertion for it to ceph_fill_inode().
Cc: stable@vger.kernel.org # v5.7+ Fixes: 9a8d03ca2e2c3 ("ceph: attempt to do async create when possible") Signed-off-by: Jeff Layton jlayton@kernel.org Reviewed-by: Ilya Dryomov idryomov@gmail.com Signed-off-by: Ilya Dryomov idryomov@gmail.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- fs/ceph/file.c | 3 +++ fs/ceph/inode.c | 2 ++ 2 files changed, 5 insertions(+)
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 209535d5b8d3..3d2e3dd4ee01 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -578,6 +578,7 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry, struct ceph_inode_info *ci = ceph_inode(dir); struct inode *inode; struct timespec64 now; + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); struct ceph_vino vino = { .ino = req->r_deleg_ino, .snap = CEPH_NOSNAP };
@@ -615,8 +616,10 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry,
ceph_file_layout_to_legacy(lo, &in.layout);
+ down_read(&mdsc->snap_rwsem); ret = ceph_fill_inode(inode, NULL, &iinfo, NULL, req->r_session, req->r_fmode, NULL); + up_read(&mdsc->snap_rwsem); if (ret) { dout("%s failed to fill inode: %d\n", __func__, ret); ceph_dir_clear_complete(dir); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 346fcdfcd3e9..57cd78e942c0 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -762,6 +762,8 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page, bool new_version = false; bool fill_inline = false;
+ lockdep_assert_held(&mdsc->snap_rwsem); + dout("%s %p ino %llx.%llx v %llu had %llu\n", __func__, inode, ceph_vinop(inode), le64_to_cpu(info->version), ci->i_version);
From: Petr Mladek pmladek@suse.com
stable inclusion from stable-5.10.47 commit bfe28af78a2021cfa93f26634a9bbd3a56577745 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 34b3d5344719d14fd2185b2d9459b3abcb8cf9d8 upstream.
Patch series "kthread_worker: Fix race between kthread_mod_delayed_work() and kthread_cancel_delayed_work_sync()".
This patchset fixes the race between kthread_mod_delayed_work() and kthread_cancel_delayed_work_sync() including proper return value handling.
This patch (of 2):
Simple code refactoring as a preparation step for fixing a race between kthread_mod_delayed_work() and kthread_cancel_delayed_work_sync().
It does not modify the existing behavior.
Link: https://lkml.kernel.org/r/20210610133051.15337-2-pmladek@suse.com Signed-off-by: Petr Mladek pmladek@suse.com Cc: jenhaochen@google.com Cc: Martin Liu liumartin@google.com Cc: Minchan Kim minchan@google.com Cc: Nathan Chancellor nathan@kernel.org Cc: Nick Desaulniers ndesaulniers@google.com Cc: Oleg Nesterov oleg@redhat.com Cc: Tejun Heo tj@kernel.org Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- kernel/kthread.c | 46 +++++++++++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 17 deletions(-)
diff --git a/kernel/kthread.c b/kernel/kthread.c index 5edf7e19ab26..415417b76bfb 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -1043,6 +1043,33 @@ void kthread_flush_work(struct kthread_work *work) } EXPORT_SYMBOL_GPL(kthread_flush_work);
+/* + * Make sure that the timer is neither set nor running and could + * not manipulate the work list_head any longer. + * + * The function is called under worker->lock. The lock is temporary + * released but the timer can't be set again in the meantime. + */ +static void kthread_cancel_delayed_work_timer(struct kthread_work *work, + unsigned long *flags) +{ + struct kthread_delayed_work *dwork = + container_of(work, struct kthread_delayed_work, work); + struct kthread_worker *worker = work->worker; + + /* + * del_timer_sync() must be called to make sure that the timer + * callback is not running. The lock must be temporary released + * to avoid a deadlock with the callback. In the meantime, + * any queuing is blocked by setting the canceling counter. + */ + work->canceling++; + raw_spin_unlock_irqrestore(&worker->lock, *flags); + del_timer_sync(&dwork->timer); + raw_spin_lock_irqsave(&worker->lock, *flags); + work->canceling--; +} + /* * This function removes the work from the worker queue. Also it makes sure * that it won't get queued later via the delayed work's timer. @@ -1057,23 +1084,8 @@ static bool __kthread_cancel_work(struct kthread_work *work, bool is_dwork, unsigned long *flags) { /* Try to cancel the timer if exists. */ - if (is_dwork) { - struct kthread_delayed_work *dwork = - container_of(work, struct kthread_delayed_work, work); - struct kthread_worker *worker = work->worker; - - /* - * del_timer_sync() must be called to make sure that the timer - * callback is not running. The lock must be temporary released - * to avoid a deadlock with the callback. In the meantime, - * any queuing is blocked by setting the canceling counter. - */ - work->canceling++; - raw_spin_unlock_irqrestore(&worker->lock, *flags); - del_timer_sync(&dwork->timer); - raw_spin_lock_irqsave(&worker->lock, *flags); - work->canceling--; - } + if (is_dwork) + kthread_cancel_delayed_work_timer(work, flags);
/* * Try to remove the work from a worker list. It might either
From: Petr Mladek pmladek@suse.com
stable inclusion from stable-5.10.47 commit 2b35a4eaaaae26f3f22bfb1a8b7ca1e994bae063 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 5fa54346caf67b4b1b10b1f390316ae466da4d53 upstream.
The system might hang with the following backtrace:
schedule+0x80/0x100 schedule_timeout+0x48/0x138 wait_for_common+0xa4/0x134 wait_for_completion+0x1c/0x2c kthread_flush_work+0x114/0x1cc kthread_cancel_work_sync.llvm.16514401384283632983+0xe8/0x144 kthread_cancel_delayed_work_sync+0x18/0x2c xxxx_pm_notify+0xb0/0xd8 blocking_notifier_call_chain_robust+0x80/0x194 pm_notifier_call_chain_robust+0x28/0x4c suspend_prepare+0x40/0x260 enter_state+0x80/0x3f4 pm_suspend+0x60/0xdc state_store+0x108/0x144 kobj_attr_store+0x38/0x88 sysfs_kf_write+0x64/0xc0 kernfs_fop_write_iter+0x108/0x1d0 vfs_write+0x2f4/0x368 ksys_write+0x7c/0xec
It is caused by the following race between kthread_mod_delayed_work() and kthread_cancel_delayed_work_sync():
CPU0 CPU1
Context: Thread A Context: Thread B
kthread_mod_delayed_work() spin_lock() __kthread_cancel_work() spin_unlock() del_timer_sync() kthread_cancel_delayed_work_sync() spin_lock() __kthread_cancel_work() spin_unlock() del_timer_sync() spin_lock()
work->canceling++ spin_unlock spin_lock() queue_delayed_work() // dwork is put into the worker->delayed_work_list
spin_unlock()
kthread_flush_work() // flush_work is put at the tail of the dwork
wait_for_completion()
Context: IRQ
kthread_delayed_work_timer_fn() spin_lock() list_del_init(&work->node); spin_unlock()
BANG: flush_work is not longer linked and will never get proceed.
The problem is that kthread_mod_delayed_work() checks work->canceling flag before canceling the timer.
A simple solution is to (re)check work->canceling after __kthread_cancel_work(). But then it is not clear what should be returned when __kthread_cancel_work() removed the work from the queue (list) and it can't queue it again with the new @delay.
The return value might be used for reference counting. The caller has to know whether a new work has been queued or an existing one was replaced.
The proper solution is that kthread_mod_delayed_work() will remove the work from the queue (list) _only_ when work->canceling is not set. The flag must be checked after the timer is stopped and the remaining operations can be done under worker->lock.
Note that kthread_mod_delayed_work() could remove the timer and then bail out. It is fine. The other canceling caller needs to cancel the timer as well. The important thing is that the queue (list) manipulation is done atomically under worker->lock.
Link: https://lkml.kernel.org/r/20210610133051.15337-3-pmladek@suse.com Fixes: 9a6b06c8d9a220860468a ("kthread: allow to modify delayed kthread work") Signed-off-by: Petr Mladek pmladek@suse.com Reported-by: Martin Liu liumartin@google.com Cc: jenhaochen@google.com Cc: Minchan Kim minchan@google.com Cc: Nathan Chancellor nathan@kernel.org Cc: Nick Desaulniers ndesaulniers@google.com Cc: Oleg Nesterov oleg@redhat.com Cc: Tejun Heo tj@kernel.org Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- kernel/kthread.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-)
diff --git a/kernel/kthread.c b/kernel/kthread.c index 415417b76bfb..36be4364b313 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -1071,8 +1071,11 @@ static void kthread_cancel_delayed_work_timer(struct kthread_work *work, }
/* - * This function removes the work from the worker queue. Also it makes sure - * that it won't get queued later via the delayed work's timer. + * This function removes the work from the worker queue. + * + * It is called under worker->lock. The caller must make sure that + * the timer used by delayed work is not running, e.g. by calling + * kthread_cancel_delayed_work_timer(). * * The work might still be in use when this function finishes. See the * current_work proceed by the worker. @@ -1080,13 +1083,8 @@ static void kthread_cancel_delayed_work_timer(struct kthread_work *work, * Return: %true if @work was pending and successfully canceled, * %false if @work was not pending */ -static bool __kthread_cancel_work(struct kthread_work *work, bool is_dwork, - unsigned long *flags) +static bool __kthread_cancel_work(struct kthread_work *work) { - /* Try to cancel the timer if exists. */ - if (is_dwork) - kthread_cancel_delayed_work_timer(work, flags); - /* * Try to remove the work from a worker list. It might either * be from worker->work_list or from worker->delayed_work_list. @@ -1139,11 +1137,23 @@ bool kthread_mod_delayed_work(struct kthread_worker *worker, /* Work must not be used with >1 worker, see kthread_queue_work() */ WARN_ON_ONCE(work->worker != worker);
- /* Do not fight with another command that is canceling this work. */ + /* + * Temporary cancel the work but do not fight with another command + * that is canceling the work as well. + * + * It is a bit tricky because of possible races with another + * mod_delayed_work() and cancel_delayed_work() callers. + * + * The timer must be canceled first because worker->lock is released + * when doing so. But the work can be removed from the queue (list) + * only when it can be queued again so that the return value can + * be used for reference counting. + */ + kthread_cancel_delayed_work_timer(work, &flags); if (work->canceling) goto out; + ret = __kthread_cancel_work(work);
- ret = __kthread_cancel_work(work, true, &flags); fast_queue: __kthread_queue_delayed_work(worker, dwork, delay); out: @@ -1165,7 +1175,10 @@ static bool __kthread_cancel_work_sync(struct kthread_work *work, bool is_dwork) /* Work must not be used with >1 worker, see kthread_queue_work(). */ WARN_ON_ONCE(work->worker != worker);
- ret = __kthread_cancel_work(work, is_dwork, &flags); + if (is_dwork) + kthread_cancel_delayed_work_timer(work, &flags); + + ret = __kthread_cancel_work(work);
if (worker->current_work != work) goto out_fast;
From: Thomas Gleixner tglx@linutronix.de
stable inclusion from stable-5.10.47 commit 51d8011782ed751b5982914f18776be9eb9be062 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 9301982c424a003c0095bf157154a85bf5322bd0 upstream.
sanitize_restored_user_xstate() preserves the supervisor states only when the fx_only argument is zero, which allows unprivileged user space to put supervisor states back into init state.
Preserve them unconditionally.
[ bp: Fix a typo or two in the text. ]
Fixes: 5d6b6a6f9b5c ("x86/fpu/xstate: Update sanitize_restored_xstate() for supervisor xstates") Signed-off-by: Thomas Gleixner tglx@linutronix.de Signed-off-by: Borislav Petkov bp@suse.de Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20210618143444.438635017@linutronix.de Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- arch/x86/kernel/fpu/signal.c | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-)
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index ec3ae3054792..b7b92cdf3add 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -221,28 +221,18 @@ sanitize_restored_user_xstate(union fpregs_state *state,
if (use_xsave()) { /* - * Note: we don't need to zero the reserved bits in the - * xstate_header here because we either didn't copy them at all, - * or we checked earlier that they aren't set. + * Clear all feature bits which are not set in + * user_xfeatures and clear all extended features + * for fx_only mode. */ + u64 mask = fx_only ? XFEATURE_MASK_FPSSE : user_xfeatures;
/* - * 'user_xfeatures' might have bits clear which are - * set in header->xfeatures. This represents features that - * were in init state prior to a signal delivery, and need - * to be reset back to the init state. Clear any user - * feature bits which are set in the kernel buffer to get - * them back to the init state. - * - * Supervisor state is unchanged by input from userspace. - * Ensure supervisor state bits stay set and supervisor - * state is not modified. + * Supervisor state has to be preserved. The sigframe + * restore can only modify user features, i.e. @mask + * cannot contain them. */ - if (fx_only) - header->xfeatures = XFEATURE_MASK_FPSSE; - else - header->xfeatures &= user_xfeatures | - xfeatures_mask_supervisor(); + header->xfeatures &= mask | xfeatures_mask_supervisor(); }
if (use_fxsr()) {
From: Thomas Gleixner tglx@linutronix.de
stable inclusion from stable-5.10.47 commit 130a1d76ee0b206f96d885cefca5282fd2b7c44a bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit f9dfb5e390fab2df9f7944bb91e7705aba14cd26 upstream.
The XSAVE init code initializes all enabled and supported components with XRSTOR(S) to init state. Then it XSAVEs the state of the components back into init_fpstate which is used in several places to fill in the init state of components.
This works correctly with XSAVE, but not with XSAVEOPT and XSAVES because those use the init optimization and skip writing state of components which are in init state. So init_fpstate.xsave still contains all zeroes after this operation.
There are two ways to solve that:
1) Use XSAVE unconditionally, but that requires to reshuffle the buffer when XSAVES is enabled because XSAVES uses compacted format.
2) Save the components which are known to have a non-zero init state by other means.
Looking deeper, #2 is the right thing to do because all components the kernel supports have all-zeroes init state except the legacy features (FP, SSE). Those cannot be hard coded because the states are not identical on all CPUs, but they can be saved with FXSAVE which avoids all conditionals.
Use FXSAVE to save the legacy FP/SSE components in init_fpstate along with a BUILD_BUG_ON() which reminds developers to validate that a newly added component has all zeroes init state. As a bonus remove the now unused copy_xregs_to_kernel_booting() crutch.
The XSAVE and reshuffle method can still be implemented in the unlikely case that components are added which have a non-zero init state and no other means to save them. For now, FXSAVE is just simple and good enough.
[ bp: Fix a typo or two in the text. ]
Fixes: 6bad06b76892 ("x86, xsave: Use xsaveopt in context-switch path when supported") Signed-off-by: Thomas Gleixner tglx@linutronix.de Signed-off-by: Borislav Petkov bp@suse.de Reviewed-by: Borislav Petkov bp@suse.de Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20210618143444.587311343@linutronix.de Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- arch/x86/include/asm/fpu/internal.h | 30 ++++++--------------- arch/x86/kernel/fpu/xstate.c | 41 ++++++++++++++++++++++++++--- 2 files changed, 46 insertions(+), 25 deletions(-)
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index fdee23ea4e17..16bf4d4a8159 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -204,6 +204,14 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave)); }
+static inline void fxsave(struct fxregs_state *fx) +{ + if (IS_ENABLED(CONFIG_X86_32)) + asm volatile( "fxsave %[fx]" : [fx] "=m" (*fx)); + else + asm volatile("fxsaveq %[fx]" : [fx] "=m" (*fx)); +} + /* These macros all use (%edi)/(%rdi) as the single memory argument. */ #define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27" #define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37" @@ -268,28 +276,6 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ : "memory")
-/* - * This function is called only during boot time when x86 caps are not set - * up and alternative can not be used yet. - */ -static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate) -{ - u64 mask = xfeatures_mask_all; - u32 lmask = mask; - u32 hmask = mask >> 32; - int err; - - WARN_ON(system_state != SYSTEM_BOOTING); - - if (boot_cpu_has(X86_FEATURE_XSAVES)) - XSTATE_OP(XSAVES, xstate, lmask, hmask, err); - else - XSTATE_OP(XSAVE, xstate, lmask, hmask, err); - - /* We should never fault when copying to a kernel buffer: */ - WARN_ON_FPU(err); -} - /* * This function is called only during boot time when x86 caps are not set * up and alternative can not be used yet. diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 67f1a03b9b23..80dcf0417f30 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -440,6 +440,25 @@ static void __init print_xstate_offset_size(void) } }
+/* + * All supported features have either init state all zeros or are + * handled in setup_init_fpu() individually. This is an explicit + * feature list and does not use XFEATURE_MASK*SUPPORTED to catch + * newly added supported features at build time and make people + * actually look at the init state for the new feature. + */ +#define XFEATURES_INIT_FPSTATE_HANDLED \ + (XFEATURE_MASK_FP | \ + XFEATURE_MASK_SSE | \ + XFEATURE_MASK_YMM | \ + XFEATURE_MASK_OPMASK | \ + XFEATURE_MASK_ZMM_Hi256 | \ + XFEATURE_MASK_Hi16_ZMM | \ + XFEATURE_MASK_PKRU | \ + XFEATURE_MASK_BNDREGS | \ + XFEATURE_MASK_BNDCSR | \ + XFEATURE_MASK_PASID) + /* * setup the xstate image representing the init state */ @@ -447,6 +466,10 @@ static void __init setup_init_fpu_buf(void) { static int on_boot_cpu __initdata = 1;
+ BUILD_BUG_ON((XFEATURE_MASK_USER_SUPPORTED | + XFEATURE_MASK_SUPERVISOR_SUPPORTED) != + XFEATURES_INIT_FPSTATE_HANDLED); + WARN_ON_FPU(!on_boot_cpu); on_boot_cpu = 0;
@@ -466,10 +489,22 @@ static void __init setup_init_fpu_buf(void) copy_kernel_to_xregs_booting(&init_fpstate.xsave);
/* - * Dump the init state again. This is to identify the init state - * of any feature which is not represented by all zero's. + * All components are now in init state. Read the state back so + * that init_fpstate contains all non-zero init state. This only + * works with XSAVE, but not with XSAVEOPT and XSAVES because + * those use the init optimization which skips writing data for + * components in init state. + * + * XSAVE could be used, but that would require to reshuffle the + * data when XSAVES is available because XSAVES uses xstate + * compaction. But doing so is a pointless exercise because most + * components have an all zeros init state except for the legacy + * ones (FP and SSE). Those can be saved with FXSAVE into the + * legacy area. Adding new features requires to ensure that init + * state is all zeroes or if not to add the necessary handling + * here. */ - copy_xregs_to_kernel_booting(&init_fpstate.xsave); + fxsave(&init_fpstate.fxsave); }
static int xfeature_uncompacted_offset(int xfeature_nr)
From: Alex Shi alex.shi@linux.alibaba.com
stable inclusion from stable-5.10.47 commit a0ad7ea018e7390ceaa8ef3e01cde0301b9ff6bb bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit a4055888629bc0467d12d912cd7c90acdf3d9b12 part ]
Add VM_WARN_ON_ONCE_PAGE() macro.
Link: https://lkml.kernel.org/r/1604283436-18880-3-git-send-email-alex.shi@linux.a... Signed-off-by: Alex Shi alex.shi@linux.alibaba.com Acked-by: Michal Hocko mhocko@suse.com Acked-by: Hugh Dickins hughd@google.com Acked-by: Johannes Weiner hannes@cmpxchg.org Cc: Vladimir Davydov vdavydov.dev@gmail.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org
Note on stable backport: original commit was titled mm/memcg: warning on !memcg after readahead page charged which included uses of this macro in mm/memcontrol.c: here omitted.
Signed-off-by: Hugh Dickins hughd@google.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- include/linux/mmdebug.h | 13 +++++++++++++ 1 file changed, 13 insertions(+)
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index 2ad72d2c8cc5..5d0767cb424a 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -37,6 +37,18 @@ void dump_mm(const struct mm_struct *mm); BUG(); \ } \ } while (0) +#define VM_WARN_ON_ONCE_PAGE(cond, page) ({ \ + static bool __section(".data.once") __warned; \ + int __ret_warn_once = !!(cond); \ + \ + if (unlikely(__ret_warn_once && !__warned)) { \ + dump_page(page, "VM_WARN_ON_ONCE_PAGE(" __stringify(cond)")");\ + __warned = true; \ + WARN_ON(1); \ + } \ + unlikely(__ret_warn_once); \ +}) + #define VM_WARN_ON(cond) (void)WARN_ON(cond) #define VM_WARN_ON_ONCE(cond) (void)WARN_ON_ONCE(cond) #define VM_WARN_ONCE(cond, format...) (void)WARN_ONCE(cond, format) @@ -48,6 +60,7 @@ void dump_mm(const struct mm_struct *mm); #define VM_BUG_ON_MM(cond, mm) VM_BUG_ON(cond) #define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond) +#define VM_WARN_ON_ONCE_PAGE(cond, page) BUILD_BUG_ON_INVALID(cond) #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond) #define VM_WARN(cond, format...) BUILD_BUG_ON_INVALID(cond) #endif
From: Miaohe Lin linmiaohe@huawei.com
stable inclusion from stable-5.10.47 commit ff81af8259bbfd3969f2c0dd1e36ccd0b404dc66 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit e0af87ff7afcde2660be44302836d2d5618185af ]
Remove extra semicolon without any functional change intended.
Link: https://lkml.kernel.org/r/20210127093425.39640-1-linmiaohe@huawei.com Signed-off-by: Miaohe Lin linmiaohe@huawei.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- mm/rmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/rmap.c b/mm/rmap.c index 08c56aaf72eb..1928ca98d56d 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1769,7 +1769,7 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags) static int page_not_mapped(struct page *page) { return !page_mapped(page); -}; +}
/** * try_to_munlock - try to munlock a page
From: Miaohe Lin linmiaohe@huawei.com
stable inclusion from stable-5.10.47 commit bfd90b56d7f6cb225d6c31d2620cecc1a75d6142 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit b7e188ec98b1644ff70a6d3624ea16aadc39f5e0 ]
page_mapcount_is_zero() calculates accurately how many mappings a hugepage has in order to check against 0 only. This is a waste of cpu time. We can do this via page_not_mapped() to save some possible atomic_read cycles. Remove the function page_mapcount_is_zero() as it's not used anymore and move page_not_mapped() above try_to_unmap() to avoid identifier undeclared compilation error.
Link: https://lkml.kernel.org/r/20210130084904.35307-1-linmiaohe@huawei.com Signed-off-by: Miaohe Lin linmiaohe@huawei.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- mm/rmap.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-)
diff --git a/mm/rmap.c b/mm/rmap.c index 1928ca98d56d..13df44196177 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1722,9 +1722,9 @@ static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg) return vma_is_temporary_stack(vma); }
-static int page_mapcount_is_zero(struct page *page) +static int page_not_mapped(struct page *page) { - return !total_mapcount(page); + return !page_mapped(page); }
/** @@ -1742,7 +1742,7 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags) struct rmap_walk_control rwc = { .rmap_one = try_to_unmap_one, .arg = (void *)flags, - .done = page_mapcount_is_zero, + .done = page_not_mapped, .anon_lock = page_lock_anon_vma_read, };
@@ -1766,11 +1766,6 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags) return !page_mapcount(page) ? true : false; }
-static int page_not_mapped(struct page *page) -{ - return !page_mapped(page); -} - /** * try_to_munlock - try to munlock a page * @page: the page to be munlocked
From: Xu Yu xuyu@linux.alibaba.com
stable inclusion from stable-5.10.47 commit 32f954e961caf658bc7b5c83e7d213169595e15d bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit ffc90cbb2970ab88b66ea51dd580469eede57b67 upstream.
We notice that hung task happens in a corner but practical scenario when CONFIG_PREEMPT_NONE is enabled, as follows.
Process 0 Process 1 Process 2..Inf split_huge_page_to_list unmap_page split_huge_pmd_address __migration_entry_wait(head) __migration_entry_wait(tail) remap_page (roll back) remove_migration_ptes rmap_walk_anon cond_resched
Where __migration_entry_wait(tail) is occurred in kernel space, e.g., copy_to_user in fstat, which will immediately fault again without rescheduling, and thus occupy the cpu fully.
When there are too many processes performing __migration_entry_wait on tail page, remap_page will never be done after cond_resched.
This makes __migration_entry_wait operate on the compound head page, thus waits for remap_page to complete, whether the THP is split successfully or roll back.
Note that put_and_wait_on_page_locked helps to drop the page reference acquired with get_page_unless_zero, as soon as the page is on the wait queue, before actually waiting. So splitting the THP is only prevented for a brief interval.
Link: https://lkml.kernel.org/r/b9836c1dd522e903891760af9f0c86a2cce987eb.162314400... Fixes: ba98828088ad ("thp: add option to setup migration entries during PMD split") Suggested-by: Hugh Dickins hughd@google.com Signed-off-by: Gang Deng gavin.dg@linux.alibaba.com Signed-off-by: Xu Yu xuyu@linux.alibaba.com Acked-by: Kirill A. Shutemov kirill.shutemov@linux.intel.com Acked-by: Hugh Dickins hughd@google.com Cc: Matthew Wilcox willy@infradead.org Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- mm/migrate.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/mm/migrate.c b/mm/migrate.c index 6b6d142780be..8a57a09ad665 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -326,6 +326,7 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, goto out;
page = migration_entry_to_page(entry); + page = compound_head(page);
/* * Once page cache replacement of page migration started, page_count
From: Hugh Dickins hughd@google.com
stable inclusion from stable-5.10.47 commit a8f4ea1d38ac6ed0ada7a876f39f29a24e3ff070 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 99fa8a48203d62b3743d866fc48ef6abaee682be ]
Patch series "mm/thp: fix THP splitting unmap BUGs and related", v10.
Here is v2 batch of long-standing THP bug fixes that I had not got around to sending before, but prompted now by Wang Yugui's report https://lore.kernel.org/linux-mm/20210412180659.B9E3.409509F4@e16-tech.com/
Wang Yugui has tested a rollup of these fixes applied to 5.10.39, and they have done no harm, but have *not* fixed that issue: something more is needed and I have no idea of what.
This patch (of 7):
Stressing huge tmpfs page migration racing hole punch often crashed on the VM_BUG_ON(!pmd_present) in pmdp_huge_clear_flush(), with DEBUG_VM=y kernel; or shortly afterwards, on a bad dereference in __split_huge_pmd_locked() when DEBUG_VM=n. They forgot to allow for pmd migration entries in the non-anonymous case.
Full disclosure: those particular experiments were on a kernel with more relaxed mmap_lock and i_mmap_rwsem locking, and were not repeated on the vanilla kernel: it is conceivable that stricter locking happens to avoid those cases, or makes them less likely; but __split_huge_pmd_locked() already allowed for pmd migration entries when handling anonymous THPs, so this commit brings the shmem and file THP handling into line.
And while there: use old_pmd rather than _pmd, as in the following blocks; and make it clearer to the eye that the !vma_is_anonymous() block is self-contained, making an early return after accounting for unmapping.
Link: https://lkml.kernel.org/r/af88612-1473-2eaa-903-8d1a448b26@google.com Link: https://lkml.kernel.org/r/dd221a99-efb3-cd1d-6256-7e646af29314@google.com Fixes: e71769ae5260 ("mm: enable thp migration for shmem thp") Signed-off-by: Hugh Dickins hughd@google.com Cc: Kirill A. Shutemov kirill.shutemov@linux.intel.com Cc: Yang Shi shy828301@gmail.com Cc: Wang Yugui wangyugui@e16-tech.com Cc: "Matthew Wilcox (Oracle)" willy@infradead.org Cc: Naoya Horiguchi naoya.horiguchi@nec.com Cc: Alistair Popple apopple@nvidia.com Cc: Ralph Campbell rcampbell@nvidia.com Cc: Zi Yan ziy@nvidia.com Cc: Miaohe Lin linmiaohe@huawei.com Cc: Minchan Kim minchan@kernel.org Cc: Jue Wang juew@google.com Cc: Peter Xu peterx@redhat.com Cc: Jan Kara jack@suse.cz Cc: Shakeel Butt shakeelb@google.com Cc: Oscar Salvador osalvador@suse.de Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org
Note on stable backport: this commit made intervening cleanups in pmdp_huge_clear_flush() redundant: here it's rediffed to skip them.
Signed-off-by: Hugh Dickins hughd@google.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- mm/huge_memory.c | 27 ++++++++++++++++++--------- mm/pgtable-generic.c | 4 ++-- 2 files changed, 20 insertions(+), 11 deletions(-)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 378ae22e5640..8ada0f897390 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2032,7 +2032,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, count_vm_event(THP_SPLIT_PMD);
if (!vma_is_anonymous(vma)) { - _pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd); + old_pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd); /* * We are going to unmap this huge page. So * just go ahead and zap it @@ -2041,16 +2041,25 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, zap_deposited_table(mm, pmd); if (vma_is_special_huge(vma)) return; - page = pmd_page(_pmd); - if (!PageDirty(page) && pmd_dirty(_pmd)) - set_page_dirty(page); - if (!PageReferenced(page) && pmd_young(_pmd)) - SetPageReferenced(page); - page_remove_rmap(page, true); - put_page(page); + if (unlikely(is_pmd_migration_entry(old_pmd))) { + swp_entry_t entry; + + entry = pmd_to_swp_entry(old_pmd); + page = migration_entry_to_page(entry); + } else { + page = pmd_page(old_pmd); + if (!PageDirty(page) && pmd_dirty(old_pmd)) + set_page_dirty(page); + if (!PageReferenced(page) && pmd_young(old_pmd)) + SetPageReferenced(page); + page_remove_rmap(page, true); + put_page(page); + } add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR); return; - } else if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) { + } + + if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) { /* * FIXME: Do we want to invalidate secondary mmu by calling * mmu_notifier_invalidate_range() see comments below inside diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index 9578db83e312..4e640baf9794 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -135,8 +135,8 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address, { pmd_t pmd; VM_BUG_ON(address & ~HPAGE_PMD_MASK); - VM_BUG_ON((pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) && - !pmd_devmap(*pmdp)) || !pmd_present(*pmdp)); + VM_BUG_ON(pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) && + !pmd_devmap(*pmdp)); pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); return pmd;
From: Hugh Dickins hughd@google.com
stable inclusion from stable-5.10.47 commit 6527d8ef68c3ca3c455e38ae2a37cd7810caec73 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 3b77e8c8cde581dadab9a0f1543a347e24315f11 upstream.
Most callers of is_huge_zero_pmd() supply a pmd already verified present; but a few (notably zap_huge_pmd()) do not - it might be a pmd migration entry, in which the pfn is encoded differently from a present pmd: which might pass the is_huge_zero_pmd() test (though not on x86, since L1TF forced us to protect against that); or perhaps even crash in pmd_page() applied to a swap-like entry.
Make it safe by adding pmd_present() check into is_huge_zero_pmd() itself; and make it quicker by saving huge_zero_pfn, so that is_huge_zero_pmd() will not need to do that pmd_page() lookup each time.
__split_huge_pmd_locked() checked pmd_trans_huge() before: that worked, but is unnecessary now that is_huge_zero_pmd() checks present.
Link: https://lkml.kernel.org/r/21ea9ca-a1f5-8b90-5e88-95fb1c49bbfa@google.com Fixes: e71769ae5260 ("mm: enable thp migration for shmem thp") Signed-off-by: Hugh Dickins hughd@google.com Acked-by: Kirill A. Shutemov kirill.shutemov@linux.intel.com Reviewed-by: Yang Shi shy828301@gmail.com Cc: Alistair Popple apopple@nvidia.com Cc: Jan Kara jack@suse.cz Cc: Jue Wang juew@google.com Cc: "Matthew Wilcox (Oracle)" willy@infradead.org Cc: Miaohe Lin linmiaohe@huawei.com Cc: Minchan Kim minchan@kernel.org Cc: Naoya Horiguchi naoya.horiguchi@nec.com Cc: Oscar Salvador osalvador@suse.de Cc: Peter Xu peterx@redhat.com Cc: Ralph Campbell rcampbell@nvidia.com Cc: Shakeel Butt shakeelb@google.com Cc: Wang Yugui wangyugui@e16-tech.com Cc: Zi Yan ziy@nvidia.com Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- include/linux/huge_mm.h | 8 +++++++- mm/huge_memory.c | 5 ++++- 2 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 0365aa97f8e7..ff55be011739 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -297,6 +297,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, extern vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
extern struct page *huge_zero_page; +extern unsigned long huge_zero_pfn;
static inline bool is_huge_zero_page(struct page *page) { @@ -305,7 +306,7 @@ static inline bool is_huge_zero_page(struct page *page)
static inline bool is_huge_zero_pmd(pmd_t pmd) { - return is_huge_zero_page(pmd_page(pmd)); + return READ_ONCE(huge_zero_pfn) == pmd_pfn(pmd) && pmd_present(pmd); }
static inline bool is_huge_zero_pud(pud_t pud) @@ -451,6 +452,11 @@ static inline bool is_huge_zero_page(struct page *page) return false; }
+static inline bool is_huge_zero_pmd(pmd_t pmd) +{ + return false; +} + static inline bool is_huge_zero_pud(pud_t pud) { return false; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 8ada0f897390..9d5518730ac2 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -61,6 +61,7 @@ static struct shrinker deferred_split_shrinker;
static atomic_t huge_zero_refcount; struct page *huge_zero_page __read_mostly; +unsigned long huge_zero_pfn __read_mostly = ~0UL;
bool transparent_hugepage_enabled(struct vm_area_struct *vma) { @@ -97,6 +98,7 @@ static struct page *get_huge_zero_page(void) __free_pages(zero_page, compound_order(zero_page)); goto retry; } + WRITE_ONCE(huge_zero_pfn, page_to_pfn(zero_page));
/* We take additional reference here. It will be put back by shrinker */ atomic_set(&huge_zero_refcount, 2); @@ -146,6 +148,7 @@ static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink, if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) { struct page *zero_page = xchg(&huge_zero_page, NULL); BUG_ON(zero_page == NULL); + WRITE_ONCE(huge_zero_pfn, ~0UL); __free_pages(zero_page, compound_order(zero_page)); return HPAGE_PMD_NR; } @@ -2059,7 +2062,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, return; }
- if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) { + if (is_huge_zero_pmd(*pmd)) { /* * FIXME: Do we want to invalidate secondary mmu by calling * mmu_notifier_invalidate_range() see comments below inside
From: Hugh Dickins hughd@google.com
stable inclusion from stable-5.10.47 commit 66be14a9260913d5700b95fb327fc2d3300809b7 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 732ed55823fc3ad998d43b86bf771887bcc5ec67 upstream.
Stressing huge tmpfs often crashed on unmap_page()'s VM_BUG_ON_PAGE (!unmap_success): with dump_page() showing mapcount:1, but then its raw struct page output showing _mapcount ffffffff i.e. mapcount 0.
And even if that particular VM_BUG_ON_PAGE(!unmap_success) is removed, it is immediately followed by a VM_BUG_ON_PAGE(compound_mapcount(head)), and further down an IS_ENABLED(CONFIG_DEBUG_VM) total_mapcount BUG(): all indicative of some mapcount difficulty in development here perhaps. But the !CONFIG_DEBUG_VM path handles the failures correctly and silently.
I believe the problem is that once a racing unmap has cleared pte or pmd, try_to_unmap_one() may skip taking the page table lock, and emerge from try_to_unmap() before the racing task has reached decrementing mapcount.
Instead of abandoning the unsafe VM_BUG_ON_PAGE(), and the ones that follow, use PVMW_SYNC in try_to_unmap_one() in this case: adding TTU_SYNC to the options, and passing that from unmap_page().
When CONFIG_DEBUG_VM, or for non-debug too? Consensus is to do the same for both: the slight overhead added should rarely matter, except perhaps if splitting sparsely-populated multiply-mapped shmem. Once confident that bugs are fixed, TTU_SYNC here can be removed, and the race tolerated.
Link: https://lkml.kernel.org/r/c1e95853-8bcd-d8fd-55fa-e7f2488e78f@google.com Fixes: fec89c109f3a ("thp: rewrite freeze_page()/unfreeze_page() with generic rmap walkers") Signed-off-by: Hugh Dickins hughd@google.com Cc: Alistair Popple apopple@nvidia.com Cc: Jan Kara jack@suse.cz Cc: Jue Wang juew@google.com Cc: Kirill A. Shutemov kirill.shutemov@linux.intel.com Cc: "Matthew Wilcox (Oracle)" willy@infradead.org Cc: Miaohe Lin linmiaohe@huawei.com Cc: Minchan Kim minchan@kernel.org Cc: Naoya Horiguchi naoya.horiguchi@nec.com Cc: Oscar Salvador osalvador@suse.de Cc: Peter Xu peterx@redhat.com Cc: Ralph Campbell rcampbell@nvidia.com Cc: Shakeel Butt shakeelb@google.com Cc: Wang Yugui wangyugui@e16-tech.com Cc: Yang Shi shy828301@gmail.com Cc: Zi Yan ziy@nvidia.com Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- include/linux/rmap.h | 1 + mm/huge_memory.c | 2 +- mm/page_vma_mapped.c | 11 +++++++++++ mm/rmap.c | 17 ++++++++++++++++- 4 files changed, 29 insertions(+), 2 deletions(-)
diff --git a/include/linux/rmap.h b/include/linux/rmap.h index def5c62c93b3..8d04e7deedc6 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -91,6 +91,7 @@ enum ttu_flags {
TTU_SPLIT_HUGE_PMD = 0x4, /* split huge PMD if any */ TTU_IGNORE_MLOCK = 0x8, /* ignore mlock */ + TTU_SYNC = 0x10, /* avoid racy checks with PVMW_SYNC */ TTU_IGNORE_HWPOISON = 0x20, /* corrupted page is recoverable */ TTU_BATCH_FLUSH = 0x40, /* Batch TLB flushes where possible * and caller guarantees they will diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 9d5518730ac2..a22745d135da 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2343,7 +2343,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
static void unmap_page(struct page *page) { - enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | + enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_SYNC | TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD; bool unmap_success;
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index 5e77b269c330..3ba2bedc5794 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -211,6 +211,17 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) pvmw->ptl = NULL; } } else if (!pmd_present(pmde)) { + /* + * If PVMW_SYNC, take and drop THP pmd lock so that we + * cannot return prematurely, while zap_huge_pmd() has + * cleared *pmd but not decremented compound_mapcount(). + */ + if ((pvmw->flags & PVMW_SYNC) && + PageTransCompound(pvmw->page)) { + spinlock_t *ptl = pmd_lock(mm, pvmw->pmd); + + spin_unlock(ptl); + } return false; } if (!map_pte(pvmw)) diff --git a/mm/rmap.c b/mm/rmap.c index 13df44196177..94000a114ce3 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1391,6 +1391,15 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, struct mmu_notifier_range range; enum ttu_flags flags = (enum ttu_flags)(long)arg;
+ /* + * When racing against e.g. zap_pte_range() on another cpu, + * in between its ptep_get_and_clear_full() and page_remove_rmap(), + * try_to_unmap() may return false when it is about to become true, + * if page table locking is skipped: use TTU_SYNC to wait for that. + */ + if (flags & TTU_SYNC) + pvmw.flags = PVMW_SYNC; + /* munlock has nothing to gain from examining un-locked vmas */ if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED)) return true; @@ -1763,7 +1772,13 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags) else rmap_walk(page, &rwc);
- return !page_mapcount(page) ? true : false; + /* + * When racing against e.g. zap_pte_range() on another cpu, + * in between its ptep_get_and_clear_full() and page_remove_rmap(), + * try_to_unmap() may return false when it is about to become true, + * if page table locking is skipped: use TTU_SYNC to wait for that. + */ + return !page_mapcount(page); }
/**
From: Hugh Dickins hughd@google.com
stable inclusion from stable-5.10.47 commit 37ffe9f4d7ff60f9f8919d4c702c948dcbb200f3 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 494334e43c16d63b878536a26505397fce6ff3a2 upstream.
Running certain tests with a DEBUG_VM kernel would crash within hours, on the total_mapcount BUG() in split_huge_page_to_list(), while trying to free up some memory by punching a hole in a shmem huge page: split's try_to_unmap() was unable to find all the mappings of the page (which, on a !DEBUG_VM kernel, would then keep the huge page pinned in memory).
When that BUG() was changed to a WARN(), it would later crash on the VM_BUG_ON_VMA(end < vma->vm_start || start >= vma->vm_end, vma) in mm/internal.h:vma_address(), used by rmap_walk_file() for try_to_unmap().
vma_address() is usually correct, but there's a wraparound case when the vm_start address is unusually low, but vm_pgoff not so low: vma_address() chooses max(start, vma->vm_start), but that decides on the wrong address, because start has become almost ULONG_MAX.
Rewrite vma_address() to be more careful about vm_pgoff; move the VM_BUG_ON_VMA() out of it, returning -EFAULT for errors, so that it can be safely used from page_mapped_in_vma() and page_address_in_vma() too.
Add vma_address_end() to apply similar care to end address calculation, in page_vma_mapped_walk() and page_mkclean_one() and try_to_unmap_one(); though it raises a question of whether callers would do better to supply pvmw->end to page_vma_mapped_walk() - I chose not, for a smaller patch.
An irritation is that their apparent generality breaks down on KSM pages, which cannot be located by the page->index that page_to_pgoff() uses: as commit 4b0ece6fa016 ("mm: migrate: fix remove_migration_pte() for ksm pages") once discovered. I dithered over the best thing to do about that, and have ended up with a VM_BUG_ON_PAGE(PageKsm) in both vma_address() and vma_address_end(); though the only place in danger of using it on them was try_to_unmap_one().
Sidenote: vma_address() and vma_address_end() now use compound_nr() on a head page, instead of thp_size(): to make the right calculation on a hugetlbfs page, whether or not THPs are configured. try_to_unmap() is used on hugetlbfs pages, but perhaps the wrong calculation never mattered.
Link: https://lkml.kernel.org/r/caf1c1a3-7cfb-7f8f-1beb-ba816e932825@google.com Fixes: a8fa41ad2f6f ("mm, rmap: check all VMAs that PTE-mapped THP can be part of") Signed-off-by: Hugh Dickins hughd@google.com Acked-by: Kirill A. Shutemov kirill.shutemov@linux.intel.com Cc: Alistair Popple apopple@nvidia.com Cc: Jan Kara jack@suse.cz Cc: Jue Wang juew@google.com Cc: "Matthew Wilcox (Oracle)" willy@infradead.org Cc: Miaohe Lin linmiaohe@huawei.com Cc: Minchan Kim minchan@kernel.org Cc: Naoya Horiguchi naoya.horiguchi@nec.com Cc: Oscar Salvador osalvador@suse.de Cc: Peter Xu peterx@redhat.com Cc: Ralph Campbell rcampbell@nvidia.com Cc: Shakeel Butt shakeelb@google.com Cc: Wang Yugui wangyugui@e16-tech.com Cc: Yang Shi shy828301@gmail.com Cc: Zi Yan ziy@nvidia.com Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- mm/internal.h | 53 ++++++++++++++++++++++++++++++++------------ mm/page_vma_mapped.c | 16 +++++-------- mm/rmap.c | 16 ++++++------- 3 files changed, 53 insertions(+), 32 deletions(-)
diff --git a/mm/internal.h b/mm/internal.h index b289a10d8274..eb39a9b93db3 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -379,27 +379,52 @@ static inline void mlock_migrate_page(struct page *newpage, struct page *page) extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
/* - * At what user virtual address is page expected in @vma? + * At what user virtual address is page expected in vma? + * Returns -EFAULT if all of the page is outside the range of vma. + * If page is a compound head, the entire compound page is considered. */ static inline unsigned long -__vma_address(struct page *page, struct vm_area_struct *vma) +vma_address(struct page *page, struct vm_area_struct *vma) { - pgoff_t pgoff = page_to_pgoff(page); - return vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); + pgoff_t pgoff; + unsigned long address; + + VM_BUG_ON_PAGE(PageKsm(page), page); /* KSM page->index unusable */ + pgoff = page_to_pgoff(page); + if (pgoff >= vma->vm_pgoff) { + address = vma->vm_start + + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); + /* Check for address beyond vma (or wrapped through 0?) */ + if (address < vma->vm_start || address >= vma->vm_end) + address = -EFAULT; + } else if (PageHead(page) && + pgoff + compound_nr(page) - 1 >= vma->vm_pgoff) { + /* Test above avoids possibility of wrap to 0 on 32-bit */ + address = vma->vm_start; + } else { + address = -EFAULT; + } + return address; }
+/* + * Then at what user virtual address will none of the page be found in vma? + * Assumes that vma_address() already returned a good starting address. + * If page is a compound head, the entire compound page is considered. + */ static inline unsigned long -vma_address(struct page *page, struct vm_area_struct *vma) +vma_address_end(struct page *page, struct vm_area_struct *vma) { - unsigned long start, end; - - start = __vma_address(page, vma); - end = start + thp_size(page) - PAGE_SIZE; - - /* page should be within @vma mapping range */ - VM_BUG_ON_VMA(end < vma->vm_start || start >= vma->vm_end, vma); - - return max(start, vma->vm_start); + pgoff_t pgoff; + unsigned long address; + + VM_BUG_ON_PAGE(PageKsm(page), page); /* KSM page->index unusable */ + pgoff = page_to_pgoff(page) + compound_nr(page); + address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); + /* Check for address beyond vma (or wrapped through 0?) */ + if (address < vma->vm_start || address > vma->vm_end) + address = vma->vm_end; + return address; }
static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf, diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index 3ba2bedc5794..a540af346f88 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -227,18 +227,18 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) if (!map_pte(pvmw)) goto next_pte; while (1) { + unsigned long end; + if (check_pte(pvmw)) return true; next_pte: /* Seek to next pte only makes sense for THP */ if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page)) return not_found(pvmw); + end = vma_address_end(pvmw->page, pvmw->vma); do { pvmw->address += PAGE_SIZE; - if (pvmw->address >= pvmw->vma->vm_end || - pvmw->address >= - __vma_address(pvmw->page, pvmw->vma) + - thp_size(pvmw->page)) + if (pvmw->address >= end) return not_found(pvmw); /* Did we cross page table boundary? */ if (pvmw->address % PMD_SIZE == 0) { @@ -276,14 +276,10 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma) .vma = vma, .flags = PVMW_SYNC, }; - unsigned long start, end; - - start = __vma_address(page, vma); - end = start + thp_size(page) - PAGE_SIZE;
- if (unlikely(end < vma->vm_start || start >= vma->vm_end)) + pvmw.address = vma_address(page, vma); + if (pvmw.address == -EFAULT) return 0; - pvmw.address = max(start, vma->vm_start); if (!page_vma_mapped_walk(&pvmw)) return 0; page_vma_mapped_walk_done(&pvmw); diff --git a/mm/rmap.c b/mm/rmap.c index 94000a114ce3..be14784281be 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -700,7 +700,6 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags) */ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) { - unsigned long address; if (PageAnon(page)) { struct anon_vma *page__anon_vma = page_anon_vma(page); /* @@ -715,10 +714,8 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) return -EFAULT; } else return -EFAULT; - address = __vma_address(page, vma); - if (unlikely(address < vma->vm_start || address >= vma->vm_end)) - return -EFAULT; - return address; + + return vma_address(page, vma); }
pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address) @@ -912,7 +909,7 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, */ mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE, 0, vma, vma->vm_mm, address, - min(vma->vm_end, address + page_size(page))); + vma_address_end(page, vma)); mmu_notifier_invalidate_range_start(&range);
while (page_vma_mapped_walk(&pvmw)) { @@ -1421,9 +1418,10 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, * Note that the page can not be free in this function as call of * try_to_unmap() must hold a reference on the page. */ + range.end = PageKsm(page) ? + address + PAGE_SIZE : vma_address_end(page, vma); mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, - address, - min(vma->vm_end, address + page_size(page))); + address, range.end); if (PageHuge(page)) { /* * If sharing is possible, start and end will be adjusted @@ -1875,6 +1873,7 @@ static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc, struct vm_area_struct *vma = avc->vma; unsigned long address = vma_address(page, vma);
+ VM_BUG_ON_VMA(address == -EFAULT, vma); cond_resched();
if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg)) @@ -1929,6 +1928,7 @@ static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc, pgoff_start, pgoff_end) { unsigned long address = vma_address(page, vma);
+ VM_BUG_ON_VMA(address == -EFAULT, vma); cond_resched();
if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
From: Jue Wang juew@google.com
stable inclusion from stable-5.10.47 commit 38cda6b5ab83dcb3e2bdc6c1e6474488e089e4a6 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 31657170deaf1d8d2f6a1955fbc6fa9d228be036 upstream.
Anon THP tails were already supported, but memory-failure may need to use page_address_in_vma() on file THP tails, which its page->mapping check did not permit: fix it.
hughd adds: no current usage is known to hit the issue, but this does fix a subtle trap in a general helper: best fixed in stable sooner than later.
Link: https://lkml.kernel.org/r/a0d9b53-bf5d-8bab-ac5-759dc61819c1@google.com Fixes: 800d8c63b2e9 ("shmem: add huge pages support") Signed-off-by: Jue Wang juew@google.com Signed-off-by: Hugh Dickins hughd@google.com Reviewed-by: Matthew Wilcox (Oracle) willy@infradead.org Reviewed-by: Yang Shi shy828301@gmail.com Acked-by: Kirill A. Shutemov kirill.shutemov@linux.intel.com Cc: Alistair Popple apopple@nvidia.com Cc: Jan Kara jack@suse.cz Cc: Miaohe Lin linmiaohe@huawei.com Cc: Minchan Kim minchan@kernel.org Cc: Naoya Horiguchi naoya.horiguchi@nec.com Cc: Oscar Salvador osalvador@suse.de Cc: Peter Xu peterx@redhat.com Cc: Ralph Campbell rcampbell@nvidia.com Cc: Shakeel Butt shakeelb@google.com Cc: Wang Yugui wangyugui@e16-tech.com Cc: Zi Yan ziy@nvidia.com Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- mm/rmap.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/mm/rmap.c b/mm/rmap.c index be14784281be..9b57d8cf6294 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -709,11 +709,11 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) if (!vma->anon_vma || !page__anon_vma || vma->anon_vma->root != page__anon_vma->root) return -EFAULT; - } else if (page->mapping) { - if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping) - return -EFAULT; - } else + } else if (!vma->vm_file) { + return -EFAULT; + } else if (vma->vm_file->f_mapping != compound_head(page)->mapping) { return -EFAULT; + }
return vma_address(page, vma); }
From: Hugh Dickins hughd@google.com
stable inclusion from stable-5.10.47 commit 0010275ca243e6260893207d41843bb8dc3846e4 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 22061a1ffabdb9c3385de159c5db7aac3a4df1cc ]
There is a race between THP unmapping and truncation, when truncate sees pmd_none() and skips the entry, after munmap's zap_huge_pmd() cleared it, but before its page_remove_rmap() gets to decrement compound_mapcount: generating false "BUG: Bad page cache" reports that the page is still mapped when deleted. This commit fixes that, but not in the way I hoped.
The first attempt used try_to_unmap(page, TTU_SYNC|TTU_IGNORE_MLOCK) instead of unmap_mapping_range() in truncate_cleanup_page(): it has often been an annoyance that we usually call unmap_mapping_range() with no pages locked, but there apply it to a single locked page. try_to_unmap() looks more suitable for a single locked page.
However, try_to_unmap_one() contains a VM_BUG_ON_PAGE(!pvmw.pte,page): it is used to insert THP migration entries, but not used to unmap THPs. Copy zap_huge_pmd() and add THP handling now? Perhaps, but their TLB needs are different, I'm too ignorant of the DAX cases, and couldn't decide how far to go for anon+swap. Set that aside.
The second attempt took a different tack: make no change in truncate.c, but modify zap_huge_pmd() to insert an invalidated huge pmd instead of clearing it initially, then pmd_clear() between page_remove_rmap() and unlocking at the end. Nice. But powerpc blows that approach out of the water, with its serialize_against_pte_lookup(), and interesting pgtable usage. It would need serious help to get working on powerpc (with a minor optimization issue on s390 too). Set that aside.
Just add an "if (page_mapped(page)) synchronize_rcu();" or other such delay, after unmapping in truncate_cleanup_page()? Perhaps, but though that's likely to reduce or eliminate the number of incidents, it would give less assurance of whether we had identified the problem correctly.
This successful iteration introduces "unmap_mapping_page(page)" instead of try_to_unmap(), and goes the usual unmap_mapping_range_tree() route, with an addition to details. Then zap_pmd_range() watches for this case, and does spin_unlock(pmd_lock) if so - just like page_vma_mapped_walk() now does in the PVMW_SYNC case. Not pretty, but safe.
Note that unmap_mapping_page() is doing a VM_BUG_ON(!PageLocked) to assert its interface; but currently that's only used to make sure that page->mapping is stable, and zap_pmd_range() doesn't care if the page is locked or not. Along these lines, in invalidate_inode_pages2_range() move the initial unmap_mapping_range() out from under page lock, before then calling unmap_mapping_page() under page lock if still mapped.
Link: https://lkml.kernel.org/r/a2a4a148-cdd8-942c-4ef8-51b77f643dbe@google.com Fixes: fc127da085c2 ("truncate: handle file thp") Signed-off-by: Hugh Dickins hughd@google.com Acked-by: Kirill A. Shutemov kirill.shutemov@linux.intel.com Reviewed-by: Yang Shi shy828301@gmail.com Cc: Alistair Popple apopple@nvidia.com Cc: Jan Kara jack@suse.cz Cc: Jue Wang juew@google.com Cc: "Matthew Wilcox (Oracle)" willy@infradead.org Cc: Miaohe Lin linmiaohe@huawei.com Cc: Minchan Kim minchan@kernel.org Cc: Naoya Horiguchi naoya.horiguchi@nec.com Cc: Oscar Salvador osalvador@suse.de Cc: Peter Xu peterx@redhat.com Cc: Ralph Campbell rcampbell@nvidia.com Cc: Shakeel Butt shakeelb@google.com Cc: Wang Yugui wangyugui@e16-tech.com Cc: Zi Yan ziy@nvidia.com Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org
Note on stable backport: fixed up call to truncate_cleanup_page() in truncate_inode_pages_range().
Signed-off-by: Hugh Dickins hughd@google.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com
Conflict: mm/truncate.c [Backport from mainline 22061a1ffabdb9c3385de159c5db7aac3a4df1cc] Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- include/linux/mm.h | 3 +++ mm/memory.c | 41 +++++++++++++++++++++++++++++++++++++++++ mm/truncate.c | 43 +++++++++++++++++++------------------------ 3 files changed, 63 insertions(+), 24 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h index eba4edf054ae..fb9d6c25d311 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1655,6 +1655,7 @@ struct zap_details { struct address_space *check_mapping; /* Check page->mapping if set */ pgoff_t first_index; /* Lowest page->index to unmap */ pgoff_t last_index; /* Highest page->index to unmap */ + struct page *single_page; /* Locked page to be unmapped */ };
struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, @@ -1702,6 +1703,7 @@ extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma, extern int fixup_user_fault(struct mm_struct *mm, unsigned long address, unsigned int fault_flags, bool *unlocked); +void unmap_mapping_page(struct page *page); void unmap_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t nr, bool even_cows); void unmap_mapping_range(struct address_space *mapping, @@ -1722,6 +1724,7 @@ static inline int fixup_user_fault(struct mm_struct *mm, unsigned long address, BUG(); return -EFAULT; } +static inline void unmap_mapping_page(struct page *page) { } static inline void unmap_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t nr, bool even_cows) { } static inline void unmap_mapping_range(struct address_space *mapping, diff --git a/mm/memory.c b/mm/memory.c index d15b9ccfc108..e3e68d309aef 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1355,7 +1355,18 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, else if (zap_huge_pmd(tlb, vma, pmd, addr)) goto next; /* fall through */ + } else if (details && details->single_page && + PageTransCompound(details->single_page) && + next - addr == HPAGE_PMD_SIZE && pmd_none(*pmd)) { + spinlock_t *ptl = pmd_lock(tlb->mm, pmd); + /* + * Take and drop THP pmd lock so that we cannot return + * prematurely, while zap_huge_pmd() has cleared *pmd, + * but not yet decremented compound_mapcount(). + */ + spin_unlock(ptl); } + /* * Here there can be other concurrent MADV_DONTNEED or * trans huge page faults running, and if the pmd is @@ -3217,6 +3228,36 @@ static inline void unmap_mapping_range_tree(struct rb_root_cached *root, } }
+/** + * unmap_mapping_page() - Unmap single page from processes. + * @page: The locked page to be unmapped. + * + * Unmap this page from any userspace process which still has it mmaped. + * Typically, for efficiency, the range of nearby pages has already been + * unmapped by unmap_mapping_pages() or unmap_mapping_range(). But once + * truncation or invalidation holds the lock on a page, it may find that + * the page has been remapped again: and then uses unmap_mapping_page() + * to unmap it finally. + */ +void unmap_mapping_page(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct zap_details details = { }; + + VM_BUG_ON(!PageLocked(page)); + VM_BUG_ON(PageTail(page)); + + details.check_mapping = mapping; + details.first_index = page->index; + details.last_index = page->index + thp_nr_pages(page) - 1; + details.single_page = page; + + i_mmap_lock_write(mapping); + if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) + unmap_mapping_range_tree(&mapping->i_mmap, &details); + i_mmap_unlock_write(mapping); +} + /** * unmap_mapping_pages() - Unmap pages from processes. * @mapping: The address space containing pages to be unmapped. diff --git a/mm/truncate.c b/mm/truncate.c index f53925f0e34f..98d08f197766 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -167,13 +167,10 @@ void do_invalidatepage(struct page *page, unsigned int offset, * its lock, b) when a concurrent invalidate_mapping_pages got there first and * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space. */ -static void -truncate_cleanup_page(struct address_space *mapping, struct page *page) +static void truncate_cleanup_page(struct page *page) { - if (page_mapped(page)) { - unsigned int nr = thp_nr_pages(page); - unmap_mapping_pages(mapping, page->index, nr, false); - } + if (page_mapped(page)) + unmap_mapping_page(page);
if (page_has_private(page)) do_invalidatepage(page, 0, thp_size(page)); @@ -218,7 +215,7 @@ int truncate_inode_page(struct address_space *mapping, struct page *page) if (page->mapping != mapping) return -EIO;
- truncate_cleanup_page(mapping, page); + truncate_cleanup_page(page); delete_from_page_cache(page); return 0; } @@ -325,7 +322,7 @@ void truncate_inode_pages_range(struct address_space *mapping, index = indices[pagevec_count(&pvec) - 1] + 1; truncate_exceptional_pvec_entries(mapping, &pvec, indices); for (i = 0; i < pagevec_count(&pvec); i++) - truncate_cleanup_page(mapping, pvec.pages[i]); + truncate_cleanup_page(pvec.pages[i]); delete_from_page_cache_batch(mapping, &pvec); for (i = 0; i < pagevec_count(&pvec); i++) unlock_page(pvec.pages[i]); @@ -633,6 +630,16 @@ int invalidate_inode_pages2_range(struct address_space *mapping, continue; }
+ if (!did_range_unmap && page_mapped(page)) { + /* + * If page is mapped, before taking its lock, + * zap the rest of the file in one hit. + */ + unmap_mapping_pages(mapping, index, + (1 + end - index), false); + did_range_unmap = 1; + } + lock_page(page); WARN_ON(page_to_index(page) != index); if (page->mapping != mapping) { @@ -640,23 +647,11 @@ int invalidate_inode_pages2_range(struct address_space *mapping, continue; } wait_on_page_writeback(page); - if (page_mapped(page)) { - if (!did_range_unmap) { - /* - * Zap the rest of the file in one hit. - */ - unmap_mapping_pages(mapping, index, - (1 + end - index), false); - did_range_unmap = 1; - } else { - /* - * Just zap this page - */ - unmap_mapping_pages(mapping, index, - 1, false); - } - } + + if (page_mapped(page)) + unmap_mapping_page(page); BUG_ON(page_mapped(page)); + ret2 = do_launder_page(mapping, page); if (ret2 == 0) { if (!invalidate_complete_page2(mapping, page))
From: Yang Shi shy828301@gmail.com
stable inclusion from stable-5.10.47 commit 825c28052b4d15bd0e05d9a06f23c5bd17459735 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 504e070dc08f757bccaed6d05c0f53ecbfac8a23 ]
When debugging the bug reported by Wang Yugui [1], try_to_unmap() may fail, but the first VM_BUG_ON_PAGE() just checks page_mapcount() however it may miss the failure when head page is unmapped but other subpage is mapped. Then the second DEBUG_VM BUG() that check total mapcount would catch it. This may incur some confusion.
As this is not a fatal issue, so consolidate the two DEBUG_VM checks into one VM_WARN_ON_ONCE_PAGE().
[1] https://lore.kernel.org/linux-mm/20210412180659.B9E3.409509F4@e16-tech.com/
Link: https://lkml.kernel.org/r/d0f0db68-98b8-ebfb-16dc-f29df24cf012@google.com Signed-off-by: Yang Shi shy828301@gmail.com Reviewed-by: Zi Yan ziy@nvidia.com Acked-by: Kirill A. Shutemov kirill.shutemov@linux.intel.com Signed-off-by: Hugh Dickins hughd@google.com Cc: Alistair Popple apopple@nvidia.com Cc: Jan Kara jack@suse.cz Cc: Jue Wang juew@google.com Cc: "Matthew Wilcox (Oracle)" willy@infradead.org Cc: Miaohe Lin linmiaohe@huawei.com Cc: Minchan Kim minchan@kernel.org Cc: Naoya Horiguchi naoya.horiguchi@nec.com Cc: Oscar Salvador osalvador@suse.de Cc: Peter Xu peterx@redhat.com Cc: Ralph Campbell rcampbell@nvidia.com Cc: Shakeel Butt shakeelb@google.com Cc: Wang Yugui wangyugui@e16-tech.com Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org
Note on stable backport: fixed up variables in split_huge_page_to_list().
Signed-off-by: Hugh Dickins hughd@google.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com
Conflict: mm/huge_memory.c [Backport from mainline 504e070dc08f757bccaed6d05c0f53ecbfac8a23]
Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- mm/huge_memory.c | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index a22745d135da..4894343a5e85 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2345,15 +2345,15 @@ static void unmap_page(struct page *page) { enum ttu_flags ttu_flags = TTU_IGNORE_MLOCK | TTU_SYNC | TTU_RMAP_LOCKED | TTU_SPLIT_HUGE_PMD; - bool unmap_success;
VM_BUG_ON_PAGE(!PageHead(page), page);
if (PageAnon(page)) ttu_flags |= TTU_SPLIT_FREEZE;
- unmap_success = try_to_unmap(page, ttu_flags); - VM_BUG_ON_PAGE(!unmap_success, page); + try_to_unmap(page, ttu_flags); + + VM_WARN_ON_ONCE_PAGE(page_mapped(page), page); }
static void remap_page(struct page *page, unsigned int nr) @@ -2664,7 +2664,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) struct deferred_split *ds_queue = get_deferred_split_queue(head); struct anon_vma *anon_vma = NULL; struct address_space *mapping = NULL; - int count, mapcount, extra_pins, ret; + int extra_pins, ret; pgoff_t end;
VM_BUG_ON_PAGE(is_huge_zero_page(head), head); @@ -2723,7 +2723,6 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) }
unmap_page(head); - VM_BUG_ON_PAGE(compound_mapcount(head), head);
/* block interrupt reentry in xa_lock and spinlock */ local_irq_disable(); @@ -2741,9 +2740,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
/* Prevent deferred_split_scan() touching ->_refcount */ spin_lock(&ds_queue->split_queue_lock); - count = page_count(head); - mapcount = total_mapcount(head); - if (!mapcount && page_ref_freeze(head, 1 + extra_pins)) { + if (page_ref_freeze(head, 1 + extra_pins)) { if (!list_empty(page_deferred_list(head))) { ds_queue->split_queue_len--; list_del(page_deferred_list(head)); @@ -2759,16 +2756,9 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) __split_huge_page(page, list, end); ret = 0; } else { - if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) { - pr_alert("total_mapcount: %u, page_count(): %u\n", - mapcount, count); - if (PageTail(page)) - dump_page(head, NULL); - dump_page(page, "total_mapcount(head) > 0"); - BUG(); - } spin_unlock(&ds_queue->split_queue_lock); -fail: if (mapping) +fail: + if (mapping) xa_unlock(&mapping->i_pages); local_irq_enable(); remap_page(head, thp_nr_pages(head));
From: Hugh Dickins hughd@google.com
stable inclusion from stable-5.10.47 commit 65febb41b4d653edb4300c538fda80f383db7e41 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit f003c03bd29e6f46fef1b9a8e8d636ac732286d5 upstream.
Patch series "mm: page_vma_mapped_walk() cleanup and THP fixes".
I've marked all of these for stable: many are merely cleanups, but I think they are much better before the main fix than after.
This patch (of 11):
page_vma_mapped_walk() cleanup: sometimes the local copy of pvwm->page was used, sometimes pvmw->page itself: use the local copy "page" throughout.
Link: https://lkml.kernel.org/r/589b358c-febc-c88e-d4c2-7834b37fa7bf@google.com Link: https://lkml.kernel.org/r/88e67645-f467-c279-bf5e-af4b5c6b13eb@google.com Signed-off-by: Hugh Dickins hughd@google.com Reviewed-by: Alistair Popple apopple@nvidia.com Acked-by: Kirill A. Shutemov kirill.shutemov@linux.intel.com Reviewed-by: Peter Xu peterx@redhat.com Cc: Yang Shi shy828301@gmail.com Cc: Wang Yugui wangyugui@e16-tech.com Cc: Matthew Wilcox willy@infradead.org Cc: Ralph Campbell rcampbell@nvidia.com Cc: Zi Yan ziy@nvidia.com Cc: Will Deacon will@kernel.org Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- mm/page_vma_mapped.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index a540af346f88..3cd41168e802 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -155,7 +155,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) if (pvmw->pte) goto next_pte;
- if (unlikely(PageHuge(pvmw->page))) { + if (unlikely(PageHuge(page))) { /* when pud is not present, pte will be NULL */ pvmw->pte = huge_pte_offset(mm, pvmw->address, page_size(page)); if (!pvmw->pte) @@ -216,8 +216,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) * cannot return prematurely, while zap_huge_pmd() has * cleared *pmd but not decremented compound_mapcount(). */ - if ((pvmw->flags & PVMW_SYNC) && - PageTransCompound(pvmw->page)) { + if ((pvmw->flags & PVMW_SYNC) && PageTransCompound(page)) { spinlock_t *ptl = pmd_lock(mm, pvmw->pmd);
spin_unlock(ptl); @@ -233,9 +232,9 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) return true; next_pte: /* Seek to next pte only makes sense for THP */ - if (!PageTransHuge(pvmw->page) || PageHuge(pvmw->page)) + if (!PageTransHuge(page) || PageHuge(page)) return not_found(pvmw); - end = vma_address_end(pvmw->page, pvmw->vma); + end = vma_address_end(page, pvmw->vma); do { pvmw->address += PAGE_SIZE; if (pvmw->address >= end)
From: Hugh Dickins hughd@google.com
stable inclusion from stable-5.10.47 commit 1cb0b9059f9ef46e113fd90ed50accbc877cc013 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 6d0fd5987657cb0c9756ce684e3a74c0f6351728 upstream.
page_vma_mapped_walk() cleanup: get the hugetlbfs PageHuge case out of the way at the start, so no need to worry about it later.
Link: https://lkml.kernel.org/r/e31a483c-6d73-a6bb-26c5-43c3b880a2@google.com Signed-off-by: Hugh Dickins hughd@google.com Acked-by: Kirill A. Shutemov kirill.shutemov@linux.intel.com Reviewed-by: Peter Xu peterx@redhat.com Cc: Alistair Popple apopple@nvidia.com Cc: "Kirill A. Shutemov" kirill.shutemov@linux.intel.com Cc: Matthew Wilcox willy@infradead.org Cc: Ralph Campbell rcampbell@nvidia.com Cc: Wang Yugui wangyugui@e16-tech.com Cc: Will Deacon will@kernel.org Cc: Yang Shi shy828301@gmail.com Cc: Zi Yan ziy@nvidia.com Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- mm/page_vma_mapped.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index 3cd41168e802..4f04472effb7 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -152,10 +152,11 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) if (pvmw->pmd && !pvmw->pte) return not_found(pvmw);
- if (pvmw->pte) - goto next_pte; - if (unlikely(PageHuge(page))) { + /* The only possible mapping was handled on last iteration */ + if (pvmw->pte) + return not_found(pvmw); + /* when pud is not present, pte will be NULL */ pvmw->pte = huge_pte_offset(mm, pvmw->address, page_size(page)); if (!pvmw->pte) @@ -167,6 +168,9 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) return not_found(pvmw); return true; } + + if (pvmw->pte) + goto next_pte; restart: pgd = pgd_offset(mm, pvmw->address); if (!pgd_present(*pgd)) @@ -232,7 +236,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) return true; next_pte: /* Seek to next pte only makes sense for THP */ - if (!PageTransHuge(page) || PageHuge(page)) + if (!PageTransHuge(page)) return not_found(pvmw); end = vma_address_end(page, pvmw->vma); do {
From: Hugh Dickins hughd@google.com
stable inclusion from stable-5.10.47 commit 7b55a4bcfccf839d5b65fbfba82f8edcf1bef423 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 3306d3119ceacc43ea8b141a73e21fea68eec30c upstream.
page_vma_mapped_walk() cleanup: re-evaluate pmde after taking lock, then use it in subsequent tests, instead of repeatedly dereferencing pointer.
Link: https://lkml.kernel.org/r/53fbc9d-891e-46b2-cb4b-468c3b19238e@google.com Signed-off-by: Hugh Dickins hughd@google.com Acked-by: Kirill A. Shutemov kirill.shutemov@linux.intel.com Reviewed-by: Peter Xu peterx@redhat.com Cc: Alistair Popple apopple@nvidia.com Cc: Matthew Wilcox willy@infradead.org Cc: Ralph Campbell rcampbell@nvidia.com Cc: Wang Yugui wangyugui@e16-tech.com Cc: Will Deacon will@kernel.org Cc: Yang Shi shy828301@gmail.com Cc: Zi Yan ziy@nvidia.com Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- mm/page_vma_mapped.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index 4f04472effb7..821147eca0e1 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -190,18 +190,19 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) pmde = READ_ONCE(*pvmw->pmd); if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) { pvmw->ptl = pmd_lock(mm, pvmw->pmd); - if (likely(pmd_trans_huge(*pvmw->pmd))) { + pmde = *pvmw->pmd; + if (likely(pmd_trans_huge(pmde))) { if (pvmw->flags & PVMW_MIGRATION) return not_found(pvmw); - if (pmd_page(*pvmw->pmd) != page) + if (pmd_page(pmde) != page) return not_found(pvmw); return true; - } else if (!pmd_present(*pvmw->pmd)) { + } else if (!pmd_present(pmde)) { if (thp_migration_supported()) { if (!(pvmw->flags & PVMW_MIGRATION)) return not_found(pvmw); - if (is_migration_entry(pmd_to_swp_entry(*pvmw->pmd))) { - swp_entry_t entry = pmd_to_swp_entry(*pvmw->pmd); + if (is_migration_entry(pmd_to_swp_entry(pmde))) { + swp_entry_t entry = pmd_to_swp_entry(pmde);
if (migration_entry_to_page(entry) != page) return not_found(pvmw);
From: Hugh Dickins hughd@google.com
stable inclusion from stable-5.10.47 commit 8dc191ed9c5f7af7f6cd1ebea3f281db353fe442 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit e2e1d4076c77b3671cf8ce702535ae7dee3acf89 upstream.
page_vma_mapped_walk() cleanup: rearrange the !pmd_present() block to follow the same "return not_found, return not_found, return true" pattern as the block above it (note: returning not_found there is never premature, since existence or prior existence of huge pmd guarantees good alignment).
Link: https://lkml.kernel.org/r/378c8650-1488-2edf-9647-32a53cf2e21@google.com Signed-off-by: Hugh Dickins hughd@google.com Acked-by: Kirill A. Shutemov kirill.shutemov@linux.intel.com Reviewed-by: Peter Xu peterx@redhat.com Cc: Alistair Popple apopple@nvidia.com Cc: Matthew Wilcox willy@infradead.org Cc: Ralph Campbell rcampbell@nvidia.com Cc: Wang Yugui wangyugui@e16-tech.com Cc: Will Deacon will@kernel.org Cc: Yang Shi shy828301@gmail.com Cc: Zi Yan ziy@nvidia.com Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- mm/page_vma_mapped.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-)
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index 821147eca0e1..a9cd4e4b8d95 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -197,24 +197,22 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) if (pmd_page(pmde) != page) return not_found(pvmw); return true; - } else if (!pmd_present(pmde)) { - if (thp_migration_supported()) { - if (!(pvmw->flags & PVMW_MIGRATION)) - return not_found(pvmw); - if (is_migration_entry(pmd_to_swp_entry(pmde))) { - swp_entry_t entry = pmd_to_swp_entry(pmde); + } + if (!pmd_present(pmde)) { + swp_entry_t entry;
- if (migration_entry_to_page(entry) != page) - return not_found(pvmw); - return true; - } - } - return not_found(pvmw); - } else { - /* THP pmd was split under us: handle on pte level */ - spin_unlock(pvmw->ptl); - pvmw->ptl = NULL; + if (!thp_migration_supported() || + !(pvmw->flags & PVMW_MIGRATION)) + return not_found(pvmw); + entry = pmd_to_swp_entry(pmde); + if (!is_migration_entry(entry) || + migration_entry_to_page(entry) != page) + return not_found(pvmw); + return true; } + /* THP pmd was split under us: handle on pte level */ + spin_unlock(pvmw->ptl); + pvmw->ptl = NULL; } else if (!pmd_present(pmde)) { /* * If PVMW_SYNC, take and drop THP pmd lock so that we
From: Hugh Dickins hughd@google.com
stable inclusion from stable-5.10.47 commit e56bdb397663ede28465891026757376018d2c3d bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 448282487483d6fa5b2eeeafaa0acc681e544a9c upstream.
page_vma_mapped_walk() cleanup: adjust the test for crossing page table boundary - I believe pvmw->address is always page-aligned, but nothing else here assumed that; and remember to reset pvmw->pte to NULL after unmapping the page table, though I never saw any bug from that.
Link: https://lkml.kernel.org/r/799b3f9c-2a9e-dfef-5d89-26e9f76fd97@google.com Signed-off-by: Hugh Dickins hughd@google.com Acked-by: Kirill A. Shutemov kirill.shutemov@linux.intel.com Cc: Alistair Popple apopple@nvidia.com Cc: Matthew Wilcox willy@infradead.org Cc: Peter Xu peterx@redhat.com Cc: Ralph Campbell rcampbell@nvidia.com Cc: Wang Yugui wangyugui@e16-tech.com Cc: Will Deacon will@kernel.org Cc: Yang Shi shy828301@gmail.com Cc: Zi Yan ziy@nvidia.com Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- mm/page_vma_mapped.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index a9cd4e4b8d95..36b40fd23f94 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -243,16 +243,16 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) if (pvmw->address >= end) return not_found(pvmw); /* Did we cross page table boundary? */ - if (pvmw->address % PMD_SIZE == 0) { - pte_unmap(pvmw->pte); + if ((pvmw->address & (PMD_SIZE - PAGE_SIZE)) == 0) { if (pvmw->ptl) { spin_unlock(pvmw->ptl); pvmw->ptl = NULL; } + pte_unmap(pvmw->pte); + pvmw->pte = NULL; goto restart; - } else { - pvmw->pte++; } + pvmw->pte++; } while (pte_none(*pvmw->pte));
if (!pvmw->ptl) {
From: Hugh Dickins hughd@google.com
stable inclusion from stable-5.10.47 commit 9f85dcaf1533a20d0ba8443a214af19a8a732f64 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit b3807a91aca7d21c05d5790612e49969117a72b9 upstream.
page_vma_mapped_walk() cleanup: add a level of indentation to much of the body, making no functional change in this commit, but reducing the later diff when this is all converted to a loop.
[hughd@google.com: : page_vma_mapped_walk(): add a level of indentation fix] Link: https://lkml.kernel.org/r/7f817555-3ce1-c785-e438-87d8efdcaf26@google.com
Link: https://lkml.kernel.org/r/efde211-f3e2-fe54-977-ef481419e7f3@google.com Signed-off-by: Hugh Dickins hughd@google.com Acked-by: Kirill A. Shutemov kirill.shutemov@linux.intel.com Cc: Alistair Popple apopple@nvidia.com Cc: Matthew Wilcox willy@infradead.org Cc: Peter Xu peterx@redhat.com Cc: Ralph Campbell rcampbell@nvidia.com Cc: Wang Yugui wangyugui@e16-tech.com Cc: Will Deacon will@kernel.org Cc: Yang Shi shy828301@gmail.com Cc: Zi Yan ziy@nvidia.com Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- mm/page_vma_mapped.c | 105 ++++++++++++++++++++++--------------------- 1 file changed, 55 insertions(+), 50 deletions(-)
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index 36b40fd23f94..3c3a7cb2de85 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -172,62 +172,67 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) if (pvmw->pte) goto next_pte; restart: - pgd = pgd_offset(mm, pvmw->address); - if (!pgd_present(*pgd)) - return false; - p4d = p4d_offset(pgd, pvmw->address); - if (!p4d_present(*p4d)) - return false; - pud = pud_offset(p4d, pvmw->address); - if (!pud_present(*pud)) - return false; - pvmw->pmd = pmd_offset(pud, pvmw->address); - /* - * Make sure the pmd value isn't cached in a register by the - * compiler and used as a stale value after we've observed a - * subsequent update. - */ - pmde = READ_ONCE(*pvmw->pmd); - if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) { - pvmw->ptl = pmd_lock(mm, pvmw->pmd); - pmde = *pvmw->pmd; - if (likely(pmd_trans_huge(pmde))) { - if (pvmw->flags & PVMW_MIGRATION) - return not_found(pvmw); - if (pmd_page(pmde) != page) - return not_found(pvmw); - return true; - } - if (!pmd_present(pmde)) { - swp_entry_t entry; + { + pgd = pgd_offset(mm, pvmw->address); + if (!pgd_present(*pgd)) + return false; + p4d = p4d_offset(pgd, pvmw->address); + if (!p4d_present(*p4d)) + return false; + pud = pud_offset(p4d, pvmw->address); + if (!pud_present(*pud)) + return false;
- if (!thp_migration_supported() || - !(pvmw->flags & PVMW_MIGRATION)) - return not_found(pvmw); - entry = pmd_to_swp_entry(pmde); - if (!is_migration_entry(entry) || - migration_entry_to_page(entry) != page) - return not_found(pvmw); - return true; - } - /* THP pmd was split under us: handle on pte level */ - spin_unlock(pvmw->ptl); - pvmw->ptl = NULL; - } else if (!pmd_present(pmde)) { + pvmw->pmd = pmd_offset(pud, pvmw->address); /* - * If PVMW_SYNC, take and drop THP pmd lock so that we - * cannot return prematurely, while zap_huge_pmd() has - * cleared *pmd but not decremented compound_mapcount(). + * Make sure the pmd value isn't cached in a register by the + * compiler and used as a stale value after we've observed a + * subsequent update. */ - if ((pvmw->flags & PVMW_SYNC) && PageTransCompound(page)) { - spinlock_t *ptl = pmd_lock(mm, pvmw->pmd); + pmde = READ_ONCE(*pvmw->pmd);
- spin_unlock(ptl); + if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) { + pvmw->ptl = pmd_lock(mm, pvmw->pmd); + pmde = *pvmw->pmd; + if (likely(pmd_trans_huge(pmde))) { + if (pvmw->flags & PVMW_MIGRATION) + return not_found(pvmw); + if (pmd_page(pmde) != page) + return not_found(pvmw); + return true; + } + if (!pmd_present(pmde)) { + swp_entry_t entry; + + if (!thp_migration_supported() || + !(pvmw->flags & PVMW_MIGRATION)) + return not_found(pvmw); + entry = pmd_to_swp_entry(pmde); + if (!is_migration_entry(entry) || + migration_entry_to_page(entry) != page) + return not_found(pvmw); + return true; + } + /* THP pmd was split under us: handle on pte level */ + spin_unlock(pvmw->ptl); + pvmw->ptl = NULL; + } else if (!pmd_present(pmde)) { + /* + * If PVMW_SYNC, take and drop THP pmd lock so that we + * cannot return prematurely, while zap_huge_pmd() has + * cleared *pmd but not decremented compound_mapcount(). + */ + if ((pvmw->flags & PVMW_SYNC) && + PageTransCompound(page)) { + spinlock_t *ptl = pmd_lock(mm, pvmw->pmd); + + spin_unlock(ptl); + } + return false; } - return false; + if (!map_pte(pvmw)) + goto next_pte; } - if (!map_pte(pvmw)) - goto next_pte; while (1) { unsigned long end;
From: Hugh Dickins hughd@google.com
stable inclusion from stable-5.10.47 commit bf60fc2314b98fc57b8ef20a7a3a5642513ef6d1 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 474466301dfd8b39a10c01db740645f3f7ae9a28 upstream.
page_vma_mapped_walk() cleanup: add a label this_pte, matching next_pte, and use "goto this_pte", in place of the "while (1)" loop at the end.
Link: https://lkml.kernel.org/r/a52b234a-851-3616-2525-f42736e8934@google.com Signed-off-by: Hugh Dickins hughd@google.com Acked-by: Kirill A. Shutemov kirill.shutemov@linux.intel.com Cc: Alistair Popple apopple@nvidia.com Cc: Matthew Wilcox willy@infradead.org Cc: Peter Xu peterx@redhat.com Cc: Ralph Campbell rcampbell@nvidia.com Cc: Wang Yugui wangyugui@e16-tech.com Cc: Will Deacon will@kernel.org Cc: Yang Shi shy828301@gmail.com Cc: Zi Yan ziy@nvidia.com Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- mm/page_vma_mapped.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index 3c3a7cb2de85..cb43affe6c76 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -143,6 +143,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) { struct mm_struct *mm = pvmw->vma->vm_mm; struct page *page = pvmw->page; + unsigned long end; pgd_t *pgd; p4d_t *p4d; pud_t *pud; @@ -232,10 +233,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) } if (!map_pte(pvmw)) goto next_pte; - } - while (1) { - unsigned long end; - +this_pte: if (check_pte(pvmw)) return true; next_pte: @@ -264,6 +262,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) pvmw->ptl = pte_lockptr(mm, pvmw->pmd); spin_lock(pvmw->ptl); } + goto this_pte; } }
From: Hugh Dickins hughd@google.com
stable inclusion from stable-5.10.47 commit 90073aecc3ccabb7b8ee9fc968c6b9e3f03ebaed bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit a765c417d876cc635f628365ec9aa6f09470069a upstream.
page_vma_mapped_walk() cleanup: get THP's vma_address_end() at the start, rather than later at next_pte.
It's a little unnecessary overhead on the first call, but makes for a simpler loop in the following commit.
Link: https://lkml.kernel.org/r/4542b34d-862f-7cb4-bb22-e0df6ce830a2@google.com Signed-off-by: Hugh Dickins hughd@google.com Acked-by: Kirill A. Shutemov kirill.shutemov@linux.intel.com Cc: Alistair Popple apopple@nvidia.com Cc: Matthew Wilcox willy@infradead.org Cc: Peter Xu peterx@redhat.com Cc: Ralph Campbell rcampbell@nvidia.com Cc: Wang Yugui wangyugui@e16-tech.com Cc: Will Deacon will@kernel.org Cc: Yang Shi shy828301@gmail.com Cc: Zi Yan ziy@nvidia.com Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- mm/page_vma_mapped.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-)
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index cb43affe6c76..5df88f3b454c 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -170,6 +170,15 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) return true; }
+ /* + * Seek to next pte only makes sense for THP. + * But more important than that optimization, is to filter out + * any PageKsm page: whose page->index misleads vma_address() + * and vma_address_end() to disaster. + */ + end = PageTransCompound(page) ? + vma_address_end(page, pvmw->vma) : + pvmw->address + PAGE_SIZE; if (pvmw->pte) goto next_pte; restart: @@ -237,10 +246,6 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) if (check_pte(pvmw)) return true; next_pte: - /* Seek to next pte only makes sense for THP */ - if (!PageTransHuge(page)) - return not_found(pvmw); - end = vma_address_end(page, pvmw->vma); do { pvmw->address += PAGE_SIZE; if (pvmw->address >= end)
From: Hugh Dickins hughd@google.com
stable inclusion from stable-5.10.47 commit 915c3a262c49fcd1caa7dff192289bdfdaa9438c bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit a9a7504d9beaf395481faa91e70e2fd08f7a3dde upstream.
Running certain tests with a DEBUG_VM kernel would crash within hours, on the total_mapcount BUG() in split_huge_page_to_list(), while trying to free up some memory by punching a hole in a shmem huge page: split's try_to_unmap() was unable to find all the mappings of the page (which, on a !DEBUG_VM kernel, would then keep the huge page pinned in memory).
Crash dumps showed two tail pages of a shmem huge page remained mapped by pte: ptes in a non-huge-aligned vma of a gVisor process, at the end of a long unmapped range; and no page table had yet been allocated for the head of the huge page to be mapped into.
Although designed to handle these odd misaligned huge-page-mapped-by-pte cases, page_vma_mapped_walk() falls short by returning false prematurely when !pmd_present or !pud_present or !p4d_present or !pgd_present: there are cases when a huge page may span the boundary, with ptes present in the next.
Restructure page_vma_mapped_walk() as a loop to continue in these cases, while keeping its layout much as before. Add a step_forward() helper to advance pvmw->address across those boundaries: originally I tried to use mm's standard p?d_addr_end() macros, but hit the same crash 512 times less often: because of the way redundant levels are folded together, but folded differently in different configurations, it was just too difficult to use them correctly; and step_forward() is simpler anyway.
Link: https://lkml.kernel.org/r/fedb8632-1798-de42-f39e-873551d5bc81@google.com Fixes: ace71a19cec5 ("mm: introduce page_vma_mapped_walk()") Signed-off-by: Hugh Dickins hughd@google.com Acked-by: Kirill A. Shutemov kirill.shutemov@linux.intel.com Cc: Alistair Popple apopple@nvidia.com Cc: Matthew Wilcox willy@infradead.org Cc: Peter Xu peterx@redhat.com Cc: Ralph Campbell rcampbell@nvidia.com Cc: Wang Yugui wangyugui@e16-tech.com Cc: Will Deacon will@kernel.org Cc: Yang Shi shy828301@gmail.com Cc: Zi Yan ziy@nvidia.com Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- mm/page_vma_mapped.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-)
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index 5df88f3b454c..2ad76a3d871d 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -115,6 +115,13 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw) return pfn_is_match(pvmw->page, pfn); }
+static void step_forward(struct page_vma_mapped_walk *pvmw, unsigned long size) +{ + pvmw->address = (pvmw->address + size) & ~(size - 1); + if (!pvmw->address) + pvmw->address = ULONG_MAX; +} + /** * page_vma_mapped_walk - check if @pvmw->page is mapped in @pvmw->vma at * @pvmw->address @@ -182,16 +189,22 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) if (pvmw->pte) goto next_pte; restart: - { + do { pgd = pgd_offset(mm, pvmw->address); - if (!pgd_present(*pgd)) - return false; + if (!pgd_present(*pgd)) { + step_forward(pvmw, PGDIR_SIZE); + continue; + } p4d = p4d_offset(pgd, pvmw->address); - if (!p4d_present(*p4d)) - return false; + if (!p4d_present(*p4d)) { + step_forward(pvmw, P4D_SIZE); + continue; + } pud = pud_offset(p4d, pvmw->address); - if (!pud_present(*pud)) - return false; + if (!pud_present(*pud)) { + step_forward(pvmw, PUD_SIZE); + continue; + }
pvmw->pmd = pmd_offset(pud, pvmw->address); /* @@ -238,7 +251,8 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
spin_unlock(ptl); } - return false; + step_forward(pvmw, PMD_SIZE); + continue; } if (!map_pte(pvmw)) goto next_pte; @@ -268,7 +282,9 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) spin_lock(pvmw->ptl); } goto this_pte; - } + } while (pvmw->address < end); + + return false; }
/**
From: Hugh Dickins hughd@google.com
stable inclusion from stable-5.10.47 commit ab9d178167eae80b623baaedc1b5e3142fe480ce bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit a7a69d8ba88d8dcee7ef00e91d413a4bd003a814 upstream.
Aha! Shouldn't that quick scan over pte_none()s make sure that it holds ptlock in the PVMW_SYNC case? That too might have been responsible for BUGs or WARNs in split_huge_page_to_list() or its unmap_page(), though I've never seen any.
Link: https://lkml.kernel.org/r/1bdf384c-8137-a149-2a1e-475a4791c3c@google.com Link: https://lore.kernel.org/linux-mm/20210412180659.B9E3.409509F4@e16-tech.com/ Fixes: ace71a19cec5 ("mm: introduce page_vma_mapped_walk()") Signed-off-by: Hugh Dickins hughd@google.com Acked-by: Kirill A. Shutemov kirill.shutemov@linux.intel.com Tested-by: Wang Yugui wangyugui@e16-tech.com Cc: Alistair Popple apopple@nvidia.com Cc: Matthew Wilcox willy@infradead.org Cc: Peter Xu peterx@redhat.com Cc: Ralph Campbell rcampbell@nvidia.com Cc: Will Deacon will@kernel.org Cc: Yang Shi shy828301@gmail.com Cc: Zi Yan ziy@nvidia.com Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- mm/page_vma_mapped.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index 2ad76a3d871d..610ebbee787c 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -275,6 +275,10 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) goto restart; } pvmw->pte++; + if ((pvmw->flags & PVMW_SYNC) && !pvmw->ptl) { + pvmw->ptl = pte_lockptr(mm, pvmw->pmd); + spin_lock(pvmw->ptl); + } } while (pte_none(*pvmw->pte));
if (!pvmw->ptl) {
From: Hugh Dickins hughd@google.com
stable inclusion from stable-5.10.47 commit 377a796e7a7102e543b4d1ccce473ed2df09f0d7 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit fe19bd3dae3d15d2fbfdb3de8839a6ea0fe94264 upstream.
If more than one futex is placed on a shmem huge page, it can happen that waking the second wakes the first instead, and leaves the second waiting: the key's shared.pgoff is wrong.
When 3.11 commit 13d60f4b6ab5 ("futex: Take hugepages into account when generating futex_key"), the only shared huge pages came from hugetlbfs, and the code added to deal with its exceptional page->index was put into hugetlb source. Then that was missed when 4.8 added shmem huge pages.
page_to_pgoff() is what others use for this nowadays: except that, as currently written, it gives the right answer on hugetlbfs head, but nonsense on hugetlbfs tails. Fix that by calling hugetlbfs-specific hugetlb_basepage_index() on PageHuge tails as well as on head.
Yes, it's unconventional to declare hugetlb_basepage_index() there in pagemap.h, rather than in hugetlb.h; but I do not expect anything but page_to_pgoff() ever to need it.
[akpm@linux-foundation.org: give hugetlb_basepage_index() prototype the correct scope]
Link: https://lkml.kernel.org/r/b17d946b-d09-326e-b42a-52884c36df32@google.com Fixes: 800d8c63b2e9 ("shmem: add huge pages support") Reported-by: Neel Natu neelnatu@google.com Signed-off-by: Hugh Dickins hughd@google.com Reviewed-by: Matthew Wilcox (Oracle) willy@infradead.org Acked-by: Thomas Gleixner tglx@linutronix.de Cc: "Kirill A. Shutemov" kirill.shutemov@linux.intel.com Cc: Zhang Yi wetpzy@gmail.com Cc: Mel Gorman mgorman@techsingularity.net Cc: Mike Kravetz mike.kravetz@oracle.com Cc: Ingo Molnar mingo@redhat.com Cc: Peter Zijlstra peterz@infradead.org Cc: Darren Hart dvhart@infradead.org Cc: Davidlohr Bueso dave@stgolabs.net Cc: stable@vger.kernel.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- include/linux/hugetlb.h | 16 ---------------- include/linux/pagemap.h | 13 +++++++------ kernel/futex.c | 3 +-- mm/hugetlb.c | 5 +---- 4 files changed, 9 insertions(+), 28 deletions(-)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index fd0e06d3a87d..85ae2d66925f 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -734,17 +734,6 @@ static inline int hstate_index(struct hstate *h) return h - hstates; }
-pgoff_t __basepage_index(struct page *page); - -/* Return page->index in PAGE_SIZE units */ -static inline pgoff_t basepage_index(struct page *page) -{ - if (!PageCompound(page)) - return page->index; - - return __basepage_index(page); -} - extern int dissolve_free_huge_page(struct page *page); extern int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn); @@ -980,11 +969,6 @@ static inline int hstate_index(struct hstate *h) return 0; }
-static inline pgoff_t basepage_index(struct page *page) -{ - return page->index; -} - static inline int dissolve_free_huge_page(struct page *page) { return 0; diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index cb6fc68141a7..0bfa9cce6589 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -506,7 +506,7 @@ static inline struct page *read_mapping_page(struct address_space *mapping, }
/* - * Get index of the page with in radix-tree + * Get index of the page within radix-tree (but not for hugetlb pages). * (TODO: remove once hugetlb pages will have ->index in PAGE_SIZE) */ static inline pgoff_t page_to_index(struct page *page) @@ -525,15 +525,16 @@ static inline pgoff_t page_to_index(struct page *page) return pgoff; }
+extern pgoff_t hugetlb_basepage_index(struct page *page); + /* - * Get the offset in PAGE_SIZE. - * (TODO: hugepage should have ->index in PAGE_SIZE) + * Get the offset in PAGE_SIZE (even for hugetlb pages). + * (TODO: hugetlb pages should have ->index in PAGE_SIZE) */ static inline pgoff_t page_to_pgoff(struct page *page) { - if (unlikely(PageHeadHuge(page))) - return page->index << compound_order(page); - + if (unlikely(PageHuge(page))) + return hugetlb_basepage_index(page); return page_to_index(page); }
diff --git a/kernel/futex.c b/kernel/futex.c index 3136aba17772..98a6e1b80bfe 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -35,7 +35,6 @@ #include <linux/jhash.h> #include <linux/pagemap.h> #include <linux/syscalls.h> -#include <linux/hugetlb.h> #include <linux/freezer.h> #include <linux/memblock.h> #include <linux/fault-inject.h> @@ -652,7 +651,7 @@ static int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key,
key->both.offset |= FUT_OFF_INODE; /* inode-based key */ key->shared.i_seq = get_inode_sequence_number(inode); - key->shared.pgoff = basepage_index(tail); + key->shared.pgoff = page_to_pgoff(tail); rcu_read_unlock(); }
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 7e9ab6cf8729..19f83ef14ee4 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1681,15 +1681,12 @@ struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage) return NULL; }
-pgoff_t __basepage_index(struct page *page) +pgoff_t hugetlb_basepage_index(struct page *page) { struct page *page_head = compound_head(page); pgoff_t index = page_index(page_head); unsigned long compound_idx;
- if (!PageHuge(page_head)) - return page_index(page); - if (compound_order(page_head) >= MAX_ORDER) compound_idx = page_to_pfn(page) - page_to_pfn(page_head); else
From: Alper Gun alpergun@google.com
stable inclusion from stable-5.10.47 commit 7570a8b5dd493eebda5c0c826f3a87e2e6117791 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 934002cd660b035b926438244b4294e647507e13 upstream.
Send SEV_CMD_DECOMMISSION command to PSP firmware if ASID binding fails. If a failure happens after a successful LAUNCH_START command, a decommission command should be executed. Otherwise, guest context will be unfreed inside the AMD SP. After the firmware will not have memory to allocate more SEV guest context, LAUNCH_START command will begin to fail with SEV_RET_RESOURCE_LIMIT error.
The existing code calls decommission inside sev_unbind_asid, but it is not called if a failure happens before guest activation succeeds. If sev_bind_asid fails, decommission is never called. PSP firmware has a limit for the number of guests. If sev_asid_binding fails many times, PSP firmware will not have resources to create another guest context.
Cc: stable@vger.kernel.org Fixes: 59414c989220 ("KVM: SVM: Add support for KVM_SEV_LAUNCH_START command") Reported-by: Peter Gonda pgonda@google.com Signed-off-by: Alper Gun alpergun@google.com Reviewed-by: Marc Orr marcorr@google.com Signed-off-by: Paolo Bonzini pbonzini@redhat.com Message-Id: 20210610174604.2554090-1-alpergun@google.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- arch/x86/kvm/svm/sev.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-)
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 16b10b9436dc..01547bdbfb06 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -130,9 +130,25 @@ static void sev_asid_free(int asid) mutex_unlock(&sev_bitmap_lock); }
-static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) +static void sev_decommission(unsigned int handle) { struct sev_data_decommission *decommission; + + if (!handle) + return; + + decommission = kzalloc(sizeof(*decommission), GFP_KERNEL); + if (!decommission) + return; + + decommission->handle = handle; + sev_guest_decommission(decommission, NULL); + + kfree(decommission); +} + +static void sev_unbind_asid(struct kvm *kvm, unsigned int handle) +{ struct sev_data_deactivate *data;
if (!handle) @@ -152,15 +168,7 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
kfree(data);
- decommission = kzalloc(sizeof(*decommission), GFP_KERNEL); - if (!decommission) - return; - - /* decommission handle */ - decommission->handle = handle; - sev_guest_decommission(decommission, NULL); - - kfree(decommission); + sev_decommission(handle); }
static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) @@ -288,8 +296,10 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
/* Bind ASID to this guest */ ret = sev_bind_asid(kvm, start->handle, error); - if (ret) + if (ret) { + sev_decommission(start->handle); goto e_free_session; + }
/* return handle to userspace */ params.handle = start->handle;
From: Bumyong Lee bumyong.lee@samsung.com
stable inclusion from stable-5.10.47 commit e6108147dd91b94d1979b110f265710c254c99d5 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 5f89468e2f060031cd89fd4287298e0eaf246bf6 upstream.
in case of driver wants to sync part of ranges with offset, swiotlb_tbl_sync_single() copies from orig_addr base to tlb_addr with offset and ends up with data mismatch.
It was removed from "swiotlb: don't modify orig_addr in swiotlb_tbl_sync_single", but said logic has to be added back in.
From Linus's email: "That commit which the removed the offset calculation entirely, because the old
(unsigned long)tlb_addr & (IO_TLB_SIZE - 1)
was wrong, but instead of removing it, I think it should have just fixed it to be
(tlb_addr - mem->start) & (IO_TLB_SIZE - 1);
instead. That way the slot offset always matches the slot index calculation."
(Unfortunatly that broke NVMe).
The use-case that drivers are hitting is as follow:
1. Get dma_addr_t from dma_map_single()
dma_addr_t tlb_addr = dma_map_single(dev, vaddr, vsize, DMA_TO_DEVICE);
|<---------------vsize------------->| +-----------------------------------+ | | original buffer +-----------------------------------+ vaddr
swiotlb_align_offset |<----->|<---------------vsize------------->| +-------+-----------------------------------+ | | | swiotlb buffer +-------+-----------------------------------+ tlb_addr
2. Do something 3. Sync dma_addr_t through dma_sync_single_for_device(..)
dma_sync_single_for_device(dev, tlb_addr + offset, size, DMA_TO_DEVICE);
Error case. Copy data to original buffer but it is from base addr (instead of base addr + offset) in original buffer:
swiotlb_align_offset |<----->|<- offset ->|<- size ->| +-------+-----------------------------------+ | | |##########| | swiotlb buffer +-------+-----------------------------------+ tlb_addr
|<- size ->| +-----------------------------------+ |##########| | original buffer +-----------------------------------+ vaddr
The fix is to copy the data to the original buffer and take into account the offset, like so:
swiotlb_align_offset |<----->|<- offset ->|<- size ->| +-------+-----------------------------------+ | | |##########| | swiotlb buffer +-------+-----------------------------------+ tlb_addr
|<- offset ->|<- size ->| +-----------------------------------+ | |##########| | original buffer +-----------------------------------+ vaddr
[One fix which was Linus's that made more sense to as it created a symmetry would break NVMe. The reason for that is the: unsigned int offset = (tlb_addr - mem->start) & (IO_TLB_SIZE - 1);
would come up with the proper offset, but it would lose the alignment (which this patch contains).]
Fixes: 16fc3cef33a0 ("swiotlb: don't modify orig_addr in swiotlb_tbl_sync_single") Signed-off-by: Bumyong Lee bumyong.lee@samsung.com Signed-off-by: Chanho Park chanho61.park@samsung.com Reviewed-by: Christoph Hellwig hch@lst.de Reported-by: Dominique MARTINET dominique.martinet@atmark-techno.com Reported-by: Horia Geantă horia.geanta@nxp.com Tested-by: Horia Geantă horia.geanta@nxp.com CC: stable@vger.kernel.org Signed-off-by: Konrad Rzeszutek Wilk konrad.wilk@oracle.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- kernel/dma/swiotlb.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 0f61b14b0099..0ed0e1f215c7 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -667,6 +667,9 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr, if (orig_addr == INVALID_PHYS_ADDR) return;
+ orig_addr += (tlb_addr & (IO_TLB_SIZE - 1)) - + swiotlb_align_offset(hwdev, orig_addr); + switch (target) { case SYNC_FOR_CPU: if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
From: Jeff Layton jlayton@kernel.org
stable inclusion from stable-5.10.47 commit 0463b49e0239bb04f6ca5d7b697645e1122c30f8 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 827a746f405d25f79560c7868474aec5aee174e1 upstream.
It's not sufficient to skip reading when the pos is beyond the EOF. There may be data at the head of the page that we need to fill in before the write.
Add a new helper function that corrects and clarifies the logic of when we can skip reads, and have it only zero out the part of the page that won't have data copied in for the write.
Finally, don't set the page Uptodate after zeroing. It's not up to date since the write data won't have been copied in yet.
[DH made the following changes:
- Prefixed the new function with "netfs_".
- Don't call zero_user_segments() for a full-page write.
- Altered the beyond-last-page check to avoid a DIV instruction and got rid of then-redundant zero-length file check. ]
[ Note: this fix is commit 827a746f405d in mainline kernels. The original bug was in ceph, but got lifted into the fs/netfs library for v5.13. This backport should apply to stable kernels v5.10 though v5.12. ]
Fixes: e1b1240c1ff5f ("netfs: Add write_begin helper") Reported-by: Andrew W Elble aweits@rit.edu Signed-off-by: Jeff Layton jlayton@kernel.org Signed-off-by: David Howells dhowells@redhat.com Reviewed-by: Matthew Wilcox (Oracle) willy@infradead.org cc: ceph-devel@vger.kernel.org Link: https://lore.kernel.org/r/20210613233345.113565-1-jlayton@kernel.org/ Link: https://lore.kernel.org/r/162367683365.460125.4467036947364047314.stgit@wart... # v1 Link: https://lore.kernel.org/r/162391826758.1173366.11794946719301590013.stgit@wa... # v2 Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org
Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- fs/ceph/addr.c | 54 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 13 deletions(-)
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 35c83f65475b..8b0507f69c15 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -1302,6 +1302,45 @@ ceph_find_incompatible(struct page *page) return NULL; }
+/** + * prep_noread_page - prep a page for writing without reading first + * @page: page being prepared + * @pos: starting position for the write + * @len: length of write + * + * In some cases, write_begin doesn't need to read at all: + * - full page write + * - file is currently zero-length + * - write that lies in a page that is completely beyond EOF + * - write that covers the the page from start to EOF or beyond it + * + * If any of these criteria are met, then zero out the unwritten parts + * of the page and return true. Otherwise, return false. + */ +static bool skip_page_read(struct page *page, loff_t pos, size_t len) +{ + struct inode *inode = page->mapping->host; + loff_t i_size = i_size_read(inode); + size_t offset = offset_in_page(pos); + + /* Full page write */ + if (offset == 0 && len >= PAGE_SIZE) + return true; + + /* pos beyond last page in the file */ + if (pos - offset >= i_size) + goto zero_out; + + /* write that covers the whole page from start to EOF or beyond it */ + if (offset == 0 && (pos + len) >= i_size) + goto zero_out; + + return false; +zero_out: + zero_user_segments(page, 0, offset, offset + len, PAGE_SIZE); + return true; +} + /* * We are only allowed to write into/dirty the page if the page is * clean, or already dirty within the same snap context. @@ -1315,7 +1354,6 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, struct ceph_snap_context *snapc; struct page *page = NULL; pgoff_t index = pos >> PAGE_SHIFT; - int pos_in_page = pos & ~PAGE_MASK; int r = 0;
dout("write_begin file %p inode %p page %p %d~%d\n", file, inode, page, (int)pos, (int)len); @@ -1350,19 +1388,9 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, break; }
- /* - * In some cases we don't need to read at all: - * - full page write - * - write that lies completely beyond EOF - * - write that covers the the page from start to EOF or beyond it - */ - if ((pos_in_page == 0 && len == PAGE_SIZE) || - (pos >= i_size_read(inode)) || - (pos_in_page == 0 && (pos + len) >= i_size_read(inode))) { - zero_user_segments(page, 0, pos_in_page, - pos_in_page + len, PAGE_SIZE); + /* No need to read in some cases */ + if (skip_page_read(page, pos, len)) break; - }
/* * We need to read it. If we get back -EINPROGRESS, then the page was
From: Daniel Vetter daniel.vetter@ffwll.ch
stable inclusion from stable-5.10.47 commit 0ba128fa68a4abfdfc8928f0ed67b5eb80962254 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit f54b3ca7ea1e5e02f481cf4ca54568e57bd66086 upstream.
This reverts commit 1815d9c86e3090477fbde066ff314a7e9721ee0f.
Unfortunately this inverts the locking hierarchy, so back to the drawing board. Full lockdep splat below:
====================================================== WARNING: possible circular locking dependency detected 5.13.0-rc7-CI-CI_DRM_10254+ #1 Not tainted ------------------------------------------------------ kms_frontbuffer/1087 is trying to acquire lock: ffff88810dcd01a8 (&dev->master_mutex){+.+.}-{3:3}, at: drm_is_current_master+0x1b/0x40 but task is already holding lock: ffff88810dcd0488 (&dev->mode_config.mutex){+.+.}-{3:3}, at: drm_mode_getconnector+0x1c6/0x4a0 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (&dev->mode_config.mutex){+.+.}-{3:3}: __mutex_lock+0xab/0x970 drm_client_modeset_probe+0x22e/0xca0 __drm_fb_helper_initial_config_and_unlock+0x42/0x540 intel_fbdev_initial_config+0xf/0x20 [i915] async_run_entry_fn+0x28/0x130 process_one_work+0x26d/0x5c0 worker_thread+0x37/0x380 kthread+0x144/0x170 ret_from_fork+0x1f/0x30 -> #1 (&client->modeset_mutex){+.+.}-{3:3}: __mutex_lock+0xab/0x970 drm_client_modeset_commit_locked+0x1c/0x180 drm_client_modeset_commit+0x1c/0x40 __drm_fb_helper_restore_fbdev_mode_unlocked+0x88/0xb0 drm_fb_helper_set_par+0x34/0x40 intel_fbdev_set_par+0x11/0x40 [i915] fbcon_init+0x270/0x4f0 visual_init+0xc6/0x130 do_bind_con_driver+0x1e5/0x2d0 do_take_over_console+0x10e/0x180 do_fbcon_takeover+0x53/0xb0 register_framebuffer+0x22d/0x310 __drm_fb_helper_initial_config_and_unlock+0x36c/0x540 intel_fbdev_initial_config+0xf/0x20 [i915] async_run_entry_fn+0x28/0x130 process_one_work+0x26d/0x5c0 worker_thread+0x37/0x380 kthread+0x144/0x170 ret_from_fork+0x1f/0x30 -> #0 (&dev->master_mutex){+.+.}-{3:3}: __lock_acquire+0x151e/0x2590 lock_acquire+0xd1/0x3d0 __mutex_lock+0xab/0x970 drm_is_current_master+0x1b/0x40 drm_mode_getconnector+0x37e/0x4a0 drm_ioctl_kernel+0xa8/0xf0 drm_ioctl+0x1e8/0x390 __x64_sys_ioctl+0x6a/0xa0 do_syscall_64+0x39/0xb0 entry_SYSCALL_64_after_hwframe+0x44/0xae other info that might help us debug this: Chain exists of: &dev->master_mutex --> &client->modeset_mutex --> &dev->mode_config.mutex Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&dev->mode_config.mutex); lock(&client->modeset_mutex); lock(&dev->mode_config.mutex); lock(&dev->master_mutex);
Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/gpu/drm/drm_auth.c | 51 ++++++++++++++------------------------ 1 file changed, 19 insertions(+), 32 deletions(-)
diff --git a/drivers/gpu/drm/drm_auth.c b/drivers/gpu/drm/drm_auth.c index 86d4b72e95cb..232abbba3686 100644 --- a/drivers/gpu/drm/drm_auth.c +++ b/drivers/gpu/drm/drm_auth.c @@ -61,35 +61,6 @@ * trusted clients. */
-static bool drm_is_current_master_locked(struct drm_file *fpriv) -{ - lockdep_assert_held_once(&fpriv->master->dev->master_mutex); - - return fpriv->is_master && drm_lease_owner(fpriv->master) == fpriv->minor->dev->master; -} - -/** - * drm_is_current_master - checks whether @priv is the current master - * @fpriv: DRM file private - * - * Checks whether @fpriv is current master on its device. This decides whether a - * client is allowed to run DRM_MASTER IOCTLs. - * - * Most of the modern IOCTL which require DRM_MASTER are for kernel modesetting - * - the current master is assumed to own the non-shareable display hardware. - */ -bool drm_is_current_master(struct drm_file *fpriv) -{ - bool ret; - - mutex_lock(&fpriv->master->dev->master_mutex); - ret = drm_is_current_master_locked(fpriv); - mutex_unlock(&fpriv->master->dev->master_mutex); - - return ret; -} -EXPORT_SYMBOL(drm_is_current_master); - int drm_getmagic(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_auth *auth = data; @@ -252,7 +223,7 @@ int drm_setmaster_ioctl(struct drm_device *dev, void *data, if (ret) goto out_unlock;
- if (drm_is_current_master_locked(file_priv)) + if (drm_is_current_master(file_priv)) goto out_unlock;
if (dev->master) { @@ -301,7 +272,7 @@ int drm_dropmaster_ioctl(struct drm_device *dev, void *data, if (ret) goto out_unlock;
- if (!drm_is_current_master_locked(file_priv)) { + if (!drm_is_current_master(file_priv)) { ret = -EINVAL; goto out_unlock; } @@ -350,7 +321,7 @@ void drm_master_release(struct drm_file *file_priv) if (file_priv->magic) idr_remove(&file_priv->master->magic_map, file_priv->magic);
- if (!drm_is_current_master_locked(file_priv)) + if (!drm_is_current_master(file_priv)) goto out;
drm_legacy_lock_master_cleanup(dev, master); @@ -371,6 +342,22 @@ void drm_master_release(struct drm_file *file_priv) mutex_unlock(&dev->master_mutex); }
+/** + * drm_is_current_master - checks whether @priv is the current master + * @fpriv: DRM file private + * + * Checks whether @fpriv is current master on its device. This decides whether a + * client is allowed to run DRM_MASTER IOCTLs. + * + * Most of the modern IOCTL which require DRM_MASTER are for kernel modesetting + * - the current master is assumed to own the non-shareable display hardware. + */ +bool drm_is_current_master(struct drm_file *fpriv) +{ + return fpriv->is_master && drm_lease_owner(fpriv->master) == fpriv->minor->dev->master; +} +EXPORT_SYMBOL(drm_is_current_master); + /** * drm_master_get - reference a master pointer * @master: &struct drm_master
From: Eric Snowberg eric.snowberg@oracle.com
stable inclusion from stable-5.10.47 commit 45109066f686597116467a53eaf4330450702a96 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 56c5812623f95313f6a46fbf0beee7fa17c68bbf ]
This fixes CVE-2020-26541.
The Secure Boot Forbidden Signature Database, dbx, contains a list of now revoked signatures and keys previously approved to boot with UEFI Secure Boot enabled. The dbx is capable of containing any number of EFI_CERT_X509_SHA256_GUID, EFI_CERT_SHA256_GUID, and EFI_CERT_X509_GUID entries.
Currently when EFI_CERT_X509_GUID are contained in the dbx, the entries are skipped.
Add support for EFI_CERT_X509_GUID dbx entries. When a EFI_CERT_X509_GUID is found, it is added as an asymmetrical key to the .blacklist keyring. Anytime the .platform keyring is used, the keys in the .blacklist keyring are referenced, if a matching key is found, the key will be rejected.
[DH: Made the following changes: - Added to have a config option to enable the facility. This allows a Kconfig solution to make sure that pkcs7_validate_trust() is enabled.[1][2] - Moved the functions out from the middle of the blacklist functions. - Added kerneldoc comments.]
Signed-off-by: Eric Snowberg eric.snowberg@oracle.com Signed-off-by: David Howells dhowells@redhat.com Reviewed-by: Jarkko Sakkinen jarkko@kernel.org cc: Randy Dunlap rdunlap@infradead.org cc: Mickaël Salaün mic@digikod.net cc: Arnd Bergmann arnd@kernel.org cc: keyrings@vger.kernel.org Link: https://lore.kernel.org/r/20200901165143.10295-1-eric.snowberg@oracle.com/ # rfc Link: https://lore.kernel.org/r/20200909172736.73003-1-eric.snowberg@oracle.com/ # v2 Link: https://lore.kernel.org/r/20200911182230.62266-1-eric.snowberg@oracle.com/ # v3 Link: https://lore.kernel.org/r/20200916004927.64276-1-eric.snowberg@oracle.com/ # v4 Link: https://lore.kernel.org/r/20210122181054.32635-2-eric.snowberg@oracle.com/ # v5 Link: https://lore.kernel.org/r/161428672051.677100.11064981943343605138.stgit@war... Link: https://lore.kernel.org/r/161433310942.902181.4901864302675874242.stgit@wart... # v2 Link: https://lore.kernel.org/r/161529605075.163428.14625520893961300757.stgit@war... # v3 Link: https://lore.kernel.org/r/bc2c24e3-ed68-2521-0bf4-a1f6be4a895d@infradead.org... [1] Link: https://lore.kernel.org/r/20210225125638.1841436-1-arnd@kernel.org/ [2] Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- certs/Kconfig | 9 ++++ certs/blacklist.c | 43 +++++++++++++++++++ certs/blacklist.h | 2 + certs/system_keyring.c | 6 +++ include/keys/system_keyring.h | 15 +++++++ .../platform_certs/keyring_handler.c | 11 +++++ 6 files changed, 86 insertions(+)
diff --git a/certs/Kconfig b/certs/Kconfig index 71be583e55a4..5ecdf8807bee 100644 --- a/certs/Kconfig +++ b/certs/Kconfig @@ -83,6 +83,15 @@ config SYSTEM_BLACKLIST_HASH_LIST wrapper to incorporate the list into the kernel. Each <hash> should be a string of hex digits.
+config SYSTEM_REVOCATION_LIST + bool "Provide system-wide ring of revocation certificates" + depends on SYSTEM_BLACKLIST_KEYRING + depends on PKCS7_MESSAGE_PARSER=y + help + If set, this allows revocation certificates to be stored in the + blacklist keyring and implements a hook whereby a PKCS#7 message can + be checked to see if it matches such a certificate. + config PGP_PRELOAD_PUBLIC_KEYS bool "Preload PGP public keys" select PGP_PRELOAD diff --git a/certs/blacklist.c b/certs/blacklist.c index f1c434b04b5e..59b2f106b294 100644 --- a/certs/blacklist.c +++ b/certs/blacklist.c @@ -144,6 +144,49 @@ int is_binary_blacklisted(const u8 *hash, size_t hash_len) } EXPORT_SYMBOL_GPL(is_binary_blacklisted);
+#ifdef CONFIG_SYSTEM_REVOCATION_LIST +/** + * add_key_to_revocation_list - Add a revocation certificate to the blacklist + * @data: The data blob containing the certificate + * @size: The size of data blob + */ +int add_key_to_revocation_list(const char *data, size_t size) +{ + key_ref_t key; + + key = key_create_or_update(make_key_ref(blacklist_keyring, true), + "asymmetric", + NULL, + data, + size, + ((KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW), + KEY_ALLOC_NOT_IN_QUOTA | KEY_ALLOC_BUILT_IN); + + if (IS_ERR(key)) { + pr_err("Problem with revocation key (%ld)\n", PTR_ERR(key)); + return PTR_ERR(key); + } + + return 0; +} + +/** + * is_key_on_revocation_list - Determine if the key for a PKCS#7 message is revoked + * @pkcs7: The PKCS#7 message to check + */ +int is_key_on_revocation_list(struct pkcs7_message *pkcs7) +{ + int ret; + + ret = pkcs7_validate_trust(pkcs7, blacklist_keyring); + + if (ret == 0) + return -EKEYREJECTED; + + return -ENOKEY; +} +#endif + /* * Initialise the blacklist */ diff --git a/certs/blacklist.h b/certs/blacklist.h index 1efd6fa0dc60..51b320cf8574 100644 --- a/certs/blacklist.h +++ b/certs/blacklist.h @@ -1,3 +1,5 @@ #include <linux/kernel.h> +#include <linux/errno.h> +#include <crypto/pkcs7.h>
extern const char __initconst *const blacklist_hashes[]; diff --git a/certs/system_keyring.c b/certs/system_keyring.c index 40f35834cd10..eef193616faf 100644 --- a/certs/system_keyring.c +++ b/certs/system_keyring.c @@ -263,6 +263,12 @@ int verify_pkcs7_message_sig(const void *data, size_t len, pr_devel("PKCS#7 platform keyring is not available\n"); goto error; } + + ret = is_key_on_revocation_list(pkcs7); + if (ret != -ENOKEY) { + pr_devel("PKCS#7 platform key is on revocation list\n"); + goto error; + } } ret = pkcs7_validate_trust(pkcs7, trusted_keys); if (ret < 0) { diff --git a/include/keys/system_keyring.h b/include/keys/system_keyring.h index fb8b07daa9d1..875e002a4180 100644 --- a/include/keys/system_keyring.h +++ b/include/keys/system_keyring.h @@ -31,6 +31,7 @@ extern int restrict_link_by_builtin_and_secondary_trusted( #define restrict_link_by_builtin_and_secondary_trusted restrict_link_by_builtin_trusted #endif
+extern struct pkcs7_message *pkcs7; #ifdef CONFIG_SYSTEM_BLACKLIST_KEYRING extern int mark_hash_blacklisted(const char *hash); extern int is_hash_blacklisted(const u8 *hash, size_t hash_len, @@ -49,6 +50,20 @@ static inline int is_binary_blacklisted(const u8 *hash, size_t hash_len) } #endif
+#ifdef CONFIG_SYSTEM_REVOCATION_LIST +extern int add_key_to_revocation_list(const char *data, size_t size); +extern int is_key_on_revocation_list(struct pkcs7_message *pkcs7); +#else +static inline int add_key_to_revocation_list(const char *data, size_t size) +{ + return 0; +} +static inline int is_key_on_revocation_list(struct pkcs7_message *pkcs7) +{ + return -ENOKEY; +} +#endif + #ifdef CONFIG_IMA_BLACKLIST_KEYRING extern struct key *ima_blacklist_keyring;
diff --git a/security/integrity/platform_certs/keyring_handler.c b/security/integrity/platform_certs/keyring_handler.c index c5ba695c10e3..5604bd57c990 100644 --- a/security/integrity/platform_certs/keyring_handler.c +++ b/security/integrity/platform_certs/keyring_handler.c @@ -55,6 +55,15 @@ static __init void uefi_blacklist_binary(const char *source, uefi_blacklist_hash(source, data, len, "bin:", 4); }
+/* + * Add an X509 cert to the revocation list. + */ +static __init void uefi_revocation_list_x509(const char *source, + const void *data, size_t len) +{ + add_key_to_revocation_list(data, len); +} + /* * Return the appropriate handler for particular signature list types found in * the UEFI db and MokListRT tables. @@ -76,5 +85,7 @@ __init efi_element_handler_t get_handler_for_dbx(const efi_guid_t *sig_type) return uefi_blacklist_x509_tbs; if (efi_guidcmp(*sig_type, efi_cert_sha256_guid) == 0) return uefi_blacklist_binary; + if (efi_guidcmp(*sig_type, efi_cert_x509_guid) == 0) + return uefi_revocation_list_x509; return 0; }
From: Eric Snowberg eric.snowberg@oracle.com
stable inclusion from stable-5.10.47 commit 72d6f5d982f0e823eaa01b9439db23af85fb0ee0 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 2565ca7f5ec1a98d51eea8860c4ab923f1ca2c85 ]
Move functionality within load_system_certificate_list to a common function, so it can be reused in the future.
DH Changes: - Added inclusion of common.h to common.c (Eric [1]).
Signed-off-by: Eric Snowberg eric.snowberg@oracle.com Acked-by: Jarkko Sakkinen jarkko@kernel.org Signed-off-by: David Howells dhowells@redhat.com cc: keyrings@vger.kernel.org Link: https://lore.kernel.org/r/EDA280F9-F72D-4181-93C7-CDBE95976FF7@oracle.com/ [1] Link: https://lore.kernel.org/r/20200930201508.35113-2-eric.snowberg@oracle.com/ Link: https://lore.kernel.org/r/20210122181054.32635-3-eric.snowberg@oracle.com/ # v5 Link: https://lore.kernel.org/r/161428672825.677100.7545516389752262918.stgit@wart... Link: https://lore.kernel.org/r/161433311696.902181.3599366124784670368.stgit@wart... # v2 Link: https://lore.kernel.org/r/161529605850.163428.7786675680201528556.stgit@wart... # v3 Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- certs/Makefile | 2 +- certs/common.c | 57 ++++++++++++++++++++++++++++++++++++++++++ certs/common.h | 9 +++++++ certs/system_keyring.c | 49 +++--------------------------------- 4 files changed, 70 insertions(+), 47 deletions(-) create mode 100644 certs/common.c create mode 100644 certs/common.h
diff --git a/certs/Makefile b/certs/Makefile index 980ede56157a..2a7d30028858 100644 --- a/certs/Makefile +++ b/certs/Makefile @@ -3,7 +3,7 @@ # Makefile for the linux kernel signature checking certificates. #
-obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o +obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o common.o obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist.o ifneq ($(CONFIG_SYSTEM_BLACKLIST_HASH_LIST),"") obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist_hashes.o diff --git a/certs/common.c b/certs/common.c new file mode 100644 index 000000000000..16a220887a53 --- /dev/null +++ b/certs/common.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <linux/kernel.h> +#include <linux/key.h> +#include "common.h" + +int load_certificate_list(const u8 cert_list[], + const unsigned long list_size, + const struct key *keyring) +{ + key_ref_t key; + const u8 *p, *end; + size_t plen; + + p = cert_list; + end = p + list_size; + while (p < end) { + /* Each cert begins with an ASN.1 SEQUENCE tag and must be more + * than 256 bytes in size. + */ + if (end - p < 4) + goto dodgy_cert; + if (p[0] != 0x30 && + p[1] != 0x82) + goto dodgy_cert; + plen = (p[2] << 8) | p[3]; + plen += 4; + if (plen > end - p) + goto dodgy_cert; + + key = key_create_or_update(make_key_ref(keyring, 1), + "asymmetric", + NULL, + p, + plen, + ((KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ), + KEY_ALLOC_NOT_IN_QUOTA | + KEY_ALLOC_BUILT_IN | + KEY_ALLOC_BYPASS_RESTRICTION); + if (IS_ERR(key)) { + pr_err("Problem loading in-kernel X.509 certificate (%ld)\n", + PTR_ERR(key)); + } else { + pr_notice("Loaded X.509 cert '%s'\n", + key_ref_to_ptr(key)->description); + key_ref_put(key); + } + p += plen; + } + + return 0; + +dodgy_cert: + pr_err("Problem parsing in-kernel X.509 certificate list\n"); + return 0; +} diff --git a/certs/common.h b/certs/common.h new file mode 100644 index 000000000000..abdb5795936b --- /dev/null +++ b/certs/common.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _CERT_COMMON_H +#define _CERT_COMMON_H + +int load_certificate_list(const u8 cert_list[], const unsigned long list_size, + const struct key *keyring); + +#endif diff --git a/certs/system_keyring.c b/certs/system_keyring.c index eef193616faf..0eac6c368048 100644 --- a/certs/system_keyring.c +++ b/certs/system_keyring.c @@ -16,6 +16,7 @@ #include <keys/asymmetric-type.h> #include <keys/system_keyring.h> #include <crypto/pkcs7.h> +#include "common.h"
static struct key *builtin_trusted_keys; #ifdef CONFIG_SECONDARY_TRUSTED_KEYRING @@ -137,54 +138,10 @@ device_initcall(system_trusted_keyring_init); */ static __init int load_system_certificate_list(void) { - key_ref_t key; - const u8 *p, *end; - size_t plen; - pr_notice("Loading compiled-in X.509 certificates\n");
- p = system_certificate_list; - end = p + system_certificate_list_size; - while (p < end) { - /* Each cert begins with an ASN.1 SEQUENCE tag and must be more - * than 256 bytes in size. - */ - if (end - p < 4) - goto dodgy_cert; - if (p[0] != 0x30 && - p[1] != 0x82) - goto dodgy_cert; - plen = (p[2] << 8) | p[3]; - plen += 4; - if (plen > end - p) - goto dodgy_cert; - - key = key_create_or_update(make_key_ref(builtin_trusted_keys, 1), - "asymmetric", - NULL, - p, - plen, - ((KEY_POS_ALL & ~KEY_POS_SETATTR) | - KEY_USR_VIEW | KEY_USR_READ), - KEY_ALLOC_NOT_IN_QUOTA | - KEY_ALLOC_BUILT_IN | - KEY_ALLOC_BYPASS_RESTRICTION); - if (IS_ERR(key)) { - pr_err("Problem loading in-kernel X.509 certificate (%ld)\n", - PTR_ERR(key)); - } else { - pr_notice("Loaded X.509 cert '%s'\n", - key_ref_to_ptr(key)->description); - key_ref_put(key); - } - p += plen; - } - - return 0; - -dodgy_cert: - pr_err("Problem parsing in-kernel X.509 certificate list\n"); - return 0; + return load_certificate_list(system_certificate_list, system_certificate_list_size, + builtin_trusted_keys); } late_initcall(load_system_certificate_list);
From: Eric Snowberg eric.snowberg@oracle.com
stable inclusion from stable-5.10.47 commit c6ae6f89fc4f7820d0ce6e8c1340d660b358e791 bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit d1f044103dad70c1cec0a8f3abdf00834fec8b98 ]
Add a new Kconfig option called SYSTEM_REVOCATION_KEYS. If set, this option should be the filename of a PEM-formated file containing X.509 certificates to be included in the default blacklist keyring.
DH Changes: - Make the new Kconfig option depend on SYSTEM_REVOCATION_LIST. - Fix SYSTEM_REVOCATION_KEYS=n, but CONFIG_SYSTEM_REVOCATION_LIST=y[1][2]. - Use CONFIG_SYSTEM_REVOCATION_LIST for extract-cert[3]. - Use CONFIG_SYSTEM_REVOCATION_LIST for revocation_certificates.o[3].
Signed-off-by: Eric Snowberg eric.snowberg@oracle.com Acked-by: Jarkko Sakkinen jarkko@kernel.org Signed-off-by: David Howells dhowells@redhat.com cc: Randy Dunlap rdunlap@infradead.org cc: keyrings@vger.kernel.org Link: https://lore.kernel.org/r/e1c15c74-82ce-3a69-44de-a33af9b320ea@infradead.org... [1] Link: https://lore.kernel.org/r/20210303034418.106762-1-eric.snowberg@oracle.com/ [2] Link: https://lore.kernel.org/r/20210304175030.184131-1-eric.snowberg@oracle.com/ [3] Link: https://lore.kernel.org/r/20200930201508.35113-3-eric.snowberg@oracle.com/ Link: https://lore.kernel.org/r/20210122181054.32635-4-eric.snowberg@oracle.com/ # v5 Link: https://lore.kernel.org/r/161428673564.677100.4112098280028451629.stgit@wart... Link: https://lore.kernel.org/r/161433312452.902181.4146169951896577982.stgit@wart... # v2 Link: https://lore.kernel.org/r/161529606657.163428.3340689182456495390.stgit@wart... # v3 Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- certs/Kconfig | 8 ++++++++ certs/Makefile | 19 +++++++++++++++++-- certs/blacklist.c | 21 +++++++++++++++++++++ certs/revocation_certificates.S | 21 +++++++++++++++++++++ scripts/Makefile | 1 + 5 files changed, 68 insertions(+), 2 deletions(-) create mode 100644 certs/revocation_certificates.S
diff --git a/certs/Kconfig b/certs/Kconfig index 5ecdf8807bee..84fc229b6205 100644 --- a/certs/Kconfig +++ b/certs/Kconfig @@ -92,6 +92,14 @@ config SYSTEM_REVOCATION_LIST blacklist keyring and implements a hook whereby a PKCS#7 message can be checked to see if it matches such a certificate.
+config SYSTEM_REVOCATION_KEYS + string "X.509 certificates to be preloaded into the system blacklist keyring" + depends on SYSTEM_REVOCATION_LIST + help + If set, this option should be the filename of a PEM-formatted file + containing X.509 certificates to be included in the default blacklist + keyring. + config PGP_PRELOAD_PUBLIC_KEYS bool "Preload PGP public keys" select PGP_PRELOAD diff --git a/certs/Makefile b/certs/Makefile index 2a7d30028858..77392d949415 100644 --- a/certs/Makefile +++ b/certs/Makefile @@ -4,7 +4,8 @@ #
obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o common.o -obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist.o +obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist.o common.o +obj-$(CONFIG_SYSTEM_REVOCATION_LIST) += revocation_certificates.o ifneq ($(CONFIG_SYSTEM_BLACKLIST_HASH_LIST),"") obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist_hashes.o else @@ -36,7 +37,7 @@ $(obj)/x509_certificate_list: scripts/extract-cert $(SYSTEM_TRUSTED_KEYS_SRCPREF $(call if_changed,extract_certs,$(SYSTEM_TRUSTED_KEYS_SRCPREFIX)$(CONFIG_SYSTEM_TRUSTED_KEYS)) endif # CONFIG_SYSTEM_TRUSTED_KEYRING
-clean-files := x509_certificate_list .x509.list +clean-files := x509_certificate_list .x509.list x509_revocation_list
ifeq ($(CONFIG_MODULE_SIG),y) ############################################################################### @@ -111,3 +112,17 @@ targets += signing_key.x509 $(obj)/signing_key.x509: scripts/extract-cert $(X509_DEP) FORCE $(call if_changed,extract_certs,$(MODULE_SIG_KEY_SRCPREFIX)$(CONFIG_MODULE_SIG_KEY)) endif # CONFIG_MODULE_SIG + +ifeq ($(CONFIG_SYSTEM_REVOCATION_LIST),y) + +$(eval $(call config_filename,SYSTEM_REVOCATION_KEYS)) + +$(obj)/revocation_certificates.o: $(obj)/x509_revocation_list + +quiet_cmd_extract_certs = EXTRACT_CERTS $(patsubst "%",%,$(2)) + cmd_extract_certs = scripts/extract-cert $(2) $@ + +targets += x509_revocation_list +$(obj)/x509_revocation_list: scripts/extract-cert $(SYSTEM_REVOCATION_KEYS_SRCPREFIX)$(SYSTEM_REVOCATION_KEYS_FILENAME) FORCE + $(call if_changed,extract_certs,$(SYSTEM_REVOCATION_KEYS_SRCPREFIX)$(CONFIG_SYSTEM_REVOCATION_KEYS)) +endif diff --git a/certs/blacklist.c b/certs/blacklist.c index 59b2f106b294..c973de883cf0 100644 --- a/certs/blacklist.c +++ b/certs/blacklist.c @@ -16,9 +16,15 @@ #include <linux/seq_file.h> #include <keys/system_keyring.h> #include "blacklist.h" +#include "common.h"
static struct key *blacklist_keyring;
+#ifdef CONFIG_SYSTEM_REVOCATION_LIST +extern __initconst const u8 revocation_certificate_list[]; +extern __initconst const unsigned long revocation_certificate_list_size; +#endif + /* * The description must be a type prefix, a colon and then an even number of * hex digits. The hash is kept in the description. @@ -220,3 +226,18 @@ static int __init blacklist_init(void) * Must be initialised before we try and load the keys into the keyring. */ device_initcall(blacklist_init); + +#ifdef CONFIG_SYSTEM_REVOCATION_LIST +/* + * Load the compiled-in list of revocation X.509 certificates. + */ +static __init int load_revocation_certificate_list(void) +{ + if (revocation_certificate_list_size) + pr_notice("Loading compiled-in revocation X.509 certificates\n"); + + return load_certificate_list(revocation_certificate_list, revocation_certificate_list_size, + blacklist_keyring); +} +late_initcall(load_revocation_certificate_list); +#endif diff --git a/certs/revocation_certificates.S b/certs/revocation_certificates.S new file mode 100644 index 000000000000..f21aae8a8f0e --- /dev/null +++ b/certs/revocation_certificates.S @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/export.h> +#include <linux/init.h> + + __INITRODATA + + .align 8 + .globl revocation_certificate_list +revocation_certificate_list: +__revocation_list_start: + .incbin "certs/x509_revocation_list" +__revocation_list_end: + + .align 8 + .globl revocation_certificate_list_size +revocation_certificate_list_size: +#ifdef CONFIG_64BIT + .quad __revocation_list_end - __revocation_list_start +#else + .long __revocation_list_end - __revocation_list_start +#endif diff --git a/scripts/Makefile b/scripts/Makefile index c36106bce80e..9adb6d247818 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -14,6 +14,7 @@ hostprogs-always-$(CONFIG_ASN1) += asn1_compiler hostprogs-always-$(CONFIG_MODULE_SIG_FORMAT) += sign-file hostprogs-always-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += extract-cert hostprogs-always-$(CONFIG_SYSTEM_EXTRA_CERTIFICATE) += insert-sys-cert +hostprogs-always-$(CONFIG_SYSTEM_REVOCATION_LIST) += extract-cert
HOSTCFLAGS_sorttable.o = -I$(srctree)/tools/include HOSTCFLAGS_asn1_compiler.o = -I$(srctree)/include
From: Eric Snowberg eric.snowberg@oracle.com
stable inclusion from stable-5.10.47 commit 1573d595e2395c4d2742d2217d86f6241ca47b9f bugzilla: 172973 https://gitee.com/openeuler/kernel/issues/I4DAKB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit ebd9c2ae369a45bdd9f8615484db09be58fc242b ]
During boot the Secure Boot Forbidden Signature Database, dbx, is loaded into the blacklist keyring. Systems booted with shim have an equivalent Forbidden Signature Database called mokx. Currently mokx is only used by shim and grub, the contents are ignored by the kernel.
Add the ability to load mokx into the blacklist keyring during boot.
Signed-off-by: Eric Snowberg eric.snowberg@oracle.com Suggested-by: James Bottomley James.Bottomley@HansenPartnership.com Signed-off-by: David Howells dhowells@redhat.com Reviewed-by: Jarkko Sakkinen jarkko@kernel.org cc: keyrings@vger.kernel.org Link: https://lore.kernel.org/r/c33c8e3839a41e9654f41cc92c7231104931b1d7.camel@Han... Link: https://lore.kernel.org/r/20210122181054.32635-5-eric.snowberg@oracle.com/ # v5 Link: https://lore.kernel.org/r/161428674320.677100.12637282414018170743.stgit@war... Link: https://lore.kernel.org/r/161433313205.902181.2502803393898221637.stgit@wart... # v2 Link: https://lore.kernel.org/r/161529607422.163428.13530426573612578854.stgit@war... # v3 Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- security/integrity/platform_certs/load_uefi.c | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-)
diff --git a/security/integrity/platform_certs/load_uefi.c b/security/integrity/platform_certs/load_uefi.c index ee4b4c666854..f290f78c3f30 100644 --- a/security/integrity/platform_certs/load_uefi.c +++ b/security/integrity/platform_certs/load_uefi.c @@ -132,8 +132,9 @@ static int __init load_moklist_certs(void) static int __init load_uefi_certs(void) { efi_guid_t secure_var = EFI_IMAGE_SECURITY_DATABASE_GUID; - void *db = NULL, *dbx = NULL; - unsigned long dbsize = 0, dbxsize = 0; + efi_guid_t mok_var = EFI_SHIM_LOCK_GUID; + void *db = NULL, *dbx = NULL, *mokx = NULL; + unsigned long dbsize = 0, dbxsize = 0, mokxsize = 0; efi_status_t status; int rc = 0;
@@ -175,6 +176,21 @@ static int __init load_uefi_certs(void) kfree(dbx); }
+ mokx = get_cert_list(L"MokListXRT", &mok_var, &mokxsize, &status); + if (!mokx) { + if (status == EFI_NOT_FOUND) + pr_debug("mokx variable wasn't found\n"); + else + pr_info("Couldn't get mokx list\n"); + } else { + rc = parse_efi_signature_list("UEFI:MokListXRT", + mokx, mokxsize, + get_handler_for_dbx); + if (rc) + pr_err("Couldn't parse mokx signatures %d\n", rc); + kfree(mokx); + } + /* Load the MokListRT certs */ rc = load_moklist_certs();
From: "Martin K. Petersen" martin.petersen@oracle.com
hulk inclusion category: bugfix bugzilla: 46833 https://gitee.com/openeuler/kernel/issues/I4DCSN
-----------------------------------------------
Fix: https://gitee.com/src-openeuler/util-linux/issues/I28N07
Origin patch: https://patchwork.kernel.org/project/linux-scsi/patch/20190227041941. 1568-1-martin.petersen@oracle.com/
If partition table changed online, will record partition read-only flag.
Some devices come online in write protected state and switch to read-write once they are ready to process I/O requests. These devices broke with commit 20bd1d026aac ("scsi: sd: Keep disk read-only when re-reading partition") because we had no way to distinguish between a user decision to set a block_device read-only and the actual hardware device being write-protected.
Because partitions are dropped and recreated on revalidate we are unable to persist any user-provided policy in hd_struct. Introduce a bitmap in struct gendisk to track the user configuration. This bitmap is updated when BLKROSET is called on a given disk or partition.
A helper function, get_user_ro(), is provided to determine whether the ioctl has forced read-only state for a given block device. This helper is used by set_disk_ro() and add_partition() to ensure that both existing and newly created partitions will get the correct state.
- If BLKROSET sets a whole disk device read-only, all partitions will now end up in a read-only state.
- If BLKROSET sets a given partition read-only, that partition will remain read-only post revalidate.
- Otherwise both the whole disk device and any partitions will reflect the write protect state of the underlying device.
Since nobody knows what "policy" means, rename the field to "read_only" for clarity.
Cc: Jeremy Cline jeremy@jcline.org Cc: Oleksii Kurochko olkuroch@cisco.com Cc: stable@vger.kernel.org # v4.16+ Reported-by: Oleksii Kurochko olkuroch@cisco.com Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201221 Fixes: 20bd1d026aac ("scsi: sd: Keep disk read-only when re-reading partition") Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Ye Bin yebin10@huawei.com Reviewed-by: Jason Yan yanaijie@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- block/blk-core.c | 2 +- block/genhd.c | 34 ++++++++++++++++++++++++---------- block/ioctl.c | 4 ++++ block/partitions/core.c | 4 ++-- include/linux/genhd.h | 11 +++++++---- 5 files changed, 38 insertions(+), 17 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c index 02da3baf0bf7..913f9c06b3b1 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -696,7 +696,7 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part) { const int op = bio_op(bio);
- if (part->policy && op_is_write(op)) { + if (part->read_only && op_is_write(op)) { char b[BDEVNAME_SIZE];
if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) diff --git a/block/genhd.c b/block/genhd.c index 8846cbdc75d4..5a7196286241 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1867,26 +1867,40 @@ static void set_disk_ro_uevent(struct gendisk *gd, int ro) kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp); }
-void set_device_ro(struct block_device *bdev, int flag) +void set_device_ro(struct block_device *bdev, bool state) { - bdev->bd_part->policy = flag; + bdev->bd_part->read_only = state; }
EXPORT_SYMBOL(set_device_ro);
-void set_disk_ro(struct gendisk *disk, int flag) +bool get_user_ro(struct gendisk *disk, unsigned int partno) +{ + /* Is the user read-only bit set for the whole disk device? */ + if (test_bit(0, disk->user_ro_bitmap)) + return true; + + /* Is the user read-only bit set for this particular partition? */ + if (test_bit(partno, disk->user_ro_bitmap)) + return true; + + return false; +} + +void set_disk_ro(struct gendisk *disk, bool state) { struct disk_part_iter piter; struct hd_struct *part;
- if (disk->part0.policy != flag) { - set_disk_ro_uevent(disk, flag); - disk->part0.policy = flag; - } + if (disk->part0.read_only != state) + set_disk_ro_uevent(disk, state);
- disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); + disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY_PART0); while ((part = disk_part_iter_next(&piter))) - part->policy = flag; + if (get_user_ro(disk, part->partno)) + part->read_only = true; + else + part->read_only = state; disk_part_iter_exit(&piter); }
@@ -1896,7 +1910,7 @@ int bdev_read_only(struct block_device *bdev) { if (!bdev) return 0; - return bdev->bd_part->policy; + return bdev->bd_part->read_only; }
EXPORT_SYMBOL(bdev_read_only); diff --git a/block/ioctl.c b/block/ioctl.c index ed240e170e14..8171858dc8a9 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -405,6 +405,10 @@ static int blkdev_roset(struct block_device *bdev, fmode_t mode, return ret; if (get_user(n, (int __user *)arg)) return -EFAULT; + if (n) + set_bit(bdev->bd_partno, bdev->bd_disk->user_ro_bitmap); + else + clear_bit(bdev->bd_partno, bdev->bd_disk->user_ro_bitmap); set_device_ro(bdev, n); return 0; } diff --git a/block/partitions/core.c b/block/partitions/core.c index 758ecc3de802..1f031f074ffd 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -192,7 +192,7 @@ static ssize_t part_ro_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); - return sprintf(buf, "%d\n", p->policy ? 1 : 0); + return sprintf(buf, "%d\n", p->read_only ? 1 : 0); }
static ssize_t part_alignment_offset_show(struct device *dev, @@ -414,7 +414,7 @@ static struct hd_struct *add_partition(struct gendisk *disk, int partno, p->start_sect = start; p->nr_sects = len; p->partno = partno; - p->policy = get_disk_ro(disk); + p->read_only = get_disk_ro(disk) | test_bit(partno, disk->user_ro_bitmap);
if (info) { struct partition_meta_info *pinfo; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index f70458755828..9116e41f4cc9 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -67,7 +67,8 @@ struct hd_struct {
struct device __dev; struct kobject *holder_dir; - int policy, partno; + bool read_only; + int partno; struct partition_meta_info *info; #ifdef CONFIG_FAIL_MAKE_REQUEST int make_it_fail; @@ -185,6 +186,7 @@ struct gendisk { */ struct disk_part_tbl __rcu *part_tbl; struct hd_struct part0; + DECLARE_BITMAP(user_ro_bitmap, DISK_MAX_PARTS);
const struct block_device_operations *fops; struct request_queue *queue; @@ -304,12 +306,13 @@ extern void del_gendisk(struct gendisk *gp); extern struct gendisk *get_gendisk(dev_t dev, int *partno); extern struct block_device *bdget_disk(struct gendisk *disk, int partno);
-extern void set_device_ro(struct block_device *bdev, int flag); -extern void set_disk_ro(struct gendisk *disk, int flag); +extern void set_device_ro(struct block_device *bdev, bool state); +extern void set_disk_ro(struct gendisk *disk, bool state); +extern bool get_user_ro(struct gendisk *disk, unsigned int partno);
static inline int get_disk_ro(struct gendisk *disk) { - return disk->part0.policy; + return disk->part0.read_only; }
extern void disk_block_events(struct gendisk *disk);
From: "GONG, Ruiqi" gongruiqi1@huawei.com
hulk inclusion category: bugfix bugzilla: 172971 https://gitee.com/openeuler/kernel/issues/I4DCXF
-------------------------------------------------
When ILP32 support is enabled, compilation on ARM64 would complain about an undeclared __NR_compat_syscalls, since commit 0b18db62881c ("arm64: ilp32: introduce syscall table for ILP32") changes the config that __NR_compat_syscalls depends on. Modify the config for SECCOMP_ARCH_COMPAT_NR accordingly.
Fixes: 9571a6a35fdc ("arm64: Enable seccomp architecture tracking") Signed-off-by: GONG, Ruiqi gongruiqi1@huawei.com Reviewed-by: Chen Jiahao chenjiahao16@huawei.com Reviewed-by: Xiu Jianfeng xiujianfeng@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- arch/arm64/include/asm/seccomp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/include/asm/seccomp.h b/arch/arm64/include/asm/seccomp.h index 22f74666e135..0be58ac682c4 100644 --- a/arch/arm64/include/asm/seccomp.h +++ b/arch/arm64/include/asm/seccomp.h @@ -52,10 +52,10 @@ static inline const int *get_compat_mode1_syscalls(void) #define SECCOMP_ARCH_NATIVE AUDIT_ARCH_AARCH64 #define SECCOMP_ARCH_NATIVE_NR NR_syscalls #define SECCOMP_ARCH_NATIVE_NAME "aarch64" -#ifdef CONFIG_COMPAT +#ifdef CONFIG_AARCH32_EL0 # define SECCOMP_ARCH_COMPAT AUDIT_ARCH_ARM # define SECCOMP_ARCH_COMPAT_NR __NR_compat_syscalls # define SECCOMP_ARCH_COMPAT_NAME "arm" -#endif +#endif /* CONFIG_AARCH32_EL0 */
#endif /* _ASM_SECCOMP_H */
From: ManYi Li limanyi@uniontech.com
stable inclusion from stable-5.10.48 commit f77f97238496aeab597d573aa1703441626da999 bugzilla: 173268 https://gitee.com/openeuler/kernel/issues/I4DD1K
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 7dd753ca59d6c8cc09aa1ed24f7657524803c7f3 ]
Handle a reported media event code of 3. This indicates that the media has been removed from the drive and user intervention is required to proceed. Return DISK_EVENT_EJECT_REQUEST in that case.
Link: https://lore.kernel.org/r/20210611094402.23884-1-limanyi@uniontech.com Signed-off-by: ManYi Li limanyi@uniontech.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/scsi/sr.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index fd4b582110b2..77961f058367 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -220,6 +220,8 @@ static unsigned int sr_get_events(struct scsi_device *sdev) return DISK_EVENT_EJECT_REQUEST; else if (med->media_event_code == 2) return DISK_EVENT_MEDIA_CHANGE; + else if (med->media_event_code == 3) + return DISK_EVENT_EJECT_REQUEST; return 0; }
From: Loic Poulain loic.poulain@linaro.org
stable inclusion from stable-5.10.48 commit d191c3d6ad330a686bfe41e923442594766cc619 bugzilla: 173268 https://gitee.com/openeuler/kernel/issues/I4DD1K
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 3093e6cca3ba7d47848068cb256c489675125181 ]
A disabled/masked interrupt marked as wakeup source must be re-enable and unmasked in order to be able to wake-up the host. That can be done by flaging the irqchip with IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND.
Note: It 'sometimes' works without that change, but only thanks to the lazy generic interrupt disabling (keeping interrupt unmasked).
Reported-by: Michal Koziel michal.koziel@emlogic.no Signed-off-by: Loic Poulain loic.poulain@linaro.org Reviewed-by: Linus Walleij linus.walleij@linaro.org Signed-off-by: Bartosz Golaszewski bgolaszewski@baylibre.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/gpio/gpio-mxc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpio/gpio-mxc.c b/drivers/gpio/gpio-mxc.c index 643f4c557ac2..ba6ed2a413f5 100644 --- a/drivers/gpio/gpio-mxc.c +++ b/drivers/gpio/gpio-mxc.c @@ -361,7 +361,7 @@ static int mxc_gpio_init_gc(struct mxc_gpio_port *port, int irq_base) ct->chip.irq_unmask = irq_gc_mask_set_bit; ct->chip.irq_set_type = gpio_set_irq_type; ct->chip.irq_set_wake = gpio_set_wake_irq; - ct->chip.flags = IRQCHIP_MASK_ON_SUSPEND; + ct->chip.flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_ENABLE_WAKEUP_ON_SUSPEND; ct->regs.ack = GPIO_ISR; ct->regs.mask = GPIO_IMR;
From: Christian König christian.koenig@amd.com
stable inclusion from stable-5.10.48 commit 45ca6df5df11da7f19c85591e093479d5c60262f bugzilla: 173268 https://gitee.com/openeuler/kernel/issues/I4DD1K
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit d330099115597bbc238d6758a4930e72b49ea9ba ]
AGP for example doesn't have a dma_address array.
Signed-off-by: Christian König christian.koenig@amd.com Acked-by: Alex Deucher alexander.deucher@amd.com Link: https://patchwork.freedesktop.org/patch/msgid/20210614110517.1624-1-christia... Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/gpu/drm/nouveau/nouveau_bo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 7daa12eec01b..b4946b595d86 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -590,7 +590,7 @@ nouveau_bo_sync_for_device(struct nouveau_bo *nvbo) struct ttm_dma_tt *ttm_dma = (struct ttm_dma_tt *)nvbo->bo.ttm; int i;
- if (!ttm_dma) + if (!ttm_dma || !ttm_dma->dma_address) return;
/* Don't waste time looping if the object is coherent */ @@ -610,7 +610,7 @@ nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo) struct ttm_dma_tt *ttm_dma = (struct ttm_dma_tt *)nvbo->bo.ttm; int i;
- if (!ttm_dma) + if (!ttm_dma || !ttm_dma->dma_address) return;
/* Don't waste time looping if the object is coherent */
From: Johannes Berg johannes.berg@intel.com
stable inclusion from stable-5.10.48 commit 348143a38012656f914cc7d3f016849e08a76d82 bugzilla: 173268 https://gitee.com/openeuler/kernel/issues/I4DD1K
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit c6414e1a2bd26b0071e2b9d6034621f705dfd4c0 ]
Both of these drivers use ioport_map(), so they need to depend on HAS_IOPORT_MAP. Otherwise, they cannot be built even with COMPILE_TEST on architectures without an ioport implementation, such as ARCH=um.
Reported-by: kernel test robot lkp@intel.com Signed-off-by: Johannes Berg johannes.berg@intel.com Signed-off-by: Bartosz Golaszewski bgolaszewski@baylibre.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/gpio/Kconfig | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 14751c7ccd1f..d1300fc003ed 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -1337,6 +1337,7 @@ config GPIO_TPS68470 config GPIO_TQMX86 tristate "TQ-Systems QTMX86 GPIO" depends on MFD_TQMX86 || COMPILE_TEST + depends on HAS_IOPORT_MAP select GPIOLIB_IRQCHIP help This driver supports GPIO on the TQMX86 IO controller. @@ -1404,6 +1405,7 @@ menu "PCI GPIO expanders" config GPIO_AMD8111 tristate "AMD 8111 GPIO driver" depends on X86 || COMPILE_TEST + depends on HAS_IOPORT_MAP help The AMD 8111 south bridge contains 32 GPIO pins which can be used.
From: Mark Bloch mbloch@nvidia.com
stable inclusion from stable-5.10.48 commit 4ab869e0289dbab0aeeedea5e5c4536e13af47b2 bugzilla: 173268 https://gitee.com/openeuler/kernel/issues/I4DD1K
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit edc0b0bccc9c80d9a44d3002dcca94984b25e7cf upstream.
Allow creating FDB steering rules only when in switchdev mode.
The only software model where a userspace application can manipulate FDB entries is when it manages the eswitch. This is only possible in switchdev mode where we expose a single RDMA device with representors for all the vports that are connected to the eswitch.
Fixes: 52438be44112 ("RDMA/mlx5: Allow inserting a steering rule to the FDB") Link: https://lore.kernel.org/r/e928ae7c58d07f104716a2a8d730963d1bd01204.162305292... Reviewed-by: Maor Gottlieb maorg@nvidia.com Signed-off-by: Mark Bloch mbloch@nvidia.com Signed-off-by: Leon Romanovsky leonro@nvidia.com Signed-off-by: Jason Gunthorpe jgg@nvidia.com [sudip: use old mlx5_eswitch_mode] Signed-off-by: Sudip Mukherjee sudipm.mukherjee@gmail.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org
Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/infiniband/hw/mlx5/fs.c | 7 +++++++ 1 file changed, 7 insertions(+)
diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index 13d50b178166..b3391ecedda7 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -2136,6 +2136,13 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)( if (err) goto end;
+ if (obj->ns_type == MLX5_FLOW_NAMESPACE_FDB && + mlx5_eswitch_mode(dev->mdev->priv.eswitch) != + MLX5_ESWITCH_OFFLOADS) { + err = -EINVAL; + goto end; + } + uobj->object = obj; obj->mdev = dev->mdev; atomic_set(&obj->usecnt, 0);
From: Sean Christopherson seanjc@google.com
stable inclusion from stable-5.10.48 commit 4dc96804286498f74beabcfb7603bb76d9905ad9 bugzilla: 173268 https://gitee.com/openeuler/kernel/issues/I4DD1K
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit f71a53d1180d5ecc346f0c6a23191d837fe2871b upstream.
Restore CR4.LA57 to the mmu_role to fix an amusing edge case with nested virtualization. When KVM (L0) is using TDP, CR4.LA57 is not reflected in mmu_role.base.level because that tracks the shadow root level, i.e. TDP level. Normally, this is not an issue because LA57 can't be toggled while long mode is active, i.e. the guest has to first disable paging, then toggle LA57, then re-enable paging, thus ensuring an MMU reinitialization.
But if L1 is crafty, it can load a new CR4 on VM-Exit and toggle LA57 without having to bounce through an unpaged section. L1 can also load a new CR3 on exit, i.e. it doesn't even need to play crazy paging games, a single entry PML5 is sufficient. Such shenanigans are only problematic if L0 and L1 use TDP, otherwise L1 and L2 share an MMU that gets reinitialized on nested VM-Enter/VM-Exit due to mmu_role.base.guest_mode.
Note, in the L2 case with nested TDP, even though L1 can switch between L2s with different LA57 settings, thus bypassing the paging requirement, in that case KVM's nested_mmu will track LA57 in base.level.
This reverts commit 8053f924cad30bf9f9a24e02b6c8ddfabf5202ea.
Fixes: 8053f924cad3 ("KVM: x86/mmu: Drop kvm_mmu_extended_role.cr4_la57 hack") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson seanjc@google.com Message-Id: 20210622175739.3610207-6-seanjc@google.com Signed-off-by: Paolo Bonzini pbonzini@redhat.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org
Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/mmu/mmu.c | 1 + 2 files changed, 2 insertions(+)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 90c34de3fefc..d715ce29bfc3 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -296,6 +296,7 @@ union kvm_mmu_extended_role { unsigned int cr4_pke:1; unsigned int cr4_smap:1; unsigned int cr4_smep:1; + unsigned int cr4_la57:1; unsigned int maxphyaddr:6; }; }; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 6b794344c02d..f2eeaf197294 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4442,6 +4442,7 @@ static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu) ext.cr4_smap = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMAP); ext.cr4_pse = !!is_pse(vcpu); ext.cr4_pke = !!kvm_read_cr4_bits(vcpu, X86_CR4_PKE); + ext.cr4_la57 = !!kvm_read_cr4_bits(vcpu, X86_CR4_LA57); ext.maxphyaddr = cpuid_maxphyaddr(vcpu);
ext.valid = 1;
From: Zhang Xiaoxu zhangxiaoxu5@huawei.com
mainline inclusion from mainline-v5.14 commit fcb170a9d825d7db4a3fb870b0300f5a40a8d096 category: bugfix bugzilla: 51898 https://gitee.com/openeuler/kernel/issues/I4DD74
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
---------------------------
The 'queue->nr' will wraparound from 0 to 255 when only current priority queue has tasks. This maybe lead a deadlock same as commit dfe1fe75e00e ("NFSv4: Fix deadlock between nfs4_evict_inode() and nfs4_opendata_get_inode()"):
Privileged delegreturn task is queued to privileged list because all the slots are assigned. When non-privileged task complete and release the slot, a non-privileged maybe picked out. It maybe allocate slot failed when the session on draining.
If the 'queue->nr' has wraparound to 255, and no enough slot to service it, then the privileged delegreturn will lost to wake up.
So we should avoid the wraparound on 'queue->nr'.
Reported-by: Hulk Robot hulkci@huawei.com Fixes: 5fcdfacc01f3 ("NFSv4: Return delegations synchronously in evict_inode") Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@vger.kernel.org Signed-off-by: Zhang Xiaoxu zhangxiaoxu5@huawei.com Signed-off-by: Trond Myklebust trond.myklebust@hammerspace.com Signed-off-by: ChenXiaoSong chenxiaosong2@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- net/sunrpc/sched.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 39ed0e0afe6d..71993adb0762 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -595,7 +595,8 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q * Service a batch of tasks from a single owner. */ q = &queue->tasks[queue->priority]; - if (!list_empty(q) && --queue->nr) { + if (!list_empty(q) && queue->nr) { + queue->nr--; task = list_first_entry(q, struct rpc_task, u.tk_wait.list); goto out; }
From: Zhang Xiaoxu zhangxiaoxu5@huawei.com
mainline inclusion from mainline-v5.14 commit 5483b904bf336948826594610af4c9bbb0d9e3aa category: bugfix bugzilla: 51898 https://gitee.com/openeuler/kernel/issues/I4DD74
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
---------------------------
When find a task from wait queue to wake up, a non-privileged task may be found out, rather than the privileged. This maybe lead a deadlock same as commit dfe1fe75e00e ("NFSv4: Fix deadlock between nfs4_evict_inode() and nfs4_opendata_get_inode()"):
Privileged delegreturn task is queued to privileged list because all the slots are assigned. If there has no enough slot to wake up the non-privileged batch tasks(session less than 8 slot), then the privileged delegreturn task maybe lost waked up because the found out task can't get slot since the session is on draining.
So we should treate the privileged task as the emergency task, and execute it as for as we can.
Reported-by: Hulk Robot hulkci@huawei.com Fixes: 5fcdfacc01f3 ("NFSv4: Return delegations synchronously in evict_inode") Cc: stable@vger.kernel.org Signed-off-by: Zhang Xiaoxu zhangxiaoxu5@huawei.com Signed-off-by: Trond Myklebust trond.myklebust@hammerspace.com Signed-off-by: ChenXiaoSong chenxiaosong2@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- net/sunrpc/sched.c | 9 +++++++++ 1 file changed, 9 insertions(+)
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 71993adb0762..c045f63d11fa 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -591,6 +591,15 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q struct list_head *q; struct rpc_task *task;
+ /* + * Service the privileged queue. + */ + q = &queue->tasks[RPC_NR_PRIORITY - 1]; + if (queue->maxpriority > RPC_PRIORITY_PRIVILEGED && !list_empty(q)) { + task = list_first_entry(q, struct rpc_task, u.tk_wait.list); + goto out; + } + /* * Service a batch of tasks from a single owner. */
From: Theodore Ts'o tytso@mit.edu
mainline inclusion from mainline-5.14 commit 61bb4a1c417e5b95d9edb4f887f131de32e419cb category: bugfix bugzilla: 173880 https://gitee.com/openeuler/kernel/issues/I4DDEL
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
-------------------------------------------------
After commit 618f003199c6 ("ext4: fix memory leak in ext4_fill_super"), after the file system is remounted read-only, there is a race where the kmmpd thread can exit, causing sbi->s_mmp_tsk to point at freed memory, which the call to ext4_stop_mmpd() can trip over.
Fix this by only allowing kmmpd() to exit when it is stopped via ext4_stop_mmpd().
Link: https://lore.kernel.org/r/20210707002433.3719773-1-tytso@mit.edu Reported-by: Ye Bin yebin10@huawei.com Bug-Report-Link: 20210629143603.2166962-1-yebin10@huawei.com Signed-off-by: Theodore Ts'o tytso@mit.edu Reviewed-by: Jan Kara jack@suse.cz Signed-off-by: Baokun Li libaokun1@huawei.com Reviewed-by: Zhang Yi yi.zhang@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- fs/ext4/mmp.c | 31 +++++++++++++++---------------- fs/ext4/super.c | 6 +++++- 2 files changed, 20 insertions(+), 17 deletions(-)
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 6cb598b549ca..bc364c119af6 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -156,7 +156,12 @@ static int kmmpd(void *data) memcpy(mmp->mmp_nodename, init_utsname()->nodename, sizeof(mmp->mmp_nodename));
- while (!kthread_should_stop()) { + while (!kthread_should_stop() && !sb_rdonly(sb)) { + if (!ext4_has_feature_mmp(sb)) { + ext4_warning(sb, "kmmpd being stopped since MMP feature" + " has been disabled."); + goto wait_to_exit; + } if (++seq > EXT4_MMP_SEQ_MAX) seq = 1;
@@ -177,16 +182,6 @@ static int kmmpd(void *data) failed_writes++; }
- if (!(le32_to_cpu(es->s_feature_incompat) & - EXT4_FEATURE_INCOMPAT_MMP)) { - ext4_warning(sb, "kmmpd being stopped since MMP feature" - " has been disabled."); - goto exit_thread; - } - - if (sb_rdonly(sb)) - break; - diff = jiffies - last_update_time; if (diff < mmp_update_interval * HZ) schedule_timeout_interruptible(mmp_update_interval * @@ -207,7 +202,7 @@ static int kmmpd(void *data) ext4_error_err(sb, -retval, "error reading MMP data: %d", retval); - goto exit_thread; + goto wait_to_exit; }
mmp_check = (struct mmp_struct *)(bh_check->b_data); @@ -221,7 +216,7 @@ static int kmmpd(void *data) ext4_error_err(sb, EBUSY, "abort"); put_bh(bh_check); retval = -EBUSY; - goto exit_thread; + goto wait_to_exit; } put_bh(bh_check); } @@ -244,7 +239,13 @@ static int kmmpd(void *data)
retval = write_mmp_block(sb, bh);
-exit_thread: +wait_to_exit: + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); + if (!kthread_should_stop()) + schedule(); + } + set_current_state(TASK_RUNNING); return retval; }
@@ -391,5 +392,3 @@ int ext4_multi_mount_protect(struct super_block *sb, brelse(bh); return 1; } - - diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 01147e5bafa7..c5c89f10a7f4 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5952,7 +5952,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) */ ext4_mark_recovery_complete(sb, es); } - ext4_stop_mmpd(sbi); } else { /* Make sure we can mount this feature set readwrite */ if (ext4_has_feature_readonly(sb) || @@ -6066,6 +6065,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks) ext4_release_system_zone(sb);
+ if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb)) + ext4_stop_mmpd(sbi); + /* * Some options can be enabled by ext4 and/or by VFS mount flag * either way we need to make sure it matches in both *flags and @@ -6098,6 +6100,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) for (i = 0; i < EXT4_MAXQUOTAS; i++) kfree(to_free[i]); #endif + if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb)) + ext4_stop_mmpd(sbi); kfree(orig_data); return err; }
From: "Gong, Sishuai" sishuai@purdue.edu
mainline inclusion from mainline-v5.13-rc1 commit 69e16d01d1de4f1249869de342915f608feb55d5 category: bugfix bugzilla: 105658 https://gitee.com/openeuler/kernel/issues/I4DDEL
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
-----------------------------------------------
l2tp_tunnel_register() registers a tunnel without fully initializing its attribute. This can allow another kernel thread running l2tp_xmit_core() to access the uninitialized data and then cause a kernel NULL pointer dereference error, as shown below.
Thread 1 Thread 2 //l2tp_tunnel_register() list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list); //pppol2tp_connect() tunnel = l2tp_tunnel_get(sock_net(sk), info.tunnel_id); // Fetch the new tunnel ... //l2tp_xmit_core() struct sock *sk = tunnel->sock; ... bh_lock_sock(sk); //Null pointer error happens tunnel->sock = sk;
Fix this bug by initializing tunnel->sock before adding the tunnel into l2tp_tunnel_list.
Reviewed-by: Cong Wang cong.wang@bytedance.com Signed-off-by: Sishuai Gong sishuai@purdue.edu Reported-by: Sishuai Gong sishuai@purdue.edu Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Huang Guobin huangguobin4@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- net/l2tp/l2tp_core.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 203890e378cb..8eb805ee18d4 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1478,11 +1478,15 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, tunnel->l2tp_net = net; pn = l2tp_pernet(net);
+ sk = sock->sk; + sock_hold(sk); + tunnel->sock = sk; + spin_lock_bh(&pn->l2tp_tunnel_list_lock); list_for_each_entry(tunnel_walk, &pn->l2tp_tunnel_list, list) { if (tunnel_walk->tunnel_id == tunnel->tunnel_id) { spin_unlock_bh(&pn->l2tp_tunnel_list_lock); - + sock_put(sk); ret = -EEXIST; goto err_sock; } @@ -1490,10 +1494,6 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list); spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
- sk = sock->sk; - sock_hold(sk); - tunnel->sock = sk; - if (tunnel->encap == L2TP_ENCAPTYPE_UDP) { struct udp_tunnel_sock_cfg udp_cfg = { .sk_user_data = tunnel,
From: Ido Schimmel idosch@nvidia.com
mainline inclusion from mainline-v5.13-rc1 commit 812fa71f0d967dea7616810f27e98135d410b27e category: bugfix bugzilla: 105860 https://gitee.com/openeuler/kernel/issues/I4DDEL
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
-----------------------------------------------
Netfilter tries to reroute mangled packets as a different route might need to be used following the mangling. When this happens, netfilter does not populate the IP protocol, the source port and the destination port in the flow key. Therefore, FIB rules that match on these fields are ignored and packets can be misrouted.
Solve this by dissecting the outer flow and populating the flow key before rerouting the packet. Note that flow dissection only happens when FIB rules that match on these fields are installed, so in the common case there should not be a penalty.
Reported-by: Michal Soltys msoltyspl@yandex.pl Signed-off-by: Ido Schimmel idosch@nvidia.com Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Huang Guobin huangguobin4@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- net/ipv4/netfilter.c | 2 ++ net/ipv6/netfilter.c | 2 ++ 2 files changed, 4 insertions(+)
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 7c841037c533..aff707988e23 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -25,6 +25,7 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un __be32 saddr = iph->saddr; __u8 flags; struct net_device *dev = skb_dst(skb)->dev; + struct flow_keys flkeys; unsigned int hh_len;
sk = sk_to_full_sk(sk); @@ -48,6 +49,7 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un fl4.flowi4_oif = l3mdev_master_ifindex(dev); fl4.flowi4_mark = skb->mark; fl4.flowi4_flags = flags; + fib4_rules_early_flow_dissect(net, skb, &fl4, &flkeys); rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) return PTR_ERR(rt); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index ab9a279dd6d4..6ab710b5a1a8 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -24,6 +24,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff { const struct ipv6hdr *iph = ipv6_hdr(skb); struct sock *sk = sk_to_full_sk(sk_partial); + struct flow_keys flkeys; unsigned int hh_len; struct dst_entry *dst; int strict = (ipv6_addr_type(&iph->daddr) & @@ -38,6 +39,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff }; int err;
+ fib6_rules_early_flow_dissect(net, skb, &fl6, &flkeys); dst = ip6_route_output(net, sk, &fl6); err = dst->error; if (err) {
From: Huang Guobin huangguobin4@huawei.com
mainline inclusion from mainline-v5.13-rc1 commit 6c8774a94e6ad26f29ef103c8671f55c255c6201 category: bugfix bugzilla: 78605 https://gitee.com/openeuler/kernel/issues/I4DDEL
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
-----------------------------------------------
User space could ask for very large hash tables, we need to make sure our size computations wont overflow.
nf_tables_newset() needs to double check the u64 size will fit into size_t field.
Fixes: 0ed6389c483d ("netfilter: nf_tables: rename set implementations") Signed-off-by: Eric Dumazet edumazet@google.com Signed-off-by: Pablo Neira Ayuso pablo@netfilter.org Signed-off-by: Huang Guobin huangguobin4@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- net/netfilter/nf_tables_api.c | 7 +++++-- net/netfilter/nft_set_hash.c | 10 +++++----- 2 files changed, 10 insertions(+), 7 deletions(-)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e34d05cc5754..947d52cff582 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4115,6 +4115,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, struct nft_table *table; struct nft_set *set; struct nft_ctx ctx; + size_t alloc_size; char *name; u64 size; u64 timeout; @@ -4263,8 +4264,10 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, size = 0; if (ops->privsize != NULL) size = ops->privsize(nla, &desc); - - set = kvzalloc(sizeof(*set) + size + udlen, GFP_KERNEL); + alloc_size = sizeof(*set) + size + udlen; + if (alloc_size < size) + return -ENOMEM; + set = kvzalloc(alloc_size, GFP_KERNEL); if (!set) return -ENOMEM;
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index d7083bcb20e8..858c8d4d659a 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -604,7 +604,7 @@ static u64 nft_hash_privsize(const struct nlattr * const nla[], const struct nft_set_desc *desc) { return sizeof(struct nft_hash) + - nft_hash_buckets(desc->size) * sizeof(struct hlist_head); + (u64)nft_hash_buckets(desc->size) * sizeof(struct hlist_head); }
static int nft_hash_init(const struct nft_set *set, @@ -644,8 +644,8 @@ static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, return false;
est->size = sizeof(struct nft_hash) + - nft_hash_buckets(desc->size) * sizeof(struct hlist_head) + - desc->size * sizeof(struct nft_hash_elem); + (u64)nft_hash_buckets(desc->size) * sizeof(struct hlist_head) + + (u64)desc->size * sizeof(struct nft_hash_elem); est->lookup = NFT_SET_CLASS_O_1; est->space = NFT_SET_CLASS_O_N;
@@ -662,8 +662,8 @@ static bool nft_hash_fast_estimate(const struct nft_set_desc *desc, u32 features return false;
est->size = sizeof(struct nft_hash) + - nft_hash_buckets(desc->size) * sizeof(struct hlist_head) + - desc->size * sizeof(struct nft_hash_elem); + (u64)nft_hash_buckets(desc->size) * sizeof(struct hlist_head) + + (u64)desc->size * sizeof(struct nft_hash_elem); est->lookup = NFT_SET_CLASS_O_1; est->space = NFT_SET_CLASS_O_N;
From: Vlad Buslov vladbu@nvidia.com
mainline inclusion from mainline-v5.12-rc1 commit 6f1995523a0fac7dd4fc7d2f175604dd9f699338 category: bugfix bugzilla: 107379 https://gitee.com/openeuler/kernel/issues/I4DDEL
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
-----------------------------------------------
The function fib6_walk_continue() cannot return a positive value when called from register_fib_notifier(), but ignoring causes static analyzer to generate warnings in users of register_fib_notifier() that try to convert returned error code to pointer with ERR_PTR(). Handle such case by explicitly checking for positive error values and converting them to -EINVAL in fib6_tables_dump().
Reported-by: Dan Carpenter dan.carpenter@oracle.com Suggested-by: Ido Schimmel idosch@nvidia.com Signed-off-by: Vlad Buslov vladbu@nvidia.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Huang Guobin huangguobin4@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- net/ipv6/ip6_fib.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 1fb79dbde0cb..679699e953f1 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -499,7 +499,7 @@ int fib6_tables_dump(struct net *net, struct notifier_block *nb,
hlist_for_each_entry_rcu(tb, head, tb6_hlist) { err = fib6_table_dump(net, tb, w); - if (err < 0) + if (err) goto out; } } @@ -507,7 +507,8 @@ int fib6_tables_dump(struct net *net, struct notifier_block *nb, out: kfree(w);
- return err; + /* The tree traversal function should never return a positive value. */ + return err > 0 ? -EINVAL : err; }
static int fib6_dump_node(struct fib6_walker *w)
From: Jakub Kicinski kuba@kernel.org
mainline inclusion from mainline-v5.11-rc4 commit 766b0515d5bec4b780750773ed3009b148df8c0a category: bugfix bugzilla: 108062 https://gitee.com/openeuler/kernel/issues/I4DDEL
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
-----------------------------------------------
If register_netdevice() fails at the very last stage - the notifier call - some subsystems may have already seen it and grabbed a reference. struct net_device can't be freed right away without calling netdev_wait_all_refs().
Now that we have a clean interface in form of dev->needs_free_netdev and lenient free_netdev() we can undo what commit 93ee31f14f6f ("[NET]: Fix free_netdev on register_netdev failure.") has done and complete the unregistration path by bringing the net_set_todo() call back.
After registration fails user is still expected to explicitly free the net_device, so make sure ->needs_free_netdev is cleared, otherwise rolling back the registration will cause the old double free for callers who release rtnl_lock before the free.
This also solves the problem of priv_destructor not being called on notifier error.
net_set_todo() will be moved back into unregister_netdevice_queue() in a follow up.
Reported-by: Hulk Robot hulkci@huawei.com Reported-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Jakub Kicinski kuba@kernel.org Signed-off-by: Huang Guobin huangguobin4@huawei.com Reviewed-by: Yue Haibing yuehaibing@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- net/core/dev.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-)
diff --git a/net/core/dev.c b/net/core/dev.c index 39827a829eb1..d0a629b69298 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10095,17 +10095,11 @@ int register_netdevice(struct net_device *dev) ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); ret = notifier_to_errno(ret); if (ret) { + /* Expect explicit free_netdev() on failure */ + dev->needs_free_netdev = false; rollback_registered(dev); - rcu_barrier(); - - dev->reg_state = NETREG_UNREGISTERED; - /* We should put the kobject that hold in - * netdev_unregister_kobject(), otherwise - * the net device cannot be freed when - * driver calls free_netdev(), because the - * kobject is being hold. - */ - kobject_put(&dev->dev.kobj); + net_set_todo(dev); + goto out; } /* * Prevent userspace races by waiting until the network
From: Cui GaoSheng cuigaosheng1@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: 172972 https://gitee.com/openeuler/kernel/issues/I4DDEL
-----------------------------------------------------------------
The value returned by get_region_number may exceed the usable memory positions indexed by bitmap, it will cause probabilistic boot failure in images which enabled kaslr.
Fixes: 156b9ca54d0d ("[Backport] ARM: decompressor: add KASLR support") Signed-off-by: Cui GaoSheng cuigaosheng1@huawei.com Reviewed-by: Xiu Jianfeng xiujianfeng@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- arch/arm/boot/compressed/kaslr.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-)
diff --git a/arch/arm/boot/compressed/kaslr.c b/arch/arm/boot/compressed/kaslr.c index 7cab9a670b94..ff81a2c31233 100644 --- a/arch/arm/boot/compressed/kaslr.c +++ b/arch/arm/boot/compressed/kaslr.c @@ -178,13 +178,18 @@ static u32 count_suitable_regions(const void *fdt, struct regions *regions, return ret; }
-static u32 get_region_number(u32 num, u32 *bitmap) +/* The caller ensures that num is within the range of regions.*/ +static u32 get_region_number(u32 num, u32 *bitmap, u32 size) { - u32 i; + u32 i, cnt = size * BITS_PER_BYTE * sizeof(u32); + + for (i = 0; i < cnt; i++) { + if (bitmap[i >> 5] & BIT(i & 0x1f)) + continue; + if (num-- == 0) + break; + }
- for (i = 0; num > 0; i++) - if (!(bitmap[i >> 5] & BIT(i & 0x1f))) - num--; return i; }
@@ -453,7 +458,7 @@ u32 kaslr_early_init(u32 *kaslr_offset, u32 image_base, u32 image_size, num = ((u16)seed * count) >> 16; puthex32(num);
- *kaslr_offset = get_region_number(num, bitmap) * SZ_2M; + *kaslr_offset = get_region_number(num, bitmap, sizeof(bitmap) / sizeof(u32)) * SZ_2M; puthex32(*kaslr_offset);
return *kaslr_offset;
From: Vijayanand Jitta vjitta@codeaurora.org
mainline inclusion from mainline-5.13-rc1 commit ad216c0316ad6391d90f4de0a7f59396b2925a06 category: bugfix bugzilla: 57119 https://gitee.com/openeuler/kernel/issues/I4DDEL
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
------------------------------------------------- A potential use after free can occur in _vm_unmap_aliases where an already freed vmap_area could be accessed, Consider the following scenario:
Process 1 Process 2
__vm_unmap_aliases __vm_unmap_aliases purge_fragmented_blocks_allcpus rcu_read_lock() rcu_read_lock() list_del_rcu(&vb->free_list) list_for_each_entry_rcu(vb .. ) __purge_vmap_area_lazy kmem_cache_free(va) va_start = vb->va->va_start
Here Process 1 is in purge path and it does list_del_rcu on vmap_block and later frees the vmap_area, since Process 2 was holding the rcu lock at this time vmap_block will still be present in and Process 2 accesse it and thereby it tries to access vmap_area of that vmap_block which was already freed by Process 1 and this results in use after free.
Fix this by adding a check for vb->dirty before accessing vmap_area structure since vb->dirty will be set to VMAP_BBMAP_BITS in purge path checking for this will prevent the use after free.
Link: https://lkml.kernel.org/r/1616062105-23263-1-git-send-email-vjitta@codeauror... Signed-off-by: Vijayanand Jitta vjitta@codeaurora.org Reviewed-by: Uladzislau Rezki (Sony) urezki@gmail.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org (cherry picked from commit ad216c0316ad6391d90f4de0a7f59396b2925a06) Signed-off-by: Yue Zou zouyue3@huawei.com Reviewed-by: Chen Wandun chenwandun@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- mm/vmalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index e05329d6d505..1b4838f6454d 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2039,7 +2039,7 @@ static void _vm_unmap_aliases(unsigned long start, unsigned long end, int flush) rcu_read_lock(); list_for_each_entry_rcu(vb, &vbq->free, free_list) { spin_lock(&vb->lock); - if (vb->dirty) { + if (vb->dirty && vb->dirty != VMAP_BBMAP_BITS) { unsigned long va_start = vb->va->va_start; unsigned long s, e;
From: Liu Xiang liu.xiang@zlingsmart.com
mainline inclusion from mainline-v5.11-rc1 commit 0a4f3d1bb91cac4efdd780373638b6a1a4c24c51 category: bugfix bugzilla: 108349 https://gitee.com/openeuler/kernel/issues/I4DDEL
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
-----------------------------------------------
On 64-bit machine, delta variable in hugetlb_acct_memory() may be larger than 0xffffffff, but gather_surplus_pages() can only use the low 32-bit value now. So we need to fix type of delta parameter and related local variables in gather_surplus_pages().
Link: https://lkml.kernel.org/r/1605793733-3573-1-git-send-email-liu.xiang@zlingsm... Reported-by: Ma Chenggong ma.chenggong@zlingsmart.com Signed-off-by: Liu Xiang liu.xiang@zlingsmart.com Signed-off-by: Pan Jiagen pan.jiagen@zlingsmart.com Reviewed-by: Mike Kravetz mike.kravetz@oracle.com Cc: Liu Xiang liuxiang_1999@126.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Tong Tiangen tongtiangen@huawei.com Reviewed-by: Chen Wandun chenwandun@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- mm/hugetlb.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 19f83ef14ee4..44b57cc363ff 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2070,13 +2070,14 @@ struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma, * Increase the hugetlb pool such that it can accommodate a reservation * of size 'delta'. */ -static int gather_surplus_pages(struct hstate *h, int delta) +static int gather_surplus_pages(struct hstate *h, long delta) __must_hold(&hugetlb_lock) { struct list_head surplus_list; struct page *page, *tmp; - int ret, i; - int needed, allocated; + int ret; + long i; + long needed, allocated; bool alloc_ok = true;
lockdep_assert_held(&hugetlb_lock);
From: Muchun Song songmuchun@bytedance.com
mainline inclusion from mainline-v5.11-rc1 commit 7ad69832f37e3cea8557db6df7c793905f1135e8 category: bugfix bugzilla: 108340 https://gitee.com/openeuler/kernel/issues/I4DDEL
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
-----------------------------------------------
When we free a page whose order is very close to MAX_ORDER and greater than pageblock_order, it wastes some CPU cycles to increase max_order to MAX_ORDER one by one and check the pageblock migratetype of that page repeatedly especially when MAX_ORDER is much larger than pageblock_order.
We also should not be checking migratetype of buddy when "order == MAX_ORDER - 1" as the buddy pfn may be invalid, so adjust the condition. With the new check, we don't need the max_order check anymore, so we replace it.
Also adjust max_order initialization so that it's lower by one than previously, which makes the code hopefully more clear.
Link: https://lkml.kernel.org/r/20201204155109.55451-1-songmuchun@bytedance.com Fixes: d9dddbf55667 ("mm/page_alloc: prevent merging between isolated and other pageblocks") Signed-off-by: Muchun Song songmuchun@bytedance.com Acked-by: Vlastimil Babka vbabka@suse.cz Reviewed-by: Oscar Salvador osalvador@suse.de Reviewed-by: David Hildenbrand david@redhat.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Tong Tiangen tongtiangen@huawei.com Reviewed-by: Chen Wandun chenwandun@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- mm/page_alloc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a5adfda63ff5..6449170e8b53 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -998,7 +998,7 @@ static inline void __free_one_page(struct page *page, struct page *buddy; bool to_tail;
- max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1); + max_order = min_t(unsigned int, MAX_ORDER - 1, pageblock_order);
VM_BUG_ON(!zone_is_initialized(zone)); VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page); @@ -1011,7 +1011,7 @@ static inline void __free_one_page(struct page *page, VM_BUG_ON_PAGE(bad_range(zone, page), page);
continue_merging: - while (order < max_order - 1) { + while (order < max_order) { if (compaction_capture(capc, page, order, migratetype)) { __mod_zone_freepage_state(zone, -(1 << order), migratetype); @@ -1037,7 +1037,7 @@ static inline void __free_one_page(struct page *page, pfn = combined_pfn; order++; } - if (max_order < MAX_ORDER) { + if (order < MAX_ORDER - 1) { /* If we are here, it means order is >= pageblock_order. * We want to prevent merge between freepages on isolate * pageblock and normal pageblock. Without this, pageblock @@ -1058,7 +1058,7 @@ static inline void __free_one_page(struct page *page, is_migrate_isolate(buddy_mt))) goto done_merging; } - max_order++; + max_order = order + 1; goto continue_merging; }
From: YueHaibing yuehaibing@huawei.com
mainline inclusion from mainline-v5.11-rc1 commit 42a44704367cd18d069c9855cb84090ff90ecd86 category: bugfix bugzilla: 108339 https://gitee.com/openeuler/kernel/issues/I4DDEL
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
-----------------------------------------------
Fix smatch warning:
mm/zswap.c:425 zswap_cpu_comp_prepare() warn: passing zero to 'PTR_ERR'
crypto_alloc_comp() never return NULL, use IS_ERR instead of IS_ERR_OR_NULL to fix this.
Link: https://lkml.kernel.org/r/20201031055615.28080-1-yuehaibing@huawei.com Fixes: f1c54846ee45 ("zswap: dynamic pool creation") Signed-off-by: YueHaibing yuehaibing@huawei.com Reviewed-by: David Hildenbrand david@redhat.com Cc: Seth Jennings sjenning@redhat.com Cc: Dan Streetman ddstreet@ieee.org Cc: Vitaly Wool vitaly.wool@konsulko.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Tong Tiangen tongtiangen@huawei.com Reviewed-by: Chen Wandun chenwandun@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- mm/zswap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/zswap.c b/mm/zswap.c index 4c6ce385c4e7..cf2798903445 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -421,7 +421,7 @@ static int zswap_cpu_comp_prepare(unsigned int cpu, struct hlist_node *node) return 0;
tfm = crypto_alloc_comp(pool->tfm_name, 0, 0); - if (IS_ERR_OR_NULL(tfm)) { + if (IS_ERR(tfm)) { pr_err("could not alloc crypto comp %s : %ld\n", pool->tfm_name, PTR_ERR(tfm)); return -ENOMEM;
From: Muchun Song songmuchun@bytedance.com
mainline inclusion from mainline-v5.11-rc1 commit 2484be0f88dc6c9670362d51f6a04f2da0626b50 category: bugfix bugzilla: 108337 https://gitee.com/openeuler/kernel/issues/I4DDEL
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
-----------------------------------------------
A max order page has no buddy page and never merges to another order. So isolating and then freeing it is pointless.
Link: https://lkml.kernel.org/r/20201202122114.75316-1-songmuchun@bytedance.com Fixes: 3c605096d315 ("mm/page_alloc: restrict max order of merging on isolated pageblock") Signed-off-by: Muchun Song songmuchun@bytedance.com Reviewed-by: Andrew Morton akpm@linux-foundation.org Acked-by: Vlastimil Babka vbabka@suse.cz Reviewed-by: David Hildenbrand david@redhat.com Reviewed-by: Oscar Salvador osalvador@suse.de Cc: Joonsoo Kim iamjoonsoo.kim@lge.com Cc: Matthew Wilcox willy@infradead.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Tong Tiangen tongtiangen@huawei.com Reviewed-by: Chen Wandun chenwandun@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- mm/page_isolation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/page_isolation.c b/mm/page_isolation.c index abbf42214485..756d1542f2c8 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -89,7 +89,7 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype) */ if (PageBuddy(page)) { order = buddy_order(page); - if (order >= pageblock_order) { + if (order >= pageblock_order && order < MAX_ORDER - 1) { pfn = page_to_pfn(page); buddy_pfn = __find_buddy_pfn(pfn, order); buddy = page + (buddy_pfn - pfn);
From: Christoph Hellwig hch@lst.de
mainline inclusion from mainline-v5.11-rc1 commit f7387170339afb473a0d95b7732f904346f9795e category: bugfix bugzilla: 172157 https://gitee.com/openeuler/kernel/issues/I4DDEL CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
-----------------------------------------------
BDIs get unregistered during device removal, and this WARN can be trivially triggered by hot-removing a NVMe device while running fsx It is otherwise harmless as we still hold a BDI reference, and the writeback has been shut down already.
Link: https://lore.kernel.org/r/20200928122613.434820-1-hch@lst.de Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Jan Kara jack@suse.cz Signed-off-by: Ye Bin yebin10@huawei.com Reviewed-by: Zhang Yi yi.zhang@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- fs/fs-writeback.c | 4 ---- 1 file changed, 4 deletions(-)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 0d0f014b09ec..c68e9100afb5 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -2298,10 +2298,6 @@ void __mark_inode_dirty(struct inode *inode, int flags)
wb = locked_inode_to_wb_and_lock_list(inode);
- WARN((wb->bdi->capabilities & BDI_CAP_WRITEBACK) && - !test_bit(WB_registered, &wb->state), - "bdi-%s not registered\n", bdi_dev_name(wb->bdi)); - inode->dirtied_when = jiffies; if (dirtytime) inode->dirtied_time_when = jiffies;
From: yangerkun yangerkun@huawei.com
Offering: HULK hulk inclusion category: performance bugzilla: 174005 https://gitee.com/openeuler/kernel/issues/I4DDEL
---------------------------
'commit 737eb78e82d5 ("block: Delay default elevator initialization")' delay elevator init to fix some problem for special device like SMR. Also, the commit add the logic to ensure no IO can happened while blk_mq_init_sched. However, blk_mq_freeze_queue/blk_mq_quiesce_queue will add RCU Grace period which can lead some overhead(about 36 loop device try to mount which each Grace period around 20ms).
For loop device, no io can happened while add_disk, so it's safe to skip this step. Add flag QUEUE_FLAG_NO_INIT_IO to identify this case.
Signed-off-by: yangerkun yangerkun@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- block/elevator.c | 14 ++++++++++---- drivers/block/loop.c | 5 +++++ include/linux/blkdev.h | 2 ++ 3 files changed, 17 insertions(+), 4 deletions(-)
diff --git a/block/elevator.c b/block/elevator.c index 293c5c81397a..dc756d4c2678 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -662,6 +662,7 @@ void elevator_init_mq(struct request_queue *q) { struct elevator_type *e; int err; + bool no_init_io;
if (!elv_support_iosched(q)) return; @@ -678,13 +679,18 @@ void elevator_init_mq(struct request_queue *q) if (!e) return;
- blk_mq_freeze_queue(q); - blk_mq_quiesce_queue(q); + no_init_io = blk_queue_no_init_io(q); + if (!no_init_io) { + blk_mq_freeze_queue(q); + blk_mq_quiesce_queue(q); + }
err = blk_mq_init_sched(q, e);
- blk_mq_unquiesce_queue(q); - blk_mq_unfreeze_queue(q); + if (!no_init_io) { + blk_mq_unquiesce_queue(q); + blk_mq_unfreeze_queue(q); + }
if (err) { pr_warn(""%s" elevator initialization failed, " diff --git a/drivers/block/loop.c b/drivers/block/loop.c index cd13c0eb3f63..31b1ef73b981 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -2175,6 +2175,11 @@ static int loop_add(struct loop_device **l, int i) disk->private_data = lo; disk->queue = lo->lo_queue; sprintf(disk->disk_name, "loop%d", i); + /* + * There won't be io before add_disk, QUEUE_FLAG_NO_INIT_IO can help + * to save time while elevator_init_mq. + */ + blk_queue_flag_set(QUEUE_FLAG_NO_INIT_IO, lo->lo_queue); add_disk(disk); *l = lo; return lo->lo_number; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 542471b76f41..00e71019f4f6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -621,6 +621,7 @@ struct request_queue { #define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */ #define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */ #define QUEUE_FLAG_NOWAIT 29 /* device supports NOWAIT */ +#define QUEUE_FLAG_NO_INIT_IO 30 /* no IO can happen while elevator_init_mq */
#define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_SAME_COMP) | \ @@ -667,6 +668,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_fua(q) test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags) #define blk_queue_registered(q) test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags) #define blk_queue_nowait(q) test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags) +#define blk_queue_no_init_io(q) test_bit(QUEUE_FLAG_NO_INIT_IO, &(q)->queue_flags)
extern void blk_set_pm_only(struct request_queue *q); extern void blk_clear_pm_only(struct request_queue *q);
From: Zhihao Cheng chengzhihao1@huawei.com
mainline inclusion from mainline-v5.14 commit f4e3634a3b642225a530c292fdb1e8a4007507f5 category: bugfix bugzilla: 173879 https://gitee.com/openeuler/kernel/issues/I4DDEL
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
---------------------------
UBIFS may occur some problems with concurrent xattr_{set|get} and listxattr operations, such as assertion failure, memory corruption, stale xattr value[1].
Fix it by importing a new rw-lock in @ubifs_inode to serilize write operations on xattr, concurrent read operations are still effective, just like ext4.
[1] https://lore.kernel.org/linux-mtd/20200630130438.141649-1-houtao1@huawei.com
Fixes: 1e51764a3c2ac05a23 ("UBIFS: add new flash file system") Cc: stable@vger.kernel.org # v2.6+ Signed-off-by: Zhihao Cheng chengzhihao1@huawei.com Reviewed-by: Sascha Hauer s.hauer@pengutronix.de Signed-off-by: Richard Weinberger richard@nod.at Reviewed-by: Zhang Yi yi.zhang@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- fs/ubifs/super.c | 1 + fs/ubifs/ubifs.h | 2 ++ fs/ubifs/xattr.c | 44 +++++++++++++++++++++++++++++++++----------- 3 files changed, 36 insertions(+), 11 deletions(-)
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index dacbb999ae34..cfd46753a685 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -275,6 +275,7 @@ static struct inode *ubifs_alloc_inode(struct super_block *sb) memset((void *)ui + sizeof(struct inode), 0, sizeof(struct ubifs_inode) - sizeof(struct inode)); mutex_init(&ui->ui_mutex); + init_rwsem(&ui->xattr_sem); spin_lock_init(&ui->ui_lock); return &ui->vfs_inode; }; diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 4ffd832e3b93..e7e48f3b179a 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -356,6 +356,7 @@ struct ubifs_gced_idx_leb { * @ui_mutex: serializes inode write-back with the rest of VFS operations, * serializes "clean <-> dirty" state changes, serializes bulk-read, * protects @dirty, @bulk_read, @ui_size, and @xattr_size + * @xattr_sem: serilizes write operations (remove|set|create) on xattr * @ui_lock: protects @synced_i_size * @synced_i_size: synchronized size of inode, i.e. the value of inode size * currently stored on the flash; used only for regular file @@ -409,6 +410,7 @@ struct ubifs_inode { unsigned int bulk_read:1; unsigned int compr_type:2; struct mutex ui_mutex; + struct rw_semaphore xattr_sem; spinlock_t ui_lock; loff_t synced_i_size; loff_t ui_size; diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index a0b9b349efe6..09280796fc61 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -285,6 +285,7 @@ int ubifs_xattr_set(struct inode *host, const char *name, const void *value, if (!xent) return -ENOMEM;
+ down_write(&ubifs_inode(host)->xattr_sem); /* * The extended attribute entries are stored in LNC, so multiple * look-ups do not involve reading the flash. @@ -319,6 +320,7 @@ int ubifs_xattr_set(struct inode *host, const char *name, const void *value, iput(inode);
out_free: + up_write(&ubifs_inode(host)->xattr_sem); kfree(xent); return err; } @@ -341,18 +343,19 @@ ssize_t ubifs_xattr_get(struct inode *host, const char *name, void *buf, if (!xent) return -ENOMEM;
+ down_read(&ubifs_inode(host)->xattr_sem); xent_key_init(c, &key, host->i_ino, &nm); err = ubifs_tnc_lookup_nm(c, &key, xent, &nm); if (err) { if (err == -ENOENT) err = -ENODATA; - goto out_unlock; + goto out_cleanup; }
inode = iget_xattr(c, le64_to_cpu(xent->inum)); if (IS_ERR(inode)) { err = PTR_ERR(inode); - goto out_unlock; + goto out_cleanup; }
ui = ubifs_inode(inode); @@ -374,7 +377,8 @@ ssize_t ubifs_xattr_get(struct inode *host, const char *name, void *buf, out_iput: mutex_unlock(&ui->ui_mutex); iput(inode); -out_unlock: +out_cleanup: + up_read(&ubifs_inode(host)->xattr_sem); kfree(xent); return err; } @@ -406,16 +410,21 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size) dbg_gen("ino %lu ('%pd'), buffer size %zd", host->i_ino, dentry, size);
+ down_read(&host_ui->xattr_sem); len = host_ui->xattr_names + host_ui->xattr_cnt; - if (!buffer) + if (!buffer) { /* * We should return the minimum buffer size which will fit a * null-terminated list of all the extended attribute names. */ - return len; + err = len; + goto out_err; + }
- if (len > size) - return -ERANGE; + if (len > size) { + err = -ERANGE; + goto out_err; + }
lowest_xent_key(c, &key, host->i_ino); while (1) { @@ -437,8 +446,9 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size) pxent = xent; key_read(c, &xent->key, &key); } - kfree(pxent); + up_read(&host_ui->xattr_sem); + if (err != -ENOENT) { ubifs_err(c, "cannot find next direntry, error %d", err); return err; @@ -446,6 +456,10 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size)
ubifs_assert(c, written <= size); return written; + +out_err: + up_read(&host_ui->xattr_sem); + return err; }
static int remove_xattr(struct ubifs_info *c, struct inode *host, @@ -504,6 +518,7 @@ int ubifs_purge_xattrs(struct inode *host) ubifs_warn(c, "inode %lu has too many xattrs, doing a non-atomic deletion", host->i_ino);
+ down_write(&ubifs_inode(host)->xattr_sem); lowest_xent_key(c, &key, host->i_ino); while (1) { xent = ubifs_tnc_next_ent(c, &key, &nm); @@ -523,7 +538,7 @@ int ubifs_purge_xattrs(struct inode *host) ubifs_ro_mode(c, err); kfree(pxent); kfree(xent); - return err; + goto out_err; }
ubifs_assert(c, ubifs_inode(xino)->xattr); @@ -535,7 +550,7 @@ int ubifs_purge_xattrs(struct inode *host) kfree(xent); iput(xino); ubifs_err(c, "cannot remove xattr, error %d", err); - return err; + goto out_err; }
iput(xino); @@ -544,14 +559,19 @@ int ubifs_purge_xattrs(struct inode *host) pxent = xent; key_read(c, &xent->key, &key); } - kfree(pxent); + up_write(&ubifs_inode(host)->xattr_sem); + if (err != -ENOENT) { ubifs_err(c, "cannot find next direntry, error %d", err); return err; }
return 0; + +out_err: + up_write(&ubifs_inode(host)->xattr_sem); + return err; }
/** @@ -594,6 +614,7 @@ static int ubifs_xattr_remove(struct inode *host, const char *name) if (!xent) return -ENOMEM;
+ down_write(&ubifs_inode(host)->xattr_sem); xent_key_init(c, &key, host->i_ino, &nm); err = ubifs_tnc_lookup_nm(c, &key, xent, &nm); if (err) { @@ -618,6 +639,7 @@ static int ubifs_xattr_remove(struct inode *host, const char *name) iput(inode);
out_free: + up_write(&ubifs_inode(host)->xattr_sem); kfree(xent); return err; }
From: Zhihao Cheng chengzhihao1@huawei.com
mainline inclusion from mainline-v5.14 commit 819f9ab430a4478ce519e5cc8ae4de438d8ad4ba category: bugfix bugzilla: 173879 https://gitee.com/openeuler/kernel/issues/I4DDEL
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
---------------------------
Since ubifs_xattr_get and ubifs_xattr_set cannot being executed parallelly after importing @host_ui->xattr_sem, now we can remove ui_mutex imported by commit ab92a20bce3b4c2 ("ubifs: make ubifs_[get|set]xattr atomic").
@xattr_size, @xattr_names and @xattr_cnt can't be out of protection by @host_ui->mutex yet, they are sill accesed in other places, such as pack_inode() called by ubifs_write_inode() triggered by page-writeback.
Signed-off-by: Zhihao Cheng chengzhihao1@huawei.com Signed-off-by: Richard Weinberger richard@nod.at Reviewed-by: Zhang Yi yi.zhang@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- fs/ubifs/xattr.c | 4 ---- 1 file changed, 4 deletions(-)
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index 09280796fc61..905eddf90345 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -208,13 +208,11 @@ static int change_xattr(struct ubifs_info *c, struct inode *host, err = -ENOMEM; goto out_free; } - mutex_lock(&ui->ui_mutex); kfree(ui->data); ui->data = buf; inode->i_size = ui->ui_size = size; old_size = ui->data_len; ui->data_len = size; - mutex_unlock(&ui->ui_mutex);
mutex_lock(&host_ui->ui_mutex); host->i_ctime = current_time(host); @@ -362,7 +360,6 @@ ssize_t ubifs_xattr_get(struct inode *host, const char *name, void *buf, ubifs_assert(c, inode->i_size == ui->data_len); ubifs_assert(c, ubifs_inode(host)->xattr_size > ui->data_len);
- mutex_lock(&ui->ui_mutex); if (buf) { /* If @buf is %NULL we are supposed to return the length */ if (ui->data_len > size) { @@ -375,7 +372,6 @@ ssize_t ubifs_xattr_get(struct inode *host, const char *name, void *buf, err = ui->data_len;
out_iput: - mutex_unlock(&ui->ui_mutex); iput(inode); out_cleanup: up_read(&ubifs_inode(host)->xattr_sem);
From: Fabiano Rosas farosas@linux.ibm.com
stable inclusion from stable-5.10.49 commit d5737410d2ddbb63cf56e8a1b8028a8b19ec949c bugzilla: 174521 https://gitee.com/openeuler/kernel/issues/I4DGQS
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 25edcc50d76c834479d11fcc7de46f3da4d95121 upstream.
The Facility Status and Control Register is a privileged SPR that defines the availability of some features in problem state. Since it can be written by the guest, we must restore it to the previous host value after guest exit.
This restoration is currently done by taking the value from current->thread.fscr, which in the P9 path is not enough anymore because the guest could context switch the QEMU thread, causing the guest-current value to be saved into the thread struct.
The above situation manifested when running a QEMU linked against a libc with System Call Vectored support, which causes scv instructions to be run by QEMU early during the guest boot (during SLOF), at which point the FSCR is 0 due to guest entry. After a few scv calls (1 to a couple hundred), the context switching happens and the QEMU thread runs with the guest value, resulting in a Facility Unavailable interrupt.
This patch saves and restores the host value of FSCR in the inner guest entry loop in a way independent of current->thread.fscr. The old way of doing it is still kept in place because it works for the old entry path.
Signed-off-by: Fabiano Rosas farosas@linux.ibm.com Signed-off-by: Paul Mackerras paulus@ozlabs.org Cc: Georgy Yakovlev gyakovlev@gentoo.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org
Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- arch/powerpc/kvm/book3s_hv.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 280f7992ae99..965b702208d8 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3583,6 +3583,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long host_tidr = mfspr(SPRN_TIDR); unsigned long host_iamr = mfspr(SPRN_IAMR); unsigned long host_amr = mfspr(SPRN_AMR); + unsigned long host_fscr = mfspr(SPRN_FSCR); s64 dec; u64 tb; int trap, save_pmu; @@ -3726,6 +3727,9 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, if (host_amr != vcpu->arch.amr) mtspr(SPRN_AMR, host_amr);
+ if (host_fscr != vcpu->arch.fscr) + mtspr(SPRN_FSCR, host_fscr); + msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX); store_fp_state(&vcpu->arch.fp); #ifdef CONFIG_ALTIVEC
From: Laurent Pinchart laurent.pinchart@ideasonboard.com
stable inclusion from stable-5.10.49 commit 8148665cb7fece4acb899f2e2dbbff7ed0720e1d bugzilla: 174521 https://gitee.com/openeuler/kernel/issues/I4DGQS
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 4ca052b4ea621d0002a5e5feace51f60ad5e6b23 upstream.
Some devices reference an output terminal as the source of extension units. This is incorrect, as output terminals only have an input pin, and thus can't be connected to any entity in the forward direction. The resulting topology would cause issues when registering the media controller graph. To avoid this problem, connect the extension unit to the source of the output terminal instead.
While at it, and while no device has been reported to be affected by this issue, also handle forward scans where two output terminals would be connected together, and skip the terminals found through such an invalid connection.
Reported-and-tested-by: John Nealy jnealy3@yahoo.com
Signed-off-by: Laurent Pinchart laurent.pinchart@ideasonboard.com Signed-off-by: Hans de Goede hdegoede@redhat.com Signed-off-by: Mauro Carvalho Chehab mchehab+huawei@kernel.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/media/usb/uvc/uvc_driver.c | 32 ++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+)
diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c index 5ad528264135..282f3d2388cc 100644 --- a/drivers/media/usb/uvc/uvc_driver.c +++ b/drivers/media/usb/uvc/uvc_driver.c @@ -1588,6 +1588,31 @@ static int uvc_scan_chain_forward(struct uvc_video_chain *chain, return -EINVAL; }
+ /* + * Some devices reference an output terminal as the + * source of extension units. This is incorrect, as + * output terminals only have an input pin, and thus + * can't be connected to any entity in the forward + * direction. The resulting topology would cause issues + * when registering the media controller graph. To + * avoid this problem, connect the extension unit to + * the source of the output terminal instead. + */ + if (UVC_ENTITY_IS_OTERM(entity)) { + struct uvc_entity *source; + + source = uvc_entity_by_id(chain->dev, + entity->baSourceID[0]); + if (!source) { + uvc_trace(UVC_TRACE_DESCR, + "Can't connect extension unit %u in chain\n", + forward->id); + break; + } + + forward->baSourceID[0] = source->id; + } + list_add_tail(&forward->chain, &chain->entities); if (uvc_trace_param & UVC_TRACE_PROBE) { if (!found) @@ -1608,6 +1633,13 @@ static int uvc_scan_chain_forward(struct uvc_video_chain *chain, return -EINVAL; }
+ if (UVC_ENTITY_IS_OTERM(entity)) { + uvc_trace(UVC_TRACE_DESCR, + "Unsupported connection between output terminals %u and %u\n", + entity->id, forward->id); + break; + } + list_add_tail(&forward->chain, &chain->entities); if (uvc_trace_param & UVC_TRACE_PROBE) { if (!found)
From: Sid Manning sidneym@codeaurora.org
stable inclusion from stable-5.10.49 commit 243f325ecc902c9936d1edb838454f4a4dd75034 bugzilla: 174521 https://gitee.com/openeuler/kernel/issues/I4DGQS
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 788dcee0306e1bdbae1a76d1b3478bb899c5838e upstream.
Fix type-o in ptrace.c. Add missing include: asm/hexagon_vm.h Remove superfluous cast. Replace 'p3_0' with 'preds'.
Signed-off-by: Sid Manning sidneym@codeaurora.org Add -mlong-calls to build flags. Signed-off-by: Brian Cain bcain@codeaurora.org Tested-by: Nick Desaulniers ndesaulniers@google.com Reviewed-by: Nick Desaulniers ndesaulniers@google.com Cc: Guenter Roeck linux@roeck-us.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org
Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- arch/hexagon/Makefile | 3 +++ arch/hexagon/include/asm/timex.h | 3 ++- arch/hexagon/kernel/ptrace.c | 4 ++-- 3 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/arch/hexagon/Makefile b/arch/hexagon/Makefile index c168c6980d05..38264831905b 100644 --- a/arch/hexagon/Makefile +++ b/arch/hexagon/Makefile @@ -10,6 +10,9 @@ LDFLAGS_vmlinux += -G0 # Do not use single-byte enums; these will overflow. KBUILD_CFLAGS += -fno-short-enums
+# We must use long-calls: +KBUILD_CFLAGS += -mlong-calls + # Modules must use either long-calls, or use pic/plt. # Use long-calls for now, it's easier. And faster. # KBUILD_CFLAGS_MODULE += -fPIC diff --git a/arch/hexagon/include/asm/timex.h b/arch/hexagon/include/asm/timex.h index 78338d8ada83..8d4ec76fceb4 100644 --- a/arch/hexagon/include/asm/timex.h +++ b/arch/hexagon/include/asm/timex.h @@ -8,6 +8,7 @@
#include <asm-generic/timex.h> #include <asm/timer-regs.h> +#include <asm/hexagon_vm.h>
/* Using TCX0 as our clock. CLOCK_TICK_RATE scheduled to be removed. */ #define CLOCK_TICK_RATE TCX0_CLK_RATE @@ -16,7 +17,7 @@
static inline int read_current_timer(unsigned long *timer_val) { - *timer_val = (unsigned long) __vmgettime(); + *timer_val = __vmgettime(); return 0; }
diff --git a/arch/hexagon/kernel/ptrace.c b/arch/hexagon/kernel/ptrace.c index a5a89e944257..8975f9b4cedf 100644 --- a/arch/hexagon/kernel/ptrace.c +++ b/arch/hexagon/kernel/ptrace.c @@ -35,7 +35,7 @@ void user_disable_single_step(struct task_struct *child)
static int genregs_get(struct task_struct *target, const struct user_regset *regset, - srtuct membuf to) + struct membuf to) { struct pt_regs *regs = task_pt_regs(target);
@@ -54,7 +54,7 @@ static int genregs_get(struct task_struct *target, membuf_store(&to, regs->m0); membuf_store(&to, regs->m1); membuf_store(&to, regs->usr); - membuf_store(&to, regs->p3_0); + membuf_store(&to, regs->preds); membuf_store(&to, regs->gp); membuf_store(&to, regs->ugp); membuf_store(&to, pt_elr(regs)); // pc
From: Sid Manning sidneym@codeaurora.org
stable inclusion from stable-5.10.49 commit a7f51048c5a85501e41af8538c24af845f1679f6 bugzilla: 174521 https://gitee.com/openeuler/kernel/issues/I4DGQS
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit f1f99adf05f2138ff2646d756d4674e302e8d02d upstream.
Add the compiler-rt builtins like memcpy to the hexagon kernel.
Signed-off-by: Sid Manning sidneym@codeaurora.org Add SYM_FUNC_START/END, ksyms exports Signed-off-by: Brian Cain bcain@codeaurora.org Cc: Guenter Roeck linux@roeck-us.net Tested-by: Nick Desaulniers ndesaulniers@google.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org
Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- arch/hexagon/Makefile | 3 -- arch/hexagon/kernel/hexagon_ksyms.c | 8 +-- arch/hexagon/lib/Makefile | 3 +- arch/hexagon/lib/divsi3.S | 67 ++++++++++++++++++++++++ arch/hexagon/lib/memcpy_likely_aligned.S | 56 ++++++++++++++++++++ arch/hexagon/lib/modsi3.S | 46 ++++++++++++++++ arch/hexagon/lib/udivsi3.S | 38 ++++++++++++++ arch/hexagon/lib/umodsi3.S | 36 +++++++++++++ 8 files changed, 249 insertions(+), 8 deletions(-) create mode 100644 arch/hexagon/lib/divsi3.S create mode 100644 arch/hexagon/lib/memcpy_likely_aligned.S create mode 100644 arch/hexagon/lib/modsi3.S create mode 100644 arch/hexagon/lib/udivsi3.S create mode 100644 arch/hexagon/lib/umodsi3.S
diff --git a/arch/hexagon/Makefile b/arch/hexagon/Makefile index 38264831905b..74b644ea8a00 100644 --- a/arch/hexagon/Makefile +++ b/arch/hexagon/Makefile @@ -33,9 +33,6 @@ TIR_NAME := r19 KBUILD_CFLAGS += -ffixed-$(TIR_NAME) -DTHREADINFO_REG=$(TIR_NAME) -D__linux__ KBUILD_AFLAGS += -DTHREADINFO_REG=$(TIR_NAME)
-LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name 2>/dev/null) -libs-y += $(LIBGCC) - head-y := arch/hexagon/kernel/head.o
core-y += arch/hexagon/kernel/ \ diff --git a/arch/hexagon/kernel/hexagon_ksyms.c b/arch/hexagon/kernel/hexagon_ksyms.c index 6fb1aaab1c29..35545a7386a0 100644 --- a/arch/hexagon/kernel/hexagon_ksyms.c +++ b/arch/hexagon/kernel/hexagon_ksyms.c @@ -35,8 +35,8 @@ EXPORT_SYMBOL(_dflt_cache_att); DECLARE_EXPORT(__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes);
/* Additional functions */ -DECLARE_EXPORT(__divsi3); -DECLARE_EXPORT(__modsi3); -DECLARE_EXPORT(__udivsi3); -DECLARE_EXPORT(__umodsi3); +DECLARE_EXPORT(__hexagon_divsi3); +DECLARE_EXPORT(__hexagon_modsi3); +DECLARE_EXPORT(__hexagon_udivsi3); +DECLARE_EXPORT(__hexagon_umodsi3); DECLARE_EXPORT(csum_tcpudp_magic); diff --git a/arch/hexagon/lib/Makefile b/arch/hexagon/lib/Makefile index 54be529d17a2..a64641e89d5f 100644 --- a/arch/hexagon/lib/Makefile +++ b/arch/hexagon/lib/Makefile @@ -2,4 +2,5 @@ # # Makefile for hexagon-specific library files. # -obj-y = checksum.o io.o memcpy.o memset.o +obj-y = checksum.o io.o memcpy.o memset.o memcpy_likely_aligned.o \ + divsi3.o modsi3.o udivsi3.o umodsi3.o diff --git a/arch/hexagon/lib/divsi3.S b/arch/hexagon/lib/divsi3.S new file mode 100644 index 000000000000..783e09424c2c --- /dev/null +++ b/arch/hexagon/lib/divsi3.S @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#include <linux/linkage.h> + +SYM_FUNC_START(__hexagon_divsi3) + { + p0 = cmp.gt(r0,#-1) + p1 = cmp.gt(r1,#-1) + r3:2 = vabsw(r1:0) + } + { + p3 = xor(p0,p1) + r4 = sub(r2,r3) + r6 = cl0(r2) + p0 = cmp.gtu(r3,r2) + } + { + r0 = mux(p3,#-1,#1) + r7 = cl0(r3) + p1 = cmp.gtu(r3,r4) + } + { + r0 = mux(p0,#0,r0) + p0 = or(p0,p1) + if (p0.new) jumpr:nt r31 + r6 = sub(r7,r6) + } + { + r7 = r6 + r5:4 = combine(#1,r3) + r6 = add(#1,lsr(r6,#1)) + p0 = cmp.gtu(r6,#4) + } + { + r5:4 = vaslw(r5:4,r7) + if (!p0) r6 = #3 + } + { + loop0(1f,r6) + r7:6 = vlsrw(r5:4,#1) + r1:0 = #0 + } + .falign +1: + { + r5:4 = vlsrw(r5:4,#2) + if (!p0.new) r0 = add(r0,r5) + if (!p0.new) r2 = sub(r2,r4) + p0 = cmp.gtu(r4,r2) + } + { + r7:6 = vlsrw(r7:6,#2) + if (!p0.new) r0 = add(r0,r7) + if (!p0.new) r2 = sub(r2,r6) + p0 = cmp.gtu(r6,r2) + }:endloop0 + { + if (!p0) r0 = add(r0,r7) + } + { + if (p3) r0 = sub(r1,r0) + jumpr r31 + } +SYM_FUNC_END(__hexagon_divsi3) diff --git a/arch/hexagon/lib/memcpy_likely_aligned.S b/arch/hexagon/lib/memcpy_likely_aligned.S new file mode 100644 index 000000000000..6a541fb90a54 --- /dev/null +++ b/arch/hexagon/lib/memcpy_likely_aligned.S @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#include <linux/linkage.h> + +SYM_FUNC_START(__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes) + { + p0 = bitsclr(r1,#7) + p0 = bitsclr(r0,#7) + if (p0.new) r5:4 = memd(r1) + if (p0.new) r7:6 = memd(r1+#8) + } + { + if (!p0) jump:nt .Lmemcpy_call + if (p0) r9:8 = memd(r1+#16) + if (p0) r11:10 = memd(r1+#24) + p0 = cmp.gtu(r2,#64) + } + { + if (p0) jump:nt .Lmemcpy_call + if (!p0) memd(r0) = r5:4 + if (!p0) memd(r0+#8) = r7:6 + p0 = cmp.gtu(r2,#32) + } + { + p1 = cmp.gtu(r2,#40) + p2 = cmp.gtu(r2,#48) + if (p0) r13:12 = memd(r1+#32) + if (p1.new) r15:14 = memd(r1+#40) + } + { + memd(r0+#16) = r9:8 + memd(r0+#24) = r11:10 + } + { + if (p0) memd(r0+#32) = r13:12 + if (p1) memd(r0+#40) = r15:14 + if (!p2) jumpr:t r31 + } + { + p0 = cmp.gtu(r2,#56) + r5:4 = memd(r1+#48) + if (p0.new) r7:6 = memd(r1+#56) + } + { + memd(r0+#48) = r5:4 + if (p0) memd(r0+#56) = r7:6 + jumpr r31 + } + +.Lmemcpy_call: + jump memcpy + +SYM_FUNC_END(__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes) diff --git a/arch/hexagon/lib/modsi3.S b/arch/hexagon/lib/modsi3.S new file mode 100644 index 000000000000..9ea1c86efac2 --- /dev/null +++ b/arch/hexagon/lib/modsi3.S @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#include <linux/linkage.h> + +SYM_FUNC_START(__hexagon_modsi3) + { + p2 = cmp.ge(r0,#0) + r2 = abs(r0) + r1 = abs(r1) + } + { + r3 = cl0(r2) + r4 = cl0(r1) + p0 = cmp.gtu(r1,r2) + } + { + r3 = sub(r4,r3) + if (p0) jumpr r31 + } + { + p1 = cmp.eq(r3,#0) + loop0(1f,r3) + r0 = r2 + r2 = lsl(r1,r3) + } + .falign +1: + { + p0 = cmp.gtu(r2,r0) + if (!p0.new) r0 = sub(r0,r2) + r2 = lsr(r2,#1) + if (p1) r1 = #0 + }:endloop0 + { + p0 = cmp.gtu(r2,r0) + if (!p0.new) r0 = sub(r0,r1) + if (p2) jumpr r31 + } + { + r0 = neg(r0) + jumpr r31 + } +SYM_FUNC_END(__hexagon_modsi3) diff --git a/arch/hexagon/lib/udivsi3.S b/arch/hexagon/lib/udivsi3.S new file mode 100644 index 000000000000..477f27b9311c --- /dev/null +++ b/arch/hexagon/lib/udivsi3.S @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#include <linux/linkage.h> + +SYM_FUNC_START(__hexagon_udivsi3) + { + r2 = cl0(r0) + r3 = cl0(r1) + r5:4 = combine(#1,#0) + p0 = cmp.gtu(r1,r0) + } + { + r6 = sub(r3,r2) + r4 = r1 + r1:0 = combine(r0,r4) + if (p0) jumpr r31 + } + { + r3:2 = vlslw(r5:4,r6) + loop0(1f,r6) + } + .falign +1: + { + p0 = cmp.gtu(r2,r1) + if (!p0.new) r1 = sub(r1,r2) + if (!p0.new) r0 = add(r0,r3) + r3:2 = vlsrw(r3:2,#1) + }:endloop0 + { + p0 = cmp.gtu(r2,r1) + if (!p0.new) r0 = add(r0,r3) + jumpr r31 + } +SYM_FUNC_END(__hexagon_udivsi3) diff --git a/arch/hexagon/lib/umodsi3.S b/arch/hexagon/lib/umodsi3.S new file mode 100644 index 000000000000..280bf06a55e7 --- /dev/null +++ b/arch/hexagon/lib/umodsi3.S @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021, The Linux Foundation. All rights reserved. + */ + +#include <linux/linkage.h> + +SYM_FUNC_START(__hexagon_umodsi3) + { + r2 = cl0(r0) + r3 = cl0(r1) + p0 = cmp.gtu(r1,r0) + } + { + r2 = sub(r3,r2) + if (p0) jumpr r31 + } + { + loop0(1f,r2) + p1 = cmp.eq(r2,#0) + r2 = lsl(r1,r2) + } + .falign +1: + { + p0 = cmp.gtu(r2,r0) + if (!p0.new) r0 = sub(r0,r2) + r2 = lsr(r2,#1) + if (p1) r1 = #0 + }:endloop0 + { + p0 = cmp.gtu(r2,r0) + if (!p0.new) r0 = sub(r0,r1) + jumpr r31 + } +SYM_FUNC_END(__hexagon_umodsi3)
From: Sid Manning sidneym@codeaurora.org
stable inclusion from stable-5.10.49 commit a245f6842d212080a1dd95a6b99d7d4b0c005740 bugzilla: 174521 https://gitee.com/openeuler/kernel/issues/I4DGQS
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 6fff7410f6befe5744d54f0418d65a6322998c09 upstream.
Cross-section jumps from .fixup section must be extended.
Signed-off-by: Sid Manning sidneym@codeaurora.org Signed-off-by: Brian Cain bcain@codeaurora.org Tested-by: Nick Desaulniers ndesaulniers@google.com Reviewed-by: Nick Desaulniers ndesaulniers@google.com Cc: Guenter Roeck linux@roeck-us.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org
Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- arch/hexagon/include/asm/futex.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/hexagon/include/asm/futex.h b/arch/hexagon/include/asm/futex.h index 6b9c554aee78..9fb00a0ae89f 100644 --- a/arch/hexagon/include/asm/futex.h +++ b/arch/hexagon/include/asm/futex.h @@ -21,7 +21,7 @@ "3:\n" \ ".section .fixup,"ax"\n" \ "4: %1 = #%5;\n" \ - " jump 3b\n" \ + " jump ##3b\n" \ ".previous\n" \ ".section __ex_table,"a"\n" \ ".long 1b,4b,2b,4b\n" \ @@ -90,7 +90,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, "3:\n" ".section .fixup,"ax"\n" "4: %0 = #%6\n" - " jump 3b\n" + " jump ##3b\n" ".previous\n" ".section __ex_table,"a"\n" ".long 1b,4b,2b,4b\n"
From: Juergen Gross jgross@suse.com
stable inclusion from stable-5.10.49 commit 064b57a8da995ee0b97b505ae8ca478eff87c331 bugzilla: 174521 https://gitee.com/openeuler/kernel/issues/I4DGQS
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 3de218ff39b9e3f0d453fe3154f12a174de44b25 upstream.
In order to avoid a race condition for user events when changing cpu affinity reset the active flag only when EOI-ing the event.
This is working fine as all user events are lateeoi events. Note that lateeoi_ack_mask_dynirq() is not modified as there is no explicit call to xen_irq_lateeoi() expected later.
Cc: stable@vger.kernel.org Reported-by: Julien Grall julien@xen.org Fixes: b6622798bc50b62 ("xen/events: avoid handling the same event on two cpus at the same time") Tested-by: Julien Grall julien@xen.org Signed-off-by: Juergen Gross jgross@suse.com Reviewed-by: Boris Ostrovsky boris.ostrvsky@oracle.com Link: https://lore.kernel.org/r/20210623130913.9405-1-jgross@suse.com Signed-off-by: Juergen Gross jgross@suse.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org
Signed-off-by: Chen Jun chenjun102@huawei.com Acked-by: Weilong Chen chenweilong@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/xen/events/events_base.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-)
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 29bec0720514..af0f6ad32522 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -583,6 +583,9 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious) }
info->eoi_time = 0; + + /* is_active hasn't been reset yet, do it now. */ + smp_store_release(&info->is_active, 0); do_unmask(info, EVT_MASK_REASON_EOI_PENDING); }
@@ -1807,10 +1810,22 @@ static void lateeoi_ack_dynirq(struct irq_data *data) struct irq_info *info = info_for_irq(data->irq); evtchn_port_t evtchn = info ? info->evtchn : 0;
- if (VALID_EVTCHN(evtchn)) { - do_mask(info, EVT_MASK_REASON_EOI_PENDING); - ack_dynirq(data); - } + if (!VALID_EVTCHN(evtchn)) + return; + + do_mask(info, EVT_MASK_REASON_EOI_PENDING); + + if (unlikely(irqd_is_setaffinity_pending(data)) && + likely(!irqd_irq_disabled(data))) { + do_mask(info, EVT_MASK_REASON_TEMPORARY); + + clear_evtchn(evtchn); + + irq_move_masked_irq(data); + + do_unmask(info, EVT_MASK_REASON_TEMPORARY); + } else + clear_evtchn(evtchn); }
static void lateeoi_mask_ack_dynirq(struct irq_data *data)
From: Mingrui Ren jiladahe1997@gmail.com
mainline inclusion from mainline-v5.121-rc1 commit aef1b6a27970607721a618a0b990716ca8dbbf9 category: bugfix bugzilla: 108552 https://gitee.com/openeuler/kernel/issues/I4DDEL
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
-------------------------------------------------
As described in Documentation, poll_init() is called by kgdb to initialize hardware which supports both poll_put_char() and poll_get_char().
It's necessary to enable TXEN bit, otherwise, it will cause hardware fault and kernel panic when calling imx_poll_put_char().
Generally, if use /dev/ttymxc0 as kgdb console as well as system console, ttymxc0 is initialized early by system console which does enable TXEN bit.But when use /dev/ttymxc1 as kgbd console, ttymxc1 is only initialized by imx_poll_init() cannot enable the TXEN bit, which will cause kernel panic.
Signed-off-by: Mingrui Ren jiladahe1997@gmail.com Link: https://lore.kernel.org/r/20201202072543.151-1-972931182@qq.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Lu Jialin lujialin4@huawei.com Reviewed-by: Xiu Jianfeng xiujianfeng@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- drivers/tty/serial/imx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index cacf7266a262..a66301b3b33d 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -1881,7 +1881,7 @@ static int imx_uart_poll_init(struct uart_port *port) ucr1 |= UCR1_UARTEN; ucr1 &= ~(UCR1_TRDYEN | UCR1_RTSDEN | UCR1_RRDYEN);
- ucr2 |= UCR2_RXEN; + ucr2 |= UCR2_RXEN | UCR2_TXEN; ucr2 &= ~UCR2_ATEN;
imx_uart_writel(sport, ucr1, UCR1);
From: Ye Bin yebin10@huawei.com
mainline inclusion from mainline-v5.14-rc1 commit 558d6450c7755aa005d89021204b6cdcae5e848f category: bugfix bugzilla: 172157 https://gitee.com/openeuler/kernel/issues/I4DDEL
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
-----------------------------------------------
If a writeback of the superblock fails with an I/O error, the buffer is marked not uptodate. However, this can cause a WARN_ON to trigger when we attempt to write superblock a second time. (Which might succeed this time, for cerrtain types of block devices such as iSCSI devices over a flaky network.)
Try to detect this case in flush_stashed_error_work(), and also change __ext4_handle_dirty_metadata() so we always set the uptodate flag, not just in the nojournal case.
Before this commit, this problem can be repliciated via:
1. dmsetup create dust1 --table '0 2097152 dust /dev/sdc 0 4096' 2. mount /dev/mapper/dust1 /home/test 3. dmsetup message dust1 0 addbadblock 0 10 4. cd /home/test 5. echo "XXXXXXX" > t
After a few seconds, we got following warning:
[ 80.654487] end_buffer_async_write: bh=0xffff88842f18bdd0 [ 80.656134] Buffer I/O error on dev dm-0, logical block 0, lost async page write [ 85.774450] EXT4-fs error (device dm-0): ext4_check_bdev_write_error:193: comm kworker/u16:8: Error while async write back metadata [ 91.415513] mark_buffer_dirty: bh=0xffff88842f18bdd0 [ 91.417038] ------------[ cut here ]------------ [ 91.418450] WARNING: CPU: 1 PID: 1944 at fs/buffer.c:1092 mark_buffer_dirty.cold+0x1c/0x5e [ 91.440322] Call Trace: [ 91.440652] __jbd2_journal_temp_unlink_buffer+0x135/0x220 [ 91.441354] __jbd2_journal_unfile_buffer+0x24/0x90 [ 91.441981] __jbd2_journal_refile_buffer+0x134/0x1d0 [ 91.442628] jbd2_journal_commit_transaction+0x249a/0x3240 [ 91.443336] ? put_prev_entity+0x2a/0x200 [ 91.443856] ? kjournald2+0x12e/0x510 [ 91.444324] kjournald2+0x12e/0x510 [ 91.444773] ? woken_wake_function+0x30/0x30 [ 91.445326] kthread+0x150/0x1b0 [ 91.445739] ? commit_timeout+0x20/0x20 [ 91.446258] ? kthread_flush_worker+0xb0/0xb0 [ 91.446818] ret_from_fork+0x1f/0x30 [ 91.447293] ---[ end trace 66f0b6bf3d1abade ]---
Signed-off-by: Ye Bin yebin10@huawei.com Link: https://lore.kernel.org/r/20210615090537.3423231-1-yebin10@huawei.com Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Ye Bin yebin10@huawei.com Reviewed-by: Zhang Yi yi.zhang@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- fs/ext4/ext4_jbd2.c | 2 +- fs/ext4/super.c | 12 ++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-)
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index be799040a415..b96ecba91899 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -327,6 +327,7 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
set_buffer_meta(bh); set_buffer_prio(bh); + set_buffer_uptodate(bh); if (ext4_handle_valid(handle)) { err = jbd2_journal_dirty_metadata(handle, bh); /* Errors can only happen due to aborted journal or a nasty bug */ @@ -355,7 +356,6 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, err); } } else { - set_buffer_uptodate(bh); if (inode) mark_buffer_dirty_inode(bh, inode); else diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c5c89f10a7f4..ed6c1e7bc653 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -704,15 +704,23 @@ static void flush_stashed_error_work(struct work_struct *work) * ext4 error handling code during handling of previous errors. */ if (!sb_rdonly(sbi->s_sb) && journal) { + struct buffer_head *sbh = sbi->s_sbh; handle = jbd2_journal_start(journal, 1); if (IS_ERR(handle)) goto write_directly; - if (jbd2_journal_get_write_access(handle, sbi->s_sbh)) { + if (jbd2_journal_get_write_access(handle, sbh)) { jbd2_journal_stop(handle); goto write_directly; } ext4_update_super(sbi->s_sb); - if (jbd2_journal_dirty_metadata(handle, sbi->s_sbh)) { + if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) { + ext4_msg(sbi->s_sb, KERN_ERR, "previous I/O error to " + "superblock detected"); + clear_buffer_write_io_error(sbh); + set_buffer_uptodate(sbh); + } + + if (jbd2_journal_dirty_metadata(handle, sbh)) { jbd2_journal_stop(handle); goto write_directly; }
From: Zhang Yi yi.zhang@huawei.com
mainline inclusion from mainline-5.14-rc1 commit b9a037b7f3c401d3c63e0423e56aef606b1ffaaf category: bugfix bugzilla: 51864 https://gitee.com/openeuler/kernel/issues/I4DDEL Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
---------------------------
In ext4_orphan_cleanup(), if ext4_truncate() failed to get a transaction handle, it didn't remove the inode from the in-core orphan list, which may probably trigger below error dump in ext4_destroy_inode() during the final iput() and could lead to memory corruption on the later orphan list changes.
EXT4-fs (sda): Inode 6291467 (00000000b8247c67): orphan list check failed! 00000000b8247c67: 0001f30a 00000004 00000000 00000023 ............#... 00000000e24cde71: 00000006 014082a3 00000000 00000000 ......@......... 0000000072c6a5ee: 00000000 00000000 00000000 00000000 ................ ...
This patch fix this by cleanup in-core orphan list manually if ext4_truncate() return error.
Cc: stable@kernel.org Signed-off-by: Zhang Yi yi.zhang@huawei.com Reviewed-by: Jan Kara jack@suse.cz Link: https://lore.kernel.org/r/20210507071904.160808-1-yi.zhang@huawei.com Signed-off-by: Theodore Ts'o tytso@mit.edu Reviewed-by: Yang Erkun yangerkun@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- fs/ext4/super.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ed6c1e7bc653..15bc072dd011 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3091,8 +3091,15 @@ static void ext4_orphan_cleanup(struct super_block *sb, inode_lock(inode); truncate_inode_pages(inode->i_mapping, inode->i_size); ret = ext4_truncate(inode); - if (ret) + if (ret) { + /* + * We need to clean up the in-core orphan list + * manually if ext4_truncate() failed to get a + * transaction handle. + */ + ext4_orphan_del(NULL, inode); ext4_std_error(inode->i_sb, ret); + } inode_unlock(inode); nr_truncates++; } else {
From: Chen Jiahao chenjiahao16@huawei.com
hulk inclusion category: bugfix bugzilla: 174525 https://gitee.com/openeuler/kernel/issues/I4DDEL
--------
In arm64be_ilp32 platform, audit could not record log in some case, because syscall_get_arch() in arch/arm64 returns AUDIT_ARCH_AARCH64ILP32, which fits the arm32 platform. Audit will gets a fault arch in this situation and hence mismatch some syscall numbers in audit_match_perm().
This patch fixes it, and use the arch AUDIT_ARCH_AARCH64 which matches all syscall numbers in arm64be_ilp32 platform.
Fixes: 0fe4141ba63a ("[Backport] arm64: introduce AUDIT_ARCH_AARCH64ILP32 for ilp32") Signed-off-by: Chen Jiahao chenjiahao16@huawei.com Reviewed-by: Liao Chang liaochang1@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- arch/arm64/include/asm/syscall.h | 3 --- include/uapi/linux/audit.h | 1 - 2 files changed, 4 deletions(-)
diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index 6e546ec398b3..579300bb03fe 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -95,9 +95,6 @@ static inline int syscall_get_arch(struct task_struct *task) if (is_a32_compat_thread(task_thread_info(task))) return AUDIT_ARCH_ARM;
- else if (is_ilp32_compat_task()) - return AUDIT_ARCH_AARCH64ILP32; - return AUDIT_ARCH_AARCH64; }
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index b585d043a44a..cd2d8279a5e4 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -381,7 +381,6 @@ enum { #define __AUDIT_ARCH_LE 0x40000000
#define AUDIT_ARCH_AARCH64 (EM_AARCH64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) -#define AUDIT_ARCH_AARCH64ILP32 (EM_AARCH64|__AUDIT_ARCH_LE) #define AUDIT_ARCH_ALPHA (EM_ALPHA|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) #define AUDIT_ARCH_ARCOMPACT (EM_ARCOMPACT|__AUDIT_ARCH_LE) #define AUDIT_ARCH_ARCOMPACTBE (EM_ARCOMPACT)
From: Xiongfeng Wang wangxiongfeng2@huawei.com
hulk inclusion category: bugfix bugzilla: 173968 https://gitee.com/openeuler/kernel/issues/I4DDEL
-------------------------------------------------
When I enable CONFIG_DEBUG_PREEMPT and CONFIG_PREEMPT on X86, I got the following Call Trace:
[ 3.341853] BUG: using smp_processor_id() in preemptible [00000000] code: swapper/0/1 [ 3.344392] caller is debug_smp_processor_id+0x17/0x20 [ 3.344395] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 5.10.0+ #398 [ 3.344397] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.2-0-g5f4c7b1-prebuilt.qemu-project.org 04/01/2014 [ 3.344399] Call Trace: [ 3.344410] dump_stack+0x60/0x76 [ 3.344412] check_preemption_disabled+0xba/0xc0 [ 3.344415] debug_smp_processor_id+0x17/0x20 [ 3.344422] hardlockup_detector_event_create+0xf/0x60 [ 3.344427] hardlockup_detector_perf_init+0xf/0x41 [ 3.344430] watchdog_nmi_probe+0xe/0x10 [ 3.344432] lockup_detector_init+0x22/0x5b [ 3.344437] kernel_init_freeable+0x20c/0x245 [ 3.344439] ? rest_init+0xd0/0xd0 [ 3.344441] kernel_init+0xe/0x110 [ 3.344446] ret_from_fork+0x22/0x30
It is because sched_init_smp() set 'current->nr_cpus_allowed' to possible cpu number, and check_preemption_disabled() failed. This issue is introduced by commit a79050434b45, which move down lockup_detector_init() after do_basic_setup(). Fix it by moving lockup_detector_init() to its origin place when sdei_watchdog is disabled. There is no problem when sdei_watchdog is enabled because watchdog_nmi_probe() is overridden in 'arch/arm64/kernel/watchdog_sdei.c' in this case.
Fixes: a79050434b45 ("lockup_detector: init lockup detector after all the init_calls") Signed-off-by: Xiongfeng Wang wangxiongfeng2@huawei.com Reviewed-by: Wei Li liwei391@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- arch/arm64/kernel/watchdog_sdei.c | 2 +- include/linux/nmi.h | 2 ++ init/main.c | 6 +++++- 3 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/kernel/watchdog_sdei.c b/arch/arm64/kernel/watchdog_sdei.c index aa980b090598..cdbe2ebe3d69 100644 --- a/arch/arm64/kernel/watchdog_sdei.c +++ b/arch/arm64/kernel/watchdog_sdei.c @@ -21,7 +21,7 @@ #define SDEI_NMI_WATCHDOG_HWIRQ 29
static int sdei_watchdog_event_num; -static bool disable_sdei_nmi_watchdog; +bool disable_sdei_nmi_watchdog; static bool sdei_watchdog_registered; static DEFINE_PER_CPU(ktime_t, last_check_time);
diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 662405794e4c..8c9b857e7f62 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -231,8 +231,10 @@ int proc_watchdog_cpumask(struct ctl_table *, int, void *, size_t *, loff_t *);
#ifdef CONFIG_SDEI_WATCHDOG void sdei_watchdog_clear_eoi(void); +extern bool disable_sdei_nmi_watchdog; #else static inline void sdei_watchdog_clear_eoi(void) { } +#define disable_sdei_nmi_watchdog 1 #endif
#endif diff --git a/init/main.c b/init/main.c index f6fe37a744b7..ad414aa0a0eb 100644 --- a/init/main.c +++ b/init/main.c @@ -1518,6 +1518,8 @@ static noinline void __init kernel_init_freeable(void)
rcu_init_tasks_generic(); do_pre_smp_initcalls(); + if (disable_sdei_nmi_watchdog) + lockup_detector_init();
smp_init(); sched_init_smp(); @@ -1529,7 +1531,9 @@ static noinline void __init kernel_init_freeable(void)
do_basic_setup();
- lockup_detector_init(); + /* sdei_watchdog needs to be initialized after sdei_init */ + if (!disable_sdei_nmi_watchdog) + lockup_detector_init();
kunit_run_all_tests();
From: Chung-Chiang Cheng shepjeng@gmail.com
mainline inclusion from mainline-5.14 commit 3c252b087de08d3cb32468b54a158bd7ad0ae2f7 category: bugfix bugzilla: 171902 https://gitee.com/openeuler/kernel/issues/I4DDEL
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
-------------------------------------------------
When reading binary attributes in progress, buffer->bin_buffer is setup in configfs_read_bin_file() but never freed.
Fixes: 03607ace807b4 ("configfs: implement binary attributes") Signed-off-by: Chung-Chiang Cheng cccheng@synology.com [hch: move the vfree rather than duplicating it] Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Baokun Li libaokun1@huawei.com Reviewed-by: Zhang Yi yi.zhang@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- fs/configfs/file.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/fs/configfs/file.c b/fs/configfs/file.c index da8351d1e455..4d0825213116 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c @@ -482,13 +482,13 @@ static int configfs_release_bin_file(struct inode *inode, struct file *file) buffer->bin_buffer_size); } up_read(&frag->frag_sem); - /* vfree on NULL is safe */ - vfree(buffer->bin_buffer); - buffer->bin_buffer = NULL; - buffer->bin_buffer_size = 0; - buffer->needs_read_fill = 1; }
+ vfree(buffer->bin_buffer); + buffer->bin_buffer = NULL; + buffer->bin_buffer_size = 0; + buffer->needs_read_fill = 1; + configfs_release(inode, file); return 0; }