From: Yan Zhao yan.y.zhao@intel.com
mainline inclusion from mainline-v5.6-rc7 commit c0560f51cf77472f4ed113539b0a02ca6cda7961 category: Common features bugzilla: 46841 CVE: NA
-------------------------------------------------
external user calls vfio_group_get_external_user_from_dev() with a device pointer to get the VFIO group associated with this device. The VFIO group is checked to be vialbe and have IOMMU set. Then container user counter is increased and VFIO group reference is hold to prevent the VFIO group from disposal before external user exits.
when the external user finishes using of the VFIO group, it calls vfio_group_put_external_user() to dereference the VFIO group and the container user counter.
Suggested-by: Alex Williamson alex.williamson@redhat.com Signed-off-by: Yan Zhao yan.y.zhao@intel.com Signed-off-by: Alex Williamson alex.williamson@redhat.com Signed-off-by: Xiaoyang Xu xuxiaoyang2@huawei.com Reviewed-by: Xiangyou Xie xiexiangyou@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/vfio/vfio.c | 38 ++++++++++++++++++++++++++++++++++++++ include/linux/vfio.h | 2 ++ 2 files changed, 40 insertions(+)
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 7a386fb30bf1..34a293318a39 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1759,6 +1759,44 @@ struct vfio_group *vfio_group_get_external_user(struct file *filep) } EXPORT_SYMBOL_GPL(vfio_group_get_external_user);
+/** + * External user API, exported by symbols to be linked dynamically. + * The external user passes in a device pointer + * to verify that: + * - A VFIO group is assiciated with the device; + * - IOMMU is set for the group. + * If both checks passed, vfio_group_get_external_user_from_dev() + * increments the container user counter to prevent the VFIO group + * from disposal before external user exits and returns the pointer + * to the VFIO group. + * + * When the external user finishes using the VFIO group, it calls + * vfio_group_put_external_user() to release the VFIO group and + * decrement the container user counter. + * + * @dev [in] : device + * Return error PTR or pointer to VFIO group. + */ + +struct vfio_group *vfio_group_get_external_user_from_dev(struct device *dev) +{ + struct vfio_group *group; + int ret; + + group = vfio_group_get_from_dev(dev); + if (!group) + return ERR_PTR(-ENODEV); + + ret = vfio_group_add_container_user(group); + if (ret) { + vfio_group_put(group); + return ERR_PTR(ret); + } + + return group; +} +EXPORT_SYMBOL_GPL(vfio_group_get_external_user_from_dev); + void vfio_group_put_external_user(struct vfio_group *group) { vfio_group_try_dissolve_container(group); diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 66741ab087c1..a1e8943b7be3 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -97,6 +97,8 @@ extern void vfio_unregister_iommu_driver( */ extern struct vfio_group *vfio_group_get_external_user(struct file *filep); extern void vfio_group_put_external_user(struct vfio_group *group); +extern struct vfio_group *vfio_group_get_external_user_from_dev(struct device + *dev); extern bool vfio_external_group_match_file(struct vfio_group *group, struct file *filep); extern int vfio_external_user_iommu_id(struct vfio_group *group);
From: Yan Zhao yan.y.zhao@intel.com
mainline inclusion from mainline-v5.6-rc7 commit 8d46c0cca5f4dc0538173d62cd36b1119b5105bc category: Common features bugzilla: 46841 CVE: NA
--------------------------------
vfio_dma_rw will read/write a range of user space memory pointed to by IOVA into/from a kernel buffer without enforcing pinning the user space memory.
TODO: mark the IOVAs to user space memory dirty if they are written in vfio_dma_rw().
Cc: Kevin Tian kevin.tian@intel.com Signed-off-by: Yan Zhao yan.y.zhao@intel.com Signed-off-by: Alex Williamson alex.williamson@redhat.com Conflicts: drivers/vfio/vfio_iommu_type1.c [xxy: add #include <linux/mmu_context.h> after #include <linux/ptrace.h>] Signed-off-by: Xiaoyang Xu xuxiaoyang2@huawei.com Reviewed-by: Xiangyou Xie xiexiangyou@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/vfio/vfio.c | 49 +++++++++++++++++++++ drivers/vfio/vfio_iommu_type1.c | 76 +++++++++++++++++++++++++++++++++ include/linux/vfio.h | 5 +++ 3 files changed, 130 insertions(+)
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 34a293318a39..3463a4f10422 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -2038,6 +2038,55 @@ int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, int npage) } EXPORT_SYMBOL(vfio_unpin_pages);
+ +/* + * This interface allows the CPUs to perform some sort of virtual DMA on + * behalf of the device. + * + * CPUs read/write from/into a range of IOVAs pointing to user space memory + * into/from a kernel buffer. + * + * As the read/write of user space memory is conducted via the CPUs and is + * not a real device DMA, it is not necessary to pin the user space memory. + * + * The caller needs to call vfio_group_get_external_user() or + * vfio_group_get_external_user_from_dev() prior to calling this interface, + * so as to prevent the VFIO group from disposal in the middle of the call. + * But it can keep the reference to the VFIO group for several calls into + * this interface. + * After finishing using of the VFIO group, the caller needs to release the + * VFIO group by calling vfio_group_put_external_user(). + * + * @group [in] : VFIO group + * @user_iova [in] : base IOVA of a user space buffer + * @data [in] : pointer to kernel buffer + * @len [in] : kernel buffer length + * @write : indicate read or write + * Return error code on failure or 0 on success. + */ +int vfio_dma_rw(struct vfio_group *group, dma_addr_t user_iova, + void *data, size_t len, bool write) +{ + struct vfio_container *container; + struct vfio_iommu_driver *driver; + int ret = 0; + + if (!group || !data || len <= 0) + return -EINVAL; + + container = group->container; + driver = container->iommu_driver; + + if (likely(driver && driver->ops->dma_rw)) + ret = driver->ops->dma_rw(container->iommu_data, + user_iova, data, len, write); + else + ret = -ENOTTY; + + return ret; +} +EXPORT_SYMBOL(vfio_dma_rw); + static int vfio_register_iommu_notifier(struct vfio_group *group, unsigned long *events, struct notifier_block *nb) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 8c5c99aad00d..1acf7cd1cf1c 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -31,6 +31,7 @@ #include <linux/module.h> #include <linux/mm.h> #include <linux/ptrace.h> +#include <linux/mmu_context.h> #include <linux/rbtree.h> #include <linux/sched/signal.h> #include <linux/sched/mm.h> @@ -2328,6 +2329,80 @@ static int vfio_iommu_type1_unregister_notifier(void *iommu_data, return blocking_notifier_chain_unregister(&iommu->notifier, nb); }
+static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu, + dma_addr_t user_iova, void *data, + size_t count, bool write, + size_t *copied) +{ + struct mm_struct *mm; + unsigned long vaddr; + struct vfio_dma *dma; + bool kthread = current->mm == NULL; + size_t offset; + + *copied = 0; + + dma = vfio_find_dma(iommu, user_iova, 1); + if (!dma) + return -EINVAL; + + if ((write && !(dma->prot & IOMMU_WRITE)) || + !(dma->prot & IOMMU_READ)) + return -EPERM; + + mm = get_task_mm(dma->task); + + if (!mm) + return -EPERM; + + if (kthread) + use_mm(mm); + else if (current->mm != mm) + goto out; + + offset = user_iova - dma->iova; + + if (count > dma->size - offset) + count = dma->size - offset; + + vaddr = dma->vaddr + offset; + + if (write) + *copied = __copy_to_user((void __user *)vaddr, data, + count) ? 0 : count; + else + *copied = __copy_from_user(data, (void __user *)vaddr, + count) ? 0 : count; + if (kthread) + unuse_mm(mm); +out: + mmput(mm); + return *copied ? 0 : -EFAULT; +} + +static int vfio_iommu_type1_dma_rw(void *iommu_data, dma_addr_t user_iova, + void *data, size_t count, bool write) +{ + struct vfio_iommu *iommu = iommu_data; + int ret = 0; + size_t done; + + mutex_lock(&iommu->lock); + while (count > 0) { + ret = vfio_iommu_type1_dma_rw_chunk(iommu, user_iova, data, + count, write, &done); + if (ret) + break; + + count -= done; + data += done; + user_iova += done; + } + + mutex_unlock(&iommu->lock); + return ret; +} + static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = { .name = "vfio-iommu-type1", .owner = THIS_MODULE, @@ -2340,6 +2415,7 @@ static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = { .unpin_pages = vfio_iommu_type1_unpin_pages, .register_notifier = vfio_iommu_type1_register_notifier, .unregister_notifier = vfio_iommu_type1_unregister_notifier, + .dma_rw = vfio_iommu_type1_dma_rw, };
static int __init vfio_iommu_type1_init(void) diff --git a/include/linux/vfio.h b/include/linux/vfio.h index a1e8943b7be3..449eb728304c 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -85,6 +85,8 @@ struct vfio_iommu_driver_ops { struct notifier_block *nb); int (*unregister_notifier)(void *iommu_data, struct notifier_block *nb); + int (*dma_rw)(void *iommu_data, dma_addr_t user_iova, + void *data, size_t count, bool write); };
extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops); @@ -112,6 +114,9 @@ extern int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, extern int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn, int npage);
+extern int vfio_dma_rw(struct vfio_group *group, dma_addr_t user_iova, + void *data, size_t len, bool write); + /* each type has independent events */ enum vfio_notify_type { VFIO_IOMMU_NOTIFY = 0,
From: Yan Zhao yan.y.zhao@intel.com
mainline inclusion from mainline-v5.7-rc4 commit 205323b8ceac57b0ac9d7dbc4d6fcdb18aa802ec category: Common features bugzilla: 46841 CVE: NA
--------------------------------
instead of calling __copy_to/from_user(), use copy_to_from_user() to ensure vaddr range is a valid user address range before accessing them.
Fixes: 8d46c0cca5f4 ("vfio: introduce vfio_dma_rw to read/write a range of IOVAs") Signed-off-by: Yan Zhao yan.y.zhao@intel.com Reported-by: Kees Cook keescook@chromium.org Reviewed-by: Kees Cook keescook@chromium.org Signed-off-by: Alex Williamson alex.williamson@redhat.com Signed-off-by: Xiaoyang Xu xuxiaoyang2@huawei.com Reviewed-by: Xiangyou Xie xiexiangyou@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/vfio/vfio_iommu_type1.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 1acf7cd1cf1c..99e9ffa19193 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -2368,10 +2368,10 @@ static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu, vaddr = dma->vaddr + offset;
if (write) - *copied = __copy_to_user((void __user *)vaddr, data, + *copied = copy_to_user((void __user *)vaddr, data, count) ? 0 : count; else - *copied = __copy_from_user(data, (void __user *)vaddr, + *copied = copy_from_user(data, (void __user *)vaddr, count) ? 0 : count; if (kthread) unuse_mm(mm);
From: qinyu qinyu16@huawei.com
euleros inclusion category: bugfix bugzilla: NA
--------------------------------
After we turned on CONFIG_ARMV8_DEPRECATE, we found hackbench test performance regressed:
for i in 20;do echo "--------pipe process num=$i----------" for j in $(seq 1 10000000);do ./hackbench -pipe $i process 1000 done done
here is the result: --------pipe process num=20---------- Running with 20*40 (== 800) tasks. Time: 1.739 Running with 20*40 (== 800) tasks. Time: 1.985 Running with 20*40 (== 800) tasks. Time: 1.088 Running with 20*40 (== 800) tasks. Time: 1.721 Running with 20*40 (== 800) tasks. Time: 1.455
the base line is: --------pipe process num=20---------- Running with 20*40 (== 800) tasks. Time: 0.272 Running with 20*40 (== 800) tasks. Time: 0.270 Running with 20*40 (== 800) tasks. Time: 0.303 Running with 20*40 (== 800) tasks. Time: 0.321 Running with 20*40 (== 800) tasks. Time: 0.304 Running with 20*40 (== 800) tasks. Time: 0.324
the reason is that this file "./arch/arm64/kernel/armv8_deprecated.c" adds a global variable "static DEFINE_RAW_SPINLOCK(insn_emulation_lock);" (after CONFIG_ARMV8_DEPRECATE is enabled) and the variable is inserted into the ".bss" section and this will cause an inappropriate shift to the other variables after it which are no longer cacheline-aligned.
here is the layout arrangement: ffff0000814599d8 <arch_kgdb_ops>: ...
ffff000081459a28 <insn_emulation_lock>: <---------new inserted var ...
ffff000081459a30 <sea_info>: <--------- misaligned ...
so we add __cacheline_aligned to this newly-inserted variable and placed it into the ".data" section.
Signed-off-by: liqingqing liqingqing3@huawei.com Signed-off-by: qinyu qinyu16@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/kernel/armv8_deprecated.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index c14b3a508c8a..3a36901b9402 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -61,7 +61,7 @@ struct insn_emulation {
static LIST_HEAD(insn_emulation); static int nr_insn_emulated __initdata; -static DEFINE_RAW_SPINLOCK(insn_emulation_lock); +static __cacheline_aligned DEFINE_RAW_SPINLOCK(insn_emulation_lock);
static void register_emulation_hooks(struct insn_emulation_ops *ops) {
From: Xu Qiang xuqiang36@huawei.com
ascend inclusion category: feature bugzilla: NA CVE: NA
-------------------------------------------------
Export console_flush_on_panic for bbox to use.
Signed-off-by: Xu Qiang xuqiang36@huawei.com Signed-off-by: Fang Lijun fanglijun3@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/printk/printk.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index bdc6cae8c83a..e25c3102b8f3 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2572,6 +2572,7 @@ void console_flush_on_panic(void) console_may_schedule = 0; console_unlock(); } +EXPORT_SYMBOL(console_flush_on_panic);
/* * Return the console tty driver structure and its associated index
From: yangerkun yangerkun@huawei.com
mainline inclusion from mainline-v5.10-rc2 commit d7dce9e08595 category: bugfix bugzilla: 45516 CVE: NA
-------------------------------------------------
ext4_ext_search_right() will read more extent blocks and call put_bh after we get the information we need. However, ret_ex will break this and may cause use-after-free once pagecache has been freed. Fix it by copying the extent structure if needed.
Signed-off-by: yangerkun yangerkun@huawei.com Link: https://lore.kernel.org/r/20201028055617.2569255-1-yangerkun@huawei.com Signed-off-by: Theodore Ts'o tytso@mit.edu Cc: stable@kernel.org Signed-off-by: Zheng Liang zhengliang6@huawei.com Reviewed-by: zhangyi (F) yi.zhang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/ext4/extents.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 46af383809c5..232ba564c7f7 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -1508,16 +1508,16 @@ static int ext4_ext_search_left(struct inode *inode, }
/* - * search the closest allocated block to the right for *logical - * and returns it at @logical + it's physical address at @phys - * if *logical is the largest allocated block, the function - * returns 0 at @phys - * return value contains 0 (success) or error code + * Search the closest allocated block to the right for *logical + * and returns it at @logical + it's physical address at @phys. + * If not exists, return 0 and @phys is set to 0. We will return + * 1 which means we found an allocated block and ret_ex is valid. + * Or return a (< 0) error code. */ static int ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path, ext4_lblk_t *logical, ext4_fsblk_t *phys, - struct ext4_extent **ret_ex) + struct ext4_extent *ret_ex) { struct buffer_head *bh = NULL; struct ext4_extent_header *eh; @@ -1611,10 +1611,11 @@ static int ext4_ext_search_right(struct inode *inode, found_extent: *logical = le32_to_cpu(ex->ee_block); *phys = ext4_ext_pblock(ex); - *ret_ex = ex; + if (ret_ex) + *ret_ex = *ex; if (bh) put_bh(bh); - return 0; + return 1; }
/* @@ -2957,8 +2958,8 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, */ lblk = ex_end + 1; err = ext4_ext_search_right(inode, path, &lblk, &pblk, - &ex); - if (err) + NULL); + if (err < 0) goto out; if (pblk) partial_cluster = @@ -4317,7 +4318,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, int flags) { struct ext4_ext_path *path = NULL; - struct ext4_extent newex, *ex, *ex2; + struct ext4_extent newex, *ex, ex2; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); ext4_fsblk_t newblock = 0; int free_on_err = 0, err = 0, depth, ret; @@ -4450,15 +4451,14 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, if (err) goto out2; ar.lright = map->m_lblk; - ex2 = NULL; err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright, &ex2); - if (err) + if (err < 0) goto out2;
/* Check if the extent after searching to the right implies a * cluster we can use. */ - if ((sbi->s_cluster_ratio > 1) && ex2 && - get_implied_cluster_alloc(inode->i_sb, map, ex2, path)) { + if ((sbi->s_cluster_ratio > 1) && err && + get_implied_cluster_alloc(inode->i_sb, map, &ex2, path)) { ar.len = allocated = map->m_len; newblock = map->m_pblk; map_from_cluster = true;
From: Fang Lijun fanglijun3@huawei.com
ascend inclusion category: bugfix bugzilla: 46888 CVE: NA
--------------------------------------------------
The "area" is removed and freed in __vmalloc_area_node when it returned NULL, we needn't call free_vm_area to remove and free this area again.
Fixes: 59a57a82fb2a ("mm/vmalloc: Hugepage vmalloc mappings") Signed-off-by: Fang Lijun fanglijun3@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/vmalloc.c | 1 - 1 file changed, 1 deletion(-)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 6bebb7b52448..37b476287114 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2707,7 +2707,6 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
fail: if (shift > PAGE_SHIFT) { - free_vm_area(area); shift = PAGE_SHIFT; align = real_align; size = real_size;
From: Xiongfeng Wang wangxiongfeng2@huawei.com
hulk inclusion category: bugfix bugzilla: 46894 CVE: NA
---------------------------
Fix the following compile error when CONFIG_HOTPLUG_CPU is disabled.
arch/arm64/kernel/setup.c: In function 'arch_unregister_cpu': arch/arm64/kernel/setup.c:455:2: error: implicit declaration of function 'unregister_cpu' [-Werror=implicit-function-declaration] unregister_cpu(cpu); ^~~~~~~~~~~~~~
Signed-off-by: Xiongfeng Wang wangxiongfeng2@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/kernel/setup.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index aa499eb2a923..8bb5ae3ac293 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -439,6 +439,8 @@ static int __init register_kernel_offset_dumper(void) } __initcall(register_kernel_offset_dumper);
+#ifdef CONFIG_HOTPLUG_CPU + int arch_register_cpu(int num) { struct cpu *cpu = &per_cpu(cpu_data.cpu, num); @@ -455,3 +457,5 @@ void arch_unregister_cpu(int num) unregister_cpu(cpu); } EXPORT_SYMBOL(arch_unregister_cpu); + +#endif
From: Yufen Yu yuyufen@huawei.com
hulk inclusion category: bugfix bugzilla: 30109 CVE: NA ---------------------------
Fix compiler error in bdi_get_dev_name() for some others arch.
include/linux/backing-dev.h:511:27: note: previous implicit declaration of 'dev_name' was here 511 | strlcpy(dname, rcu_dev ? dev_name(&rcu_dev->dev) : "(unknown)", len); | ^~~~~~~~
Fixes: 4bafd511afa9 ("bdi: get device name under rcu protect") Signed-off-by: Yufen Yu yuyufen@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/backing-dev.h | 1 + 1 file changed, 1 insertion(+)
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 947b046ff588..ec3ca0b197dd 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -17,6 +17,7 @@ #include <linux/blk-cgroup.h> #include <linux/backing-dev-defs.h> #include <linux/slab.h> +#include <linux/device.h>
#define BDI_DEV_NAME_LEN 32
From: Hanjun Guo guohanjun@huawei.com
mainline inclusion from mainline-v5.6-rc1 commit 3c23b83a88d0 category: bugfix bugzilla: 28747 CVE: NA
-------------------------------------------------
The IORT specification [0] (Section 3, table 4, page 9) defines the 'Number of IDs' as 'The number of IDs in the range minus one'.
However, the IORT ID mapping function iort_id_map() treats the 'Number of IDs' field as if it were the full IDs mapping count, with the following check in place to detect out of boundary input IDs:
InputID >= Input base + Number of IDs
This check is flawed in that it considers the 'Number of IDs' field as the full number of IDs mapping and disregards the 'minus one' from the IDs count.
The correct check in iort_id_map() should be implemented as:
InputID > Input base + Number of IDs
this implements the specification correctly but unfortunately it breaks existing firmwares that erroneously set the 'Number of IDs' as the full IDs mapping count rather than IDs mapping count minus one.
e.g.
PCI hostbridge mapping entry 1: Input base: 0x1000 ID Count: 0x100 Output base: 0x1000 Output reference: 0xC4 //ITS reference
PCI hostbridge mapping entry 2: Input base: 0x1100 ID Count: 0x100 Output base: 0x2000 Output reference: 0xD4 //ITS reference
Two mapping entries which the second entry's Input base = the first entry's Input base + ID count, so for InputID 0x1100 and with the correct InputID check in place in iort_id_map() the kernel would map the InputID to ITS 0xC4 not 0xD4 as it would be expected.
Therefore, to keep supporting existing flawed firmwares, introduce a workaround that instructs the kernel to use the old InputID range check logic in iort_id_map(), so that we can support both firmwares written with the flawed 'Number of IDs' logic and the correct one as defined in the specifications.
[0]: http://infocenter.arm.com/help/topic/com.arm.doc.den0049d/DEN0049D_IO_Remapp...
Reported-by: Pankaj Bansal pankaj.bansal@nxp.com Link: https://lore.kernel.org/linux-acpi/20191215203303.29811-1-pankaj.bansal@nxp.... Signed-off-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Lorenzo Pieralisi lorenzo.pieralisi@arm.com Cc: Pankaj Bansal pankaj.bansal@nxp.com Cc: Will Deacon will@kernel.org Cc: Sudeep Holla sudeep.holla@arm.com Cc: Catalin Marinas catalin.marinas@arm.com Cc: Robin Murphy robin.murphy@arm.com Signed-off-by: Will Deacon will@kernel.org
Conflicts: drivers/acpi/arm64/iort.c [wangxiongfeng: fix a small conflict in acpi_iort_init()] Signed-off-by: Xiongfeng Wang wangxiongfeng2@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/acpi/arm64/iort.c | 57 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 2 deletions(-)
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index ed9be22a6913..d596303a00ac 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -307,6 +307,59 @@ static acpi_status iort_match_node_callback(struct acpi_iort_node *node, return status; }
+struct iort_workaround_oem_info { + char oem_id[ACPI_OEM_ID_SIZE + 1]; + char oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1]; + u32 oem_revision; +}; + +static bool apply_id_count_workaround; + +static struct iort_workaround_oem_info wa_info[] __initdata = { + { + .oem_id = "HISI ", + .oem_table_id = "HIP07 ", + .oem_revision = 0, + }, { + .oem_id = "HISI ", + .oem_table_id = "HIP08 ", + .oem_revision = 0, + } +}; + +static void __init +iort_check_id_count_workaround(struct acpi_table_header *tbl) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(wa_info); i++) { + if (!memcmp(wa_info[i].oem_id, tbl->oem_id, ACPI_OEM_ID_SIZE) && + !memcmp(wa_info[i].oem_table_id, tbl->oem_table_id, ACPI_OEM_TABLE_ID_SIZE) && + wa_info[i].oem_revision == tbl->oem_revision) { + apply_id_count_workaround = true; + pr_warn(FW_BUG "ID count for ID mapping entry is wrong, applying workaround\n"); + break; + } + } +} + +static inline u32 iort_get_map_max(struct acpi_iort_id_mapping *map) +{ + u32 map_max = map->input_base + map->id_count; + + /* + * The IORT specification revision D (Section 3, table 4, page 9) says + * Number of IDs = The number of IDs in the range minus one, but the + * IORT code ignored the "minus one", and some firmware did that too, + * so apply a workaround here to keep compatible with both the spec + * compliant and non-spec compliant firmwares. + */ + if (apply_id_count_workaround) + map_max--; + + return map_max; +} + static int iort_id_map(struct acpi_iort_id_mapping *map, u8 type, u32 rid_in, u32 *rid_out) { @@ -323,8 +376,7 @@ static int iort_id_map(struct acpi_iort_id_mapping *map, u8 type, u32 rid_in, return -ENXIO; }
- if (rid_in < map->input_base || - (rid_in >= map->input_base + map->id_count)) + if (rid_in < map->input_base || rid_in > iort_get_map_max(map)) return -ENXIO;
*rid_out = map->output_base + (rid_in - map->input_base); @@ -1677,5 +1729,6 @@ void __init acpi_iort_init(void) if (ascend_platform_detected(iort_table)) ascend_enable_all_features();
+ iort_check_id_count_workaround(iort_table); iort_init_platform_devices(); }