From: Jean-Philippe Brucker jean-philippe.brucker@arm.com
ascend inclusion category: feature bugzilla: 14369 CVE: NA
--------------
commit https://patchwork.ozlabs.org/patch/822424/
Add bind and unbind operations to the IOMMU API. Device drivers can use them to share process page tables with their device. iommu_process_bind_group is provided for VFIO's convenience, as it needs to provide a coherent interface on containers. Device drivers will most likely want to use iommu_process_bind_device, which doesn't bind the whole group.
PASIDs are de facto shared between all devices in a group (because of hardware weaknesses), but we don't do anything about it at the API level. Making bind_device call bind_group is probably the wrong way around, because it requires more work on our side for no benefit. We'd have to replay all binds each time a device is hotplugged into a group. But when a device is hotplugged into a group, the device driver will have to do a bind before using its PASID anyway and we can reject inconsistencies at that point.
Concurrent calls to iommu_process_bind_device for the same process are not supported at the moment (they'll race on process_alloc which will only succeed for the first one; the others will have to retry the bind). I also don't support calling bind() on a dying process, not sure if it matters.
Signed-off-by: Jean-Philippe Brucker jean-philippe.brucker@arm.com Signed-off-by: Fang Lijun fanglijun3@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/iommu/iommu-process.c | 445 +++++++++++++++++++++++++++++++++- drivers/iommu/iommu.c | 64 +++++ include/linux/iommu.h | 53 ++++ 3 files changed, 558 insertions(+), 4 deletions(-)
diff --git a/drivers/iommu/iommu-process.c b/drivers/iommu/iommu-process.c index 626503b83354..483821a9e93a 100644 --- a/drivers/iommu/iommu-process.c +++ b/drivers/iommu/iommu-process.c @@ -21,7 +21,9 @@
#include <linux/idr.h> #include <linux/iommu.h> +#include <linux/mmu_notifier.h> #include <linux/slab.h> +#include <linux/sched/mm.h> #include <linux/spinlock.h>
/* Link between a domain and a process */ @@ -50,21 +52,144 @@ static DEFINE_IDR(iommu_process_idr); */ static DEFINE_SPINLOCK(iommu_process_lock);
-static void iommu_process_release(struct kref *kref) +static struct mmu_notifier_ops iommu_process_mmu_notfier; + +/* + * Allocate a iommu_process structure for the given task. + * + * Ideally we shouldn't need the domain parameter, since iommu_process is + * system-wide, but we use it to retrieve the driver's allocation ops and a + * PASID range. + */ +static struct iommu_process * +iommu_process_alloc(struct iommu_domain *domain, struct task_struct *task) +{ + int err; + int pasid; + struct iommu_process *process; + + if (WARN_ON(!domain->ops->process_alloc || !domain->ops->process_free)) + return ERR_PTR(-ENODEV); + + process = domain->ops->process_alloc(task); + if (IS_ERR(process)) + return process; + if (!process) + return ERR_PTR(-ENOMEM); + + process->pid = get_task_pid(task, PIDTYPE_PID); + process->mm = get_task_mm(task); + process->notifier.ops = &iommu_process_mmu_notfier; + process->release = domain->ops->process_free; + INIT_LIST_HEAD(&process->domains); + + if (!process->pid) { + err = -EINVAL; + goto err_free_process; + } + + if (!process->mm) { + err = -EINVAL; + goto err_put_pid; + } + + idr_preload(GFP_KERNEL); + spin_lock(&iommu_process_lock); + pasid = idr_alloc_cyclic(&iommu_process_idr, process, domain->min_pasid, + domain->max_pasid + 1, GFP_ATOMIC); + process->pasid = pasid; + spin_unlock(&iommu_process_lock); + idr_preload_end(); + + if (pasid < 0) { + err = pasid; + goto err_put_mm; + } + + err = mmu_notifier_register(&process->notifier, process->mm); + if (err) + goto err_free_pasid; + + /* + * Now that the MMU notifier is valid, we can allow users to grab this + * process by setting a valid refcount. Before that it was accessible in + * the IDR but invalid. + * + * Users of the process structure obtain it with inc_not_zero, which + * provides a control dependency to ensure that they don't modify the + * structure if they didn't acquire the ref. So I think we need a write + * barrier here to pair with that control dependency (XXX probably + * nonsense.) + */ + smp_wmb(); + kref_init(&process->kref); + + /* A mm_count reference is kept by the notifier */ + mmput(process->mm); + + return process; + +err_free_pasid: + /* + * Even if the process is accessible from the IDR at this point, kref is + * 0 so no user could get a reference to it. Free it manually. + */ + spin_lock(&iommu_process_lock); + idr_remove(&iommu_process_idr, process->pasid); + spin_unlock(&iommu_process_lock); + +err_put_mm: + mmput(process->mm); + +err_put_pid: + put_pid(process->pid); + +err_free_process: + domain->ops->process_free(process); + + return ERR_PTR(err); +} + +static void iommu_process_free(struct rcu_head *rcu) { struct iommu_process *process; void (*release)(struct iommu_process *);
+ process = container_of(rcu, struct iommu_process, rcu); + release = process->release; + + release(process); +} + +static void iommu_process_release(struct kref *kref) +{ + struct iommu_process *process; + assert_spin_locked(&iommu_process_lock);
process = container_of(kref, struct iommu_process, kref); - release = process->release; - WARN_ON(!list_empty(&process->domains));
idr_remove(&iommu_process_idr, process->pasid); put_pid(process->pid); - release(process); + + /* + * If we're being released from process exit, the notifier callback + * ->release has already been called. Otherwise we don't need to go + * through there, the process isn't attached to anything anymore. Hence + * no_release. + */ + mmu_notifier_unregister_no_release(&process->notifier, process->mm); + + /* + * We can't free the structure here, because ->release might be + * attempting to grab it concurrently. And in the other case, if the + * structure is being released from within ->release, then + * __mmu_notifier_release expects to still have a valid mn when + * returning. So free the structure when it's safe, after the RCU grace + * period elapsed. + */ + mmu_notifier_call_srcu(&process->rcu, iommu_process_free); }
/* @@ -123,6 +248,318 @@ struct iommu_process *iommu_process_find(int pasid) } EXPORT_SYMBOL_GPL(iommu_process_find);
+static int iommu_process_attach(struct iommu_domain *domain, struct device *dev, + struct iommu_process *process) +{ + int err; + int pasid = process->pasid; + struct iommu_context *context; + + if (WARN_ON(!domain->ops->process_attach || !domain->ops->process_detach || + !domain->ops->process_exit || !domain->ops->process_invalidate)) + return -ENODEV; + + if (pasid > domain->max_pasid || pasid < domain->min_pasid) + return -ENOSPC; + + context = kzalloc(sizeof(*context), GFP_KERNEL); + if (!context) + return -ENOMEM; + + context->process = process; + context->domain = domain; + refcount_set(&context->ref, 1); + + spin_lock(&iommu_process_lock); + err = domain->ops->process_attach(domain, dev, process, true); + if (err) { + kfree(context); + spin_unlock(&iommu_process_lock); + return err; + } + + list_add(&context->process_head, &process->domains); + list_add(&context->domain_head, &domain->processes); + spin_unlock(&iommu_process_lock); + + return 0; +} + +static void iommu_context_free(struct iommu_context *context) +{ + assert_spin_locked(&iommu_process_lock); + + if (WARN_ON(!context->process || !context->domain)) + return; + + list_del(&context->process_head); + list_del(&context->domain_head); + iommu_process_put_locked(context->process); + + kfree(context); +} + +/* Attach an existing context to the device */ +static int iommu_process_attach_locked(struct iommu_context *context, + struct device *dev) +{ + assert_spin_locked(&iommu_process_lock); + + refcount_inc(&context->ref); + return context->domain->ops->process_attach(context->domain, dev, + context->process, false); +} + +/* Detach device from context and release it if necessary */ +static void iommu_process_detach_locked(struct iommu_context *context, + struct device *dev) +{ + bool last = false; + struct iommu_domain *domain = context->domain; + + assert_spin_locked(&iommu_process_lock); + + if (refcount_dec_and_test(&context->ref)) + last = true; + + domain->ops->process_detach(domain, dev, context->process, last); + + if (last) + iommu_context_free(context); +} + +/* + * Called when the process exits. Might race with unbind or any other function + * dropping the last reference to the process. As the mmu notifier doesn't hold + * any reference to the process when calling ->release, try to take a reference. + */ +static void iommu_notifier_release(struct mmu_notifier *mn, struct mm_struct *mm) +{ + struct iommu_context *context, *next; + struct iommu_process *process = container_of(mn, struct iommu_process, notifier); + + /* + * If the process is exiting then domains are still attached to the + * process. A few things need to be done before it is safe to release + * + * 1) Tell the IOMMU driver to stop using this PASID (and forward the + * message to attached device drivers. It can then clear the PASID + * table and invalidate relevant TLBs. + * + * 2) Drop all references to this process, by freeing the contexts. + */ + spin_lock(&iommu_process_lock); + if (!iommu_process_get_locked(process)) { + /* Someone's already taking care of it. */ + spin_unlock(&iommu_process_lock); + return; + } + + list_for_each_entry_safe(context, next, &process->domains, process_head) { + context->domain->ops->process_exit(context->domain, process); + iommu_context_free(context); + } + spin_unlock(&iommu_process_lock); + + /* + * We're now reasonably certain that no more fault is being handled for + * this process, since we just flushed them all out of the fault queue. + * Release the last reference to free the process. + */ + iommu_process_put(process); +} + +static void iommu_notifier_invalidate_range(struct mmu_notifier *mn, struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + struct iommu_context *context; + struct iommu_process *process = container_of(mn, struct iommu_process, notifier); + + spin_lock(&iommu_process_lock); + list_for_each_entry(context, &process->domains, process_head) { + context->domain->ops->process_invalidate(context->domain, + process, start, end - start); + } + spin_unlock(&iommu_process_lock); +} + +static int iommu_notifier_clear_flush_young(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + iommu_notifier_invalidate_range(mn, mm, start, end); + return 0; +} + +static void iommu_notifier_change_pte(struct mmu_notifier *mn, struct mm_struct *mm, + unsigned long address, pte_t pte) +{ + iommu_notifier_invalidate_range(mn, mm, address, address + PAGE_SIZE); +} + +static struct mmu_notifier_ops iommu_process_mmu_notfier = { + .release = iommu_notifier_release, + .clear_flush_young = iommu_notifier_clear_flush_young, + .change_pte = iommu_notifier_change_pte, + .invalidate_range = iommu_notifier_invalidate_range, +}; + +/** + * iommu_process_bind_device - Bind a process address space to a device + * @dev: the device + * @task: the process to bind + * @pasid: valid address where the PASID will be stored + * @flags: bond properties (IOMMU_PROCESS_BIND_*) + * + * Create a bond between device and task, allowing the device to access the + * process address space using the returned PASID. + * + * On success, 0 is returned and @pasid contains a valid ID. Otherwise, an error + * is returned. + */ +int iommu_process_bind_device(struct device *dev, struct task_struct *task, + int *pasid, int flags) +{ + int err, i; + int nesting; + struct pid *pid; + struct iommu_domain *domain; + struct iommu_process *process; + struct iommu_context *cur_context; + struct iommu_context *context = NULL; + + domain = iommu_get_domain_for_dev(dev); + if (WARN_ON(!domain)) + return -EINVAL; + + if (!iommu_domain_get_attr(domain, DOMAIN_ATTR_NESTING, &nesting) && + nesting) + return -EINVAL; + + pid = get_task_pid(task, PIDTYPE_PID); + if (!pid) + return -EINVAL; + + /* If an iommu_process already exists, use it */ + spin_lock(&iommu_process_lock); + idr_for_each_entry(&iommu_process_idr, process, i) { + if (process->pid != pid) + continue; + + if (!iommu_process_get_locked(process)) { + /* Process is defunct, create a new one */ + process = NULL; + break; + } + + /* Great, is it also bound to this domain? */ + list_for_each_entry(cur_context, &process->domains, + process_head) { + if (cur_context->domain != domain) + continue; + + context = cur_context; + *pasid = process->pasid; + + /* Splendid, tell the driver and increase the ref */ + err = iommu_process_attach_locked(context, dev); + if (err) + iommu_process_put_locked(process); + + break; + } + break; + } + spin_unlock(&iommu_process_lock); + put_pid(pid); + + if (context) + return err; + + if (!process) { + process = iommu_process_alloc(domain, task); + if (IS_ERR(process)) + return PTR_ERR(process); + } + + err = iommu_process_attach(domain, dev, process); + if (err) { + iommu_process_put(process); + return err; + } + + *pasid = process->pasid; + + return 0; +} +EXPORT_SYMBOL_GPL(iommu_process_bind_device); + +/** + * iommu_process_unbind_device - Remove a bond created with + * iommu_process_bind_device. + * + * @dev: the device + * @pasid: the pasid returned by bind + */ +int iommu_process_unbind_device(struct device *dev, int pasid) +{ + struct iommu_domain *domain; + struct iommu_process *process; + struct iommu_context *cur_context; + struct iommu_context *context = NULL; + + domain = iommu_get_domain_for_dev(dev); + if (WARN_ON(!domain)) + return -EINVAL; + + spin_lock(&iommu_process_lock); + process = idr_find(&iommu_process_idr, pasid); + if (!process) { + spin_unlock(&iommu_process_lock); + return -ESRCH; + } + + list_for_each_entry(cur_context, &process->domains, process_head) { + if (cur_context->domain == domain) { + context = cur_context; + break; + } + } + + if (context) + iommu_process_detach_locked(context, dev); + spin_unlock(&iommu_process_lock); + + return context ? 0 : -ESRCH; +} +EXPORT_SYMBOL_GPL(iommu_process_unbind_device); + +/* + * __iommu_process_unbind_dev_all - Detach all processes attached to this + * device. + * + * When detaching @device from @domain, IOMMU drivers have to use this function. + */ +void __iommu_process_unbind_dev_all(struct iommu_domain *domain, struct device *dev) +{ + struct iommu_context *context, *next; + + /* Ask device driver to stop using all PASIDs */ + spin_lock(&iommu_process_lock); + if (domain->process_exit) { + list_for_each_entry(context, &domain->processes, domain_head) + domain->process_exit(domain, dev, + context->process->pasid, + domain->process_exit_token); + } + + list_for_each_entry_safe(context, next, &domain->processes, domain_head) + iommu_process_detach_locked(context, dev); + spin_unlock(&iommu_process_lock); +} +EXPORT_SYMBOL_GPL(__iommu_process_unbind_dev_all); + /** * iommu_set_process_exit_handler() - set a callback for stopping the use of * PASID in a device. diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index dac8aeab16c9..831c5065f7f8 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1852,6 +1852,70 @@ void iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group) } EXPORT_SYMBOL_GPL(iommu_detach_group);
+/* + * iommu_process_bind_group - Share process address space with all devices in + * the group. + * @group: the iommu group + * @task: the process to bind + * @pasid: valid address where the PASID will be stored + * @flags: bond properties (IOMMU_PROCESS_BIND_*) + * + * Create a bond between group and process, allowing devices in the group to + * access the process address space using @pasid. + * + * On success, 0 is returned and @pasid contains a valid ID. Otherwise, an error + * is returned. + */ +int iommu_process_bind_group(struct iommu_group *group, + struct task_struct *task, int *pasid, int flags) +{ + struct group_device *device; + int ret = -ENODEV; + + if (!pasid) + return -EINVAL; + + if (!group->domain) + return -EINVAL; + + mutex_lock(&group->mutex); + list_for_each_entry(device, &group->devices, list) { + ret = iommu_process_bind_device(device->dev, task, pasid, + flags); + if (ret) + break; + } + + if (ret) { + list_for_each_entry_continue_reverse(device, &group->devices, list) + iommu_process_unbind_device(device->dev, *pasid); + } + mutex_unlock(&group->mutex); + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_process_bind_group); + +/** + * iommu_process_unbind_group - Remove a bond created with + * iommu_process_bind_group + * + * @group: the group + * @pasid: the pasid returned by bind + */ +int iommu_process_unbind_group(struct iommu_group *group, int pasid) +{ + struct group_device *device; + + mutex_lock(&group->mutex); + list_for_each_entry(device, &group->devices, list) + iommu_process_unbind_device(device->dev, pasid); + mutex_unlock(&group->mutex); + + return 0; +} +EXPORT_SYMBOL_GPL(iommu_process_unbind_group); + phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { if (unlikely(domain->ops->iova_to_phys == NULL)) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index dc78957b5544..0af49bd0a6b1 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -284,6 +284,11 @@ struct iommu_sva_param { * @domain_free: free iommu domain * @attach_dev: attach device to an iommu domain * @detach_dev: detach device from an iommu domain + * @process_alloc: allocate iommu process + * @process_free: free iommu process + * @process_attach: attach iommu process to a domain + * @process_detach: detach iommu process from a domain. Remove PASID entry and + * flush associated TLB entries. * @process_invalidate: Invalidate a range of mappings for a process. * @process_exit: A process is exiting. Stop using the PASID, remove PASID entry * and flush associated TLB entries. @@ -330,6 +335,12 @@ struct iommu_ops {
int (*attach_dev)(struct iommu_domain *domain, struct device *dev); void (*detach_dev)(struct iommu_domain *domain, struct device *dev); + struct iommu_process *(*process_alloc)(struct task_struct *task); + void (*process_free)(struct iommu_process *process); + int (*process_attach)(struct iommu_domain *domain, struct device *dev, + struct iommu_process *process, bool first); + void (*process_detach)(struct iommu_domain *domain, struct device *dev, + struct iommu_process *process, bool last); void (*process_invalidate)(struct iommu_domain *domain, struct iommu_process *process, unsigned long iova, size_t size); @@ -703,6 +714,10 @@ int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, void iommu_fwspec_free(struct device *dev); int iommu_fwspec_add_ids(struct device *dev, u32 *ids, int num_ids); const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode); +extern int iommu_process_bind_group(struct iommu_group *group, + struct task_struct *task, int *pasid, + int flags); +extern int iommu_process_unbind_group(struct iommu_group *group, int pasid);
extern int iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, int *pasid, unsigned long flags, void *drvdata); @@ -1059,6 +1074,19 @@ static inline int iommu_sva_unbind_device(struct device *dev, int pasid) return -ENODEV; }
+static inline int iommu_process_bind_group(struct iommu_group *group, + struct task_struct *task, int *pasid, + int flags) +{ + return -ENODEV; +} + +static inline int iommu_process_unbind_group(struct iommu_group *group, + int pasid) +{ + return -ENODEV; +} + #endif /* CONFIG_IOMMU_API */
#ifdef CONFIG_IOMMU_SVA @@ -1165,6 +1193,13 @@ extern void iommu_set_process_exit_handler(struct device *dev, extern struct iommu_process *iommu_process_find(int pasid); extern void iommu_process_put(struct iommu_process *process);
+extern int iommu_process_bind_device(struct device *dev, + struct task_struct *task, int *pasid, + int flags); +extern int iommu_process_unbind_device(struct device *dev, int pasid); +extern void __iommu_process_unbind_dev_all(struct iommu_domain *domain, + struct device *dev); + #else /* CONFIG_IOMMU_PROCESS */ static inline void iommu_set_process_exit_handler(struct device *dev, iommu_process_exit_handler_t cb, @@ -1180,6 +1215,24 @@ static inline struct iommu_process *iommu_process_find(int pasid) static inline void iommu_process_put(struct iommu_process *process) { } + +static inline int iommu_process_bind_device(struct device *dev, + struct task_struct *task, + int *pasid, int flags) +{ + return -ENODEV; +} + +static inline int iommu_process_unbind_device(struct device *dev, int pasid) +{ + return -ENODEV; +} + +static inline void __iommu_process_unbind_dev_all(struct iommu_domain *domain, + struct device *dev) +{ +} + #endif /* CONFIG_IOMMU_PROCESS */
#endif /* __LINUX_IOMMU_H */