From: Jason Gunthorpe jgg@nvidia.com
stable inclusion from stable-v5.10.137 commit a2fbf4acd28069d4922c33e4b5b0d58fc38073f8 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I60PLB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 0bfc6a4ea63c2adac71a824397ef48f28dbc5e47 ]
This makes the struct vfio_device part of the public interface so it can be used with container_of and so forth, as is typical for a Linux subystem.
This is the first step to bring some type-safety to the vfio interface by allowing the replacement of 'void *' and 'struct device *' inputs with a simple and clear 'struct vfio_device *'
For now the self-allocating vfio_add_group_dev() interface is kept so each user can be updated as a separate patch.
The expected usage pattern is
driver core probe() function: my_device = kzalloc(sizeof(*mydevice)); vfio_init_group_dev(&my_device->vdev, dev, ops, mydevice); /* other driver specific prep */ vfio_register_group_dev(&my_device->vdev); dev_set_drvdata(dev, my_device);
driver core remove() function: my_device = dev_get_drvdata(dev); vfio_unregister_group_dev(&my_device->vdev); /* other driver specific tear down */ kfree(my_device);
Allowing the driver to be able to use the drvdata and vfio_device to go to/from its own data.
The pattern also makes it clear that vfio_register_group_dev() must be last in the sequence, as once it is called the core code can immediately start calling ops. The init/register gap is provided to allow for the driver to do setup before ops can be called and thus avoid races.
Reviewed-by: Christoph Hellwig hch@lst.de Reviewed-by: Liu Yi L yi.l.liu@intel.com Reviewed-by: Cornelia Huck cohuck@redhat.com Reviewed-by: Max Gurtovoy mgurtovoy@nvidia.com Reviewed-by: Kevin Tian kevin.tian@intel.com Reviewed-by: Eric Auger eric.auger@redhat.com Signed-off-by: Jason Gunthorpe jgg@nvidia.com Message-Id: 3-v3-225de1400dfc+4e074-vfio1_jgg@nvidia.com Signed-off-by: Alex Williamson alex.williamson@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Reviewed-by: Wei Li liwei391@huawei.com --- Documentation/driver-api/vfio.rst | 31 ++++---- drivers/vfio/vfio.c | 125 ++++++++++++++++-------------- include/linux/vfio.h | 16 ++++ 3 files changed, 99 insertions(+), 73 deletions(-)
diff --git a/Documentation/driver-api/vfio.rst b/Documentation/driver-api/vfio.rst index 14e41324237d..b57a96d20d3b 100644 --- a/Documentation/driver-api/vfio.rst +++ b/Documentation/driver-api/vfio.rst @@ -326,18 +326,23 @@ VFIO bus driver API
VFIO bus drivers, such as vfio-pci make use of only a few interfaces into VFIO core. When devices are bound and unbound to the driver, -the driver should call vfio_add_group_dev() and vfio_del_group_dev() -respectively:: - - extern int vfio_add_group_dev(struct device *dev, - const struct vfio_device_ops *ops, - void *device_data); - - extern void *vfio_del_group_dev(struct device *dev); - -vfio_add_group_dev() indicates to the core to begin tracking the -iommu_group of the specified dev and register the dev as owned by -a VFIO bus driver. The driver provides an ops structure for callbacks +the driver should call vfio_register_group_dev() and +vfio_unregister_group_dev() respectively:: + + void vfio_init_group_dev(struct vfio_device *device, + struct device *dev, + const struct vfio_device_ops *ops, + void *device_data); + int vfio_register_group_dev(struct vfio_device *device); + void vfio_unregister_group_dev(struct vfio_device *device); + +The driver should embed the vfio_device in its own structure and call +vfio_init_group_dev() to pre-configure it before going to registration. +vfio_register_group_dev() indicates to the core to begin tracking the +iommu_group of the specified dev and register the dev as owned by a VFIO bus +driver. Once vfio_register_group_dev() returns it is possible for userspace to +start accessing the driver, thus the driver should ensure it is completely +ready before calling it. The driver provides an ops structure for callbacks similar to a file operations structure::
struct vfio_device_ops { @@ -353,7 +358,7 @@ similar to a file operations structure:: };
Each function is passed the device_data that was originally registered -in the vfio_add_group_dev() call above. This allows the bus driver +in the vfio_register_group_dev() call above. This allows the bus driver an easy place to store its opaque, private data. The open/release callbacks are issued when a new file descriptor is created for a device (via VFIO_GROUP_GET_DEVICE_FD). The ioctl interface provides diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index b5fa2ae3116d..f886f2db8153 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -89,16 +89,6 @@ struct vfio_group { struct blocking_notifier_head notifier; };
-struct vfio_device { - refcount_t refcount; - struct completion comp; - struct device *dev; - const struct vfio_device_ops *ops; - struct vfio_group *group; - struct list_head group_next; - void *device_data; -}; - #ifdef CONFIG_VFIO_NOIOMMU static bool noiommu __read_mostly; module_param_named(enable_unsafe_noiommu_mode, @@ -532,35 +522,6 @@ static struct vfio_group *vfio_group_get_from_dev(struct device *dev) /** * Device objects - create, release, get, put, search */ -static -struct vfio_device *vfio_group_create_device(struct vfio_group *group, - struct device *dev, - const struct vfio_device_ops *ops, - void *device_data) -{ - struct vfio_device *device; - - device = kzalloc(sizeof(*device), GFP_KERNEL); - if (!device) - return ERR_PTR(-ENOMEM); - - refcount_set(&device->refcount, 1); - init_completion(&device->comp); - device->dev = dev; - /* Our reference on group is moved to the device */ - device->group = group; - device->ops = ops; - device->device_data = device_data; - dev_set_drvdata(dev, device); - - mutex_lock(&group->device_lock); - list_add(&device->group_next, &group->device_list); - group->dev_counter++; - mutex_unlock(&group->device_lock); - - return device; -} - /* Device reference always implies a group reference */ void vfio_device_put(struct vfio_device *device) { @@ -779,14 +740,23 @@ static int vfio_iommu_group_notifier(struct notifier_block *nb, /** * VFIO driver API */ -int vfio_add_group_dev(struct device *dev, - const struct vfio_device_ops *ops, void *device_data) +void vfio_init_group_dev(struct vfio_device *device, struct device *dev, + const struct vfio_device_ops *ops, void *device_data) +{ + init_completion(&device->comp); + device->dev = dev; + device->ops = ops; + device->device_data = device_data; +} +EXPORT_SYMBOL_GPL(vfio_init_group_dev); + +int vfio_register_group_dev(struct vfio_device *device) { + struct vfio_device *existing_device; struct iommu_group *iommu_group; struct vfio_group *group; - struct vfio_device *device;
- iommu_group = iommu_group_get(dev); + iommu_group = iommu_group_get(device->dev); if (!iommu_group) return -EINVAL;
@@ -805,21 +775,50 @@ int vfio_add_group_dev(struct device *dev, iommu_group_put(iommu_group); }
- device = vfio_group_get_device(group, dev); - if (device) { - dev_WARN(dev, "Device already exists on group %d\n", + existing_device = vfio_group_get_device(group, device->dev); + if (existing_device) { + dev_WARN(device->dev, "Device already exists on group %d\n", iommu_group_id(iommu_group)); - vfio_device_put(device); + vfio_device_put(existing_device); vfio_group_put(group); return -EBUSY; }
- device = vfio_group_create_device(group, dev, ops, device_data); - if (IS_ERR(device)) { - vfio_group_put(group); - return PTR_ERR(device); - } + /* Our reference on group is moved to the device */ + device->group = group; + + /* Refcounting can't start until the driver calls register */ + refcount_set(&device->refcount, 1); + + mutex_lock(&group->device_lock); + list_add(&device->group_next, &group->device_list); + group->dev_counter++; + mutex_unlock(&group->device_lock); + + return 0; +} +EXPORT_SYMBOL_GPL(vfio_register_group_dev); + +int vfio_add_group_dev(struct device *dev, const struct vfio_device_ops *ops, + void *device_data) +{ + struct vfio_device *device; + int ret; + + device = kzalloc(sizeof(*device), GFP_KERNEL); + if (!device) + return -ENOMEM; + + vfio_init_group_dev(device, dev, ops, device_data); + ret = vfio_register_group_dev(device); + if (ret) + goto err_kfree; + dev_set_drvdata(dev, device); return 0; + +err_kfree: + kfree(device); + return ret; } EXPORT_SYMBOL_GPL(vfio_add_group_dev);
@@ -887,11 +886,9 @@ EXPORT_SYMBOL_GPL(vfio_device_data); /* * Decrement the device reference count and wait for the device to be * removed. Open file descriptors for the device... */ -void *vfio_del_group_dev(struct device *dev) +void vfio_unregister_group_dev(struct vfio_device *device) { - struct vfio_device *device = dev_get_drvdata(dev); struct vfio_group *group = device->group; - void *device_data = device->device_data; struct vfio_unbound_dev *unbound; unsigned int i = 0; bool interrupted = false; @@ -908,7 +905,7 @@ void *vfio_del_group_dev(struct device *dev) */ unbound = kzalloc(sizeof(*unbound), GFP_KERNEL); if (unbound) { - unbound->dev = dev; + unbound->dev = device->dev; mutex_lock(&group->unbound_lock); list_add(&unbound->unbound_next, &group->unbound_list); mutex_unlock(&group->unbound_lock); @@ -919,7 +916,7 @@ void *vfio_del_group_dev(struct device *dev) rc = try_wait_for_completion(&device->comp); while (rc <= 0) { if (device->ops->request) - device->ops->request(device_data, i++); + device->ops->request(device->device_data, i++);
if (interrupted) { rc = wait_for_completion_timeout(&device->comp, @@ -929,7 +926,7 @@ void *vfio_del_group_dev(struct device *dev) &device->comp, HZ * 10); if (rc < 0) { interrupted = true; - dev_warn(dev, + dev_warn(device->dev, "Device is currently in use, task" " "%s" (%d) " "blocked until device is released", @@ -960,11 +957,19 @@ void *vfio_del_group_dev(struct device *dev) if (list_empty(&group->device_list)) wait_event(group->container_q, !group->container);
- /* Matches the get in vfio_group_create_device() */ + /* Matches the get in vfio_register_group_dev() */ vfio_group_put(group); +} +EXPORT_SYMBOL_GPL(vfio_unregister_group_dev); + +void *vfio_del_group_dev(struct device *dev) +{ + struct vfio_device *device = dev_get_drvdata(dev); + void *device_data = device->device_data; + + vfio_unregister_group_dev(device); dev_set_drvdata(dev, NULL); kfree(device); - return device_data; } EXPORT_SYMBOL_GPL(vfio_del_group_dev); diff --git a/include/linux/vfio.h b/include/linux/vfio.h index c3819fadf7c8..25e33c7a2f6b 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -15,6 +15,18 @@ #include <linux/poll.h> #include <uapi/linux/vfio.h>
+struct vfio_device { + struct device *dev; + const struct vfio_device_ops *ops; + struct vfio_group *group; + + /* Members below here are private, not for driver use */ + refcount_t refcount; + struct completion comp; + struct list_head group_next; + void *device_data; +}; + /** * struct vfio_device_ops - VFIO bus driver device callbacks * @@ -48,11 +60,15 @@ struct vfio_device_ops { extern struct iommu_group *vfio_iommu_group_get(struct device *dev); extern void vfio_iommu_group_put(struct iommu_group *group, struct device *dev);
+void vfio_init_group_dev(struct vfio_device *device, struct device *dev, + const struct vfio_device_ops *ops, void *device_data); +int vfio_register_group_dev(struct vfio_device *device); extern int vfio_add_group_dev(struct device *dev, const struct vfio_device_ops *ops, void *device_data);
extern void *vfio_del_group_dev(struct device *dev); +void vfio_unregister_group_dev(struct vfio_device *device); extern struct vfio_device *vfio_device_get_from_dev(struct device *dev); extern void vfio_device_put(struct vfio_device *device); extern void *vfio_device_data(struct vfio_device *device);