From: jiangdongxu jiangdongxu1@huawei.com
Backport vDPA migration support patches merged on branch OLK-5.10
V2: fix compile error of kernel-6.6 new vdpa driver vduse and snet_vdpa
jiangdongxu (12): vdpa: add log operations vhost-vdpa: add uAPI for logging vdpa: add device state operations vhost-vdpa: add uAPI for device buffer vdpa: add vdpa device migration status ops vhost-vdpa: add uAPI for device migration status vhost: add VHOST feature VHOST_BACKEND_F_BYTEMAPLOG vhost-vdpa: Allow transparent MSI IOV vhost-vdpa: fix msi irq request err vhost-vdpa: allow set feature VHOST_F_LOG_ALL when been negotiated. vhost-vdpa: add reset state params to indicate reset level vdpa: add vmstate header file
drivers/vdpa/ifcvf/ifcvf_main.c | 2 +- drivers/vdpa/mlx5/net/mlx5_vnet.c | 2 +- drivers/vdpa/solidrun/snet_main.c | 2 +- drivers/vdpa/vdpa_sim/vdpa_sim.c | 2 +- drivers/vdpa/vdpa_user/vduse_dev.c | 2 +- drivers/vdpa/virtio_pci/vp_vdpa.c | 2 +- drivers/vhost/vdpa.c | 232 ++++++++++++++++++++++++++--- drivers/virtio/virtio_vdpa.c | 2 +- include/linux/vdpa.h | 56 ++++++- include/linux/vdpa_vmstate.h | 182 ++++++++++++++++++++++ include/uapi/linux/vhost.h | 12 ++ include/uapi/linux/vhost_types.h | 19 +++ 12 files changed, 485 insertions(+), 30 deletions(-) create mode 100644 include/linux/vdpa_vmstate.h
From: jiangdongxu jiangdongxu1@huawei.com
virt inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I86ITO
----------------------------------------------------------------------
Several new interfaces are introduced to allow vdpa device logging guest memory during live migration and return to the VMM.
The set_log_base interface is used to set the base address for buffer storing bitmaps.
The set_log_size interface is used to set the size of buffer used for storing bitmaps.
The log_sync interface is used to copy the bitmaps from kernel space to user space of VMM.
These operations are optional. If they are not implemented, these operations will return EOPNOTSUPP.
Signed-off-by: jiangdongxu jiangdongxu1@huawei.com --- include/linux/vdpa.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+)
diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 0e652026b776..d2c322a6e4ae 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -330,6 +330,15 @@ struct vdpa_map_file { * @unbind_mm: Unbind the device from the address space * bound using the bind_mm callback. (optional) * @vdev: vdpa device + * @set_log_base Set base address for logging. (optional) + * @vdev: vdpa device + * @base: base address + * @set_log_size Set buffer size for logging. (optional) + * @vdev: vdpa device + * @size: logging buffer size + * @log_sync Synchronize logging buffer from kernel space to + * user space. (optional) + * @vdev: vdpa device * @free: Free resources that belongs to vDPA (optional) * @vdev: vdpa device */ @@ -400,6 +409,11 @@ struct vdpa_config_ops { int (*bind_mm)(struct vdpa_device *vdev, struct mm_struct *mm); void (*unbind_mm)(struct vdpa_device *vdev);
+ /* Log ops */ + int (*set_log_base)(struct vdpa_device *vdev, uint64_t base); + int (*set_log_size)(struct vdpa_device *vdev, uint64_t size); + int (*log_sync)(struct vdpa_device *vdev); + /* Free device resources */ void (*free)(struct vdpa_device *vdev); };
From: jiangdongxu jiangdongxu1@huawei.com
virt inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I86ITO
----------------------------------------------------------------------
These new ioctl add support for setting bitmaps config, like base address and buffer size from userspace.
When setup migration, VMM will call VHOST_SET_LOG_BASE and VHOST_SET_LOG_SIZE to set address and size of buffer used for storing bitmaps.
Then VMM start live migration, VMM will enable logging vhost device by set feature VHOST_F_LOG_ALL.
And during live migration iterate, VMM get dirty page info from vhost device by calling VHOST_LOG_SYNC.
Signed-off-by: jiangdongxu jiangdongxu1@huawei.com --- drivers/vhost/vdpa.c | 49 ++++++++++++++++++++++++++++++++++++++ include/uapi/linux/vhost.h | 4 ++++ 2 files changed, 53 insertions(+)
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index fb590e346e43..425850d1c2c7 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -567,6 +567,47 @@ static long vhost_vdpa_resume(struct vhost_vdpa *v) return ops->resume(vdpa); }
+static long vhost_vdpa_set_log_base(struct vhost_vdpa *v, u64 __user *argp) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + u64 log; + + if (!ops->set_log_base) + return -EOPNOTSUPP; + + if (copy_from_user(&log, argp, sizeof(uint64_t))) + return -EFAULT; + + return ops->set_log_base(vdpa, log); +} + +static long vhost_vdpa_set_log_size(struct vhost_vdpa *v, u64 __user *sizep) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + u64 log_size; + + if (!ops->set_log_size) + return -EOPNOTSUPP; + + if (copy_from_user(&log_size, sizep, sizeof(log_size))) + return -EFAULT; + + return ops->set_log_size(vdpa, log_size); +} + +static long vhost_vdpa_log_sync(struct vhost_vdpa *v) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + + if (!ops->log_sync) + return -EOPNOTSUPP; + + return ops->log_sync(vdpa); +} + static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, void __user *argp) { @@ -741,6 +782,14 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, r = -EFAULT; break; case VHOST_SET_LOG_BASE: + r = vhost_vdpa_set_log_base(v, argp); + break; + case VHOST_SET_LOG_SIZE: + r = vhost_vdpa_set_log_size(v, argp); + break; + case VHOST_LOG_SYNC: + r = vhost_vdpa_log_sync(v); + break; case VHOST_SET_LOG_FD: r = -ENOIOCTLCMD; break; diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index f5c48b61ab62..ce9d187432d1 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -43,6 +43,10 @@ * The bit is set using an atomic 32 bit operation. */ /* Set base address for logging. */ #define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64) +/* Set buffer size for logging */ +#define VHOST_SET_LOG_SIZE _IOW(VHOST_VIRTIO, 0x05, __u64) +/* Synchronize logging buffer from kernel space to user space */ +#define VHOST_LOG_SYNC _IO(VHOST_VIRTIO, 0x06) /* Specify an eventfd file descriptor to signal on log write. */ #define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int) /* By default, a device gets one vhost_worker that its virtqueues share. This
From: jiangdongxu jiangdongxu1@huawei.com
virt inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I86ITO
----------------------------------------------------------------------
Introduce several interfaces to allow vdpa device save/load device status when guest machine resume and suspend.
The get_dev_buffer_size interface is used to get the buffer size of vdpa device status.
The get_dev_buffer interface is used to get the device buffer from vdpa device, and VMM can save it.
The set_dev_buffer interface is used to set the device status from userspace.
These operations are optional. If they are not implemented, return EOPNOTSUPP.
Signed-off-by: jiangdongxu jiangdongxu1@huawei.com --- include/linux/vdpa.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+)
diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index d2c322a6e4ae..fffd6cd366bf 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -339,6 +339,19 @@ struct vdpa_map_file { * @log_sync Synchronize logging buffer from kernel space to * user space. (optional) * @vdev: vdpa device + * @get_dev_buffer_size Get device state buffer size. (optional) + * @vdev: vdpa device + * Return device status buffer size of vdpa device. + * @get_dev_buffer Get device state buffer. (optional) + * @vdev: vdpa device + * @offset: offset of dest for saving device state. + * @dest: userspace address for saving device state. + * @len: device state buffer length. + * @set_dev_buffer Set device state buffer. (opetional) + * @vdev: vdpa device + * @offset: offset of src addr of device state. + * @src: userspace addr of device state + * @len: device state buffer length. * @free: Free resources that belongs to vDPA (optional) * @vdev: vdpa device */ @@ -414,6 +427,13 @@ struct vdpa_config_ops { int (*set_log_size)(struct vdpa_device *vdev, uint64_t size); int (*log_sync)(struct vdpa_device *vdev);
+ /* device state ops */ + uint32_t (*get_dev_buffer_size)(struct vdpa_device *vdpa); + int (*get_dev_buffer)(struct vdpa_device *vdev, unsigned int offset, + void __user *dest, unsigned int len); + int (*set_dev_buffer)(struct vdpa_device *vdev, unsigned int offset, + const void __user *src, unsigned int len); + /* Free device resources */ void (*free)(struct vdpa_device *vdev); };
From: jiangdongxu jiangdongxu1@huawei.com
virt inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I86ITO
----------------------------------------------------------------------
These new ioctl add support for saving/loading device status from usersapce.
When vhost-vdpa device start migration, VMM need to call these ioctl to save/load device status of vhost-vdpa devices if these ops is implemented.
Signed-off-by: jiangdongxu jiangdongxu1@huawei.com --- drivers/vhost/vdpa.c | 71 ++++++++++++++++++++++++++++++++++++++ include/uapi/linux/vhost.h | 5 +++ 2 files changed, 76 insertions(+)
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 425850d1c2c7..84765b4ec33c 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -567,6 +567,68 @@ static long vhost_vdpa_resume(struct vhost_vdpa *v) return ops->resume(vdpa); }
+static int vhost_vdpa_get_dev_buffer_size(struct vhost_vdpa *v, + uint32_t __user *argp) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + uint32_t size; + + if (!ops->get_dev_buffer_size) + return -EOPNOTSUPP; + + size = ops->get_dev_buffer_size(vdpa); + + if (copy_to_user(argp, &size, sizeof(size))) + return -EFAULT; + + return 0; +} + +static int vhost_vdpa_get_dev_buffer(struct vhost_vdpa *v, + struct vhost_vdpa_config __user *c) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + struct vhost_vdpa_config config; + int ret; + unsigned long size = offsetof(struct vhost_vdpa_config, buf); + + if (copy_from_user(&config, c, size)) + return -EFAULT; + + if (!ops->get_dev_buffer) + return -EOPNOTSUPP; + + down_read(&vdpa->cf_lock); + ret = ops->get_dev_buffer(vdpa, config.off, c->buf, config.len); + up_read(&vdpa->cf_lock); + + return ret; +} + +static int vhost_vdpa_set_dev_buffer(struct vhost_vdpa *v, + struct vhost_vdpa_config __user *c) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + struct vhost_vdpa_config config; + int ret; + unsigned long size = offsetof(struct vhost_vdpa_config, buf); + + if (copy_from_user(&config, c, size)) + return -EFAULT; + + if (!ops->set_dev_buffer) + return -EOPNOTSUPP; + + down_write(&vdpa->cf_lock); + ret = ops->set_dev_buffer(vdpa, config.off, c->buf, config.len); + up_write(&vdpa->cf_lock); + + return ret; +} + static long vhost_vdpa_set_log_base(struct vhost_vdpa *v, u64 __user *argp) { struct vdpa_device *vdpa = v->vdpa; @@ -821,6 +883,15 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, case VHOST_VDPA_RESUME: r = vhost_vdpa_resume(v); break; + case VHOST_GET_DEV_BUFFER_SIZE: + r = vhost_vdpa_get_dev_buffer_size(v, argp); + break; + case VHOST_GET_DEV_BUFFER: + r = vhost_vdpa_get_dev_buffer(v, argp); + break; + case VHOST_SET_DEV_BUFFER: + r = vhost_vdpa_set_dev_buffer(v, argp); + break; default: r = vhost_dev_ioctl(&v->vdev, cmd, argp); if (r == -ENOIOCTLCMD) diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index ce9d187432d1..7307f01cd2fa 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -223,4 +223,9 @@ */ #define VHOST_VDPA_RESUME _IO(VHOST_VIRTIO, 0x7E)
+/* set and get device buffer */ +#define VHOST_GET_DEV_BUFFER _IOR(VHOST_VIRTIO, 0xb0, struct vhost_vdpa_config) +#define VHOST_SET_DEV_BUFFER _IOW(VHOST_VIRTIO, 0xb1, struct vhost_vdpa_config) +#define VHOST_GET_DEV_BUFFER_SIZE _IOR(VHOST_VIRTIO, 0xb3, __u32) + #endif
From: jiangdongxu jiangdongxu1@huawei.com
virt inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I86ITO
----------------------------------------------------------------------
Introduce a new interface to set vdpa device migration status, Such as migration start/stop, pre_start/pre_stop, etc.
Some vdpa device need to do some job in different state. As not all vdpa devices need to do this, this interface is optional.
Signed-off-by: jiangdongxu jiangdongxu1@huawei.com --- include/linux/vdpa.h | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index fffd6cd366bf..1ed264ec1ebb 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -352,6 +352,9 @@ struct vdpa_map_file { * @offset: offset of src addr of device state. * @src: userspace addr of device state * @len: device state buffer length. + * @set_mig_state Set device migration status. (optional) + * @vdev: vdpa device + * @status: migration status * @free: Free resources that belongs to vDPA (optional) * @vdev: vdpa device */ @@ -434,6 +437,9 @@ struct vdpa_config_ops { int (*set_dev_buffer)(struct vdpa_device *vdev, unsigned int offset, const void __user *src, unsigned int len);
+ /* device mig state ops */ + int (*set_mig_state)(struct vdpa_device *v, u8 state); + /* Free device resources */ void (*free)(struct vdpa_device *vdev); };
From: jiangdongxu jiangdongxu1@huawei.com
virt inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I86ITO
----------------------------------------------------------------------
These new ioctl add support for setting vhost-vdpa device migration state.
During migration, there is several migration state such as start/stop, pre_start/pre_stop, post_start/post_stop, cancel etc.Some hardware needs to do something at these stages, introduce a new ioctl to implement it.
Signed-off-by: jiangdongxu jiangdongxu1@huawei.com --- drivers/vhost/vdpa.c | 18 ++++++++++++++++++ include/uapi/linux/vhost.h | 3 +++ include/uapi/linux/vhost_types.h | 16 ++++++++++++++++ 3 files changed, 37 insertions(+)
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 84765b4ec33c..251d176329d9 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -629,6 +629,21 @@ static int vhost_vdpa_set_dev_buffer(struct vhost_vdpa *v, return ret; }
+static int vhost_vdpa_set_mig_state(struct vhost_vdpa *v, u8 __user *c) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + u8 state; + + if (!ops->set_mig_state) + return -EOPNOTSUPP; + + if (get_user(state, c)) + return -EFAULT; + + return ops->set_mig_state(vdpa, state); +} + static long vhost_vdpa_set_log_base(struct vhost_vdpa *v, u64 __user *argp) { struct vdpa_device *vdpa = v->vdpa; @@ -892,6 +907,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, case VHOST_SET_DEV_BUFFER: r = vhost_vdpa_set_dev_buffer(v, argp); break; + case VHOST_VDPA_SET_MIG_STATE: + r = vhost_vdpa_set_mig_state(v, argp); + break; default: r = vhost_dev_ioctl(&v->vdev, cmd, argp); if (r == -ENOIOCTLCMD) diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index 7307f01cd2fa..a54098c7b0bc 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -228,4 +228,7 @@ #define VHOST_SET_DEV_BUFFER _IOW(VHOST_VIRTIO, 0xb1, struct vhost_vdpa_config) #define VHOST_GET_DEV_BUFFER_SIZE _IOR(VHOST_VIRTIO, 0xb3, __u32)
+/* set device migtration state */ +#define VHOST_VDPA_SET_MIG_STATE _IOW(VHOST_VIRTIO, 0xb2, __u8) + #endif diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h index 2d827d22cd99..13c44c124a48 100644 --- a/include/uapi/linux/vhost_types.h +++ b/include/uapi/linux/vhost_types.h @@ -163,6 +163,22 @@ struct vhost_vdpa_iova_range { __u64 last; };
+/* vhost vdpa device migration statue */ +enum { + VHOST_VDPA_DEVICE_START, + VHOST_VDPA_DEVICE_STOP, + VHOST_VDPA_DEVICE_PRE_START, + VHOST_VDPA_DEVICE_PRE_STOP, + VHOST_VDPA_DEVICE_CANCEL, + VHOST_VDPA_DEVICE_POST_START, + VHOST_VDPA_DEVICE_START_ASYNC, + VHOST_VDPA_DEVICE_STOP_ASYNC, + VHOST_VDPA_DEVICE_PRE_START_ASYNC, + VHOST_VDPA_DEVICE_QUERY_OP_STATE, + VHOST_VDPA_DEVICE_MSIX_MASK, + VHOST_VDPA_DEVICE_MSIX_UNMASK, +}; + /* Feature bits */ /* Log all write descriptors. Can be changed while device is active. */ #define VHOST_F_LOG_ALL 26
From: jiangdongxu jiangdongxu1@huawei.com
virt inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I86ITO
----------------------------------------------------------------------
Introduce new feature bit VHOST_BACKEND_F_BYTEMAPLOG for negotiating the type of dirty pages.
As some hardware only support bytemap for logging, introduce a new feature bit. When vhost device starte, negotiating dirty page type used for logging.
Signed-off-by: jiangdongxu jiangdongxu1@huawei.com --- drivers/vhost/vdpa.c | 3 ++- include/uapi/linux/vhost_types.h | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 251d176329d9..9af834fc2a47 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -810,7 +810,8 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, if (features & ~(VHOST_VDPA_BACKEND_FEATURES | BIT_ULL(VHOST_BACKEND_F_SUSPEND) | BIT_ULL(VHOST_BACKEND_F_RESUME) | - BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK))) + BIT_ULL(VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK) | + BIT_ULL(VHOST_BACKEND_F_BYTEMAPLOG))) return -EOPNOTSUPP; if ((features & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) && !vhost_vdpa_can_suspend(v)) diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h index 13c44c124a48..c7f25b0f7645 100644 --- a/include/uapi/linux/vhost_types.h +++ b/include/uapi/linux/vhost_types.h @@ -202,4 +202,7 @@ enum { */ #define VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK 0x6
+/* Device can use bytemap to deal log */ +#define VHOST_BACKEND_F_BYTEMAPLOG 0x3f + #endif
From: jiangdongxu jiangdongxu1@huawei.com
virt inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I86ITO
----------------------------------------------------------------------
When attach dma_dev to iommu domain, check the device's reserved regions and test whether the IOMMU translates MSI transactions. If yes, we initialize an IOVA allocator through the iommu_get_msi_cookie API. This will allow the MSI IOVAs to be transparently allocated on MSI controller's compose().
Signed-off-by: jiangdongxu jiangdongxu1@huawei.com --- drivers/vhost/vdpa.c | 59 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 56 insertions(+), 3 deletions(-)
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 9af834fc2a47..b4e24d9fe7fa 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -49,6 +49,7 @@ struct vhost_vdpa { struct completion completion; struct vdpa_device *vdpa; struct hlist_head as[VHOST_VDPA_IOTLB_BUCKETS]; + struct vhost_iotlb resv_iotlb; struct device dev; struct cdev cdev; atomic_t opened; @@ -1254,6 +1255,10 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, msg->iova + msg->size - 1 > v->range.last) return -EINVAL;
+ if (vhost_iotlb_itree_first(&v->resv_iotlb, msg->iova, + msg->iova + msg->size - 1)) + return -EINVAL; + if (vhost_iotlb_itree_first(iotlb, msg->iova, msg->iova + msg->size - 1)) return -EEXIST; @@ -1342,6 +1347,46 @@ static ssize_t vhost_vdpa_chr_write_iter(struct kiocb *iocb, return vhost_chr_write_iter(dev, from); }
+static int vhost_vdpa_resv_iommu_region(struct iommu_domain *domain, struct device *dma_dev, + struct vhost_iotlb *resv_iotlb) +{ + struct list_head dev_resv_regions; + phys_addr_t resv_msi_base = 0; + struct iommu_resv_region *region; + int ret = 0; + bool with_sw_msi = false; + bool with_hw_msi = false; + + INIT_LIST_HEAD(&dev_resv_regions); + iommu_get_resv_regions(dma_dev, &dev_resv_regions); + + list_for_each_entry(region, &dev_resv_regions, list) { + ret = vhost_iotlb_add_range_ctx(resv_iotlb, region->start, + region->start + region->length - 1, + 0, 0, NULL); + if (ret) { + vhost_iotlb_reset(resv_iotlb); + break; + } + + if (region->type == IOMMU_RESV_MSI) + with_hw_msi = true; + + if (region->type == IOMMU_RESV_SW_MSI) { + resv_msi_base = region->start; + with_sw_msi = true; + } + + } + + if (!ret && !with_hw_msi && with_sw_msi) + ret = iommu_get_msi_cookie(domain, resv_msi_base); + + iommu_put_resv_regions(dma_dev, &dev_resv_regions); + + return ret; +} + static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v) { struct vdpa_device *vdpa = v->vdpa; @@ -1370,11 +1415,16 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
ret = iommu_attach_device(v->domain, dma_dev); if (ret) - goto err_attach; + goto err_alloc_domain;
- return 0; + ret = vhost_vdpa_resv_iommu_region(v->domain, dma_dev, &v->resv_iotlb); + if (ret) + goto err_attach_device;
-err_attach: + return 0; +err_attach_device: + iommu_detach_device(v->domain, dma_dev); +err_alloc_domain: iommu_domain_free(v->domain); v->domain = NULL; return ret; @@ -1497,6 +1547,7 @@ static int vhost_vdpa_release(struct inode *inode, struct file *filep) vhost_vdpa_unbind_mm(v); vhost_vdpa_config_put(v); vhost_vdpa_cleanup(v); + vhost_iotlb_reset(&v->resv_iotlb); mutex_unlock(&d->mutex);
atomic_dec(&v->opened); @@ -1629,6 +1680,8 @@ static int vhost_vdpa_probe(struct vdpa_device *vdpa) goto err; }
+ vhost_iotlb_init(&v->resv_iotlb, 0, 0); + r = dev_set_name(&v->dev, "vhost-vdpa-%u", minor); if (r) goto err;
From: jiangdongxu jiangdongxu1@huawei.com
virt inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I86ITO
----------------------------------------------------------------------
When we call vhost_vdpa_reset, vdpa device pci driver may request irq, at this time, wo have not init msi iova for device, may cause an error, call vhost_vdpa_alloc_domain first to avoid this scene.
Signed-off-by: jiangdongxu jiangdongxu1@huawei.com --- drivers/vhost/vdpa.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-)
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index b4e24d9fe7fa..0ad78ea7baea 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -1489,6 +1489,9 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep) opened = atomic_cmpxchg(&v->opened, 0, 1); if (opened) return -EBUSY; + r = vhost_vdpa_alloc_domain(v); + if (r) + return r;
nvqs = v->nvqs; r = vhost_vdpa_reset(v); @@ -1509,19 +1512,14 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep) vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false, vhost_vdpa_process_iotlb_msg);
- r = vhost_vdpa_alloc_domain(v); - if (r) - goto err_alloc_domain; - vhost_vdpa_set_iova_range(v);
filep->private_data = v;
return 0;
-err_alloc_domain: - vhost_vdpa_cleanup(v); err: + vhost_vdpa_free_domain(v); atomic_dec(&v->opened); return r; } @@ -1547,7 +1545,6 @@ static int vhost_vdpa_release(struct inode *inode, struct file *filep) vhost_vdpa_unbind_mm(v); vhost_vdpa_config_put(v); vhost_vdpa_cleanup(v); - vhost_iotlb_reset(&v->resv_iotlb); mutex_unlock(&d->mutex);
atomic_dec(&v->opened);
From: jiangdongxu jiangdongxu1@huawei.com
virt inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I86ITO
----------------------------------------------------------------------
It's not allowed to change the features after vhost-vdpa devices have been negotiated. But log start/end is allowed. Add exception to feature VHOST_F_LOG_ALL.
Signed-off-by: jiangdongxu jiangdongxu1@huawei.com --- drivers/vhost/vdpa.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 0ad78ea7baea..5f59072fe307 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -424,16 +424,19 @@ static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep) u64 features; int i;
+ if (copy_from_user(&features, featurep, sizeof(features))) + return -EFAULT; + + actual_features = ops->get_driver_features(vdpa); + /* * It's not allowed to change the features after they have - * been negotiated. + * been negotiated. But log start/end is allowed. */ - if (ops->get_status(vdpa) & VIRTIO_CONFIG_S_FEATURES_OK) + if ((ops->get_status(vdpa) & VIRTIO_CONFIG_S_FEATURES_OK) && + (features & ~(BIT_ULL(VHOST_F_LOG_ALL))) != actual_features) return -EBUSY;
- if (copy_from_user(&features, featurep, sizeof(features))) - return -EFAULT; - if (vdpa_set_features(vdpa, features)) return -EINVAL;
From: jiangdongxu jiangdongxu1@huawei.com
virt inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I86ITO
----------------------------------------------------------------------
When vdpa hardware is used, some hardware initialization may be required. Currently, qemu connects to vdpa devices through the vhost framework. Since qemu opens the vhost device, the vdpa device cannot sense the action of qemu opening, which may cause the hardware status to be incorrect.
Add the interface parameter state to the vdpa reset interface, which respectively identifies the reset when the device is turned on/off and the virtio reset issued by the virtual machine.
Signed-off-by: jiangdongxu jiangdongxu1@huawei.com --- drivers/vdpa/ifcvf/ifcvf_main.c | 2 +- drivers/vdpa/mlx5/net/mlx5_vnet.c | 2 +- drivers/vdpa/solidrun/snet_main.c | 2 +- drivers/vdpa/vdpa_sim/vdpa_sim.c | 2 +- drivers/vdpa/vdpa_user/vduse_dev.c | 2 +- drivers/vdpa/virtio_pci/vp_vdpa.c | 2 +- drivers/vhost/vdpa.c | 10 +++++----- drivers/virtio/virtio_vdpa.c | 2 +- include/linux/vdpa.h | 16 +++++++++++++--- 9 files changed, 25 insertions(+), 15 deletions(-)
diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index e98fa8100f3c..ec72505bbe1a 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -434,7 +434,7 @@ static void ifcvf_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status) ifcvf_set_status(vf, status); }
-static int ifcvf_vdpa_reset(struct vdpa_device *vdpa_dev) +static int ifcvf_vdpa_reset(struct vdpa_device *vdpa_dev, int state) { struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); u8 status = ifcvf_get_status(vf); diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 946488b8989f..b79f14b5b970 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -2856,7 +2856,7 @@ static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev) mvdev->group2asid[i] = 0; }
-static int mlx5_vdpa_reset(struct vdpa_device *vdev) +static int mlx5_vdpa_reset(struct vdpa_device *vdev, int state) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); diff --git a/drivers/vdpa/solidrun/snet_main.c b/drivers/vdpa/solidrun/snet_main.c index 99428a04068d..c0ddeb9ded11 100644 --- a/drivers/vdpa/solidrun/snet_main.c +++ b/drivers/vdpa/solidrun/snet_main.c @@ -245,7 +245,7 @@ static int snet_reset_dev(struct snet *snet) return 0; }
-static int snet_reset(struct vdpa_device *vdev) +static int snet_reset(struct vdpa_device *vdev, int state) { struct snet *snet = vdpa_to_snet(vdev);
diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 76d41058add9..c2ec1bc83170 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -480,7 +480,7 @@ static void vdpasim_set_status(struct vdpa_device *vdpa, u8 status) mutex_unlock(&vdpasim->mutex); }
-static int vdpasim_reset(struct vdpa_device *vdpa) +static int vdpasim_reset(struct vdpa_device *vdpa, int state) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index df7869537ef1..f0c48f9efe4e 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -704,7 +704,7 @@ static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset, /* Now we only support read-only configuration space */ }
-static int vduse_vdpa_reset(struct vdpa_device *vdpa) +static int vduse_vdpa_reset(struct vdpa_device *vdpa, int state) { struct vduse_dev *dev = vdpa_to_vduse(vdpa); int ret = vduse_dev_set_status(dev, 0); diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c index 281287fae89f..0e16dd08968b 100644 --- a/drivers/vdpa/virtio_pci/vp_vdpa.c +++ b/drivers/vdpa/virtio_pci/vp_vdpa.c @@ -222,7 +222,7 @@ static void vp_vdpa_set_status(struct vdpa_device *vdpa, u8 status) vp_modern_set_status(mdev, status); }
-static int vp_vdpa_reset(struct vdpa_device *vdpa) +static int vp_vdpa_reset(struct vdpa_device *vdpa, int state) { struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa); struct virtio_pci_modern_device *mdev = vp_vdpa_to_mdev(vp_vdpa); diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 5f59072fe307..7bef364935b2 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -211,13 +211,13 @@ static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid) irq_bypass_unregister_producer(&vq->call_ctx.producer); }
-static int vhost_vdpa_reset(struct vhost_vdpa *v) +static int vhost_vdpa_reset(struct vhost_vdpa *v, int state) { struct vdpa_device *vdpa = v->vdpa;
v->in_batch = 0;
- return vdpa_reset(vdpa); + return vdpa_reset(vdpa, state); }
static long vhost_vdpa_bind_mm(struct vhost_vdpa *v) @@ -296,7 +296,7 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) vhost_vdpa_unsetup_vq_irq(v, i);
if (status == 0) { - ret = vdpa_reset(vdpa); + ret = vdpa_reset(vdpa, VDPA_DEV_RESET_VIRTIO); if (ret) return ret; } else @@ -1497,7 +1497,7 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep) return r;
nvqs = v->nvqs; - r = vhost_vdpa_reset(v); + r = vhost_vdpa_reset(v, VDPA_DEV_RESET_OPEN); if (r) goto err;
@@ -1543,7 +1543,7 @@ static int vhost_vdpa_release(struct inode *inode, struct file *filep) mutex_lock(&d->mutex); filep->private_data = NULL; vhost_vdpa_clean_irq(v); - vhost_vdpa_reset(v); + vhost_vdpa_reset(v, VDPA_DEV_RESET_CLOSE); vhost_dev_stop(&v->vdev); vhost_vdpa_unbind_mm(v); vhost_vdpa_config_put(v); diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c index 06ce6d8c2e00..13f69bfb9c98 100644 --- a/drivers/virtio/virtio_vdpa.c +++ b/drivers/virtio/virtio_vdpa.c @@ -100,7 +100,7 @@ static void virtio_vdpa_reset(struct virtio_device *vdev) { struct vdpa_device *vdpa = vd_get_vdpa(vdev);
- vdpa_reset(vdpa); + vdpa_reset(vdpa, VDPA_DEV_RESET_VIRTIO); }
static bool virtio_vdpa_notify(struct virtqueue *vq) diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 1ed264ec1ebb..3120a1a600dd 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -129,6 +129,12 @@ struct vdpa_map_file { u64 offset; };
+enum vdpa_reset_state { + VDPA_DEV_RESET_VIRTIO = 0, + VDPA_DEV_RESET_OPEN = 1, + VDPA_DEV_RESET_CLOSE = 2, +}; + /** * struct vdpa_config_ops - operations for configuring a vDPA device. * Note: vDPA device drivers are required to implement all of the @@ -241,6 +247,10 @@ struct vdpa_map_file { * @status: virtio device status * @reset: Reset device * @vdev: vdpa device + * @state: state for reset + * VDPA_DEV_RESET_VIRTIO for virtio reset + * VDPA_DEV_RESET_OPEN for vhost-vdpa device open + * VDPA_DEV_RESET_CLOSE for vhost-vdpa device close * Returns integer: success (0) or error (< 0) * @suspend: Suspend the device (optional) * @vdev: vdpa device @@ -397,7 +407,7 @@ struct vdpa_config_ops { u32 (*get_vendor_id)(struct vdpa_device *vdev); u8 (*get_status)(struct vdpa_device *vdev); void (*set_status)(struct vdpa_device *vdev, u8 status); - int (*reset)(struct vdpa_device *vdev); + int (*reset)(struct vdpa_device *vdev, int state); int (*suspend)(struct vdpa_device *vdev); int (*resume)(struct vdpa_device *vdev); size_t (*get_config_size)(struct vdpa_device *vdev); @@ -525,14 +535,14 @@ static inline struct device *vdpa_get_dma_dev(struct vdpa_device *vdev) return vdev->dma_dev; }
-static inline int vdpa_reset(struct vdpa_device *vdev) +static inline int vdpa_reset(struct vdpa_device *vdev, int state) { const struct vdpa_config_ops *ops = vdev->config; int ret;
down_write(&vdev->cf_lock); vdev->features_valid = false; - ret = ops->reset(vdev); + ret = ops->reset(vdev, state); up_write(&vdev->cf_lock); return ret; }
From: jiangdongxu jiangdongxu1@huawei.com
virt inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I86ITO
----------------------------------------------------------------------
Add the vmstate.h header file to define the structure of vdpa device status to achieve live migration of cross-vendor vdpa devices.
Signed-off-by: jiangdongxu jiangdongxu1@huawei.com --- include/linux/vdpa_vmstate.h | 182 +++++++++++++++++++++++++++++++++++ 1 file changed, 182 insertions(+) create mode 100644 include/linux/vdpa_vmstate.h
diff --git a/include/linux/vdpa_vmstate.h b/include/linux/vdpa_vmstate.h new file mode 100644 index 000000000000..0e577a9605f1 --- /dev/null +++ b/include/linux/vdpa_vmstate.h @@ -0,0 +1,182 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2023-2023. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * Description: vdpa vmstate header file + * Author: jiangdongxu + * Create: 2023-12-3 + * Note: + * History: 2023-12-3: Create file + */ + +#ifndef VDPA_VMSTATE_H +#define VDPA_VMSTATE_H + +#include <linux/virtio_net.h> +#include <linux/virtio_scsi.h> + + +#define VIRTIO_MIG_STATE_TYPE_DEVICE 0 +#define VIRTIO_MIG_STATE_TYPE_VQ 1 +#define VIRTIO_MIG_STATE_TYPE_CONFIG 2 +#define VIRTIO_MIG_STATE_TYPE_FEATURE 3 +#define VIRTIO_MIG_STATE_TYPE_PLATFORM 4 +#define VIRTIO_MIG_STATE_TYPE_VENDOR 255 + +#define VIRTIO_MIG_DEVICE_T_COMMON 0 +#define VIRTIO_MIG_DEVICE_T_NET 1 +#define VIRTIO_MIG_DEVICE_T_BLK 2 +#define VIRTIO_MIG_DEVICE_T_SCSI 8 + +#define VIRTIO_MIG_CONFIG_T_NET 1 +#define VIRTIO_MIG_CONFIG_T_BLK 2 +#define VIRTIO_MIG_CONFIG_T_SCSI 8 + +#define MAC_LEN 6 + +struct virtio_mig_state_header { + le32 type; + le32 len; +}; + +struct virtio_mig_dev_common_data { + le32 vendor_id; + le32 device_id; + le32 device_features_l; + le32 device_features_h; + le32 driver_features_l; + le32 driver_features_h; + le32 status; + le32 generation; + le32 msix_en; +}; + +struct virtio_mig_dev_common_state { + struct virtio_mig_state_header hdr; + struct virtio_mig_dev_common_data data; +}; + +struct virtio_mig_vq_split_state { + le16 avail_index; + le16 used_index; +}; +struct virtio_mig_vq_packed_state { + le16 avail_wrapped : 1; + le16 avail_index : 15; + le16 used_wrapped : 1; + le16 used_index : 15; +}; + +struct virtio_mig_per_vq_data { + le32 qsize; + + u8 qenabled; + le16 msix_vector; + + le32 desc_l; + le32 desc_h; + le32 avail_l; + le32 avail_h; + le32 used_l; + le32 used_h; + union { + struct virtio_mig_vq_split_state split; + struct virtio_mig_vq_packed_state packed; + } version; +}; + +/* vq state */ +struct virtio_mig_vq_state { + struct virtio_mig_state_header hdr; + + le16 msix_config; + le16 valid_queues; + + le16 num_queues; + + struct virtio_mig_per_vq_data vq_state[]; +}; + +/* config space */ +struct virtio_mig_config_state { + struct virtio_mig_state_header hdr; + union { + struct virtio_net_config net; + struct virtio_blk_config blk; + struct virtio_scsi_config scsi; + } dev; +}; + +struct virtio_mig_cfg_blk_features { + +}; + +struct virtio_mig_cfg_scsi_features { + +}; + +struct virtio_mig_cfg_net_ctrl_guest_offloads { + struct virtio_mig_state_header hdr; + le64 offloads; + le64 reserved; +}; + +struct virtio_mig_cfg_net_ctrl_mq_vq_pairs { + struct virtio_mig_state_header hdr; + le16 cur_virtqueue_pairs; +}; + +struct virtio_mig_cfg_net_ctrl_mac_table { + struct virtio_mig_state_header hdr; + le16 num_unicast; + /* TODO: need to be implemented later */ + // u8 unicast_macs[][6]; + le16 num_multicast; + /* TODO: need to be implemented later */ + // u8 multicast_macs[][6]; +}; + +struct virtio_mig_cfg_net_ctrl_vlan { + struct virtio_mig_state_header hdr; + le32 vlans[128]; +}; + +struct virtio_mig_cfg_net_data { + le32 nfeatures; + struct virtio_mig_cfg_net_ctrl_guest_offloads offloads; + struct virtio_mig_cfg_net_ctrl_mq_vq_pairs mq_pairs; + struct virtio_mig_cfg_net_ctrl_mac_table mac_table; + struct virtio_mig_cfg_net_ctrl_vlan vlan_table; +}; + +struct virtio_mig_cfg_net_features { + struct virtio_mig_state_header hdr; + struct virtio_mig_cfg_net_data data; +}; + +/* feature */ +struct virtio_mig_feat_state { + union { + struct virtio_mig_cfg_net_features net; + struct virtio_mig_cfg_blk_features blk; + struct virtio_mig_cfg_scsi_features scsi; + }; +}; + +struct vdpa_mig_state { + struct virtio_mig_dev_common_state dev_state; + struct virtio_mig_config_state cfg_state; + struct virtio_mig_feat_state feat_state; + struct virtio_mig_vq_state vq_state; +}; + +#endif /* VDPA_VMSTATE_H */
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/3802 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/7...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/3802 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/7...