From: Rong Wang w_angrong@163.com
kunpeng inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5CO9A CVE: NA
---------------------------------
As pass through devices, hypervisor can`t control the status of device, and can`t track dirty memory DMA from device, during migration. The goal of this framework is to combine hardware to accomplish the task above.
qemu |status control and dirty memory report vfio |ops to hardware hardware
Signed-off-by: Rong Wang w_angrong@163.com Signed-off-by: HuHua Li 18245010845@163.com Signed-off-by: Ripeng Qiu 965412048@qq.com --- drivers/vfio/pci/Makefile | 2 +- drivers/vfio/pci/vfio_pci.c | 54 +++ drivers/vfio/pci/vfio_pci_migration.c | 755 ++++++++++++++++++++++++++++++++++ drivers/vfio/pci/vfio_pci_private.h | 14 +- drivers/vfio/vfio.c | 411 +++++++++++++++++- include/linux/vfio_pci_migration.h | 136 ++++++ 6 files changed, 1367 insertions(+), 5 deletions(-) create mode 100644 drivers/vfio/pci/vfio_pci_migration.c create mode 100644 include/linux/vfio_pci_migration.h
diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile index 76d8ec0..80a777d 100644 --- a/drivers/vfio/pci/Makefile +++ b/drivers/vfio/pci/Makefile @@ -1,5 +1,5 @@
-vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o +vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o vfio_pci_migration.o vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o
obj-$(CONFIG_VFIO_PCI) += vfio-pci.o diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 51b791c..59d8280 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -30,6 +30,7 @@ #include <linux/vgaarb.h> #include <linux/nospec.h> #include <linux/sched/mm.h> +#include <linux/vfio_pci_migration.h>
#include "vfio_pci_private.h"
@@ -296,6 +297,14 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
vfio_pci_probe_mmaps(vdev);
+ if (vfio_dev_migration_is_supported(pdev)) { + ret = vfio_pci_migration_init(vdev); + if (ret) { + dev_warn(&vdev->pdev->dev, "Failed to init vfio_pci_migration\n"); + vfio_pci_disable(vdev); + return ret; + } + } return 0; }
@@ -392,6 +401,7 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) out: pci_disable_device(pdev);
+ vfio_pci_migration_exit(vdev); vfio_pci_try_bus_reset(vdev);
if (!disable_idle_d3) @@ -642,6 +652,41 @@ struct vfio_devices { int max_index; };
+static long vfio_pci_handle_log_buf_ctl(struct vfio_pci_device *vdev, + const unsigned long arg) +{ + struct vfio_log_buf_ctl *log_buf_ctl = NULL; + struct vfio_log_buf_info *log_buf_info = NULL; + struct vf_migration_log_info migration_log_info; + long ret = 0; + + log_buf_ctl = (struct vfio_log_buf_ctl *)arg; + log_buf_info = (struct vfio_log_buf_info *)log_buf_ctl->data; + + switch (log_buf_ctl->flags) { + case VFIO_DEVICE_LOG_BUF_FLAG_START: + migration_log_info.dom_uuid = log_buf_info->uuid; + migration_log_info.buffer_size = + log_buf_info->buffer_size; + migration_log_info.sge_num = log_buf_info->addrs_size; + migration_log_info.sge_len = log_buf_info->frag_size; + migration_log_info.sgevec = log_buf_info->sgevec; + ret = vfio_pci_device_log_start(vdev, + &migration_log_info); + break; + case VFIO_DEVICE_LOG_BUF_FLAG_STOP: + ret = vfio_pci_device_log_stop(vdev, + log_buf_info->uuid); + break; + case VFIO_DEVICE_LOG_BUF_FLAG_STATUS_QUERY: + ret = vfio_pci_device_log_status_query(vdev); + break; + default: + ret = -EINVAL; + break; + } + return ret; +} static long vfio_pci_ioctl(void *device_data, unsigned int cmd, unsigned long arg) { @@ -1142,6 +1187,8 @@ static long vfio_pci_ioctl(void *device_data,
return vfio_pci_ioeventfd(vdev, ioeventfd.offset, ioeventfd.data, count, ioeventfd.fd); + } else if (cmd == VFIO_DEVICE_LOG_BUF_CTL) { + return vfio_pci_handle_log_buf_ctl(vdev, arg); }
return -ENOTTY; @@ -1566,6 +1613,9 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_power_state(pdev, PCI_D3hot); }
+ if (vfio_dev_migration_is_supported(pdev)) + ret = vfio_pci_device_init(pdev); + return ret; }
@@ -1591,6 +1641,10 @@ static void vfio_pci_remove(struct pci_dev *pdev)
if (!disable_idle_d3) pci_set_power_state(pdev, PCI_D0); + + if (vfio_dev_migration_is_supported(pdev)) { + vfio_pci_device_uninit(pdev); + } }
static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev, diff --git a/drivers/vfio/pci/vfio_pci_migration.c b/drivers/vfio/pci/vfio_pci_migration.c new file mode 100644 index 0000000..f69cd13 --- /dev/null +++ b/drivers/vfio/pci/vfio_pci_migration.c @@ -0,0 +1,755 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2022 Huawei Technologies Co., Ltd. All rights reserved. + */ + +#include <linux/module.h> +#include <linux/io.h> +#include <linux/pci.h> +#include <linux/uaccess.h> +#include <linux/vfio.h> +#include <linux/vfio_pci_migration.h> + +#include "vfio_pci_private.h" + +static LIST_HEAD(vfio_pci_mig_drivers_list); +static DEFINE_MUTEX(vfio_pci_mig_drivers_mutex); + +static void vfio_pci_add_mig_drv(struct vfio_pci_vendor_mig_driver *mig_drv) +{ + mutex_lock(&vfio_pci_mig_drivers_mutex); + atomic_set(&mig_drv->count, 1); + list_add_tail(&mig_drv->list, &vfio_pci_mig_drivers_list); + mutex_unlock(&vfio_pci_mig_drivers_mutex); +} + +static void vfio_pci_remove_mig_drv(struct vfio_pci_vendor_mig_driver *mig_drv) +{ + mutex_lock(&vfio_pci_mig_drivers_mutex); + list_del(&mig_drv->list); + mutex_unlock(&vfio_pci_mig_drivers_mutex); +} + +static struct vfio_pci_vendor_mig_driver * + vfio_pci_find_mig_drv(struct pci_dev *pdev, struct module *module) +{ + struct vfio_pci_vendor_mig_driver *mig_drv = NULL; + + mutex_lock(&vfio_pci_mig_drivers_mutex); + list_for_each_entry(mig_drv, &vfio_pci_mig_drivers_list, list) { + if (mig_drv->owner == module) { + if (mig_drv->bus_num == pdev->bus->number) + goto out; + } + } + mig_drv = NULL; +out: + mutex_unlock(&vfio_pci_mig_drivers_mutex); + return mig_drv; +} + +static struct vfio_pci_vendor_mig_driver * + vfio_pci_get_mig_driver(struct pci_dev *pdev) +{ + struct vfio_pci_vendor_mig_driver *mig_drv = NULL; + struct pci_dev *pf_dev = pci_physfn(pdev); + + mutex_lock(&vfio_pci_mig_drivers_mutex); + list_for_each_entry(mig_drv, &vfio_pci_mig_drivers_list, list) { + if (mig_drv->bus_num == pf_dev->bus->number) + goto out; + } + mig_drv = NULL; +out: + mutex_unlock(&vfio_pci_mig_drivers_mutex); + return mig_drv; +} + +bool vfio_dev_migration_is_supported(struct pci_dev *pdev) +{ + struct vfio_pci_vendor_mig_driver *mig_driver = NULL; + + mig_driver = vfio_pci_get_mig_driver(pdev); + if (!mig_driver || !mig_driver->dev_mig_ops) { + dev_warn(&pdev->dev, "unable to find a mig_drv module\n"); + return false; + } + + return true; +} + +int vfio_pci_device_log_start(struct vfio_pci_device *vdev, + struct vf_migration_log_info *log_info) +{ + struct vfio_pci_vendor_mig_driver *mig_driver; + + mig_driver = vfio_pci_get_mig_driver(vdev->pdev); + if (!mig_driver || !mig_driver->dev_mig_ops) { + dev_err(&vdev->pdev->dev, "unable to find a mig_drv module\n"); + return -EFAULT; + } + + if (!mig_driver->dev_mig_ops->log_start || + (mig_driver->dev_mig_ops->log_start(vdev->pdev, + log_info) != 0)) { + dev_err(&vdev->pdev->dev, "failed to set log start\n"); + return -EFAULT; + } + + return 0; +} + +int vfio_pci_device_log_stop(struct vfio_pci_device *vdev, uint32_t uuid) +{ + struct vfio_pci_vendor_mig_driver *mig_driver; + + mig_driver = vfio_pci_get_mig_driver(vdev->pdev); + if (!mig_driver || !mig_driver->dev_mig_ops) { + dev_err(&vdev->pdev->dev, "unable to find a mig_drv module\n"); + return -EFAULT; + } + + if (!mig_driver->dev_mig_ops->log_stop || + (mig_driver->dev_mig_ops->log_stop(vdev->pdev, uuid) != 0)) { + dev_err(&vdev->pdev->dev, "failed to set log stop\n"); + return -EFAULT; + } + + return 0; +} + +int vfio_pci_device_log_status_query(struct vfio_pci_device *vdev) +{ + struct vfio_pci_vendor_mig_driver *mig_driver; + + mig_driver = vfio_pci_get_mig_driver(vdev->pdev); + if (!mig_driver || !mig_driver->dev_mig_ops) { + dev_err(&vdev->pdev->dev, "unable to find a mig_drv module\n"); + return -EFAULT; + } + + if (!mig_driver->dev_mig_ops->get_log_status || + (mig_driver->dev_mig_ops->get_log_status(vdev->pdev) != 0)) { + dev_err(&vdev->pdev->dev, "failed to get log status\n"); + return -EFAULT; + } + + return 0; +} + +int vfio_pci_device_init(struct pci_dev *pdev) +{ + struct vfio_pci_vendor_mig_driver *mig_drv; + + mig_drv = vfio_pci_get_mig_driver(pdev); + if (!mig_drv || !mig_drv->dev_mig_ops) { + dev_err(&pdev->dev, "unable to find a mig_drv module\n"); + return -EFAULT; + } + + if (mig_drv->dev_mig_ops->init) + return mig_drv->dev_mig_ops->init(pdev); + + return -EFAULT; +} + +void vfio_pci_device_uninit(struct pci_dev *pdev) +{ + struct vfio_pci_vendor_mig_driver *mig_drv; + + mig_drv = vfio_pci_get_mig_driver(pdev); + if (!mig_drv || !mig_drv->dev_mig_ops) { + dev_err(&pdev->dev, "unable to find a mig_drv module\n"); + return; + } + + if (mig_drv->dev_mig_ops->uninit) + mig_drv->dev_mig_ops->uninit(pdev); +} + +static void vfio_pci_device_release(struct pci_dev *pdev, + struct vfio_pci_vendor_mig_driver *mig_drv) +{ + if (mig_drv->dev_mig_ops->release) + mig_drv->dev_mig_ops->release(pdev); +} + +static int vfio_pci_device_get_info(struct pci_dev *pdev, + struct vfio_device_migration_info *mig_info, + struct vfio_pci_vendor_mig_driver *mig_drv) +{ + if (mig_drv->dev_mig_ops->get_info) + return mig_drv->dev_mig_ops->get_info(pdev, mig_info); + return -EFAULT; +} + +static int vfio_pci_device_enable(struct pci_dev *pdev, + struct vfio_pci_vendor_mig_driver *mig_drv) +{ + if (!mig_drv->dev_mig_ops->enable || + (mig_drv->dev_mig_ops->enable(pdev) != 0)) { + return -EINVAL; + } + + return 0; +} + +static int vfio_pci_device_disable(struct pci_dev *pdev, + struct vfio_pci_vendor_mig_driver *mig_drv) +{ + if (!mig_drv->dev_mig_ops->disable || + (mig_drv->dev_mig_ops->disable(pdev) != 0)) + return -EINVAL; + + return 0; +} + +static int vfio_pci_device_pre_enable(struct pci_dev *pdev, + struct vfio_pci_vendor_mig_driver *mig_drv) +{ + if (!mig_drv->dev_mig_ops->pre_enable || + (mig_drv->dev_mig_ops->pre_enable(pdev) != 0)) + return -EINVAL; + + return 0; +} + +static int vfio_pci_device_state_save(struct pci_dev *pdev, + struct vfio_pci_migration_data *data) +{ + struct vfio_device_migration_info *mig_info = data->mig_ctl; + struct vfio_pci_vendor_mig_driver *mig_drv = data->mig_driver; + void *base = (void *)mig_info; + int ret = 0; + + if ((mig_info->device_state & VFIO_DEVICE_STATE_RUNNING) != 0) { + ret = vfio_pci_device_disable(pdev, mig_drv); + if (ret) { + dev_err(&pdev->dev, "failed to stop VF function!\n"); + return ret; + } + mig_info->device_state &= ~VFIO_DEVICE_STATE_RUNNING; + } + + if (mig_drv->dev_mig_ops && mig_drv->dev_mig_ops->save) { + ret = mig_drv->dev_mig_ops->save(pdev, base, + mig_info->data_offset, data->state_size); + if (ret) { + dev_err(&pdev->dev, "failed to save device state!\n"); + return -EINVAL; + } + } else { + return -EFAULT; + } + + mig_info->data_size = data->state_size; + mig_info->pending_bytes = mig_info->data_size; + return ret; +} + +static int vfio_pci_device_state_restore(struct vfio_pci_migration_data *data) +{ + struct vfio_device_migration_info *mig_info = data->mig_ctl; + struct vfio_pci_vendor_mig_driver *mig_drv = data->mig_driver; + struct pci_dev *pdev = data->vf_dev; + void *base = (void *)mig_info; + int ret; + + if (mig_drv->dev_mig_ops && mig_drv->dev_mig_ops->restore) { + ret = mig_drv->dev_mig_ops->restore(pdev, base, + mig_info->data_offset, mig_info->data_size); + if (ret) { + dev_err(&pdev->dev, "failed to restore device state!\n"); + return -EINVAL; + } + return 0; + } + + return -EFAULT; +} + +static int vfio_pci_set_device_state(struct vfio_pci_migration_data *data, + u32 state) +{ + struct vfio_device_migration_info *mig_ctl = data->mig_ctl; + struct vfio_pci_vendor_mig_driver *mig_drv = data->mig_driver; + struct pci_dev *pdev = data->vf_dev; + int ret = 0; + + if (state == mig_ctl->device_state) + return 0; + + if (!mig_drv->dev_mig_ops) + return -EINVAL; + + switch (state) { + case VFIO_DEVICE_STATE_RUNNING: + if (!(mig_ctl->device_state & + VFIO_DEVICE_STATE_RUNNING)) + ret = vfio_pci_device_enable(pdev, mig_drv); + break; + case VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RUNNING: + /* + * (pre-copy) - device should start logging data. + */ + ret = 0; + break; + case VFIO_DEVICE_STATE_SAVING: + /* stop the vf function, save state */ + ret = vfio_pci_device_state_save(pdev, data); + break; + case VFIO_DEVICE_STATE_STOP: + if (mig_ctl->device_state & VFIO_DEVICE_STATE_RUNNING) + ret = vfio_pci_device_disable(pdev, mig_drv); + break; + case VFIO_DEVICE_STATE_RESUMING: + ret = vfio_pci_device_pre_enable(pdev, mig_drv); + break; + default: + ret = -EFAULT; + break; + } + + if (ret) + return ret; + + mig_ctl->device_state = state; + return 0; +} + +static ssize_t vfio_pci_handle_mig_dev_state( + struct vfio_pci_migration_data *data, + char __user *buf, size_t count, bool iswrite) +{ + struct vfio_device_migration_info *mig_ctl = data->mig_ctl; + u32 device_state; + int ret; + + if (count != sizeof(device_state)) + return -EINVAL; + + if (iswrite) { + if (copy_from_user(&device_state, buf, count)) + return -EFAULT; + + ret = vfio_pci_set_device_state(data, device_state); + if (ret) + return ret; + } else { + if (copy_to_user(buf, &mig_ctl->device_state, count)) + return -EFAULT; + } + + return count; +} + +static ssize_t vfio_pci_handle_mig_pending_bytes( + struct vfio_device_migration_info *mig_info, + char __user *buf, size_t count, bool iswrite) +{ + u64 pending_bytes; + + if (count != sizeof(pending_bytes) || iswrite) + return -EINVAL; + + if (mig_info->device_state == + (VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RUNNING)) { + /* In pre-copy state we have no data to return for now, + * return 0 pending bytes + */ + pending_bytes = 0; + } else { + pending_bytes = mig_info->pending_bytes; + } + + if (copy_to_user(buf, &pending_bytes, count)) + return -EFAULT; + + return count; +} + +static ssize_t vfio_pci_handle_mig_data_offset( + struct vfio_device_migration_info *mig_info, + char __user *buf, size_t count, bool iswrite) +{ + u64 data_offset = mig_info->data_offset; + + if (count != sizeof(data_offset) || iswrite) + return -EINVAL; + + if (copy_to_user(buf, &data_offset, count)) + return -EFAULT; + + return count; +} + +static ssize_t vfio_pci_handle_mig_data_size( + struct vfio_device_migration_info *mig_info, + char __user *buf, size_t count, bool iswrite) +{ + u64 data_size; + + if (count != sizeof(data_size)) + return -EINVAL; + + if (iswrite) { + /* data_size is writable only during resuming state */ + if (mig_info->device_state != VFIO_DEVICE_STATE_RESUMING) + return -EINVAL; + + if (copy_from_user(&data_size, buf, sizeof(data_size))) + return -EFAULT; + + mig_info->data_size = data_size; + } else { + if (mig_info->device_state != VFIO_DEVICE_STATE_SAVING) + return -EINVAL; + + if (copy_to_user(buf, &mig_info->data_size, + sizeof(data_size))) + return -EFAULT; + } + + return count; +} + +static ssize_t vfio_pci_handle_mig_dev_cmd(struct vfio_pci_migration_data *data, + char __user *buf, size_t count, bool iswrite) +{ + struct vfio_pci_vendor_mig_driver *mig_drv = data->mig_driver; + struct pci_dev *pdev = data->vf_dev; + u32 device_cmd; + int ret = -EFAULT; + + if (count != sizeof(device_cmd) || !iswrite || !mig_drv->dev_mig_ops) + return -EINVAL; + + if (copy_from_user(&device_cmd, buf, count)) + return -EFAULT; + + switch (device_cmd) { + case VFIO_DEVICE_MIGRATION_CANCEL: + if (mig_drv->dev_mig_ops->cancel) + ret = mig_drv->dev_mig_ops->cancel(pdev); + break; + default: + dev_err(&pdev->dev, "cmd is invaild\n"); + return -EINVAL; + } + + if (ret != 0) + return ret; + + return count; +} + +static ssize_t vfio_pci_handle_mig_drv_version( + struct vfio_device_migration_info *mig_info, + char __user *buf, size_t count, bool iswrite) +{ + u32 version_id = mig_info->version_id; + + if (count != sizeof(version_id) || iswrite) + return -EINVAL; + + if (copy_to_user(buf, &version_id, count)) + return -EFAULT; + + return count; +} + +static ssize_t vfio_pci_handle_mig_data_rw( + struct vfio_pci_migration_data *data, + char __user *buf, size_t count, u64 pos, bool iswrite) +{ + struct vfio_device_migration_info *mig_ctl = data->mig_ctl; + void *data_addr = data->vf_data; + + if (count == 0) { + dev_err(&data->vf_dev->dev, "qemu operation data size error!\n"); + return -EINVAL; + } + + data_addr += pos - mig_ctl->data_offset; + if (iswrite) { + if (copy_from_user(data_addr, buf, count)) + return -EFAULT; + + mig_ctl->pending_bytes += count; + if (mig_ctl->pending_bytes > data->state_size) + return -EINVAL; + } else { + if (copy_to_user(buf, data_addr, count)) + return -EFAULT; + + if (mig_ctl->pending_bytes < count) + return -EINVAL; + + mig_ctl->pending_bytes -= count; + } + + return count; +} + +static ssize_t vfio_pci_dev_migrn_rw(struct vfio_pci_device *vdev, + char __user *buf, size_t count, loff_t *ppos, bool iswrite) +{ + unsigned int index = + VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS; + struct vfio_pci_migration_data *data = + (struct vfio_pci_migration_data *)vdev->region[index].data; + loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; + struct vfio_device_migration_info *mig_ctl = data->mig_ctl; + int ret; + + if (pos >= vdev->region[index].size) + return -EINVAL; + + count = min(count, (size_t)(vdev->region[index].size - pos)); + if (pos >= VFIO_MIGRATION_REGION_DATA_OFFSET) + return vfio_pci_handle_mig_data_rw(data, + buf, count, pos, iswrite); + + switch (pos) { + case VFIO_DEVICE_MIGRATION_OFFSET(device_state): + ret = vfio_pci_handle_mig_dev_state(data, + buf, count, iswrite); + break; + case VFIO_DEVICE_MIGRATION_OFFSET(pending_bytes): + ret = vfio_pci_handle_mig_pending_bytes(mig_ctl, + buf, count, iswrite); + break; + case VFIO_DEVICE_MIGRATION_OFFSET(data_offset): + ret = vfio_pci_handle_mig_data_offset(mig_ctl, + buf, count, iswrite); + break; + case VFIO_DEVICE_MIGRATION_OFFSET(data_size): + ret = vfio_pci_handle_mig_data_size(mig_ctl, + buf, count, iswrite); + break; + case VFIO_DEVICE_MIGRATION_OFFSET(device_cmd): + ret = vfio_pci_handle_mig_dev_cmd(data, + buf, count, iswrite); + break; + case VFIO_DEVICE_MIGRATION_OFFSET(version_id): + ret = vfio_pci_handle_mig_drv_version(mig_ctl, + buf, count, iswrite); + break; + default: + dev_err(&vdev->pdev->dev, "invalid pos offset\n"); + ret = -EFAULT; + break; + } + + if (mig_ctl->device_state == VFIO_DEVICE_STATE_RESUMING && + mig_ctl->pending_bytes == data->state_size && + mig_ctl->data_size == data->state_size) { + if (vfio_pci_device_state_restore(data) != 0) { + dev_err(&vdev->pdev->dev, "Failed to restore device state!\n"); + return -EFAULT; + } + mig_ctl->pending_bytes = 0; + mig_ctl->data_size = 0; + } + + return ret; +} + +static void vfio_pci_dev_migrn_release(struct vfio_pci_device *vdev, + struct vfio_pci_region *region) +{ + struct vfio_pci_migration_data *data = region->data; + + if (data) { + kfree(data->mig_ctl); + kfree(data); + } +} + +static const struct vfio_pci_regops vfio_pci_migration_regops = { + .rw = vfio_pci_dev_migrn_rw, + .release = vfio_pci_dev_migrn_release, +}; + +static int vfio_pci_migration_info_init(struct pci_dev *pdev, + struct vfio_device_migration_info *mig_info, + struct vfio_pci_vendor_mig_driver *mig_drv) +{ + int ret; + + ret = vfio_pci_device_get_info(pdev, mig_info, mig_drv); + if (ret) { + dev_err(&pdev->dev, "failed to get device info\n"); + return ret; + } + + if (mig_info->data_size > VFIO_MIGRATION_BUFFER_MAX_SIZE) { + dev_err(&pdev->dev, "mig_info->data_size %llu is invalid\n", + mig_info->data_size); + return -EINVAL; + } + + mig_info->data_offset = VFIO_MIGRATION_REGION_DATA_OFFSET; + return ret; +} + +static int vfio_device_mig_data_init(struct vfio_pci_device *vdev, + struct vfio_pci_migration_data *data) +{ + struct vfio_device_migration_info *mig_ctl; + u64 mig_offset; + int ret; + + mig_ctl = kzalloc(sizeof(*mig_ctl), GFP_KERNEL); + if (!mig_ctl) + return -ENOMEM; + + ret = vfio_pci_migration_info_init(vdev->pdev, mig_ctl, + data->mig_driver); + if (ret) { + dev_err(&vdev->pdev->dev, "get device info error!\n"); + goto err; + } + + mig_offset = sizeof(struct vfio_device_migration_info); + data->state_size = mig_ctl->data_size; + data->mig_ctl = krealloc(mig_ctl, mig_offset + data->state_size, + GFP_KERNEL); + if (!data->mig_ctl) { + ret = -ENOMEM; + goto err; + } + + data->vf_data = (void *)((char *)data->mig_ctl + mig_offset); + memset(data->vf_data, 0, data->state_size); + data->mig_ctl->data_size = 0; + + ret = vfio_pci_register_dev_region(vdev, VFIO_REGION_TYPE_MIGRATION, + VFIO_REGION_SUBTYPE_MIGRATION, + &vfio_pci_migration_regops, mig_offset + data->state_size, + VFIO_REGION_INFO_FLAG_READ | VFIO_REGION_INFO_FLAG_WRITE, data); + if (ret) { + kfree(data->mig_ctl); + return ret; + } + + return 0; +err: + kfree(mig_ctl); + return ret; +} + +int vfio_pci_migration_init(struct vfio_pci_device *vdev) +{ + struct vfio_pci_vendor_mig_driver *mig_driver = NULL; + struct vfio_pci_migration_data *data = NULL; + struct pci_dev *pdev = vdev->pdev; + int ret; + + mig_driver = vfio_pci_get_mig_driver(pdev); + if (!mig_driver || !mig_driver->dev_mig_ops) { + dev_err(&pdev->dev, "unable to find a mig_driver module\n"); + return -EINVAL; + } + + if (!try_module_get(mig_driver->owner)) { + pr_err("module %s is not live\n", mig_driver->owner->name); + return -ENODEV; + } + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) { + module_put(mig_driver->owner); + return -ENOMEM; + } + + data->mig_driver = mig_driver; + data->vf_dev = pdev; + + ret = vfio_device_mig_data_init(vdev, data); + if (ret) { + dev_err(&pdev->dev, "failed to init vfio device migration data!\n"); + goto err; + } + + return ret; +err: + kfree(data); + module_put(mig_driver->owner); + return ret; +} + +void vfio_pci_migration_exit(struct vfio_pci_device *vdev) +{ + struct vfio_pci_vendor_mig_driver *mig_driver = NULL; + + mig_driver = vfio_pci_get_mig_driver(vdev->pdev); + if (!mig_driver || !mig_driver->dev_mig_ops) { + dev_warn(&vdev->pdev->dev, "mig_driver is not found\n"); + return; + } + + if (module_refcount(mig_driver->owner) > 0) { + vfio_pci_device_release(vdev->pdev, mig_driver); + module_put(mig_driver->owner); + } +} + +int vfio_pci_register_migration_ops(struct vfio_device_migration_ops *ops, + struct module *mod, struct pci_dev *pdev) +{ + struct vfio_pci_vendor_mig_driver *mig_driver = NULL; + + if (!ops || !mod || !pdev) + return -EINVAL; + + mig_driver = vfio_pci_find_mig_drv(pdev, mod); + if (mig_driver) { + pr_info("%s migration ops has already been registered\n", + mod->name); + atomic_add(1, &mig_driver->count); + return 0; + } + + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + mig_driver = kzalloc(sizeof(*mig_driver), GFP_KERNEL); + if (!mig_driver) { + module_put(THIS_MODULE); + return -ENOMEM; + } + + mig_driver->pdev = pdev; + mig_driver->bus_num = pdev->bus->number; + mig_driver->owner = mod; + mig_driver->dev_mig_ops = ops; + + vfio_pci_add_mig_drv(mig_driver); + + return 0; +} +EXPORT_SYMBOL_GPL(vfio_pci_register_migration_ops); + +void vfio_pci_unregister_migration_ops(struct module *mod, struct pci_dev *pdev) +{ + struct vfio_pci_vendor_mig_driver *mig_driver = NULL; + + if (!mod || !pdev) + return; + + mig_driver = vfio_pci_find_mig_drv(pdev, mod); + if (!mig_driver) { + pr_err("mig_driver is not found\n"); + return; + } + + if (atomic_sub_and_test(1, &mig_driver->count)) { + vfio_pci_remove_mig_drv(mig_driver); + kfree(mig_driver); + module_put(THIS_MODULE); + pr_info("%s succeed to unregister migration ops\n", + THIS_MODULE->name); + } +} +EXPORT_SYMBOL_GPL(vfio_pci_unregister_migration_ops); diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index 17d2bae..03af269 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -15,6 +15,7 @@ #include <linux/pci.h> #include <linux/irqbypass.h> #include <linux/types.h> +#include <linux/vfio_pci_migration.h>
#ifndef VFIO_PCI_PRIVATE_H #define VFIO_PCI_PRIVATE_H @@ -55,7 +56,7 @@ struct vfio_pci_irq_ctx { struct vfio_pci_region;
struct vfio_pci_regops { - size_t (*rw)(struct vfio_pci_device *vdev, char __user *buf, + ssize_t (*rw)(struct vfio_pci_device *vdev, char __user *buf, size_t count, loff_t *ppos, bool iswrite); void (*release)(struct vfio_pci_device *vdev, struct vfio_pci_region *region); @@ -173,4 +174,15 @@ static inline int vfio_pci_igd_init(struct vfio_pci_device *vdev) return -ENODEV; } #endif + +extern bool vfio_dev_migration_is_supported(struct pci_dev *pdev); +extern int vfio_pci_migration_init(struct vfio_pci_device *vdev); +extern void vfio_pci_migration_exit(struct vfio_pci_device *vdev); +extern int vfio_pci_device_log_start(struct vfio_pci_device *vdev, + struct vf_migration_log_info *log_info); +extern int vfio_pci_device_log_stop(struct vfio_pci_device *vdev, + uint32_t uuid); +extern int vfio_pci_device_log_status_query(struct vfio_pci_device *vdev); +extern int vfio_pci_device_init(struct pci_dev *pdev); +extern void vfio_pci_device_uninit(struct pci_dev *pdev); #endif /* VFIO_PCI_PRIVATE_H */ diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 7a386fb..35f2a29 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -33,6 +33,7 @@ #include <linux/string.h> #include <linux/uaccess.h> #include <linux/vfio.h> +#include <linux/vfio_pci_migration.h> #include <linux/wait.h> #include <linux/sched/signal.h>
@@ -40,6 +41,9 @@ #define DRIVER_AUTHOR "Alex Williamson alex.williamson@redhat.com" #define DRIVER_DESC "VFIO - User Level meta-driver"
+#define LOG_BUF_FRAG_SIZE (2 * 1024 * 1024) // fix to 2M +#define LOG_BUF_MAX_ADDRS_SIZE 128 // max vm ram size is 1T + static struct vfio { struct class *class; struct list_head iommu_drivers_list; @@ -57,6 +61,14 @@ struct vfio_iommu_driver { struct list_head vfio_next; };
+struct vfio_log_buf { + struct vfio_log_buf_info info; + int fd; + int buffer_state; + int device_state; + unsigned long *cpu_addrs; +}; + struct vfio_container { struct kref kref; struct list_head group_list; @@ -64,6 +76,7 @@ struct vfio_container { struct vfio_iommu_driver *iommu_driver; void *iommu_data; bool noiommu; + struct vfio_log_buf log_buf; };
struct vfio_unbound_dev { @@ -1158,8 +1171,398 @@ static long vfio_ioctl_set_iommu(struct vfio_container *container, return ret; }
+static long vfio_dispatch_cmd_to_devices(const struct vfio_container *container, + unsigned int cmd, unsigned long arg) +{ + struct vfio_group *group = NULL; + struct vfio_device *device = NULL; + long ret = -ENXIO; + + list_for_each_entry(group, &container->group_list, container_next) { + list_for_each_entry(device, &group->device_list, group_next) { + ret = device->ops->ioctl(device->device_data, cmd, arg); + if (ret) { + pr_err("dispatch cmd to devices failed\n"); + return ret; + } + } + } + return ret; +} + +static long vfio_log_buf_start(struct vfio_container *container) +{ + struct vfio_log_buf_ctl log_buf_ctl; + long ret; + + log_buf_ctl.argsz = sizeof(struct vfio_log_buf_info); + log_buf_ctl.flags = VFIO_DEVICE_LOG_BUF_FLAG_START; + log_buf_ctl.data = (void *)&container->log_buf.info; + ret = vfio_dispatch_cmd_to_devices(container, VFIO_DEVICE_LOG_BUF_CTL, + (unsigned long)&log_buf_ctl); + if (ret) + return ret; + + container->log_buf.device_state = 1; + return 0; +} + +static long vfio_log_buf_stop(struct vfio_container *container) +{ + struct vfio_log_buf_ctl log_buf_ctl; + long ret; + + if (container->log_buf.device_state == 0) { + pr_warn("device already stopped\n"); + return 0; + } + + log_buf_ctl.argsz = sizeof(struct vfio_log_buf_info); + log_buf_ctl.flags = VFIO_DEVICE_LOG_BUF_FLAG_STOP; + log_buf_ctl.data = (void *)&container->log_buf.info; + ret = vfio_dispatch_cmd_to_devices(container, VFIO_DEVICE_LOG_BUF_CTL, + (unsigned long)&log_buf_ctl); + if (ret) + return ret; + + container->log_buf.device_state = 0; + return 0; +} + +static long vfio_log_buf_query(struct vfio_container *container) +{ + struct vfio_log_buf_ctl log_buf_ctl; + + log_buf_ctl.argsz = sizeof(struct vfio_log_buf_info); + log_buf_ctl.flags = VFIO_DEVICE_LOG_BUF_FLAG_STATUS_QUERY; + log_buf_ctl.data = (void *)&container->log_buf.info; + + return vfio_dispatch_cmd_to_devices(container, + VFIO_DEVICE_LOG_BUF_CTL, (unsigned long)&log_buf_ctl); +} + +static int vfio_log_buf_fops_mmap(struct file *filep, + struct vm_area_struct *vma) +{ + struct vfio_container *container = filep->private_data; + struct vfio_log_buf *log_buf = &container->log_buf; + unsigned long frag_pg_size; + unsigned long frag_offset; + phys_addr_t pa; + int ret = -EINVAL; + + if (!log_buf->cpu_addrs) { + pr_err("mmap before setup, please setup log buf first\n"); + return ret; + } + + if (log_buf->info.frag_size < PAGE_SIZE) { + pr_err("mmap frag size should not less than page size!\n"); + return ret; + } + + frag_pg_size = log_buf->info.frag_size / PAGE_SIZE; + frag_offset = vma->vm_pgoff / frag_pg_size; + + if (frag_offset >= log_buf->info.addrs_size) { + pr_err("mmap offset out of range!\n"); + return ret; + } + + if (vma->vm_end - vma->vm_start != log_buf->info.frag_size) { + pr_err("mmap size error, should be aligned with frag size!\n"); + return ret; + } + + pa = virt_to_phys((void *)log_buf->cpu_addrs[frag_offset]); + ret = remap_pfn_range(vma, vma->vm_start, + pa >> PAGE_SHIFT, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); + if (ret) + pr_err("remap_pfn_range error!\n"); + return ret; +} + +static struct device *vfio_get_dev(struct vfio_container *container) +{ + struct vfio_group *group = NULL; + struct vfio_device *device = NULL; + + list_for_each_entry(group, &container->group_list, container_next) { + list_for_each_entry(device, &group->device_list, group_next) { + return device->dev; + } + } + return NULL; +} + +static void vfio_log_buf_release_dma(struct device *dev, + struct vfio_log_buf *log_buf) +{ + int i; + + for (i = 0; i < log_buf->info.addrs_size; i++) { + if ((log_buf->cpu_addrs && log_buf->cpu_addrs[i] != 0) && + (log_buf->info.sgevec && + log_buf->info.sgevec[i].addr != 0)) { + dma_free_coherent(dev, log_buf->info.frag_size, + (void *)log_buf->cpu_addrs[i], + log_buf->info.sgevec[i].addr); + log_buf->cpu_addrs[i] = 0; + log_buf->info.sgevec[i].addr = 0; + } + } +} + +static long vfio_log_buf_alloc_dma(struct vfio_log_buf_info *info, + struct vfio_log_buf *log_buf, struct device *dev) +{ + int i; + + for (i = 0; i < info->addrs_size; i++) { + log_buf->cpu_addrs[i] = (unsigned long)dma_alloc_coherent(dev, + info->frag_size, &log_buf->info.sgevec[i].addr, + GFP_KERNEL); + log_buf->info.sgevec[i].len = info->frag_size; + if (log_buf->cpu_addrs[i] == 0 || + log_buf->info.sgevec[i].addr == 0) { + return -ENOMEM; + } + } + return 0; +} + +static long vfio_log_buf_alloc_addrs(struct vfio_log_buf_info *info, + struct vfio_log_buf *log_buf) +{ + log_buf->info.sgevec = kcalloc(info->addrs_size, + sizeof(struct vfio_log_buf_sge), GFP_KERNEL); + if (!log_buf->info.sgevec) + return -ENOMEM; + + log_buf->cpu_addrs = kcalloc(info->addrs_size, + sizeof(unsigned long), GFP_KERNEL); + if (!log_buf->cpu_addrs) { + kfree(log_buf->info.sgevec); + log_buf->info.sgevec = NULL; + return -ENOMEM; + } + + return 0; +} + +static long vfio_log_buf_info_valid(struct vfio_log_buf_info *info) +{ + if (info->addrs_size > LOG_BUF_MAX_ADDRS_SIZE || + info->addrs_size == 0) { + pr_err("can`t support vm ram size larger than 1T or equal to 0\n"); + return -EINVAL; + } + if (info->frag_size != LOG_BUF_FRAG_SIZE) { + pr_err("only support %d frag size\n", LOG_BUF_FRAG_SIZE); + return -EINVAL; + } + return 0; +} + +static long vfio_log_buf_setup(struct vfio_container *container, + unsigned long data) +{ + struct vfio_log_buf_info info; + struct vfio_log_buf *log_buf = &container->log_buf; + struct device *dev = NULL; + long ret; + + if (log_buf->info.sgevec) { + pr_warn("log buf already setup\n"); + return 0; + } + + if (copy_from_user(&info, (void __user *)data, + sizeof(struct vfio_log_buf_info))) + return -EFAULT; + + ret = vfio_log_buf_info_valid(&info); + if (ret) + return ret; + + ret = vfio_log_buf_alloc_addrs(&info, log_buf); + if (ret) + goto err_out; + + dev = vfio_get_dev(container); + if (!dev) { + pr_err("can`t get dev\n"); + goto err_free_addrs; + } + + ret = vfio_log_buf_alloc_dma(&info, log_buf, dev); + if (ret) + goto err_free_dma_array; + + log_buf->info.uuid = info.uuid; + log_buf->info.buffer_size = info.buffer_size; + log_buf->info.frag_size = info.frag_size; + log_buf->info.addrs_size = info.addrs_size; + log_buf->buffer_state = 1; + return 0; + +err_free_dma_array: + vfio_log_buf_release_dma(dev, log_buf); +err_free_addrs: + kfree(log_buf->cpu_addrs); + log_buf->cpu_addrs = NULL; + kfree(log_buf->info.sgevec); + log_buf->info.sgevec = NULL; +err_out: + return -ENOMEM; +} + +static long vfio_log_buf_release_buffer(struct vfio_container *container) +{ + struct vfio_log_buf *log_buf = &container->log_buf; + struct device *dev = NULL; + + if (log_buf->buffer_state == 0) { + pr_warn("buffer already released\n"); + return 0; + } + + dev = vfio_get_dev(container); + if (!dev) { + pr_err("can`t get dev\n"); + return -EFAULT; + } + + vfio_log_buf_release_dma(dev, log_buf); + + kfree(log_buf->cpu_addrs); + log_buf->cpu_addrs = NULL; + + kfree(log_buf->info.sgevec); + log_buf->info.sgevec = NULL; + + log_buf->buffer_state = 0; + return 0; +} + +static int vfio_log_buf_release(struct inode *inode, struct file *filep) +{ + struct vfio_container *container = filep->private_data; + + vfio_log_buf_stop(container); + vfio_log_buf_release_buffer(container); + memset(&container->log_buf, 0, sizeof(struct vfio_log_buf)); + return 0; +} + +static long vfio_ioctl_handle_log_buf_ctl(struct vfio_container *container, + unsigned long arg) +{ + struct vfio_log_buf_ctl log_buf_ctl; + long ret = 0; + + if (copy_from_user(&log_buf_ctl, (void __user *)arg, + sizeof(struct vfio_log_buf_ctl))) + return -EFAULT; + + switch (log_buf_ctl.flags) { + case VFIO_DEVICE_LOG_BUF_FLAG_SETUP: + ret = vfio_log_buf_setup(container, + (unsigned long)log_buf_ctl.data); + break; + case VFIO_DEVICE_LOG_BUF_FLAG_RELEASE: + ret = vfio_log_buf_release_buffer(container); + break; + case VFIO_DEVICE_LOG_BUF_FLAG_START: + ret = vfio_log_buf_start(container); + break; + case VFIO_DEVICE_LOG_BUF_FLAG_STOP: + ret = vfio_log_buf_stop(container); + break; + case VFIO_DEVICE_LOG_BUF_FLAG_STATUS_QUERY: + ret = vfio_log_buf_query(container); + break; + default: + pr_err("log buf control flag incorrect\n"); + ret = -EINVAL; + break; + } + return ret; +} + +static long vfio_log_buf_fops_unl_ioctl(struct file *filep, + unsigned int cmd, unsigned long arg) +{ + struct vfio_container *container = filep->private_data; + long ret = -EINVAL; + + switch (cmd) { + case VFIO_LOG_BUF_CTL: + ret = vfio_ioctl_handle_log_buf_ctl(container, arg); + break; + default: + pr_err("log buf control cmd incorrect\n"); + break; + } + + return ret; +} + +#ifdef CONFIG_COMPAT +static long vfio_log_buf_fops_compat_ioctl(struct file *filep, + unsigned int cmd, unsigned long arg) +{ + arg = (unsigned long)compat_ptr(arg); + return vfio_log_buf_fops_unl_ioctl(filep, cmd, arg); +} +#endif /* CONFIG_COMPAT */ + +static const struct file_operations vfio_log_buf_fops = { + .owner = THIS_MODULE, + .mmap = vfio_log_buf_fops_mmap, + .unlocked_ioctl = vfio_log_buf_fops_unl_ioctl, + .release = vfio_log_buf_release, +#ifdef CONFIG_COMPAT + .compat_ioctl = vfio_log_buf_fops_compat_ioctl, +#endif +}; + +static int vfio_get_log_buf_fd(struct vfio_container *container, + unsigned long arg) +{ + struct file *filep = NULL; + int ret; + + if (container->log_buf.fd > 0) + return container->log_buf.fd; + + ret = get_unused_fd_flags(O_CLOEXEC); + if (ret < 0) { + pr_err("get_unused_fd_flags get fd failed\n"); + return ret; + } + + filep = anon_inode_getfile("[vfio-log-buf]", &vfio_log_buf_fops, + container, O_RDWR); + if (IS_ERR(filep)) { + pr_err("anon_inode_getfile failed\n"); + put_unused_fd(ret); + ret = PTR_ERR(filep); + return ret; + } + + filep->f_mode |= (FMODE_READ | FMODE_WRITE | FMODE_LSEEK); + + fd_install(ret, filep); + + container->log_buf.fd = ret; + return ret; +} + static long vfio_fops_unl_ioctl(struct file *filep, - unsigned int cmd, unsigned long arg) + unsigned int cmd, unsigned long arg) { struct vfio_container *container = filep->private_data; struct vfio_iommu_driver *driver; @@ -1179,6 +1582,9 @@ static long vfio_fops_unl_ioctl(struct file *filep, case VFIO_SET_IOMMU: ret = vfio_ioctl_set_iommu(container, arg); break; + case VFIO_GET_LOG_BUF_FD: + ret = vfio_get_log_buf_fd(container, arg); + break; default: driver = container->iommu_driver; data = container->iommu_data; @@ -1210,6 +1616,7 @@ static int vfio_fops_open(struct inode *inode, struct file *filep) INIT_LIST_HEAD(&container->group_list); init_rwsem(&container->group_lock); kref_init(&container->kref); + memset(&container->log_buf, 0, sizeof(struct vfio_log_buf));
filep->private_data = container;
@@ -1219,9 +1626,7 @@ static int vfio_fops_open(struct inode *inode, struct file *filep) static int vfio_fops_release(struct inode *inode, struct file *filep) { struct vfio_container *container = filep->private_data; - filep->private_data = NULL; - vfio_container_put(container);
return 0; diff --git a/include/linux/vfio_pci_migration.h b/include/linux/vfio_pci_migration.h new file mode 100644 index 0000000..464ffb4 --- /dev/null +++ b/include/linux/vfio_pci_migration.h @@ -0,0 +1,136 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2022 Huawei Technologies Co., Ltd. All rights reserved. + */ + +#ifndef VFIO_PCI_MIGRATION_H +#define VFIO_PCI_MIGRATION_H + +#include <linux/types.h> +#include <linux/pci.h> + +#define VFIO_REGION_TYPE_MIGRATION (3) +/* sub-types for VFIO_REGION_TYPE_MIGRATION */ +#define VFIO_REGION_SUBTYPE_MIGRATION (1) + +#define VFIO_MIGRATION_BUFFER_MAX_SIZE SZ_256K +#define VFIO_MIGRATION_REGION_DATA_OFFSET \ + (sizeof(struct vfio_device_migration_info)) +#define VFIO_DEVICE_MIGRATION_OFFSET(x) \ + offsetof(struct vfio_device_migration_info, x) + +struct vfio_device_migration_info { + __u32 device_state; /* VFIO device state */ +#define VFIO_DEVICE_STATE_STOP (0) +#define VFIO_DEVICE_STATE_RUNNING (1 << 0) +#define VFIO_DEVICE_STATE_SAVING (1 << 1) +#define VFIO_DEVICE_STATE_RESUMING (1 << 2) +#define VFIO_DEVICE_STATE_MASK (VFIO_DEVICE_STATE_RUNNING | \ + VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RESUMING) + __u32 reserved; + + __u32 device_cmd; + __u32 version_id; + + __u64 pending_bytes; + __u64 data_offset; + __u64 data_size; +}; + +enum { + VFIO_DEVICE_STOP = 0xffff0001, + VFIO_DEVICE_CONTINUE, + VFIO_DEVICE_MIGRATION_CANCEL, +}; + +struct vfio_log_buf_sge { + __u64 len; + __u64 addr; +}; + +struct vfio_log_buf_info { + __u32 uuid; + __u64 buffer_size; + __u64 addrs_size; + __u64 frag_size; + struct vfio_log_buf_sge *sgevec; +}; + +struct vfio_log_buf_ctl { + __u32 argsz; + __u32 flags; + #define VFIO_DEVICE_LOG_BUF_FLAG_SETUP (1 << 0) + #define VFIO_DEVICE_LOG_BUF_FLAG_RELEASE (1 << 1) + #define VFIO_DEVICE_LOG_BUF_FLAG_START (1 << 2) + #define VFIO_DEVICE_LOG_BUF_FLAG_STOP (1 << 3) + #define VFIO_DEVICE_LOG_BUF_FLAG_STATUS_QUERY (1 << 4) + void *data; +}; +#define VFIO_LOG_BUF_CTL _IO(VFIO_TYPE, VFIO_BASE + 21) +#define VFIO_GET_LOG_BUF_FD _IO(VFIO_TYPE, VFIO_BASE + 22) +#define VFIO_DEVICE_LOG_BUF_CTL _IO(VFIO_TYPE, VFIO_BASE + 23) + +struct vf_migration_log_info { + __u32 dom_uuid; + __u64 buffer_size; + __u64 sge_len; + __u64 sge_num; + struct vfio_log_buf_sge *sgevec; +}; + +struct vfio_device_migration_ops { + /* Get device information */ + int (*get_info)(struct pci_dev *pdev, + struct vfio_device_migration_info *info); + /* Enable a vf device */ + int (*enable)(struct pci_dev *pdev); + /* Disable a vf device */ + int (*disable)(struct pci_dev *pdev); + /* Save a vf device */ + int (*save)(struct pci_dev *pdev, void *base, + uint64_t off, uint64_t count); + /* Resuming a vf device */ + int (*restore)(struct pci_dev *pdev, void *base, + uint64_t off, uint64_t count); + /* Log start a vf device */ + int (*log_start)(struct pci_dev *pdev, + struct vf_migration_log_info *log_info); + /* Log stop a vf device */ + int (*log_stop)(struct pci_dev *pdev, uint32_t uuid); + /* Get vf device log status */ + int (*get_log_status)(struct pci_dev *pdev); + /* Pre enable a vf device(load_setup, before restore a vf) */ + int (*pre_enable)(struct pci_dev *pdev); + /* Cancel a vf device when live migration failed (rollback) */ + int (*cancel)(struct pci_dev *pdev); + /* Init a vf device */ + int (*init)(struct pci_dev *pdev); + /* Uninit a vf device */ + void (*uninit)(struct pci_dev *pdev); + /* Release a vf device */ + void (*release)(struct pci_dev *pdev); +}; + +struct vfio_pci_vendor_mig_driver { + struct pci_dev *pdev; + unsigned char bus_num; + struct vfio_device_migration_ops *dev_mig_ops; + struct module *owner; + atomic_t count; + struct list_head list; +}; + +struct vfio_pci_migration_data { + u64 state_size; + struct pci_dev *vf_dev; + struct vfio_pci_vendor_mig_driver *mig_driver; + struct vfio_device_migration_info *mig_ctl; + void *vf_data; +}; + +int vfio_pci_register_migration_ops(struct vfio_device_migration_ops *ops, + struct module *mod, struct pci_dev *pdev); +void vfio_pci_unregister_migration_ops(struct module *mod, + struct pci_dev *pdev); + +#endif /* VFIO_PCI_MIGRATION_H */