Kernel
Threads by month
- ----- 2026 -----
- January
- ----- 2025 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
June 2022
- 18 participants
- 65 discussions
From: Rong Wang <w_angrong(a)163.com>
kunpeng inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I5CO9A
CVE: NA
---------------------------------
As pass through devices, hypervisor can`t control the status of
device, and can`t track dirty memory DMA from device, during
migration.
The goal of this framework is to combine hardware to accomplish
the task above.
qemu
|status control and dirty memory report
vfio
|ops to hardware
hardware
Signed-off-by: Rong Wang <w_angrong(a)163.com>
Signed-off-by: HuHua Li <18245010845(a)163.com>
Signed-off-by: Ripeng Qiu <965412048(a)qq.com>
---
drivers/vfio/pci/Makefile | 2 +-
drivers/vfio/pci/vfio_pci.c | 54 +++
drivers/vfio/pci/vfio_pci_migration.c | 755 ++++++++++++++++++++++++++++++++++
drivers/vfio/pci/vfio_pci_private.h | 14 +-
drivers/vfio/vfio.c | 411 +++++++++++++++++-
include/linux/vfio_pci_migration.h | 136 ++++++
6 files changed, 1367 insertions(+), 5 deletions(-)
create mode 100644 drivers/vfio/pci/vfio_pci_migration.c
create mode 100644 include/linux/vfio_pci_migration.h
diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile
index 76d8ec0..80a777d 100644
--- a/drivers/vfio/pci/Makefile
+++ b/drivers/vfio/pci/Makefile
@@ -1,5 +1,5 @@
-vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o
+vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o vfio_pci_migration.o
vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o
obj-$(CONFIG_VFIO_PCI) += vfio-pci.o
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 51b791c..59d8280 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -30,6 +30,7 @@
#include <linux/vgaarb.h>
#include <linux/nospec.h>
#include <linux/sched/mm.h>
+#include <linux/vfio_pci_migration.h>
#include "vfio_pci_private.h"
@@ -296,6 +297,14 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
vfio_pci_probe_mmaps(vdev);
+ if (vfio_dev_migration_is_supported(pdev)) {
+ ret = vfio_pci_migration_init(vdev);
+ if (ret) {
+ dev_warn(&vdev->pdev->dev, "Failed to init vfio_pci_migration\n");
+ vfio_pci_disable(vdev);
+ return ret;
+ }
+ }
return 0;
}
@@ -392,6 +401,7 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
out:
pci_disable_device(pdev);
+ vfio_pci_migration_exit(vdev);
vfio_pci_try_bus_reset(vdev);
if (!disable_idle_d3)
@@ -642,6 +652,41 @@ struct vfio_devices {
int max_index;
};
+static long vfio_pci_handle_log_buf_ctl(struct vfio_pci_device *vdev,
+ const unsigned long arg)
+{
+ struct vfio_log_buf_ctl *log_buf_ctl = NULL;
+ struct vfio_log_buf_info *log_buf_info = NULL;
+ struct vf_migration_log_info migration_log_info;
+ long ret = 0;
+
+ log_buf_ctl = (struct vfio_log_buf_ctl *)arg;
+ log_buf_info = (struct vfio_log_buf_info *)log_buf_ctl->data;
+
+ switch (log_buf_ctl->flags) {
+ case VFIO_DEVICE_LOG_BUF_FLAG_START:
+ migration_log_info.dom_uuid = log_buf_info->uuid;
+ migration_log_info.buffer_size =
+ log_buf_info->buffer_size;
+ migration_log_info.sge_num = log_buf_info->addrs_size;
+ migration_log_info.sge_len = log_buf_info->frag_size;
+ migration_log_info.sgevec = log_buf_info->sgevec;
+ ret = vfio_pci_device_log_start(vdev,
+ &migration_log_info);
+ break;
+ case VFIO_DEVICE_LOG_BUF_FLAG_STOP:
+ ret = vfio_pci_device_log_stop(vdev,
+ log_buf_info->uuid);
+ break;
+ case VFIO_DEVICE_LOG_BUF_FLAG_STATUS_QUERY:
+ ret = vfio_pci_device_log_status_query(vdev);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ return ret;
+}
static long vfio_pci_ioctl(void *device_data,
unsigned int cmd, unsigned long arg)
{
@@ -1142,6 +1187,8 @@ static long vfio_pci_ioctl(void *device_data,
return vfio_pci_ioeventfd(vdev, ioeventfd.offset,
ioeventfd.data, count, ioeventfd.fd);
+ } else if (cmd == VFIO_DEVICE_LOG_BUF_CTL) {
+ return vfio_pci_handle_log_buf_ctl(vdev, arg);
}
return -ENOTTY;
@@ -1566,6 +1613,9 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
pci_set_power_state(pdev, PCI_D3hot);
}
+ if (vfio_dev_migration_is_supported(pdev))
+ ret = vfio_pci_device_init(pdev);
+
return ret;
}
@@ -1591,6 +1641,10 @@ static void vfio_pci_remove(struct pci_dev *pdev)
if (!disable_idle_d3)
pci_set_power_state(pdev, PCI_D0);
+
+ if (vfio_dev_migration_is_supported(pdev)) {
+ vfio_pci_device_uninit(pdev);
+ }
}
static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
diff --git a/drivers/vfio/pci/vfio_pci_migration.c b/drivers/vfio/pci/vfio_pci_migration.c
new file mode 100644
index 0000000..f69cd13
--- /dev/null
+++ b/drivers/vfio/pci/vfio_pci_migration.c
@@ -0,0 +1,755 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022 Huawei Technologies Co., Ltd. All rights reserved.
+ */
+
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/pci.h>
+#include <linux/uaccess.h>
+#include <linux/vfio.h>
+#include <linux/vfio_pci_migration.h>
+
+#include "vfio_pci_private.h"
+
+static LIST_HEAD(vfio_pci_mig_drivers_list);
+static DEFINE_MUTEX(vfio_pci_mig_drivers_mutex);
+
+static void vfio_pci_add_mig_drv(struct vfio_pci_vendor_mig_driver *mig_drv)
+{
+ mutex_lock(&vfio_pci_mig_drivers_mutex);
+ atomic_set(&mig_drv->count, 1);
+ list_add_tail(&mig_drv->list, &vfio_pci_mig_drivers_list);
+ mutex_unlock(&vfio_pci_mig_drivers_mutex);
+}
+
+static void vfio_pci_remove_mig_drv(struct vfio_pci_vendor_mig_driver *mig_drv)
+{
+ mutex_lock(&vfio_pci_mig_drivers_mutex);
+ list_del(&mig_drv->list);
+ mutex_unlock(&vfio_pci_mig_drivers_mutex);
+}
+
+static struct vfio_pci_vendor_mig_driver *
+ vfio_pci_find_mig_drv(struct pci_dev *pdev, struct module *module)
+{
+ struct vfio_pci_vendor_mig_driver *mig_drv = NULL;
+
+ mutex_lock(&vfio_pci_mig_drivers_mutex);
+ list_for_each_entry(mig_drv, &vfio_pci_mig_drivers_list, list) {
+ if (mig_drv->owner == module) {
+ if (mig_drv->bus_num == pdev->bus->number)
+ goto out;
+ }
+ }
+ mig_drv = NULL;
+out:
+ mutex_unlock(&vfio_pci_mig_drivers_mutex);
+ return mig_drv;
+}
+
+static struct vfio_pci_vendor_mig_driver *
+ vfio_pci_get_mig_driver(struct pci_dev *pdev)
+{
+ struct vfio_pci_vendor_mig_driver *mig_drv = NULL;
+ struct pci_dev *pf_dev = pci_physfn(pdev);
+
+ mutex_lock(&vfio_pci_mig_drivers_mutex);
+ list_for_each_entry(mig_drv, &vfio_pci_mig_drivers_list, list) {
+ if (mig_drv->bus_num == pf_dev->bus->number)
+ goto out;
+ }
+ mig_drv = NULL;
+out:
+ mutex_unlock(&vfio_pci_mig_drivers_mutex);
+ return mig_drv;
+}
+
+bool vfio_dev_migration_is_supported(struct pci_dev *pdev)
+{
+ struct vfio_pci_vendor_mig_driver *mig_driver = NULL;
+
+ mig_driver = vfio_pci_get_mig_driver(pdev);
+ if (!mig_driver || !mig_driver->dev_mig_ops) {
+ dev_warn(&pdev->dev, "unable to find a mig_drv module\n");
+ return false;
+ }
+
+ return true;
+}
+
+int vfio_pci_device_log_start(struct vfio_pci_device *vdev,
+ struct vf_migration_log_info *log_info)
+{
+ struct vfio_pci_vendor_mig_driver *mig_driver;
+
+ mig_driver = vfio_pci_get_mig_driver(vdev->pdev);
+ if (!mig_driver || !mig_driver->dev_mig_ops) {
+ dev_err(&vdev->pdev->dev, "unable to find a mig_drv module\n");
+ return -EFAULT;
+ }
+
+ if (!mig_driver->dev_mig_ops->log_start ||
+ (mig_driver->dev_mig_ops->log_start(vdev->pdev,
+ log_info) != 0)) {
+ dev_err(&vdev->pdev->dev, "failed to set log start\n");
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+int vfio_pci_device_log_stop(struct vfio_pci_device *vdev, uint32_t uuid)
+{
+ struct vfio_pci_vendor_mig_driver *mig_driver;
+
+ mig_driver = vfio_pci_get_mig_driver(vdev->pdev);
+ if (!mig_driver || !mig_driver->dev_mig_ops) {
+ dev_err(&vdev->pdev->dev, "unable to find a mig_drv module\n");
+ return -EFAULT;
+ }
+
+ if (!mig_driver->dev_mig_ops->log_stop ||
+ (mig_driver->dev_mig_ops->log_stop(vdev->pdev, uuid) != 0)) {
+ dev_err(&vdev->pdev->dev, "failed to set log stop\n");
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+int vfio_pci_device_log_status_query(struct vfio_pci_device *vdev)
+{
+ struct vfio_pci_vendor_mig_driver *mig_driver;
+
+ mig_driver = vfio_pci_get_mig_driver(vdev->pdev);
+ if (!mig_driver || !mig_driver->dev_mig_ops) {
+ dev_err(&vdev->pdev->dev, "unable to find a mig_drv module\n");
+ return -EFAULT;
+ }
+
+ if (!mig_driver->dev_mig_ops->get_log_status ||
+ (mig_driver->dev_mig_ops->get_log_status(vdev->pdev) != 0)) {
+ dev_err(&vdev->pdev->dev, "failed to get log status\n");
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+int vfio_pci_device_init(struct pci_dev *pdev)
+{
+ struct vfio_pci_vendor_mig_driver *mig_drv;
+
+ mig_drv = vfio_pci_get_mig_driver(pdev);
+ if (!mig_drv || !mig_drv->dev_mig_ops) {
+ dev_err(&pdev->dev, "unable to find a mig_drv module\n");
+ return -EFAULT;
+ }
+
+ if (mig_drv->dev_mig_ops->init)
+ return mig_drv->dev_mig_ops->init(pdev);
+
+ return -EFAULT;
+}
+
+void vfio_pci_device_uninit(struct pci_dev *pdev)
+{
+ struct vfio_pci_vendor_mig_driver *mig_drv;
+
+ mig_drv = vfio_pci_get_mig_driver(pdev);
+ if (!mig_drv || !mig_drv->dev_mig_ops) {
+ dev_err(&pdev->dev, "unable to find a mig_drv module\n");
+ return;
+ }
+
+ if (mig_drv->dev_mig_ops->uninit)
+ mig_drv->dev_mig_ops->uninit(pdev);
+}
+
+static void vfio_pci_device_release(struct pci_dev *pdev,
+ struct vfio_pci_vendor_mig_driver *mig_drv)
+{
+ if (mig_drv->dev_mig_ops->release)
+ mig_drv->dev_mig_ops->release(pdev);
+}
+
+static int vfio_pci_device_get_info(struct pci_dev *pdev,
+ struct vfio_device_migration_info *mig_info,
+ struct vfio_pci_vendor_mig_driver *mig_drv)
+{
+ if (mig_drv->dev_mig_ops->get_info)
+ return mig_drv->dev_mig_ops->get_info(pdev, mig_info);
+ return -EFAULT;
+}
+
+static int vfio_pci_device_enable(struct pci_dev *pdev,
+ struct vfio_pci_vendor_mig_driver *mig_drv)
+{
+ if (!mig_drv->dev_mig_ops->enable ||
+ (mig_drv->dev_mig_ops->enable(pdev) != 0)) {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int vfio_pci_device_disable(struct pci_dev *pdev,
+ struct vfio_pci_vendor_mig_driver *mig_drv)
+{
+ if (!mig_drv->dev_mig_ops->disable ||
+ (mig_drv->dev_mig_ops->disable(pdev) != 0))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int vfio_pci_device_pre_enable(struct pci_dev *pdev,
+ struct vfio_pci_vendor_mig_driver *mig_drv)
+{
+ if (!mig_drv->dev_mig_ops->pre_enable ||
+ (mig_drv->dev_mig_ops->pre_enable(pdev) != 0))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int vfio_pci_device_state_save(struct pci_dev *pdev,
+ struct vfio_pci_migration_data *data)
+{
+ struct vfio_device_migration_info *mig_info = data->mig_ctl;
+ struct vfio_pci_vendor_mig_driver *mig_drv = data->mig_driver;
+ void *base = (void *)mig_info;
+ int ret = 0;
+
+ if ((mig_info->device_state & VFIO_DEVICE_STATE_RUNNING) != 0) {
+ ret = vfio_pci_device_disable(pdev, mig_drv);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to stop VF function!\n");
+ return ret;
+ }
+ mig_info->device_state &= ~VFIO_DEVICE_STATE_RUNNING;
+ }
+
+ if (mig_drv->dev_mig_ops && mig_drv->dev_mig_ops->save) {
+ ret = mig_drv->dev_mig_ops->save(pdev, base,
+ mig_info->data_offset, data->state_size);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to save device state!\n");
+ return -EINVAL;
+ }
+ } else {
+ return -EFAULT;
+ }
+
+ mig_info->data_size = data->state_size;
+ mig_info->pending_bytes = mig_info->data_size;
+ return ret;
+}
+
+static int vfio_pci_device_state_restore(struct vfio_pci_migration_data *data)
+{
+ struct vfio_device_migration_info *mig_info = data->mig_ctl;
+ struct vfio_pci_vendor_mig_driver *mig_drv = data->mig_driver;
+ struct pci_dev *pdev = data->vf_dev;
+ void *base = (void *)mig_info;
+ int ret;
+
+ if (mig_drv->dev_mig_ops && mig_drv->dev_mig_ops->restore) {
+ ret = mig_drv->dev_mig_ops->restore(pdev, base,
+ mig_info->data_offset, mig_info->data_size);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to restore device state!\n");
+ return -EINVAL;
+ }
+ return 0;
+ }
+
+ return -EFAULT;
+}
+
+static int vfio_pci_set_device_state(struct vfio_pci_migration_data *data,
+ u32 state)
+{
+ struct vfio_device_migration_info *mig_ctl = data->mig_ctl;
+ struct vfio_pci_vendor_mig_driver *mig_drv = data->mig_driver;
+ struct pci_dev *pdev = data->vf_dev;
+ int ret = 0;
+
+ if (state == mig_ctl->device_state)
+ return 0;
+
+ if (!mig_drv->dev_mig_ops)
+ return -EINVAL;
+
+ switch (state) {
+ case VFIO_DEVICE_STATE_RUNNING:
+ if (!(mig_ctl->device_state &
+ VFIO_DEVICE_STATE_RUNNING))
+ ret = vfio_pci_device_enable(pdev, mig_drv);
+ break;
+ case VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RUNNING:
+ /*
+ * (pre-copy) - device should start logging data.
+ */
+ ret = 0;
+ break;
+ case VFIO_DEVICE_STATE_SAVING:
+ /* stop the vf function, save state */
+ ret = vfio_pci_device_state_save(pdev, data);
+ break;
+ case VFIO_DEVICE_STATE_STOP:
+ if (mig_ctl->device_state & VFIO_DEVICE_STATE_RUNNING)
+ ret = vfio_pci_device_disable(pdev, mig_drv);
+ break;
+ case VFIO_DEVICE_STATE_RESUMING:
+ ret = vfio_pci_device_pre_enable(pdev, mig_drv);
+ break;
+ default:
+ ret = -EFAULT;
+ break;
+ }
+
+ if (ret)
+ return ret;
+
+ mig_ctl->device_state = state;
+ return 0;
+}
+
+static ssize_t vfio_pci_handle_mig_dev_state(
+ struct vfio_pci_migration_data *data,
+ char __user *buf, size_t count, bool iswrite)
+{
+ struct vfio_device_migration_info *mig_ctl = data->mig_ctl;
+ u32 device_state;
+ int ret;
+
+ if (count != sizeof(device_state))
+ return -EINVAL;
+
+ if (iswrite) {
+ if (copy_from_user(&device_state, buf, count))
+ return -EFAULT;
+
+ ret = vfio_pci_set_device_state(data, device_state);
+ if (ret)
+ return ret;
+ } else {
+ if (copy_to_user(buf, &mig_ctl->device_state, count))
+ return -EFAULT;
+ }
+
+ return count;
+}
+
+static ssize_t vfio_pci_handle_mig_pending_bytes(
+ struct vfio_device_migration_info *mig_info,
+ char __user *buf, size_t count, bool iswrite)
+{
+ u64 pending_bytes;
+
+ if (count != sizeof(pending_bytes) || iswrite)
+ return -EINVAL;
+
+ if (mig_info->device_state ==
+ (VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RUNNING)) {
+ /* In pre-copy state we have no data to return for now,
+ * return 0 pending bytes
+ */
+ pending_bytes = 0;
+ } else {
+ pending_bytes = mig_info->pending_bytes;
+ }
+
+ if (copy_to_user(buf, &pending_bytes, count))
+ return -EFAULT;
+
+ return count;
+}
+
+static ssize_t vfio_pci_handle_mig_data_offset(
+ struct vfio_device_migration_info *mig_info,
+ char __user *buf, size_t count, bool iswrite)
+{
+ u64 data_offset = mig_info->data_offset;
+
+ if (count != sizeof(data_offset) || iswrite)
+ return -EINVAL;
+
+ if (copy_to_user(buf, &data_offset, count))
+ return -EFAULT;
+
+ return count;
+}
+
+static ssize_t vfio_pci_handle_mig_data_size(
+ struct vfio_device_migration_info *mig_info,
+ char __user *buf, size_t count, bool iswrite)
+{
+ u64 data_size;
+
+ if (count != sizeof(data_size))
+ return -EINVAL;
+
+ if (iswrite) {
+ /* data_size is writable only during resuming state */
+ if (mig_info->device_state != VFIO_DEVICE_STATE_RESUMING)
+ return -EINVAL;
+
+ if (copy_from_user(&data_size, buf, sizeof(data_size)))
+ return -EFAULT;
+
+ mig_info->data_size = data_size;
+ } else {
+ if (mig_info->device_state != VFIO_DEVICE_STATE_SAVING)
+ return -EINVAL;
+
+ if (copy_to_user(buf, &mig_info->data_size,
+ sizeof(data_size)))
+ return -EFAULT;
+ }
+
+ return count;
+}
+
+static ssize_t vfio_pci_handle_mig_dev_cmd(struct vfio_pci_migration_data *data,
+ char __user *buf, size_t count, bool iswrite)
+{
+ struct vfio_pci_vendor_mig_driver *mig_drv = data->mig_driver;
+ struct pci_dev *pdev = data->vf_dev;
+ u32 device_cmd;
+ int ret = -EFAULT;
+
+ if (count != sizeof(device_cmd) || !iswrite || !mig_drv->dev_mig_ops)
+ return -EINVAL;
+
+ if (copy_from_user(&device_cmd, buf, count))
+ return -EFAULT;
+
+ switch (device_cmd) {
+ case VFIO_DEVICE_MIGRATION_CANCEL:
+ if (mig_drv->dev_mig_ops->cancel)
+ ret = mig_drv->dev_mig_ops->cancel(pdev);
+ break;
+ default:
+ dev_err(&pdev->dev, "cmd is invaild\n");
+ return -EINVAL;
+ }
+
+ if (ret != 0)
+ return ret;
+
+ return count;
+}
+
+static ssize_t vfio_pci_handle_mig_drv_version(
+ struct vfio_device_migration_info *mig_info,
+ char __user *buf, size_t count, bool iswrite)
+{
+ u32 version_id = mig_info->version_id;
+
+ if (count != sizeof(version_id) || iswrite)
+ return -EINVAL;
+
+ if (copy_to_user(buf, &version_id, count))
+ return -EFAULT;
+
+ return count;
+}
+
+static ssize_t vfio_pci_handle_mig_data_rw(
+ struct vfio_pci_migration_data *data,
+ char __user *buf, size_t count, u64 pos, bool iswrite)
+{
+ struct vfio_device_migration_info *mig_ctl = data->mig_ctl;
+ void *data_addr = data->vf_data;
+
+ if (count == 0) {
+ dev_err(&data->vf_dev->dev, "qemu operation data size error!\n");
+ return -EINVAL;
+ }
+
+ data_addr += pos - mig_ctl->data_offset;
+ if (iswrite) {
+ if (copy_from_user(data_addr, buf, count))
+ return -EFAULT;
+
+ mig_ctl->pending_bytes += count;
+ if (mig_ctl->pending_bytes > data->state_size)
+ return -EINVAL;
+ } else {
+ if (copy_to_user(buf, data_addr, count))
+ return -EFAULT;
+
+ if (mig_ctl->pending_bytes < count)
+ return -EINVAL;
+
+ mig_ctl->pending_bytes -= count;
+ }
+
+ return count;
+}
+
+static ssize_t vfio_pci_dev_migrn_rw(struct vfio_pci_device *vdev,
+ char __user *buf, size_t count, loff_t *ppos, bool iswrite)
+{
+ unsigned int index =
+ VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
+ struct vfio_pci_migration_data *data =
+ (struct vfio_pci_migration_data *)vdev->region[index].data;
+ loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
+ struct vfio_device_migration_info *mig_ctl = data->mig_ctl;
+ int ret;
+
+ if (pos >= vdev->region[index].size)
+ return -EINVAL;
+
+ count = min(count, (size_t)(vdev->region[index].size - pos));
+ if (pos >= VFIO_MIGRATION_REGION_DATA_OFFSET)
+ return vfio_pci_handle_mig_data_rw(data,
+ buf, count, pos, iswrite);
+
+ switch (pos) {
+ case VFIO_DEVICE_MIGRATION_OFFSET(device_state):
+ ret = vfio_pci_handle_mig_dev_state(data,
+ buf, count, iswrite);
+ break;
+ case VFIO_DEVICE_MIGRATION_OFFSET(pending_bytes):
+ ret = vfio_pci_handle_mig_pending_bytes(mig_ctl,
+ buf, count, iswrite);
+ break;
+ case VFIO_DEVICE_MIGRATION_OFFSET(data_offset):
+ ret = vfio_pci_handle_mig_data_offset(mig_ctl,
+ buf, count, iswrite);
+ break;
+ case VFIO_DEVICE_MIGRATION_OFFSET(data_size):
+ ret = vfio_pci_handle_mig_data_size(mig_ctl,
+ buf, count, iswrite);
+ break;
+ case VFIO_DEVICE_MIGRATION_OFFSET(device_cmd):
+ ret = vfio_pci_handle_mig_dev_cmd(data,
+ buf, count, iswrite);
+ break;
+ case VFIO_DEVICE_MIGRATION_OFFSET(version_id):
+ ret = vfio_pci_handle_mig_drv_version(mig_ctl,
+ buf, count, iswrite);
+ break;
+ default:
+ dev_err(&vdev->pdev->dev, "invalid pos offset\n");
+ ret = -EFAULT;
+ break;
+ }
+
+ if (mig_ctl->device_state == VFIO_DEVICE_STATE_RESUMING &&
+ mig_ctl->pending_bytes == data->state_size &&
+ mig_ctl->data_size == data->state_size) {
+ if (vfio_pci_device_state_restore(data) != 0) {
+ dev_err(&vdev->pdev->dev, "Failed to restore device state!\n");
+ return -EFAULT;
+ }
+ mig_ctl->pending_bytes = 0;
+ mig_ctl->data_size = 0;
+ }
+
+ return ret;
+}
+
+static void vfio_pci_dev_migrn_release(struct vfio_pci_device *vdev,
+ struct vfio_pci_region *region)
+{
+ struct vfio_pci_migration_data *data = region->data;
+
+ if (data) {
+ kfree(data->mig_ctl);
+ kfree(data);
+ }
+}
+
+static const struct vfio_pci_regops vfio_pci_migration_regops = {
+ .rw = vfio_pci_dev_migrn_rw,
+ .release = vfio_pci_dev_migrn_release,
+};
+
+static int vfio_pci_migration_info_init(struct pci_dev *pdev,
+ struct vfio_device_migration_info *mig_info,
+ struct vfio_pci_vendor_mig_driver *mig_drv)
+{
+ int ret;
+
+ ret = vfio_pci_device_get_info(pdev, mig_info, mig_drv);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to get device info\n");
+ return ret;
+ }
+
+ if (mig_info->data_size > VFIO_MIGRATION_BUFFER_MAX_SIZE) {
+ dev_err(&pdev->dev, "mig_info->data_size %llu is invalid\n",
+ mig_info->data_size);
+ return -EINVAL;
+ }
+
+ mig_info->data_offset = VFIO_MIGRATION_REGION_DATA_OFFSET;
+ return ret;
+}
+
+static int vfio_device_mig_data_init(struct vfio_pci_device *vdev,
+ struct vfio_pci_migration_data *data)
+{
+ struct vfio_device_migration_info *mig_ctl;
+ u64 mig_offset;
+ int ret;
+
+ mig_ctl = kzalloc(sizeof(*mig_ctl), GFP_KERNEL);
+ if (!mig_ctl)
+ return -ENOMEM;
+
+ ret = vfio_pci_migration_info_init(vdev->pdev, mig_ctl,
+ data->mig_driver);
+ if (ret) {
+ dev_err(&vdev->pdev->dev, "get device info error!\n");
+ goto err;
+ }
+
+ mig_offset = sizeof(struct vfio_device_migration_info);
+ data->state_size = mig_ctl->data_size;
+ data->mig_ctl = krealloc(mig_ctl, mig_offset + data->state_size,
+ GFP_KERNEL);
+ if (!data->mig_ctl) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ data->vf_data = (void *)((char *)data->mig_ctl + mig_offset);
+ memset(data->vf_data, 0, data->state_size);
+ data->mig_ctl->data_size = 0;
+
+ ret = vfio_pci_register_dev_region(vdev, VFIO_REGION_TYPE_MIGRATION,
+ VFIO_REGION_SUBTYPE_MIGRATION,
+ &vfio_pci_migration_regops, mig_offset + data->state_size,
+ VFIO_REGION_INFO_FLAG_READ | VFIO_REGION_INFO_FLAG_WRITE, data);
+ if (ret) {
+ kfree(data->mig_ctl);
+ return ret;
+ }
+
+ return 0;
+err:
+ kfree(mig_ctl);
+ return ret;
+}
+
+int vfio_pci_migration_init(struct vfio_pci_device *vdev)
+{
+ struct vfio_pci_vendor_mig_driver *mig_driver = NULL;
+ struct vfio_pci_migration_data *data = NULL;
+ struct pci_dev *pdev = vdev->pdev;
+ int ret;
+
+ mig_driver = vfio_pci_get_mig_driver(pdev);
+ if (!mig_driver || !mig_driver->dev_mig_ops) {
+ dev_err(&pdev->dev, "unable to find a mig_driver module\n");
+ return -EINVAL;
+ }
+
+ if (!try_module_get(mig_driver->owner)) {
+ pr_err("module %s is not live\n", mig_driver->owner->name);
+ return -ENODEV;
+ }
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data) {
+ module_put(mig_driver->owner);
+ return -ENOMEM;
+ }
+
+ data->mig_driver = mig_driver;
+ data->vf_dev = pdev;
+
+ ret = vfio_device_mig_data_init(vdev, data);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to init vfio device migration data!\n");
+ goto err;
+ }
+
+ return ret;
+err:
+ kfree(data);
+ module_put(mig_driver->owner);
+ return ret;
+}
+
+void vfio_pci_migration_exit(struct vfio_pci_device *vdev)
+{
+ struct vfio_pci_vendor_mig_driver *mig_driver = NULL;
+
+ mig_driver = vfio_pci_get_mig_driver(vdev->pdev);
+ if (!mig_driver || !mig_driver->dev_mig_ops) {
+ dev_warn(&vdev->pdev->dev, "mig_driver is not found\n");
+ return;
+ }
+
+ if (module_refcount(mig_driver->owner) > 0) {
+ vfio_pci_device_release(vdev->pdev, mig_driver);
+ module_put(mig_driver->owner);
+ }
+}
+
+int vfio_pci_register_migration_ops(struct vfio_device_migration_ops *ops,
+ struct module *mod, struct pci_dev *pdev)
+{
+ struct vfio_pci_vendor_mig_driver *mig_driver = NULL;
+
+ if (!ops || !mod || !pdev)
+ return -EINVAL;
+
+ mig_driver = vfio_pci_find_mig_drv(pdev, mod);
+ if (mig_driver) {
+ pr_info("%s migration ops has already been registered\n",
+ mod->name);
+ atomic_add(1, &mig_driver->count);
+ return 0;
+ }
+
+ if (!try_module_get(THIS_MODULE))
+ return -ENODEV;
+
+ mig_driver = kzalloc(sizeof(*mig_driver), GFP_KERNEL);
+ if (!mig_driver) {
+ module_put(THIS_MODULE);
+ return -ENOMEM;
+ }
+
+ mig_driver->pdev = pdev;
+ mig_driver->bus_num = pdev->bus->number;
+ mig_driver->owner = mod;
+ mig_driver->dev_mig_ops = ops;
+
+ vfio_pci_add_mig_drv(mig_driver);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(vfio_pci_register_migration_ops);
+
+void vfio_pci_unregister_migration_ops(struct module *mod, struct pci_dev *pdev)
+{
+ struct vfio_pci_vendor_mig_driver *mig_driver = NULL;
+
+ if (!mod || !pdev)
+ return;
+
+ mig_driver = vfio_pci_find_mig_drv(pdev, mod);
+ if (!mig_driver) {
+ pr_err("mig_driver is not found\n");
+ return;
+ }
+
+ if (atomic_sub_and_test(1, &mig_driver->count)) {
+ vfio_pci_remove_mig_drv(mig_driver);
+ kfree(mig_driver);
+ module_put(THIS_MODULE);
+ pr_info("%s succeed to unregister migration ops\n",
+ THIS_MODULE->name);
+ }
+}
+EXPORT_SYMBOL_GPL(vfio_pci_unregister_migration_ops);
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index 17d2bae..03af269 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -15,6 +15,7 @@
#include <linux/pci.h>
#include <linux/irqbypass.h>
#include <linux/types.h>
+#include <linux/vfio_pci_migration.h>
#ifndef VFIO_PCI_PRIVATE_H
#define VFIO_PCI_PRIVATE_H
@@ -55,7 +56,7 @@ struct vfio_pci_irq_ctx {
struct vfio_pci_region;
struct vfio_pci_regops {
- size_t (*rw)(struct vfio_pci_device *vdev, char __user *buf,
+ ssize_t (*rw)(struct vfio_pci_device *vdev, char __user *buf,
size_t count, loff_t *ppos, bool iswrite);
void (*release)(struct vfio_pci_device *vdev,
struct vfio_pci_region *region);
@@ -173,4 +174,15 @@ static inline int vfio_pci_igd_init(struct vfio_pci_device *vdev)
return -ENODEV;
}
#endif
+
+extern bool vfio_dev_migration_is_supported(struct pci_dev *pdev);
+extern int vfio_pci_migration_init(struct vfio_pci_device *vdev);
+extern void vfio_pci_migration_exit(struct vfio_pci_device *vdev);
+extern int vfio_pci_device_log_start(struct vfio_pci_device *vdev,
+ struct vf_migration_log_info *log_info);
+extern int vfio_pci_device_log_stop(struct vfio_pci_device *vdev,
+ uint32_t uuid);
+extern int vfio_pci_device_log_status_query(struct vfio_pci_device *vdev);
+extern int vfio_pci_device_init(struct pci_dev *pdev);
+extern void vfio_pci_device_uninit(struct pci_dev *pdev);
#endif /* VFIO_PCI_PRIVATE_H */
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 7a386fb..35f2a29 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -33,6 +33,7 @@
#include <linux/string.h>
#include <linux/uaccess.h>
#include <linux/vfio.h>
+#include <linux/vfio_pci_migration.h>
#include <linux/wait.h>
#include <linux/sched/signal.h>
@@ -40,6 +41,9 @@
#define DRIVER_AUTHOR "Alex Williamson <alex.williamson(a)redhat.com>"
#define DRIVER_DESC "VFIO - User Level meta-driver"
+#define LOG_BUF_FRAG_SIZE (2 * 1024 * 1024) // fix to 2M
+#define LOG_BUF_MAX_ADDRS_SIZE 128 // max vm ram size is 1T
+
static struct vfio {
struct class *class;
struct list_head iommu_drivers_list;
@@ -57,6 +61,14 @@ struct vfio_iommu_driver {
struct list_head vfio_next;
};
+struct vfio_log_buf {
+ struct vfio_log_buf_info info;
+ int fd;
+ int buffer_state;
+ int device_state;
+ unsigned long *cpu_addrs;
+};
+
struct vfio_container {
struct kref kref;
struct list_head group_list;
@@ -64,6 +76,7 @@ struct vfio_container {
struct vfio_iommu_driver *iommu_driver;
void *iommu_data;
bool noiommu;
+ struct vfio_log_buf log_buf;
};
struct vfio_unbound_dev {
@@ -1158,8 +1171,398 @@ static long vfio_ioctl_set_iommu(struct vfio_container *container,
return ret;
}
+static long vfio_dispatch_cmd_to_devices(const struct vfio_container *container,
+ unsigned int cmd, unsigned long arg)
+{
+ struct vfio_group *group = NULL;
+ struct vfio_device *device = NULL;
+ long ret = -ENXIO;
+
+ list_for_each_entry(group, &container->group_list, container_next) {
+ list_for_each_entry(device, &group->device_list, group_next) {
+ ret = device->ops->ioctl(device->device_data, cmd, arg);
+ if (ret) {
+ pr_err("dispatch cmd to devices failed\n");
+ return ret;
+ }
+ }
+ }
+ return ret;
+}
+
+static long vfio_log_buf_start(struct vfio_container *container)
+{
+ struct vfio_log_buf_ctl log_buf_ctl;
+ long ret;
+
+ log_buf_ctl.argsz = sizeof(struct vfio_log_buf_info);
+ log_buf_ctl.flags = VFIO_DEVICE_LOG_BUF_FLAG_START;
+ log_buf_ctl.data = (void *)&container->log_buf.info;
+ ret = vfio_dispatch_cmd_to_devices(container, VFIO_DEVICE_LOG_BUF_CTL,
+ (unsigned long)&log_buf_ctl);
+ if (ret)
+ return ret;
+
+ container->log_buf.device_state = 1;
+ return 0;
+}
+
+static long vfio_log_buf_stop(struct vfio_container *container)
+{
+ struct vfio_log_buf_ctl log_buf_ctl;
+ long ret;
+
+ if (container->log_buf.device_state == 0) {
+ pr_warn("device already stopped\n");
+ return 0;
+ }
+
+ log_buf_ctl.argsz = sizeof(struct vfio_log_buf_info);
+ log_buf_ctl.flags = VFIO_DEVICE_LOG_BUF_FLAG_STOP;
+ log_buf_ctl.data = (void *)&container->log_buf.info;
+ ret = vfio_dispatch_cmd_to_devices(container, VFIO_DEVICE_LOG_BUF_CTL,
+ (unsigned long)&log_buf_ctl);
+ if (ret)
+ return ret;
+
+ container->log_buf.device_state = 0;
+ return 0;
+}
+
+static long vfio_log_buf_query(struct vfio_container *container)
+{
+ struct vfio_log_buf_ctl log_buf_ctl;
+
+ log_buf_ctl.argsz = sizeof(struct vfio_log_buf_info);
+ log_buf_ctl.flags = VFIO_DEVICE_LOG_BUF_FLAG_STATUS_QUERY;
+ log_buf_ctl.data = (void *)&container->log_buf.info;
+
+ return vfio_dispatch_cmd_to_devices(container,
+ VFIO_DEVICE_LOG_BUF_CTL, (unsigned long)&log_buf_ctl);
+}
+
+static int vfio_log_buf_fops_mmap(struct file *filep,
+ struct vm_area_struct *vma)
+{
+ struct vfio_container *container = filep->private_data;
+ struct vfio_log_buf *log_buf = &container->log_buf;
+ unsigned long frag_pg_size;
+ unsigned long frag_offset;
+ phys_addr_t pa;
+ int ret = -EINVAL;
+
+ if (!log_buf->cpu_addrs) {
+ pr_err("mmap before setup, please setup log buf first\n");
+ return ret;
+ }
+
+ if (log_buf->info.frag_size < PAGE_SIZE) {
+ pr_err("mmap frag size should not less than page size!\n");
+ return ret;
+ }
+
+ frag_pg_size = log_buf->info.frag_size / PAGE_SIZE;
+ frag_offset = vma->vm_pgoff / frag_pg_size;
+
+ if (frag_offset >= log_buf->info.addrs_size) {
+ pr_err("mmap offset out of range!\n");
+ return ret;
+ }
+
+ if (vma->vm_end - vma->vm_start != log_buf->info.frag_size) {
+ pr_err("mmap size error, should be aligned with frag size!\n");
+ return ret;
+ }
+
+ pa = virt_to_phys((void *)log_buf->cpu_addrs[frag_offset]);
+ ret = remap_pfn_range(vma, vma->vm_start,
+ pa >> PAGE_SHIFT,
+ vma->vm_end - vma->vm_start,
+ vma->vm_page_prot);
+ if (ret)
+ pr_err("remap_pfn_range error!\n");
+ return ret;
+}
+
+static struct device *vfio_get_dev(struct vfio_container *container)
+{
+ struct vfio_group *group = NULL;
+ struct vfio_device *device = NULL;
+
+ list_for_each_entry(group, &container->group_list, container_next) {
+ list_for_each_entry(device, &group->device_list, group_next) {
+ return device->dev;
+ }
+ }
+ return NULL;
+}
+
+static void vfio_log_buf_release_dma(struct device *dev,
+ struct vfio_log_buf *log_buf)
+{
+ int i;
+
+ for (i = 0; i < log_buf->info.addrs_size; i++) {
+ if ((log_buf->cpu_addrs && log_buf->cpu_addrs[i] != 0) &&
+ (log_buf->info.sgevec &&
+ log_buf->info.sgevec[i].addr != 0)) {
+ dma_free_coherent(dev, log_buf->info.frag_size,
+ (void *)log_buf->cpu_addrs[i],
+ log_buf->info.sgevec[i].addr);
+ log_buf->cpu_addrs[i] = 0;
+ log_buf->info.sgevec[i].addr = 0;
+ }
+ }
+}
+
+static long vfio_log_buf_alloc_dma(struct vfio_log_buf_info *info,
+ struct vfio_log_buf *log_buf, struct device *dev)
+{
+ int i;
+
+ for (i = 0; i < info->addrs_size; i++) {
+ log_buf->cpu_addrs[i] = (unsigned long)dma_alloc_coherent(dev,
+ info->frag_size, &log_buf->info.sgevec[i].addr,
+ GFP_KERNEL);
+ log_buf->info.sgevec[i].len = info->frag_size;
+ if (log_buf->cpu_addrs[i] == 0 ||
+ log_buf->info.sgevec[i].addr == 0) {
+ return -ENOMEM;
+ }
+ }
+ return 0;
+}
+
+static long vfio_log_buf_alloc_addrs(struct vfio_log_buf_info *info,
+ struct vfio_log_buf *log_buf)
+{
+ log_buf->info.sgevec = kcalloc(info->addrs_size,
+ sizeof(struct vfio_log_buf_sge), GFP_KERNEL);
+ if (!log_buf->info.sgevec)
+ return -ENOMEM;
+
+ log_buf->cpu_addrs = kcalloc(info->addrs_size,
+ sizeof(unsigned long), GFP_KERNEL);
+ if (!log_buf->cpu_addrs) {
+ kfree(log_buf->info.sgevec);
+ log_buf->info.sgevec = NULL;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static long vfio_log_buf_info_valid(struct vfio_log_buf_info *info)
+{
+ if (info->addrs_size > LOG_BUF_MAX_ADDRS_SIZE ||
+ info->addrs_size == 0) {
+ pr_err("can`t support vm ram size larger than 1T or equal to 0\n");
+ return -EINVAL;
+ }
+ if (info->frag_size != LOG_BUF_FRAG_SIZE) {
+ pr_err("only support %d frag size\n", LOG_BUF_FRAG_SIZE);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static long vfio_log_buf_setup(struct vfio_container *container,
+ unsigned long data)
+{
+ struct vfio_log_buf_info info;
+ struct vfio_log_buf *log_buf = &container->log_buf;
+ struct device *dev = NULL;
+ long ret;
+
+ if (log_buf->info.sgevec) {
+ pr_warn("log buf already setup\n");
+ return 0;
+ }
+
+ if (copy_from_user(&info, (void __user *)data,
+ sizeof(struct vfio_log_buf_info)))
+ return -EFAULT;
+
+ ret = vfio_log_buf_info_valid(&info);
+ if (ret)
+ return ret;
+
+ ret = vfio_log_buf_alloc_addrs(&info, log_buf);
+ if (ret)
+ goto err_out;
+
+ dev = vfio_get_dev(container);
+ if (!dev) {
+ pr_err("can`t get dev\n");
+ goto err_free_addrs;
+ }
+
+ ret = vfio_log_buf_alloc_dma(&info, log_buf, dev);
+ if (ret)
+ goto err_free_dma_array;
+
+ log_buf->info.uuid = info.uuid;
+ log_buf->info.buffer_size = info.buffer_size;
+ log_buf->info.frag_size = info.frag_size;
+ log_buf->info.addrs_size = info.addrs_size;
+ log_buf->buffer_state = 1;
+ return 0;
+
+err_free_dma_array:
+ vfio_log_buf_release_dma(dev, log_buf);
+err_free_addrs:
+ kfree(log_buf->cpu_addrs);
+ log_buf->cpu_addrs = NULL;
+ kfree(log_buf->info.sgevec);
+ log_buf->info.sgevec = NULL;
+err_out:
+ return -ENOMEM;
+}
+
+static long vfio_log_buf_release_buffer(struct vfio_container *container)
+{
+ struct vfio_log_buf *log_buf = &container->log_buf;
+ struct device *dev = NULL;
+
+ if (log_buf->buffer_state == 0) {
+ pr_warn("buffer already released\n");
+ return 0;
+ }
+
+ dev = vfio_get_dev(container);
+ if (!dev) {
+ pr_err("can`t get dev\n");
+ return -EFAULT;
+ }
+
+ vfio_log_buf_release_dma(dev, log_buf);
+
+ kfree(log_buf->cpu_addrs);
+ log_buf->cpu_addrs = NULL;
+
+ kfree(log_buf->info.sgevec);
+ log_buf->info.sgevec = NULL;
+
+ log_buf->buffer_state = 0;
+ return 0;
+}
+
+static int vfio_log_buf_release(struct inode *inode, struct file *filep)
+{
+ struct vfio_container *container = filep->private_data;
+
+ vfio_log_buf_stop(container);
+ vfio_log_buf_release_buffer(container);
+ memset(&container->log_buf, 0, sizeof(struct vfio_log_buf));
+ return 0;
+}
+
+static long vfio_ioctl_handle_log_buf_ctl(struct vfio_container *container,
+ unsigned long arg)
+{
+ struct vfio_log_buf_ctl log_buf_ctl;
+ long ret = 0;
+
+ if (copy_from_user(&log_buf_ctl, (void __user *)arg,
+ sizeof(struct vfio_log_buf_ctl)))
+ return -EFAULT;
+
+ switch (log_buf_ctl.flags) {
+ case VFIO_DEVICE_LOG_BUF_FLAG_SETUP:
+ ret = vfio_log_buf_setup(container,
+ (unsigned long)log_buf_ctl.data);
+ break;
+ case VFIO_DEVICE_LOG_BUF_FLAG_RELEASE:
+ ret = vfio_log_buf_release_buffer(container);
+ break;
+ case VFIO_DEVICE_LOG_BUF_FLAG_START:
+ ret = vfio_log_buf_start(container);
+ break;
+ case VFIO_DEVICE_LOG_BUF_FLAG_STOP:
+ ret = vfio_log_buf_stop(container);
+ break;
+ case VFIO_DEVICE_LOG_BUF_FLAG_STATUS_QUERY:
+ ret = vfio_log_buf_query(container);
+ break;
+ default:
+ pr_err("log buf control flag incorrect\n");
+ ret = -EINVAL;
+ break;
+ }
+ return ret;
+}
+
+static long vfio_log_buf_fops_unl_ioctl(struct file *filep,
+ unsigned int cmd, unsigned long arg)
+{
+ struct vfio_container *container = filep->private_data;
+ long ret = -EINVAL;
+
+ switch (cmd) {
+ case VFIO_LOG_BUF_CTL:
+ ret = vfio_ioctl_handle_log_buf_ctl(container, arg);
+ break;
+ default:
+ pr_err("log buf control cmd incorrect\n");
+ break;
+ }
+
+ return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static long vfio_log_buf_fops_compat_ioctl(struct file *filep,
+ unsigned int cmd, unsigned long arg)
+{
+ arg = (unsigned long)compat_ptr(arg);
+ return vfio_log_buf_fops_unl_ioctl(filep, cmd, arg);
+}
+#endif /* CONFIG_COMPAT */
+
+static const struct file_operations vfio_log_buf_fops = {
+ .owner = THIS_MODULE,
+ .mmap = vfio_log_buf_fops_mmap,
+ .unlocked_ioctl = vfio_log_buf_fops_unl_ioctl,
+ .release = vfio_log_buf_release,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = vfio_log_buf_fops_compat_ioctl,
+#endif
+};
+
+static int vfio_get_log_buf_fd(struct vfio_container *container,
+ unsigned long arg)
+{
+ struct file *filep = NULL;
+ int ret;
+
+ if (container->log_buf.fd > 0)
+ return container->log_buf.fd;
+
+ ret = get_unused_fd_flags(O_CLOEXEC);
+ if (ret < 0) {
+ pr_err("get_unused_fd_flags get fd failed\n");
+ return ret;
+ }
+
+ filep = anon_inode_getfile("[vfio-log-buf]", &vfio_log_buf_fops,
+ container, O_RDWR);
+ if (IS_ERR(filep)) {
+ pr_err("anon_inode_getfile failed\n");
+ put_unused_fd(ret);
+ ret = PTR_ERR(filep);
+ return ret;
+ }
+
+ filep->f_mode |= (FMODE_READ | FMODE_WRITE | FMODE_LSEEK);
+
+ fd_install(ret, filep);
+
+ container->log_buf.fd = ret;
+ return ret;
+}
+
static long vfio_fops_unl_ioctl(struct file *filep,
- unsigned int cmd, unsigned long arg)
+ unsigned int cmd, unsigned long arg)
{
struct vfio_container *container = filep->private_data;
struct vfio_iommu_driver *driver;
@@ -1179,6 +1582,9 @@ static long vfio_fops_unl_ioctl(struct file *filep,
case VFIO_SET_IOMMU:
ret = vfio_ioctl_set_iommu(container, arg);
break;
+ case VFIO_GET_LOG_BUF_FD:
+ ret = vfio_get_log_buf_fd(container, arg);
+ break;
default:
driver = container->iommu_driver;
data = container->iommu_data;
@@ -1210,6 +1616,7 @@ static int vfio_fops_open(struct inode *inode, struct file *filep)
INIT_LIST_HEAD(&container->group_list);
init_rwsem(&container->group_lock);
kref_init(&container->kref);
+ memset(&container->log_buf, 0, sizeof(struct vfio_log_buf));
filep->private_data = container;
@@ -1219,9 +1626,7 @@ static int vfio_fops_open(struct inode *inode, struct file *filep)
static int vfio_fops_release(struct inode *inode, struct file *filep)
{
struct vfio_container *container = filep->private_data;
-
filep->private_data = NULL;
-
vfio_container_put(container);
return 0;
diff --git a/include/linux/vfio_pci_migration.h b/include/linux/vfio_pci_migration.h
new file mode 100644
index 0000000..464ffb4
--- /dev/null
+++ b/include/linux/vfio_pci_migration.h
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2022 Huawei Technologies Co., Ltd. All rights reserved.
+ */
+
+#ifndef VFIO_PCI_MIGRATION_H
+#define VFIO_PCI_MIGRATION_H
+
+#include <linux/types.h>
+#include <linux/pci.h>
+
+#define VFIO_REGION_TYPE_MIGRATION (3)
+/* sub-types for VFIO_REGION_TYPE_MIGRATION */
+#define VFIO_REGION_SUBTYPE_MIGRATION (1)
+
+#define VFIO_MIGRATION_BUFFER_MAX_SIZE SZ_256K
+#define VFIO_MIGRATION_REGION_DATA_OFFSET \
+ (sizeof(struct vfio_device_migration_info))
+#define VFIO_DEVICE_MIGRATION_OFFSET(x) \
+ offsetof(struct vfio_device_migration_info, x)
+
+struct vfio_device_migration_info {
+ __u32 device_state; /* VFIO device state */
+#define VFIO_DEVICE_STATE_STOP (0)
+#define VFIO_DEVICE_STATE_RUNNING (1 << 0)
+#define VFIO_DEVICE_STATE_SAVING (1 << 1)
+#define VFIO_DEVICE_STATE_RESUMING (1 << 2)
+#define VFIO_DEVICE_STATE_MASK (VFIO_DEVICE_STATE_RUNNING | \
+ VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RESUMING)
+ __u32 reserved;
+
+ __u32 device_cmd;
+ __u32 version_id;
+
+ __u64 pending_bytes;
+ __u64 data_offset;
+ __u64 data_size;
+};
+
+enum {
+ VFIO_DEVICE_STOP = 0xffff0001,
+ VFIO_DEVICE_CONTINUE,
+ VFIO_DEVICE_MIGRATION_CANCEL,
+};
+
+struct vfio_log_buf_sge {
+ __u64 len;
+ __u64 addr;
+};
+
+struct vfio_log_buf_info {
+ __u32 uuid;
+ __u64 buffer_size;
+ __u64 addrs_size;
+ __u64 frag_size;
+ struct vfio_log_buf_sge *sgevec;
+};
+
+struct vfio_log_buf_ctl {
+ __u32 argsz;
+ __u32 flags;
+ #define VFIO_DEVICE_LOG_BUF_FLAG_SETUP (1 << 0)
+ #define VFIO_DEVICE_LOG_BUF_FLAG_RELEASE (1 << 1)
+ #define VFIO_DEVICE_LOG_BUF_FLAG_START (1 << 2)
+ #define VFIO_DEVICE_LOG_BUF_FLAG_STOP (1 << 3)
+ #define VFIO_DEVICE_LOG_BUF_FLAG_STATUS_QUERY (1 << 4)
+ void *data;
+};
+#define VFIO_LOG_BUF_CTL _IO(VFIO_TYPE, VFIO_BASE + 21)
+#define VFIO_GET_LOG_BUF_FD _IO(VFIO_TYPE, VFIO_BASE + 22)
+#define VFIO_DEVICE_LOG_BUF_CTL _IO(VFIO_TYPE, VFIO_BASE + 23)
+
+struct vf_migration_log_info {
+ __u32 dom_uuid;
+ __u64 buffer_size;
+ __u64 sge_len;
+ __u64 sge_num;
+ struct vfio_log_buf_sge *sgevec;
+};
+
+struct vfio_device_migration_ops {
+ /* Get device information */
+ int (*get_info)(struct pci_dev *pdev,
+ struct vfio_device_migration_info *info);
+ /* Enable a vf device */
+ int (*enable)(struct pci_dev *pdev);
+ /* Disable a vf device */
+ int (*disable)(struct pci_dev *pdev);
+ /* Save a vf device */
+ int (*save)(struct pci_dev *pdev, void *base,
+ uint64_t off, uint64_t count);
+ /* Resuming a vf device */
+ int (*restore)(struct pci_dev *pdev, void *base,
+ uint64_t off, uint64_t count);
+ /* Log start a vf device */
+ int (*log_start)(struct pci_dev *pdev,
+ struct vf_migration_log_info *log_info);
+ /* Log stop a vf device */
+ int (*log_stop)(struct pci_dev *pdev, uint32_t uuid);
+ /* Get vf device log status */
+ int (*get_log_status)(struct pci_dev *pdev);
+ /* Pre enable a vf device(load_setup, before restore a vf) */
+ int (*pre_enable)(struct pci_dev *pdev);
+ /* Cancel a vf device when live migration failed (rollback) */
+ int (*cancel)(struct pci_dev *pdev);
+ /* Init a vf device */
+ int (*init)(struct pci_dev *pdev);
+ /* Uninit a vf device */
+ void (*uninit)(struct pci_dev *pdev);
+ /* Release a vf device */
+ void (*release)(struct pci_dev *pdev);
+};
+
+struct vfio_pci_vendor_mig_driver {
+ struct pci_dev *pdev;
+ unsigned char bus_num;
+ struct vfio_device_migration_ops *dev_mig_ops;
+ struct module *owner;
+ atomic_t count;
+ struct list_head list;
+};
+
+struct vfio_pci_migration_data {
+ u64 state_size;
+ struct pci_dev *vf_dev;
+ struct vfio_pci_vendor_mig_driver *mig_driver;
+ struct vfio_device_migration_info *mig_ctl;
+ void *vf_data;
+};
+
+int vfio_pci_register_migration_ops(struct vfio_device_migration_ops *ops,
+ struct module *mod, struct pci_dev *pdev);
+void vfio_pci_unregister_migration_ops(struct module *mod,
+ struct pci_dev *pdev);
+
+#endif /* VFIO_PCI_MIGRATION_H */
--
1.8.3.1
3
2
[PATCH openEuler-1.0-LTS 1/4] alinux: sched: Defend cfs and rt bandwidth against overflow
by liuzhengyuan@kylinos.cn 30 Jun '22
by liuzhengyuan@kylinos.cn 30 Jun '22
30 Jun '22
From: Huaixin Chang <changhuaixin(a)linux.alibaba.com>
anolis inclusion
from anolis_master
commit 9d168f216486333f24aa1b33706eddf3b13d7228
category: performance
bugzilla: NA
CVE: NA
---------------------------
Kernel limitation on cpu.cfs_quota_us is insufficient. Some large
numbers might cause overflow in to_ratio() calculation and produce
unexpected results.
For example, if we make two cpu cgroups and then write a reasonable
value and a large value into child's and parent's cpu.cfs_quota_us. This
will cause a write error.
cd /sys/fs/cgroup/cpu
mkdir parent; mkdir parent/child
echo 8000 > parent/child/cpu.cfs_quota_us
# 17592186044416 is (1UL << 44)
echo 17592186044416 > parent/cpu.cfs_quota_us
In this case, quota will overflow and thus fail the __cfs_schedulable
check. Similar overflow also affects rt bandwidth.
Burstable CFS bandwidth controller will also benefit from limiting
quota.
Change-Id: I0f89d1f26b168c5cfa041e886395c7f3068114ae
Reviewed-by: Shanpei Chen <shanpeic(a)linux.alibaba.com>
Signed-off-by: Huaixin Chang <changhuaixin(a)linux.alibaba.com>
Signed-off-by: Zhengyuan Liu <liuzhengyuan(a)kylinos.cn>
---
kernel/sched/core.c | 8 ++++++++
kernel/sched/rt.c | 9 +++++++++
kernel/sched/sched.h | 2 ++
3 files changed, 19 insertions(+)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 36d7422da0ac..51fdd30f188a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6679,6 +6679,8 @@ static DEFINE_MUTEX(cfs_constraints_mutex);
const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; /* 1s */
const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */
+/* More than 203 days if BW_SHIFT equals 20. */
+const u64 max_cfs_runtime = MAX_BW_USEC * NSEC_PER_USEC;
static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime);
@@ -6706,6 +6708,12 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
if (period > max_cfs_quota_period)
return -EINVAL;
+ /*
+ * Bound quota to defend quota against overflow during bandwidth shift.
+ */
+ if (quota != RUNTIME_INF && quota > max_cfs_runtime)
+ return -EINVAL;
+
/*
* Prevent race between setting of cfs_rq->runtime_enabled and
* unthrottle_offline_cfs_rqs().
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 301ba04d9130..f31e0aaf1f43 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -2518,6 +2518,9 @@ static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
return ret;
}
+/* More than 203 days if BW_SHIFT equals 20. */
+static const u64 max_rt_runtime = MAX_BW_USEC * NSEC_PER_USEC;
+
static int tg_set_rt_bandwidth(struct task_group *tg,
u64 rt_period, u64 rt_runtime)
{
@@ -2534,6 +2537,12 @@ static int tg_set_rt_bandwidth(struct task_group *tg,
if (rt_period == 0)
return -EINVAL;
+ /*
+ * Bound quota to defend quota against overflow during bandwidth shift.
+ */
+ if (rt_runtime != RUNTIME_INF && rt_runtime > max_rt_runtime)
+ return -EINVAL;
+
mutex_lock(&rt_constraints_mutex);
read_lock(&tasklist_lock);
err = __rt_schedulable(tg, rt_period, rt_runtime);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ae3068153093..f3808a49ce48 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1732,6 +1732,8 @@ extern void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
#define BW_SHIFT 20
#define BW_UNIT (1 << BW_SHIFT)
#define RATIO_SHIFT 8
+#define MAX_BW_BITS (64 - BW_SHIFT)
+#define MAX_BW_USEC ((1UL << MAX_BW_BITS) - 1)
unsigned long to_ratio(u64 period, u64 runtime);
extern void init_entity_runnable_average(struct sched_entity *se);
--
2.25.1
3
6
29 Jun '22
From: Zhou Guanghui <zhouguanghui1(a)huawei.com>
hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I5EORS
CVE: NA
--------------------------------
The single-group mode has no application scenario. Therefore, the
related branch is deleted.
The boot option "enable_sp_multi_group_mode" does not take effect.
Signed-off-by: Zhou Guanghui <zhouguanghui1(a)huawei.com>
Reviewed-by: Weilong Chen <chenweilong(a)huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13(a)huawei.com>
---
mm/share_pool.c | 137 +++++++++---------------------------------------
1 file changed, 25 insertions(+), 112 deletions(-)
diff --git a/mm/share_pool.c b/mm/share_pool.c
index 5ba353ddfabd..48e309e3ddd3 100644
--- a/mm/share_pool.c
+++ b/mm/share_pool.c
@@ -65,9 +65,6 @@
#define byte2mb(size) ((size) >> 20)
#define page2kb(page_num) ((page_num) << (PAGE_SHIFT - 10))
-#define SINGLE_GROUP_MODE 1
-#define MULTI_GROUP_MODE 2
-
#define MAX_GROUP_FOR_SYSTEM 50000
#define MAX_GROUP_FOR_TASK 3000
#define MAX_PROC_PER_GROUP 1024
@@ -98,8 +95,6 @@ int sysctl_sp_perf_alloc;
int sysctl_sp_perf_k2u;
-static int share_pool_group_mode = SINGLE_GROUP_MODE;
-
static int system_group_count;
static unsigned int sp_device_number;
@@ -1088,12 +1083,6 @@ static int mm_add_group_init(struct mm_struct *mm, struct sp_group *spg)
struct sp_group_master *master = mm->sp_group_master;
bool exist = false;
- if (share_pool_group_mode == SINGLE_GROUP_MODE && master &&
- master->count == 1) {
- pr_err_ratelimited("at most one sp group for a task is allowed in single mode\n");
- return -EEXIST;
- }
-
master = sp_init_group_master_locked(mm, &exist);
if (IS_ERR(master))
return PTR_ERR(master);
@@ -2235,72 +2224,30 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags,
if (sp_flags & SP_HUGEPAGE_ONLY)
sp_flags |= SP_HUGEPAGE;
- if (share_pool_group_mode == SINGLE_GROUP_MODE) {
- spg = __sp_find_spg(current->pid, SPG_ID_DEFAULT);
- if (spg) {
- if (spg_id != SPG_ID_DEFAULT && spg->id != spg_id) {
- sp_group_drop(spg);
- return -ENODEV;
- }
-
- /* up_read will be at the end of sp_alloc */
- down_read(&spg->rw_lock);
- if (!spg_valid(spg)) {
- up_read(&spg->rw_lock);
- sp_group_drop(spg);
- pr_err_ratelimited("allocation failed, spg is dead\n");
- return -ENODEV;
- }
- } else { /* alocation pass through scene */
- if (enable_mdc_default_group) {
- int ret = 0;
-
- ret = sp_group_add_task(current->tgid, spg_id);
- if (ret < 0) {
- pr_err_ratelimited("add group failed in pass through\n");
- return ret;
- }
-
- spg = __sp_find_spg(current->pid, SPG_ID_DEFAULT);
-
- /* up_read will be at the end of sp_alloc */
- down_read(&spg->rw_lock);
- if (!spg_valid(spg)) {
- up_read(&spg->rw_lock);
- sp_group_drop(spg);
- pr_err_ratelimited("pass through allocation failed, spg is dead\n");
- return -ENODEV;
- }
- } else {
- spg = spg_none;
- }
+ if (spg_id != SPG_ID_DEFAULT) {
+ spg = __sp_find_spg(current->pid, spg_id);
+ if (!spg) {
+ pr_err_ratelimited("allocation failed, can't find group\n");
+ return -ENODEV;
}
- } else {
- if (spg_id != SPG_ID_DEFAULT) {
- spg = __sp_find_spg(current->pid, spg_id);
- if (!spg) {
- pr_err_ratelimited("allocation failed, can't find group\n");
- return -ENODEV;
- }
- /* up_read will be at the end of sp_alloc */
- down_read(&spg->rw_lock);
- if (!spg_valid(spg)) {
- up_read(&spg->rw_lock);
- sp_group_drop(spg);
- pr_err_ratelimited("allocation failed, spg is dead\n");
- return -ENODEV;
- }
+ /* up_read will be at the end of sp_alloc */
+ down_read(&spg->rw_lock);
+ if (!spg_valid(spg)) {
+ up_read(&spg->rw_lock);
+ sp_group_drop(spg);
+ pr_err_ratelimited("allocation failed, spg is dead\n");
+ return -ENODEV;
+ }
- if (!is_process_in_group(spg, current->mm)) {
- up_read(&spg->rw_lock);
- sp_group_drop(spg);
- pr_err_ratelimited("allocation failed, task not in group\n");
- return -ENODEV;
- }
- } else { /* alocation pass through scene */
- spg = spg_none;
+ if (!is_process_in_group(spg, current->mm)) {
+ up_read(&spg->rw_lock);
+ sp_group_drop(spg);
+ pr_err_ratelimited("allocation failed, task not in group\n");
+ return -ENODEV;
}
+ } else { /* alocation pass through scene */
+ spg = spg_none;
}
if (sp_flags & SP_HUGEPAGE) {
@@ -2914,33 +2861,12 @@ static int sp_k2u_prepare(unsigned long kva, unsigned long size,
kc->size_aligned = size_aligned;
kc->sp_flags = sp_flags;
kc->spg_id = spg_id;
- kc->to_task = false;
- return 0;
-}
-
-static int sp_check_k2task(struct sp_k2u_context *kc)
-{
- int ret = 0;
- int spg_id = kc->spg_id;
-
- if (share_pool_group_mode == SINGLE_GROUP_MODE) {
- struct sp_group *spg = get_first_group(current->mm);
+ if (spg_id == SPG_ID_DEFAULT || spg_id == SPG_ID_NONE)
+ kc->to_task = true;
+ else
+ kc->to_task = false;
- if (!spg) {
- if (spg_id != SPG_ID_NONE && spg_id != SPG_ID_DEFAULT)
- ret = -EINVAL;
- else
- kc->to_task = true;
- } else {
- if (spg_id != SPG_ID_DEFAULT && spg_id != spg->id)
- ret = -EINVAL;
- sp_group_drop(spg);
- }
- } else {
- if (spg_id == SPG_ID_DEFAULT || spg_id == SPG_ID_NONE)
- kc->to_task = true;
- }
- return ret;
+ return 0;
}
static void *sp_k2u_finish(void *uva, struct sp_k2u_context *kc)
@@ -2985,12 +2911,6 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size,
if (ret)
return ERR_PTR(ret);
- ret = sp_check_k2task(&kc);
- if (ret) {
- uva = ERR_PTR(ret);
- goto out;
- }
-
if (kc.to_task)
uva = sp_make_share_kva_to_task(kc.kva_aligned, kc.size_aligned, kc.sp_flags);
else {
@@ -3738,13 +3658,6 @@ static int __init enable_share_k2u_to_group(char *s)
}
__setup("enable_sp_share_k2u_spg", enable_share_k2u_to_group);
-static int __init enable_sp_multi_group_mode(char *s)
-{
- share_pool_group_mode = MULTI_GROUP_MODE;
- return 1;
-}
-__setup("enable_sp_multi_group_mode", enable_sp_multi_group_mode);
-
/*** Statistical and maintenance functions ***/
static void free_process_spg_proc_stat(struct sp_proc_stat *proc_stat)
--
2.25.1
1
13
[PATCH openEuler-22.03-LTS 1/2] ipmi/watchdog: replace atomic_add() and atomic_sub()
by Miaohe Lin 29 Jun '22
by Miaohe Lin 29 Jun '22
29 Jun '22
From: Yejune Deng <yejune.deng(a)gmail.com>
mainline inclusion
from v5.11-rc1
commit a01a89b1db1066a6af23ae08b9a0c345b7966f0b
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I5DVR9
CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
--------------------------------
atomic_inc() and atomic_dec() looks better
Signed-off-by: Yejune Deng <yejune.deng(a)gmail.com>
Message-Id: <1605511807-7135-1-git-send-email-yejune.deng(a)gmail.com>
Signed-off-by: Corey Minyard <cminyard(a)mvista.com>
Signed-off-by: Miaohe Lin <linmiaohe(a)huawei.com>
---
drivers/char/ipmi/ipmi_watchdog.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index f78156d93c3f..32c334e34d55 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -495,7 +495,7 @@ static void panic_halt_ipmi_heartbeat(void)
msg.cmd = IPMI_WDOG_RESET_TIMER;
msg.data = NULL;
msg.data_len = 0;
- atomic_add(1, &panic_done_count);
+ atomic_inc(&panic_done_count);
rv = ipmi_request_supply_msgs(watchdog_user,
(struct ipmi_addr *) &addr,
0,
@@ -505,7 +505,7 @@ static void panic_halt_ipmi_heartbeat(void)
&panic_halt_heartbeat_recv_msg,
1);
if (rv)
- atomic_sub(1, &panic_done_count);
+ atomic_dec(&panic_done_count);
}
static struct ipmi_smi_msg panic_halt_smi_msg = {
@@ -529,12 +529,12 @@ static void panic_halt_ipmi_set_timeout(void)
/* Wait for the messages to be free. */
while (atomic_read(&panic_done_count) != 0)
ipmi_poll_interface(watchdog_user);
- atomic_add(1, &panic_done_count);
+ atomic_inc(&panic_done_count);
rv = __ipmi_set_timeout(&panic_halt_smi_msg,
&panic_halt_recv_msg,
&send_heartbeat_now);
if (rv) {
- atomic_sub(1, &panic_done_count);
+ atomic_dec(&panic_done_count);
pr_warn("Unable to extend the watchdog timeout\n");
} else {
if (send_heartbeat_now)
--
2.23.0
1
1
[PATCH openEuler-5.10-LTS 1/8] net: hns3: set port base vlan tbl_sta to false before removing old vlan
by Zheng Zengkai 28 Jun '22
by Zheng Zengkai 28 Jun '22
28 Jun '22
From: Guangbin Huang <huangguangbin2(a)huawei.com>
mainline inclusion
from mainline-v5.19-rc1
commit 9eda7d8bcbdb
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I5DGNU
CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
----------------------------------------------------------------------
When modify port base vlan, the port base vlan tbl_sta needs to set to
false before removing old vlan, to indicate this operation is not finish.
Fixes: c0f46de30c96 ("net: hns3: fix port base vlan add fail when concurrent with reset")
Signed-off-by: Guangbin Huang <huangguangbin2(a)huawei.com>
Signed-off-by: David S. Miller <davem(a)davemloft.net>
Signed-off-by: Jiantao Xiao <xiaojiantao1(a)h-partners.com>
Reviewed-by: Jian Shen <shenjian15(a)huawei.com>
Reviewed-by: Yue Haibing <yuehaibing(a)huawei.com>
Signed-off-by: Zheng Zengkai <zhengzengkai(a)huawei.com>
---
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 3e0d8388ad3f..57aaa2f1536f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -10117,6 +10117,7 @@ static int hclge_modify_port_base_vlan_tag(struct hclge_vport *vport,
if (ret)
return ret;
+ vport->port_base_vlan_cfg.tbl_sta = false;
/* remove old VLAN tag */
if (old_info->vlan_tag == 0)
ret = hclge_set_vf_vlan_common(hdev, vport->vport_id,
--
2.20.1
1
7
[PATCH OLK-5.10 v3 1/2] ipmi/watchdog: replace atomic_add() and atomic_sub()
by Miaohe Lin 28 Jun '22
by Miaohe Lin 28 Jun '22
28 Jun '22
From: Yejune Deng <yejune.deng(a)gmail.com>
mainline inclusion
from v5.11-rc1
commit a01a89b1db1066a6af23ae08b9a0c345b7966f0b
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I5DVR9
CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
--------------------------------
atomic_inc() and atomic_dec() looks better
Signed-off-by: Yejune Deng <yejune.deng(a)gmail.com>
Message-Id: <1605511807-7135-1-git-send-email-yejune.deng(a)gmail.com>
Signed-off-by: Corey Minyard <cminyard(a)mvista.com>
Signed-off-by: Miaohe Lin <linmiaohe(a)huawei.com>
---
drivers/char/ipmi/ipmi_watchdog.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index f78156d93c3f..32c334e34d55 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -495,7 +495,7 @@ static void panic_halt_ipmi_heartbeat(void)
msg.cmd = IPMI_WDOG_RESET_TIMER;
msg.data = NULL;
msg.data_len = 0;
- atomic_add(1, &panic_done_count);
+ atomic_inc(&panic_done_count);
rv = ipmi_request_supply_msgs(watchdog_user,
(struct ipmi_addr *) &addr,
0,
@@ -505,7 +505,7 @@ static void panic_halt_ipmi_heartbeat(void)
&panic_halt_heartbeat_recv_msg,
1);
if (rv)
- atomic_sub(1, &panic_done_count);
+ atomic_dec(&panic_done_count);
}
static struct ipmi_smi_msg panic_halt_smi_msg = {
@@ -529,12 +529,12 @@ static void panic_halt_ipmi_set_timeout(void)
/* Wait for the messages to be free. */
while (atomic_read(&panic_done_count) != 0)
ipmi_poll_interface(watchdog_user);
- atomic_add(1, &panic_done_count);
+ atomic_inc(&panic_done_count);
rv = __ipmi_set_timeout(&panic_halt_smi_msg,
&panic_halt_recv_msg,
&send_heartbeat_now);
if (rv) {
- atomic_sub(1, &panic_done_count);
+ atomic_dec(&panic_done_count);
pr_warn("Unable to extend the watchdog timeout\n");
} else {
if (send_heartbeat_now)
--
2.23.0
1
1
28 Jun '22
From: Daniel Thompson <daniel.thompson(a)linaro.org>
from stable-v5.10.119
commit a8f4d63142f947cd22fa615b8b3b8921cdaf4991
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I5A5YP
CVE: CVE-2022-21499
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
commit eadb2f47a3ced5c64b23b90fd2a3463f63726066 upstream.
KGDB and KDB allow read and write access to kernel memory, and thus
should be restricted during lockdown. An attacker with access to a
serial port (for example, via a hypervisor console, which some cloud
vendors provide over the network) could trigger the debugger so it is
important that the debugger respect the lockdown mode when/if it is
triggered.
Fix this by integrating lockdown into kdb's existing permissions
mechanism. Unfortunately kgdb does not have any permissions mechanism
(although it certainly could be added later) so, for now, kgdb is simply
and brutally disabled by immediately exiting the gdb stub without taking
any action.
For lockdowns established early in the boot (e.g. the normal case) then
this should be fine but on systems where kgdb has set breakpoints before
the lockdown is enacted than "bad things" will happen.
CVE: CVE-2022-21499
Co-developed-by: Stephen Brennan <stephen.s.brennan(a)oracle.com>
Signed-off-by: Stephen Brennan <stephen.s.brennan(a)oracle.com>
Reviewed-by: Douglas Anderson <dianders(a)chromium.org>
Signed-off-by: Daniel Thompson <daniel.thompson(a)linaro.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Zheng Yejian <zhengyejian1(a)huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng(a)huawei.com>
Signed-off-by: Zheng Zengkai <zhengzengkai(a)huawei.com>
---
include/linux/security.h | 2 ++
kernel/debug/debug_core.c | 24 ++++++++++++++
kernel/debug/kdb/kdb_main.c | 62 +++++++++++++++++++++++++++++++++++--
security/security.c | 2 ++
4 files changed, 87 insertions(+), 3 deletions(-)
diff --git a/include/linux/security.h b/include/linux/security.h
index 35355429648e..330029ef7e89 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -121,10 +121,12 @@ enum lockdown_reason {
LOCKDOWN_DEBUGFS,
LOCKDOWN_XMON_WR,
LOCKDOWN_BPF_WRITE_USER,
+ LOCKDOWN_DBG_WRITE_KERNEL,
LOCKDOWN_INTEGRITY_MAX,
LOCKDOWN_KCORE,
LOCKDOWN_KPROBES,
LOCKDOWN_BPF_READ,
+ LOCKDOWN_DBG_READ_KERNEL,
LOCKDOWN_PERF,
LOCKDOWN_TRACEFS,
LOCKDOWN_XMON_RW,
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index be5b6b97adbf..363f781b56ca 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -56,6 +56,7 @@
#include <linux/vmacache.h>
#include <linux/rcupdate.h>
#include <linux/irq.h>
+#include <linux/security.h>
#include <asm/cacheflush.h>
#include <asm/byteorder.h>
@@ -762,6 +763,29 @@ static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs,
continue;
kgdb_connected = 0;
} else {
+ /*
+ * This is a brutal way to interfere with the debugger
+ * and prevent gdb being used to poke at kernel memory.
+ * This could cause trouble if lockdown is applied when
+ * there is already an active gdb session. For now the
+ * answer is simply "don't do that". Typically lockdown
+ * *will* be applied before the debug core gets started
+ * so only developers using kgdb for fairly advanced
+ * early kernel debug can be biten by this. Hopefully
+ * they are sophisticated enough to take care of
+ * themselves, especially with help from the lockdown
+ * message printed on the console!
+ */
+ if (security_locked_down(LOCKDOWN_DBG_WRITE_KERNEL)) {
+ if (IS_ENABLED(CONFIG_KGDB_KDB)) {
+ /* Switch back to kdb if possible... */
+ dbg_kdb_mode = 1;
+ continue;
+ } else {
+ /* ... otherwise just bail */
+ break;
+ }
+ }
error = gdb_serial_stub(ks);
}
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 930ac1b25ec7..4e09fab52faf 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -45,6 +45,7 @@
#include <linux/proc_fs.h>
#include <linux/uaccess.h>
#include <linux/slab.h>
+#include <linux/security.h>
#include "kdb_private.h"
#undef MODULE_PARAM_PREFIX
@@ -197,10 +198,62 @@ struct task_struct *kdb_curr_task(int cpu)
}
/*
- * Check whether the flags of the current command and the permissions
- * of the kdb console has allow a command to be run.
+ * Update the permissions flags (kdb_cmd_enabled) to match the
+ * current lockdown state.
+ *
+ * Within this function the calls to security_locked_down() are "lazy". We
+ * avoid calling them if the current value of kdb_cmd_enabled already excludes
+ * flags that might be subject to lockdown. Additionally we deliberately check
+ * the lockdown flags independently (even though read lockdown implies write
+ * lockdown) since that results in both simpler code and clearer messages to
+ * the user on first-time debugger entry.
+ *
+ * The permission masks during a read+write lockdown permits the following
+ * flags: INSPECT, SIGNAL, REBOOT (and ALWAYS_SAFE).
+ *
+ * The INSPECT commands are not blocked during lockdown because they are
+ * not arbitrary memory reads. INSPECT covers the backtrace family (sometimes
+ * forcing them to have no arguments) and lsmod. These commands do expose
+ * some kernel state but do not allow the developer seated at the console to
+ * choose what state is reported. SIGNAL and REBOOT should not be controversial,
+ * given these are allowed for root during lockdown already.
+ */
+static void kdb_check_for_lockdown(void)
+{
+ const int write_flags = KDB_ENABLE_MEM_WRITE |
+ KDB_ENABLE_REG_WRITE |
+ KDB_ENABLE_FLOW_CTRL;
+ const int read_flags = KDB_ENABLE_MEM_READ |
+ KDB_ENABLE_REG_READ;
+
+ bool need_to_lockdown_write = false;
+ bool need_to_lockdown_read = false;
+
+ if (kdb_cmd_enabled & (KDB_ENABLE_ALL | write_flags))
+ need_to_lockdown_write =
+ security_locked_down(LOCKDOWN_DBG_WRITE_KERNEL);
+
+ if (kdb_cmd_enabled & (KDB_ENABLE_ALL | read_flags))
+ need_to_lockdown_read =
+ security_locked_down(LOCKDOWN_DBG_READ_KERNEL);
+
+ /* De-compose KDB_ENABLE_ALL if required */
+ if (need_to_lockdown_write || need_to_lockdown_read)
+ if (kdb_cmd_enabled & KDB_ENABLE_ALL)
+ kdb_cmd_enabled = KDB_ENABLE_MASK & ~KDB_ENABLE_ALL;
+
+ if (need_to_lockdown_write)
+ kdb_cmd_enabled &= ~write_flags;
+
+ if (need_to_lockdown_read)
+ kdb_cmd_enabled &= ~read_flags;
+}
+
+/*
+ * Check whether the flags of the current command, the permissions of the kdb
+ * console and the lockdown state allow a command to be run.
*/
-static inline bool kdb_check_flags(kdb_cmdflags_t flags, int permissions,
+static bool kdb_check_flags(kdb_cmdflags_t flags, int permissions,
bool no_args)
{
/* permissions comes from userspace so needs massaging slightly */
@@ -1194,6 +1247,9 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs,
kdb_curr_task(raw_smp_processor_id());
KDB_DEBUG_STATE("kdb_local 1", reason);
+
+ kdb_check_for_lockdown();
+
kdb_go_count = 0;
if (reason == KDB_REASON_DEBUG) {
/* special case below */
diff --git a/security/security.c b/security/security.c
index 4fb58543eeb9..2fc40217d49d 100644
--- a/security/security.c
+++ b/security/security.c
@@ -59,10 +59,12 @@ const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
[LOCKDOWN_DEBUGFS] = "debugfs access",
[LOCKDOWN_XMON_WR] = "xmon write access",
[LOCKDOWN_BPF_WRITE_USER] = "use of bpf to write user RAM",
+ [LOCKDOWN_DBG_WRITE_KERNEL] = "use of kgdb/kdb to write kernel RAM",
[LOCKDOWN_INTEGRITY_MAX] = "integrity",
[LOCKDOWN_KCORE] = "/proc/kcore access",
[LOCKDOWN_KPROBES] = "use of kprobes",
[LOCKDOWN_BPF_READ] = "use of bpf to read kernel RAM",
+ [LOCKDOWN_DBG_READ_KERNEL] = "use of kgdb/kdb to read kernel RAM",
[LOCKDOWN_PERF] = "unsafe use of perf",
[LOCKDOWN_TRACEFS] = "use of tracefs",
[LOCKDOWN_XMON_RW] = "xmon read and write access",
--
2.20.1
1
4
Backport 5.10.109 LTS patches from upstream
nds32: fix access_ok() checks in get/put_user
wcn36xx: Differentiate wcn3660 from wcn3620
tpm: use try_get_ops() in tpm-space.c
mac80211: fix potential double free on mesh join
rcu: Don't deboost before reporting expedited quiescent state
Revert "ath: add support for special 0x0 regulatory domain"
crypto: qat - disable registration of algorithms
ACPI: video: Force backlight native for Clevo NL5xRU and NL5xNU
ACPI: battery: Add device HID and quirk for Microsoft Surface Go 3
ACPI / x86: Work around broken XSDT on Advantech DAC-BJ01 board
drivers: net: xgene: Fix regression in CRC stripping
ALSA: pci: fix reading of swapped values from pcmreg in AC97 codec
ALSA: cmipci: Restore aux vol on suspend/resume
ALSA: usb-audio: Add mute TLV for playback volumes on RODE NT-USB
ALSA: pcm: Add stream lock during PCM reset ioctl operations
ALSA: pcm: Fix races among concurrent prealloc proc writes
ALSA: pcm: Fix races among concurrent prepare and hw_params/hw_free calls
ALSA: pcm: Fix races among concurrent read/write and buffer changes
ALSA: pcm: Fix races among concurrent hw_params and hw_free calls
ALSA: hda/realtek: Add quirk for ASUS GA402
ALSA: hda/realtek - Fix headset mic problem for a HP machine with alc671
ALSA: hda/realtek: Add quirk for Clevo NP50PNJ
ALSA: hda/realtek: Add quirk for Clevo NP70PNJ
ALSA: usb-audio: add mapping for new Corsair Virtuoso SE
ALSA: oss: Fix PCM OSS buffer allocation overflow
ASoC: sti: Fix deadlock via snd_pcm_stop_xrun() call
staging: fbtft: fb_st7789v: reset display before initialization
tpm: Fix error handling in async work
cgroup-v1: Correct privileges check in release_agent writes
exfat: avoid incorrectly releasing for root inode
net: ipv6: fix skb_over_panic in __ip6_append_data
Already merged:
llc: only change llc->dev when bind() succeeds
netfilter: nf_tables: initialize registers in nft_do_chain()
llc: fix netdevice reference leaks in llc_ui_bind()
cgroup: Use open-time cgroup namespace for process migration perm checks
cgroup: Allocate cgroup_file_ctx for kernfs_open_file->priv
nfc: st21nfca: Fix potential buffer overflows in EVT_TRANSACTION
Total patches: 37 - 6 = 31
Arnd Bergmann (1):
nds32: fix access_ok() checks in get/put_user
Brian Norris (1):
Revert "ath: add support for special 0x0 regulatory domain"
Bryan O'Donoghue (1):
wcn36xx: Differentiate wcn3660 from wcn3620
Chen Li (1):
exfat: avoid incorrectly releasing for root inode
Giacomo Guiduzzi (1):
ALSA: pci: fix reading of swapped values from pcmreg in AC97 codec
Giovanni Cabiddu (1):
crypto: qat - disable registration of algorithms
James Bottomley (1):
tpm: use try_get_ops() in tpm-space.c
Jason Zheng (1):
ALSA: hda/realtek: Add quirk for ASUS GA402
Jonathan Teh (1):
ALSA: cmipci: Restore aux vol on suspend/resume
Lars-Peter Clausen (1):
ALSA: usb-audio: Add mute TLV for playback volumes on RODE NT-USB
Linus Lüssing (1):
mac80211: fix potential double free on mesh join
Mark Cilissen (1):
ACPI / x86: Work around broken XSDT on Advantech DAC-BJ01 board
Maximilian Luz (1):
ACPI: battery: Add device HID and quirk for Microsoft Surface Go 3
Michal Koutný (1):
cgroup-v1: Correct privileges check in release_agent writes
Oliver Graute (1):
staging: fbtft: fb_st7789v: reset display before initialization
Paul E. McKenney (1):
rcu: Don't deboost before reporting expedited quiescent state
Reza Jahanbakhshi (1):
ALSA: usb-audio: add mapping for new Corsair Virtuoso SE
Stephane Graber (1):
drivers: net: xgene: Fix regression in CRC stripping
Tadeusz Struk (2):
net: ipv6: fix skb_over_panic in __ip6_append_data
tpm: Fix error handling in async work
Takashi Iwai (7):
ASoC: sti: Fix deadlock via snd_pcm_stop_xrun() call
ALSA: oss: Fix PCM OSS buffer allocation overflow
ALSA: pcm: Fix races among concurrent hw_params and hw_free calls
ALSA: pcm: Fix races among concurrent read/write and buffer changes
ALSA: pcm: Fix races among concurrent prepare and hw_params/hw_free
calls
ALSA: pcm: Fix races among concurrent prealloc proc writes
ALSA: pcm: Add stream lock during PCM reset ioctl operations
Tim Crawford (2):
ALSA: hda/realtek: Add quirk for Clevo NP70PNJ
ALSA: hda/realtek: Add quirk for Clevo NP50PNJ
Werner Sembach (1):
ACPI: video: Force backlight native for Clevo NL5xRU and NL5xNU
huangwenhui (1):
ALSA: hda/realtek - Fix headset mic problem for a HP machine with
alc671
arch/nds32/include/asm/uaccess.h | 22 ++++-
arch/x86/kernel/acpi/boot.c | 24 +++++
drivers/acpi/battery.c | 12 +++
drivers/acpi/video_detect.c | 75 ++++++++++++++
drivers/char/tpm/tpm-dev-common.c | 8 +-
drivers/char/tpm/tpm2-space.c | 8 +-
drivers/crypto/qat/qat_common/qat_crypto.c | 8 ++
.../net/ethernet/apm/xgene/xgene_enet_main.c | 12 ++-
drivers/net/wireless/ath/regd.c | 10 +-
drivers/net/wireless/ath/wcn36xx/main.c | 3 +
drivers/net/wireless/ath/wcn36xx/wcn36xx.h | 1 +
drivers/staging/fbtft/fb_st7789v.c | 2 +
fs/exfat/super.c | 2 +-
include/sound/pcm.h | 1 +
kernel/cgroup/cgroup-v1.c | 6 +-
kernel/rcu/tree_plugin.h | 9 +-
net/ipv6/ip6_output.c | 4 +-
net/mac80211/cfg.c | 3 -
sound/core/oss/pcm_oss.c | 12 ++-
sound/core/oss/pcm_plugin.c | 5 +-
sound/core/pcm.c | 2 +
sound/core/pcm_lib.c | 4 +
sound/core/pcm_memory.c | 11 ++-
sound/core/pcm_native.c | 97 ++++++++++++-------
sound/pci/ac97/ac97_codec.c | 4 +-
sound/pci/cmipci.c | 3 +-
sound/pci/hda/patch_realtek.c | 4 +
sound/soc/sti/uniperif_player.c | 6 +-
sound/soc/sti/uniperif_reader.c | 2 +-
sound/usb/mixer_maps.c | 10 ++
sound/usb/mixer_quirks.c | 7 +-
31 files changed, 289 insertions(+), 88 deletions(-)
--
2.20.1
1
31
[PATCH openEuler-1.0-LTS] io_uring: io_close: Set owner as current->files if req->work.files uninitialized
by Yongqiang Liu 28 Jun '22
by Yongqiang Liu 28 Jun '22
28 Jun '22
From: Zhihao Cheng <chengzhihao1(a)huawei.com>
hulk inclusion
category: bugfix
bugzilla: 186543, https://gitee.com/openeuler/kernel/issues/I5BGFA
CVE: NA
--------------------------------
Following process will trigger an use-after-free problem:
1. open /proc/sysvipc/msg and lock it by file lock
fcntl_setlk
do_lock_file_wait
vfs_lock_file
posix_lock_file
locks_insert_lock_ctx
locks_insert_global_locks // Added to lock list
2. Close /proc/sysvipc/msg by io_uring
filp_close(close->put_file, req->work.files) // req->work.files equals
NULL,io_grab_files() initialize it, non-async operations
won't invokes the function.
locks_remove_posix(filp, NULL)
lock.fl_owner = NULL
vfs_lock_file
posix_lock_file
posix_same_owner // Return false according to fl_owner.
locks_delete_lock_ctx(fl, &dispose) and locks_dispose_list
won't be executed, flock is not removed from lock list
fput(filp) // release filp
3. Read /proc/locks
seq_read
locks_start // Get flock from lock list
locks_show
lock_get_status
file_inode(f->file) // Access released file, UAF occurs!
Fix it by passing current->files when req->work.files is uninitialized,
because io-sq thread shares same files with uring_fd task, so it still
works in SQPOLL mode.
Signed-off-by: Zhihao Cheng <chengzhihao1(a)huawei.com>
Reviewed-by: Zhang Yi <yi.zhang(a)huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13(a)huawei.com>
---
fs/io_uring.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index c104425b2557..7ae8ba98e73b 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3903,7 +3903,7 @@ static int io_close(struct io_kiocb *req, bool force_nonblock,
}
/* No ->flush() or already async, safely close from here */
- ret = filp_close(close->put_file, req->work.files);
+ ret = filp_close(close->put_file, req->work.files ? : current->files);
if (ret < 0)
req_set_fail_links(req);
fput(close->put_file);
--
2.25.1
1
0
[PATCH openEuler-5.10-LTS 1/5] lockdown: also lock down previous kgdb use
by Zheng Zengkai 27 Jun '22
by Zheng Zengkai 27 Jun '22
27 Jun '22
From: Daniel Thompson <daniel.thompson(a)linaro.org>
from stable-v5.10.119
commit a8f4d63142f947cd22fa615b8b3b8921cdaf4991
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I5A5YP
CVE: CVE-2022-21499
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
commit eadb2f47a3ced5c64b23b90fd2a3463f63726066 upstream.
KGDB and KDB allow read and write access to kernel memory, and thus
should be restricted during lockdown. An attacker with access to a
serial port (for example, via a hypervisor console, which some cloud
vendors provide over the network) could trigger the debugger so it is
important that the debugger respect the lockdown mode when/if it is
triggered.
Fix this by integrating lockdown into kdb's existing permissions
mechanism. Unfortunately kgdb does not have any permissions mechanism
(although it certainly could be added later) so, for now, kgdb is simply
and brutally disabled by immediately exiting the gdb stub without taking
any action.
For lockdowns established early in the boot (e.g. the normal case) then
this should be fine but on systems where kgdb has set breakpoints before
the lockdown is enacted than "bad things" will happen.
CVE: CVE-2022-21499
Co-developed-by: Stephen Brennan <stephen.s.brennan(a)oracle.com>
Signed-off-by: Stephen Brennan <stephen.s.brennan(a)oracle.com>
Reviewed-by: Douglas Anderson <dianders(a)chromium.org>
Signed-off-by: Daniel Thompson <daniel.thompson(a)linaro.org>
Signed-off-by: Linus Torvalds <torvalds(a)linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Zheng Yejian <zhengyejian1(a)huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng(a)huawei.com>
Signed-off-by: Zheng Zengkai <zhengzengkai(a)huawei.com>
---
include/linux/security.h | 2 ++
kernel/debug/debug_core.c | 24 ++++++++++++++
kernel/debug/kdb/kdb_main.c | 62 +++++++++++++++++++++++++++++++++++--
security/security.c | 2 ++
4 files changed, 87 insertions(+), 3 deletions(-)
diff --git a/include/linux/security.h b/include/linux/security.h
index 35355429648e..330029ef7e89 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -121,10 +121,12 @@ enum lockdown_reason {
LOCKDOWN_DEBUGFS,
LOCKDOWN_XMON_WR,
LOCKDOWN_BPF_WRITE_USER,
+ LOCKDOWN_DBG_WRITE_KERNEL,
LOCKDOWN_INTEGRITY_MAX,
LOCKDOWN_KCORE,
LOCKDOWN_KPROBES,
LOCKDOWN_BPF_READ,
+ LOCKDOWN_DBG_READ_KERNEL,
LOCKDOWN_PERF,
LOCKDOWN_TRACEFS,
LOCKDOWN_XMON_RW,
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index be5b6b97adbf..363f781b56ca 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -56,6 +56,7 @@
#include <linux/vmacache.h>
#include <linux/rcupdate.h>
#include <linux/irq.h>
+#include <linux/security.h>
#include <asm/cacheflush.h>
#include <asm/byteorder.h>
@@ -762,6 +763,29 @@ static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs,
continue;
kgdb_connected = 0;
} else {
+ /*
+ * This is a brutal way to interfere with the debugger
+ * and prevent gdb being used to poke at kernel memory.
+ * This could cause trouble if lockdown is applied when
+ * there is already an active gdb session. For now the
+ * answer is simply "don't do that". Typically lockdown
+ * *will* be applied before the debug core gets started
+ * so only developers using kgdb for fairly advanced
+ * early kernel debug can be biten by this. Hopefully
+ * they are sophisticated enough to take care of
+ * themselves, especially with help from the lockdown
+ * message printed on the console!
+ */
+ if (security_locked_down(LOCKDOWN_DBG_WRITE_KERNEL)) {
+ if (IS_ENABLED(CONFIG_KGDB_KDB)) {
+ /* Switch back to kdb if possible... */
+ dbg_kdb_mode = 1;
+ continue;
+ } else {
+ /* ... otherwise just bail */
+ break;
+ }
+ }
error = gdb_serial_stub(ks);
}
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 930ac1b25ec7..4e09fab52faf 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -45,6 +45,7 @@
#include <linux/proc_fs.h>
#include <linux/uaccess.h>
#include <linux/slab.h>
+#include <linux/security.h>
#include "kdb_private.h"
#undef MODULE_PARAM_PREFIX
@@ -197,10 +198,62 @@ struct task_struct *kdb_curr_task(int cpu)
}
/*
- * Check whether the flags of the current command and the permissions
- * of the kdb console has allow a command to be run.
+ * Update the permissions flags (kdb_cmd_enabled) to match the
+ * current lockdown state.
+ *
+ * Within this function the calls to security_locked_down() are "lazy". We
+ * avoid calling them if the current value of kdb_cmd_enabled already excludes
+ * flags that might be subject to lockdown. Additionally we deliberately check
+ * the lockdown flags independently (even though read lockdown implies write
+ * lockdown) since that results in both simpler code and clearer messages to
+ * the user on first-time debugger entry.
+ *
+ * The permission masks during a read+write lockdown permits the following
+ * flags: INSPECT, SIGNAL, REBOOT (and ALWAYS_SAFE).
+ *
+ * The INSPECT commands are not blocked during lockdown because they are
+ * not arbitrary memory reads. INSPECT covers the backtrace family (sometimes
+ * forcing them to have no arguments) and lsmod. These commands do expose
+ * some kernel state but do not allow the developer seated at the console to
+ * choose what state is reported. SIGNAL and REBOOT should not be controversial,
+ * given these are allowed for root during lockdown already.
+ */
+static void kdb_check_for_lockdown(void)
+{
+ const int write_flags = KDB_ENABLE_MEM_WRITE |
+ KDB_ENABLE_REG_WRITE |
+ KDB_ENABLE_FLOW_CTRL;
+ const int read_flags = KDB_ENABLE_MEM_READ |
+ KDB_ENABLE_REG_READ;
+
+ bool need_to_lockdown_write = false;
+ bool need_to_lockdown_read = false;
+
+ if (kdb_cmd_enabled & (KDB_ENABLE_ALL | write_flags))
+ need_to_lockdown_write =
+ security_locked_down(LOCKDOWN_DBG_WRITE_KERNEL);
+
+ if (kdb_cmd_enabled & (KDB_ENABLE_ALL | read_flags))
+ need_to_lockdown_read =
+ security_locked_down(LOCKDOWN_DBG_READ_KERNEL);
+
+ /* De-compose KDB_ENABLE_ALL if required */
+ if (need_to_lockdown_write || need_to_lockdown_read)
+ if (kdb_cmd_enabled & KDB_ENABLE_ALL)
+ kdb_cmd_enabled = KDB_ENABLE_MASK & ~KDB_ENABLE_ALL;
+
+ if (need_to_lockdown_write)
+ kdb_cmd_enabled &= ~write_flags;
+
+ if (need_to_lockdown_read)
+ kdb_cmd_enabled &= ~read_flags;
+}
+
+/*
+ * Check whether the flags of the current command, the permissions of the kdb
+ * console and the lockdown state allow a command to be run.
*/
-static inline bool kdb_check_flags(kdb_cmdflags_t flags, int permissions,
+static bool kdb_check_flags(kdb_cmdflags_t flags, int permissions,
bool no_args)
{
/* permissions comes from userspace so needs massaging slightly */
@@ -1194,6 +1247,9 @@ static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs,
kdb_curr_task(raw_smp_processor_id());
KDB_DEBUG_STATE("kdb_local 1", reason);
+
+ kdb_check_for_lockdown();
+
kdb_go_count = 0;
if (reason == KDB_REASON_DEBUG) {
/* special case below */
diff --git a/security/security.c b/security/security.c
index 4fb58543eeb9..2fc40217d49d 100644
--- a/security/security.c
+++ b/security/security.c
@@ -59,10 +59,12 @@ const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
[LOCKDOWN_DEBUGFS] = "debugfs access",
[LOCKDOWN_XMON_WR] = "xmon write access",
[LOCKDOWN_BPF_WRITE_USER] = "use of bpf to write user RAM",
+ [LOCKDOWN_DBG_WRITE_KERNEL] = "use of kgdb/kdb to write kernel RAM",
[LOCKDOWN_INTEGRITY_MAX] = "integrity",
[LOCKDOWN_KCORE] = "/proc/kcore access",
[LOCKDOWN_KPROBES] = "use of kprobes",
[LOCKDOWN_BPF_READ] = "use of bpf to read kernel RAM",
+ [LOCKDOWN_DBG_READ_KERNEL] = "use of kgdb/kdb to read kernel RAM",
[LOCKDOWN_PERF] = "unsafe use of perf",
[LOCKDOWN_TRACEFS] = "use of tracefs",
[LOCKDOWN_XMON_RW] = "xmon read and write access",
--
2.20.1
1
4
Backport 5.10.109 LTS patches from upstream
nds32: fix access_ok() checks in get/put_user
wcn36xx: Differentiate wcn3660 from wcn3620
tpm: use try_get_ops() in tpm-space.c
mac80211: fix potential double free on mesh join
rcu: Don't deboost before reporting expedited quiescent state
Revert "ath: add support for special 0x0 regulatory domain"
crypto: qat - disable registration of algorithms
ACPI: video: Force backlight native for Clevo NL5xRU and NL5xNU
ACPI: battery: Add device HID and quirk for Microsoft Surface Go 3
ACPI / x86: Work around broken XSDT on Advantech DAC-BJ01 board
drivers: net: xgene: Fix regression in CRC stripping
ALSA: pci: fix reading of swapped values from pcmreg in AC97 codec
ALSA: cmipci: Restore aux vol on suspend/resume
ALSA: usb-audio: Add mute TLV for playback volumes on RODE NT-USB
ALSA: pcm: Add stream lock during PCM reset ioctl operations
ALSA: pcm: Fix races among concurrent prealloc proc writes
ALSA: pcm: Fix races among concurrent prepare and hw_params/hw_free calls
ALSA: pcm: Fix races among concurrent read/write and buffer changes
ALSA: pcm: Fix races among concurrent hw_params and hw_free calls
ALSA: hda/realtek: Add quirk for ASUS GA402
ALSA: hda/realtek - Fix headset mic problem for a HP machine with alc671
ALSA: hda/realtek: Add quirk for Clevo NP50PNJ
ALSA: hda/realtek: Add quirk for Clevo NP70PNJ
ALSA: usb-audio: add mapping for new Corsair Virtuoso SE
ALSA: oss: Fix PCM OSS buffer allocation overflow
ASoC: sti: Fix deadlock via snd_pcm_stop_xrun() call
staging: fbtft: fb_st7789v: reset display before initialization
tpm: Fix error handling in async work
cgroup-v1: Correct privileges check in release_agent writes
exfat: avoid incorrectly releasing for root inode
net: ipv6: fix skb_over_panic in __ip6_append_data
Already merged:
llc: only change llc->dev when bind() succeeds
netfilter: nf_tables: initialize registers in nft_do_chain()
llc: fix netdevice reference leaks in llc_ui_bind()
cgroup: Use open-time cgroup namespace for process migration perm checks
cgroup: Allocate cgroup_file_ctx for kernfs_open_file->priv
nfc: st21nfca: Fix potential buffer overflows in EVT_TRANSACTION
Total patches: 37 - 6 = 31
Arnd Bergmann (1):
nds32: fix access_ok() checks in get/put_user
Brian Norris (1):
Revert "ath: add support for special 0x0 regulatory domain"
Bryan O'Donoghue (1):
wcn36xx: Differentiate wcn3660 from wcn3620
Chen Li (1):
exfat: avoid incorrectly releasing for root inode
Giacomo Guiduzzi (1):
ALSA: pci: fix reading of swapped values from pcmreg in AC97 codec
Giovanni Cabiddu (1):
crypto: qat - disable registration of algorithms
James Bottomley (1):
tpm: use try_get_ops() in tpm-space.c
Jason Zheng (1):
ALSA: hda/realtek: Add quirk for ASUS GA402
Jonathan Teh (1):
ALSA: cmipci: Restore aux vol on suspend/resume
Lars-Peter Clausen (1):
ALSA: usb-audio: Add mute TLV for playback volumes on RODE NT-USB
Linus Lüssing (1):
mac80211: fix potential double free on mesh join
Mark Cilissen (1):
ACPI / x86: Work around broken XSDT on Advantech DAC-BJ01 board
Maximilian Luz (1):
ACPI: battery: Add device HID and quirk for Microsoft Surface Go 3
Michal Koutný (1):
cgroup-v1: Correct privileges check in release_agent writes
Oliver Graute (1):
staging: fbtft: fb_st7789v: reset display before initialization
Paul E. McKenney (1):
rcu: Don't deboost before reporting expedited quiescent state
Reza Jahanbakhshi (1):
ALSA: usb-audio: add mapping for new Corsair Virtuoso SE
Stephane Graber (1):
drivers: net: xgene: Fix regression in CRC stripping
Tadeusz Struk (2):
net: ipv6: fix skb_over_panic in __ip6_append_data
tpm: Fix error handling in async work
Takashi Iwai (7):
ASoC: sti: Fix deadlock via snd_pcm_stop_xrun() call
ALSA: oss: Fix PCM OSS buffer allocation overflow
ALSA: pcm: Fix races among concurrent hw_params and hw_free calls
ALSA: pcm: Fix races among concurrent read/write and buffer changes
ALSA: pcm: Fix races among concurrent prepare and hw_params/hw_free
calls
ALSA: pcm: Fix races among concurrent prealloc proc writes
ALSA: pcm: Add stream lock during PCM reset ioctl operations
Tim Crawford (2):
ALSA: hda/realtek: Add quirk for Clevo NP70PNJ
ALSA: hda/realtek: Add quirk for Clevo NP50PNJ
Werner Sembach (1):
ACPI: video: Force backlight native for Clevo NL5xRU and NL5xNU
huangwenhui (1):
ALSA: hda/realtek - Fix headset mic problem for a HP machine with
alc671
arch/nds32/include/asm/uaccess.h | 22 ++++-
arch/x86/kernel/acpi/boot.c | 24 +++++
drivers/acpi/battery.c | 12 +++
drivers/acpi/video_detect.c | 75 ++++++++++++++
drivers/char/tpm/tpm-dev-common.c | 8 +-
drivers/char/tpm/tpm2-space.c | 8 +-
drivers/crypto/qat/qat_common/qat_crypto.c | 8 ++
.../net/ethernet/apm/xgene/xgene_enet_main.c | 12 ++-
drivers/net/wireless/ath/regd.c | 10 +-
drivers/net/wireless/ath/wcn36xx/main.c | 3 +
drivers/net/wireless/ath/wcn36xx/wcn36xx.h | 1 +
drivers/staging/fbtft/fb_st7789v.c | 2 +
fs/exfat/super.c | 2 +-
include/sound/pcm.h | 1 +
kernel/cgroup/cgroup-v1.c | 6 +-
kernel/rcu/tree_plugin.h | 9 +-
net/ipv6/ip6_output.c | 4 +-
net/mac80211/cfg.c | 3 -
sound/core/oss/pcm_oss.c | 12 ++-
sound/core/oss/pcm_plugin.c | 5 +-
sound/core/pcm.c | 2 +
sound/core/pcm_lib.c | 4 +
sound/core/pcm_memory.c | 11 ++-
sound/core/pcm_native.c | 97 ++++++++++++-------
sound/pci/ac97/ac97_codec.c | 4 +-
sound/pci/cmipci.c | 3 +-
sound/pci/hda/patch_realtek.c | 4 +
sound/soc/sti/uniperif_player.c | 6 +-
sound/soc/sti/uniperif_reader.c | 2 +-
sound/usb/mixer_maps.c | 10 ++
sound/usb/mixer_quirks.c | 7 +-
31 files changed, 289 insertions(+), 88 deletions(-)
--
2.20.1
1
31
[PATCH openEuler-1.0-LTS] mm/memcontrol: fix wrong vmstats for dying memcg
by Yongqiang Liu 27 Jun '22
by Yongqiang Liu 27 Jun '22
27 Jun '22
From: Lu Jialin <lujialin4(a)huawei.com>
hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I5E8LA
CVE: NA
--------------------------------
At present, only when the absolute value of stat_cpu->count exceeds
MEMCG_CHARGE_BATCH will it be updated to stat, so there will always
be a certain lag difference between stat and the correct value.
In addition, since the partially deleted memcg is still referenced, it
will not be freed immediately after it is offline. Although the
remaining memcg has released the page, it and the parent's stat will
still be not 0 or too large due to the update lag, which leads to the
abnormality of the total_<count> parameter in the memory.stat file.
This patch mainly solves the problem of synchronization between
memcg's stat and the correct value during the destruction process
from two aspects:
1) Perform a flush synchronization operation when memcg is offline
2) For memcg in the process of being destroyed, bypass the threshold
judgment when updating vmstats
Signed-off-by: Lu Jialin <lujialin4(a)huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang(a)huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng(a)huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13(a)huawei.com>
---
mm/memcontrol.c | 18 ++++++++++++++----
1 file changed, 14 insertions(+), 4 deletions(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2983baf910f4..345a9d159ad8 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -697,7 +697,8 @@ void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val)
return;
x = val + __this_cpu_read(memcg->stat_cpu->count[idx]);
- if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) {
+ if (unlikely(abs(x) > MEMCG_CHARGE_BATCH ||
+ memcg->css.flags & CSS_DYING)) {
struct mem_cgroup *mi;
struct mem_cgroup_extension *memcg_ext;
@@ -3244,8 +3245,10 @@ static void memcg_flush_percpu_vmstats(struct mem_cgroup *memcg)
stat[i] = 0;
for_each_online_cpu(cpu)
- for (i = 0; i < MEMCG_NR_STAT; i++)
+ for (i = 0; i < MEMCG_NR_STAT; i++) {
stat[i] += per_cpu(memcg->stat_cpu->count[i], cpu);
+ per_cpu(memcg->stat_cpu->count[i], cpu) = 0;
+ }
for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
for (i = 0; i < MEMCG_NR_STAT; i++)
@@ -3259,9 +3262,11 @@ static void memcg_flush_percpu_vmstats(struct mem_cgroup *memcg)
stat[i] = 0;
for_each_online_cpu(cpu)
- for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
+ for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
stat[i] += per_cpu(
pn->lruvec_stat_cpu->count[i], cpu);
+ per_cpu(pn->lruvec_stat_cpu->count[i], cpu) = 0;
+ }
for (pi = pn; pi; pi = parent_nodeinfo(pi, node))
for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
@@ -3279,9 +3284,11 @@ static void memcg_flush_percpu_vmevents(struct mem_cgroup *memcg)
events[i] = 0;
for_each_online_cpu(cpu)
- for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
+ for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
events[i] += per_cpu(memcg->stat_cpu->events[i],
cpu);
+ per_cpu(memcg->stat_cpu->events[i], cpu) = 0;
+ }
for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
@@ -5106,6 +5113,9 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
memcg_offline_kmem(memcg);
wb_memcg_offline(memcg);
+ memcg_flush_percpu_vmstats(memcg);
+ memcg_flush_percpu_vmevents(memcg);
+
mem_cgroup_id_put(memcg);
}
--
2.25.1
1
0
大家好,
本次Intel Arch例会定于本周二6/28 10:00-11:00AM进行, 欢迎大家提出更多需求或议题和参与讨论。
本次初步议题:
Agenda:
*Status update
*SPR feature PRs merge into intel-kernel & OLK-5.10 kernel *Compiler support for new instructions *Support 22.09 release for SPR fundamental features
-----Original Appointment-----
From: openEuler conference <public(a)openeuler.org>
Sent: Monday, June 20, 2022 3:10 PM
To: openEuler conference; jun.j.tian@intel.com,kai.liu@suse.com
Subject: sig-Intel-Arch
When: Tuesday, June 28, 2022 10:00 AM-11:00 AM (UTC+08:00) Beijing, Chongqing, Hong Kong, Urumqi.
Where:
您好!
sig-Intel-Arch SIG 邀请您参加 2022-06-28 10:00 召开的Zoom会议
会议主题:sig-Intel-Arch
会议链接:https://us06web.zoom.us/j/81976528831?pwd=cVIxUkRhUXFGcldFV0ZtNkpvUFpxZz09
会议纪要:https://etherpad.openeuler.org/p/sig-Intel-Arch-meetings
温馨提醒:建议接入会议后修改参会人的姓名,也可以使用您在gitee.com的ID
更多资讯尽在:https://openeuler.org/zh/
Hello!
openEuler sig-Intel-Arch SIG invites you to attend the Zoom conference will be held at 2022-06-28 10:00,
The subject of the conference is sig-Intel-Arch,
You can join the meeting at https://us06web.zoom.us/j/81976528831?pwd=cVIxUkRhUXFGcldFV0ZtNkpvUFpxZz09.
Add topics at https://etherpad.openeuler.org/p/sig-Intel-Arch-meetings.
Note: You are advised to change the participant name after joining the conference or use your ID at gitee.com.
More information: https://openeuler.org/en/
1
0
[PATCH openEuler-1.0-LTS] ext4: recover csum seed of tmp_inode after migrating to extents
by Yongqiang Liu 25 Jun '22
by Yongqiang Liu 25 Jun '22
25 Jun '22
From: Li Lingfeng <lilingfeng3(a)huawei.com>
hulk inclusion
category: bugfix
bugzilla: 186944, https://gitee.com/openeuler/kernel/issues/I5DAJY
CVE: NA
--------------------------------
When migrating to extents, the checksum seed of temporary inode
need to be replaced by inode's, otherwise the inode checksums
will be incorrect when swapping the inodes data.
However, the temporary inode can not match it's checksum to
itself since it has lost it's own checksum seed.
mkfs.ext4 -F /dev/sdc
mount /dev/sdc /mnt/sdc
xfs_io -fc "pwrite 4k 4k" -c "fsync" /mnt/sdc/testfile
chattr -e /mnt/sdc/testfile
chattr +e /mnt/sdc/testfile
fsck -fn /dev/sdc
========
...
Pass 1: Checking inodes, blocks, and sizes
Inode 13 passes checks, but checksum does not match inode. Fix? no
...
========
The fix is simple, save the checksum seed of temporary inode, and
recover it after migrating to extents.
Fixes: e81c9302a6c3 ("ext4: set csum seed in tmp inode while migrating to extents")
Signed-off-by: Li Lingfeng <lilingfeng3(a)huawei.com>
Reviewed-by: Zhang Yi <yi.zhang(a)huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13(a)huawei.com>
---
fs/ext4/migrate.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 75a769634b2b..ed9e7816efbb 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -415,7 +415,7 @@ int ext4_ext_migrate(struct inode *inode)
struct inode *tmp_inode = NULL;
struct migrate_struct lb;
unsigned long max_entries;
- __u32 goal;
+ __u32 goal, tmp_csum_seed;
uid_t owner[2];
/*
@@ -463,6 +463,7 @@ int ext4_ext_migrate(struct inode *inode)
* the migration.
*/
ei = EXT4_I(inode);
+ tmp_csum_seed = EXT4_I(tmp_inode)->i_csum_seed;
EXT4_I(tmp_inode)->i_csum_seed = ei->i_csum_seed;
i_size_write(tmp_inode, i_size_read(inode));
/*
@@ -573,6 +574,7 @@ int ext4_ext_migrate(struct inode *inode)
* the inode is not visible to user space.
*/
tmp_inode->i_blocks = 0;
+ EXT4_I(tmp_inode)->i_csum_seed = tmp_csum_seed;
/* Reset the extent details */
ext4_ext_tree_init(handle, tmp_inode);
--
2.25.1
1
0
[PATCH openEuler-1.0-LTS] vfio: framework supporting vfio device hot migration
by RongWang 24 Jun '22
by RongWang 24 Jun '22
24 Jun '22
From: Rong Wang <w_angrong(a)163.com>
kunpeng inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I5CO9A
CVE: NA
---------------------------------
As pass through devices, hypervisor can`t control the status of
device, and can`t track dirty memory DMA from device, during
migration.
The goal of this framework is to combine hardware to accomplish
the task above.
qemu
|status control and dirty memory report
vfio
|ops to hardware
hardware
Signed-off-by: Rong Wang <w_angrong(a)163.com>
Signed-off-by: HuHua Li <18245010845(a)163.com>
Signed-off-by: Ripeng Qiu <965412048(a)qq.com>
---
drivers/vfio/pci/Makefile | 2 +-
drivers/vfio/pci/vfio_pci.c | 54 +++
drivers/vfio/pci/vfio_pci_migration.c | 755 ++++++++++++++++++++++++++++++++++
drivers/vfio/pci/vfio_pci_private.h | 14 +-
drivers/vfio/vfio.c | 411 +++++++++++++++++-
include/linux/vfio_pci_migration.h | 136 ++++++
6 files changed, 1367 insertions(+), 5 deletions(-)
create mode 100644 drivers/vfio/pci/vfio_pci_migration.c
create mode 100644 include/linux/vfio_pci_migration.h
diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile
index 76d8ec0..80a777d 100644
--- a/drivers/vfio/pci/Makefile
+++ b/drivers/vfio/pci/Makefile
@@ -1,5 +1,5 @@
-vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o
+vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o vfio_pci_migration.o
vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o
obj-$(CONFIG_VFIO_PCI) += vfio-pci.o
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 51b791c..59d8280 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -30,6 +30,7 @@
#include <linux/vgaarb.h>
#include <linux/nospec.h>
#include <linux/sched/mm.h>
+#include <linux/vfio_pci_migration.h>
#include "vfio_pci_private.h"
@@ -296,6 +297,14 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
vfio_pci_probe_mmaps(vdev);
+ if (vfio_dev_migration_is_supported(pdev)) {
+ ret = vfio_pci_migration_init(vdev);
+ if (ret) {
+ dev_warn(&vdev->pdev->dev, "Failed to init vfio_pci_migration\n");
+ vfio_pci_disable(vdev);
+ return ret;
+ }
+ }
return 0;
}
@@ -392,6 +401,7 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
out:
pci_disable_device(pdev);
+ vfio_pci_migration_exit(vdev);
vfio_pci_try_bus_reset(vdev);
if (!disable_idle_d3)
@@ -642,6 +652,41 @@ struct vfio_devices {
int max_index;
};
+static long vfio_pci_handle_log_buf_ctl(struct vfio_pci_device *vdev,
+ const unsigned long arg)
+{
+ struct vfio_log_buf_ctl *log_buf_ctl = NULL;
+ struct vfio_log_buf_info *log_buf_info = NULL;
+ struct vf_migration_log_info migration_log_info;
+ long ret = 0;
+
+ log_buf_ctl = (struct vfio_log_buf_ctl *)arg;
+ log_buf_info = (struct vfio_log_buf_info *)log_buf_ctl->data;
+
+ switch (log_buf_ctl->flags) {
+ case VFIO_DEVICE_LOG_BUF_FLAG_START:
+ migration_log_info.dom_uuid = log_buf_info->uuid;
+ migration_log_info.buffer_size =
+ log_buf_info->buffer_size;
+ migration_log_info.sge_num = log_buf_info->addrs_size;
+ migration_log_info.sge_len = log_buf_info->frag_size;
+ migration_log_info.sgevec = log_buf_info->sgevec;
+ ret = vfio_pci_device_log_start(vdev,
+ &migration_log_info);
+ break;
+ case VFIO_DEVICE_LOG_BUF_FLAG_STOP:
+ ret = vfio_pci_device_log_stop(vdev,
+ log_buf_info->uuid);
+ break;
+ case VFIO_DEVICE_LOG_BUF_FLAG_STATUS_QUERY:
+ ret = vfio_pci_device_log_status_query(vdev);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ return ret;
+}
static long vfio_pci_ioctl(void *device_data,
unsigned int cmd, unsigned long arg)
{
@@ -1142,6 +1187,8 @@ static long vfio_pci_ioctl(void *device_data,
return vfio_pci_ioeventfd(vdev, ioeventfd.offset,
ioeventfd.data, count, ioeventfd.fd);
+ } else if (cmd == VFIO_DEVICE_LOG_BUF_CTL) {
+ return vfio_pci_handle_log_buf_ctl(vdev, arg);
}
return -ENOTTY;
@@ -1566,6 +1613,9 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
pci_set_power_state(pdev, PCI_D3hot);
}
+ if (vfio_dev_migration_is_supported(pdev))
+ ret = vfio_pci_device_init(pdev);
+
return ret;
}
@@ -1591,6 +1641,10 @@ static void vfio_pci_remove(struct pci_dev *pdev)
if (!disable_idle_d3)
pci_set_power_state(pdev, PCI_D0);
+
+ if (vfio_dev_migration_is_supported(pdev)) {
+ vfio_pci_device_uninit(pdev);
+ }
}
static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
diff --git a/drivers/vfio/pci/vfio_pci_migration.c b/drivers/vfio/pci/vfio_pci_migration.c
new file mode 100644
index 0000000..f69cd13
--- /dev/null
+++ b/drivers/vfio/pci/vfio_pci_migration.c
@@ -0,0 +1,755 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022 Huawei Technologies Co., Ltd. All rights reserved.
+ */
+
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/pci.h>
+#include <linux/uaccess.h>
+#include <linux/vfio.h>
+#include <linux/vfio_pci_migration.h>
+
+#include "vfio_pci_private.h"
+
+static LIST_HEAD(vfio_pci_mig_drivers_list);
+static DEFINE_MUTEX(vfio_pci_mig_drivers_mutex);
+
+static void vfio_pci_add_mig_drv(struct vfio_pci_vendor_mig_driver *mig_drv)
+{
+ mutex_lock(&vfio_pci_mig_drivers_mutex);
+ atomic_set(&mig_drv->count, 1);
+ list_add_tail(&mig_drv->list, &vfio_pci_mig_drivers_list);
+ mutex_unlock(&vfio_pci_mig_drivers_mutex);
+}
+
+static void vfio_pci_remove_mig_drv(struct vfio_pci_vendor_mig_driver *mig_drv)
+{
+ mutex_lock(&vfio_pci_mig_drivers_mutex);
+ list_del(&mig_drv->list);
+ mutex_unlock(&vfio_pci_mig_drivers_mutex);
+}
+
+static struct vfio_pci_vendor_mig_driver *
+ vfio_pci_find_mig_drv(struct pci_dev *pdev, struct module *module)
+{
+ struct vfio_pci_vendor_mig_driver *mig_drv = NULL;
+
+ mutex_lock(&vfio_pci_mig_drivers_mutex);
+ list_for_each_entry(mig_drv, &vfio_pci_mig_drivers_list, list) {
+ if (mig_drv->owner == module) {
+ if (mig_drv->bus_num == pdev->bus->number)
+ goto out;
+ }
+ }
+ mig_drv = NULL;
+out:
+ mutex_unlock(&vfio_pci_mig_drivers_mutex);
+ return mig_drv;
+}
+
+static struct vfio_pci_vendor_mig_driver *
+ vfio_pci_get_mig_driver(struct pci_dev *pdev)
+{
+ struct vfio_pci_vendor_mig_driver *mig_drv = NULL;
+ struct pci_dev *pf_dev = pci_physfn(pdev);
+
+ mutex_lock(&vfio_pci_mig_drivers_mutex);
+ list_for_each_entry(mig_drv, &vfio_pci_mig_drivers_list, list) {
+ if (mig_drv->bus_num == pf_dev->bus->number)
+ goto out;
+ }
+ mig_drv = NULL;
+out:
+ mutex_unlock(&vfio_pci_mig_drivers_mutex);
+ return mig_drv;
+}
+
+bool vfio_dev_migration_is_supported(struct pci_dev *pdev)
+{
+ struct vfio_pci_vendor_mig_driver *mig_driver = NULL;
+
+ mig_driver = vfio_pci_get_mig_driver(pdev);
+ if (!mig_driver || !mig_driver->dev_mig_ops) {
+ dev_warn(&pdev->dev, "unable to find a mig_drv module\n");
+ return false;
+ }
+
+ return true;
+}
+
+int vfio_pci_device_log_start(struct vfio_pci_device *vdev,
+ struct vf_migration_log_info *log_info)
+{
+ struct vfio_pci_vendor_mig_driver *mig_driver;
+
+ mig_driver = vfio_pci_get_mig_driver(vdev->pdev);
+ if (!mig_driver || !mig_driver->dev_mig_ops) {
+ dev_err(&vdev->pdev->dev, "unable to find a mig_drv module\n");
+ return -EFAULT;
+ }
+
+ if (!mig_driver->dev_mig_ops->log_start ||
+ (mig_driver->dev_mig_ops->log_start(vdev->pdev,
+ log_info) != 0)) {
+ dev_err(&vdev->pdev->dev, "failed to set log start\n");
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+int vfio_pci_device_log_stop(struct vfio_pci_device *vdev, uint32_t uuid)
+{
+ struct vfio_pci_vendor_mig_driver *mig_driver;
+
+ mig_driver = vfio_pci_get_mig_driver(vdev->pdev);
+ if (!mig_driver || !mig_driver->dev_mig_ops) {
+ dev_err(&vdev->pdev->dev, "unable to find a mig_drv module\n");
+ return -EFAULT;
+ }
+
+ if (!mig_driver->dev_mig_ops->log_stop ||
+ (mig_driver->dev_mig_ops->log_stop(vdev->pdev, uuid) != 0)) {
+ dev_err(&vdev->pdev->dev, "failed to set log stop\n");
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+int vfio_pci_device_log_status_query(struct vfio_pci_device *vdev)
+{
+ struct vfio_pci_vendor_mig_driver *mig_driver;
+
+ mig_driver = vfio_pci_get_mig_driver(vdev->pdev);
+ if (!mig_driver || !mig_driver->dev_mig_ops) {
+ dev_err(&vdev->pdev->dev, "unable to find a mig_drv module\n");
+ return -EFAULT;
+ }
+
+ if (!mig_driver->dev_mig_ops->get_log_status ||
+ (mig_driver->dev_mig_ops->get_log_status(vdev->pdev) != 0)) {
+ dev_err(&vdev->pdev->dev, "failed to get log status\n");
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+int vfio_pci_device_init(struct pci_dev *pdev)
+{
+ struct vfio_pci_vendor_mig_driver *mig_drv;
+
+ mig_drv = vfio_pci_get_mig_driver(pdev);
+ if (!mig_drv || !mig_drv->dev_mig_ops) {
+ dev_err(&pdev->dev, "unable to find a mig_drv module\n");
+ return -EFAULT;
+ }
+
+ if (mig_drv->dev_mig_ops->init)
+ return mig_drv->dev_mig_ops->init(pdev);
+
+ return -EFAULT;
+}
+
+void vfio_pci_device_uninit(struct pci_dev *pdev)
+{
+ struct vfio_pci_vendor_mig_driver *mig_drv;
+
+ mig_drv = vfio_pci_get_mig_driver(pdev);
+ if (!mig_drv || !mig_drv->dev_mig_ops) {
+ dev_err(&pdev->dev, "unable to find a mig_drv module\n");
+ return;
+ }
+
+ if (mig_drv->dev_mig_ops->uninit)
+ mig_drv->dev_mig_ops->uninit(pdev);
+}
+
+static void vfio_pci_device_release(struct pci_dev *pdev,
+ struct vfio_pci_vendor_mig_driver *mig_drv)
+{
+ if (mig_drv->dev_mig_ops->release)
+ mig_drv->dev_mig_ops->release(pdev);
+}
+
+static int vfio_pci_device_get_info(struct pci_dev *pdev,
+ struct vfio_device_migration_info *mig_info,
+ struct vfio_pci_vendor_mig_driver *mig_drv)
+{
+ if (mig_drv->dev_mig_ops->get_info)
+ return mig_drv->dev_mig_ops->get_info(pdev, mig_info);
+ return -EFAULT;
+}
+
+static int vfio_pci_device_enable(struct pci_dev *pdev,
+ struct vfio_pci_vendor_mig_driver *mig_drv)
+{
+ if (!mig_drv->dev_mig_ops->enable ||
+ (mig_drv->dev_mig_ops->enable(pdev) != 0)) {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int vfio_pci_device_disable(struct pci_dev *pdev,
+ struct vfio_pci_vendor_mig_driver *mig_drv)
+{
+ if (!mig_drv->dev_mig_ops->disable ||
+ (mig_drv->dev_mig_ops->disable(pdev) != 0))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int vfio_pci_device_pre_enable(struct pci_dev *pdev,
+ struct vfio_pci_vendor_mig_driver *mig_drv)
+{
+ if (!mig_drv->dev_mig_ops->pre_enable ||
+ (mig_drv->dev_mig_ops->pre_enable(pdev) != 0))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int vfio_pci_device_state_save(struct pci_dev *pdev,
+ struct vfio_pci_migration_data *data)
+{
+ struct vfio_device_migration_info *mig_info = data->mig_ctl;
+ struct vfio_pci_vendor_mig_driver *mig_drv = data->mig_driver;
+ void *base = (void *)mig_info;
+ int ret = 0;
+
+ if ((mig_info->device_state & VFIO_DEVICE_STATE_RUNNING) != 0) {
+ ret = vfio_pci_device_disable(pdev, mig_drv);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to stop VF function!\n");
+ return ret;
+ }
+ mig_info->device_state &= ~VFIO_DEVICE_STATE_RUNNING;
+ }
+
+ if (mig_drv->dev_mig_ops && mig_drv->dev_mig_ops->save) {
+ ret = mig_drv->dev_mig_ops->save(pdev, base,
+ mig_info->data_offset, data->state_size);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to save device state!\n");
+ return -EINVAL;
+ }
+ } else {
+ return -EFAULT;
+ }
+
+ mig_info->data_size = data->state_size;
+ mig_info->pending_bytes = mig_info->data_size;
+ return ret;
+}
+
+static int vfio_pci_device_state_restore(struct vfio_pci_migration_data *data)
+{
+ struct vfio_device_migration_info *mig_info = data->mig_ctl;
+ struct vfio_pci_vendor_mig_driver *mig_drv = data->mig_driver;
+ struct pci_dev *pdev = data->vf_dev;
+ void *base = (void *)mig_info;
+ int ret;
+
+ if (mig_drv->dev_mig_ops && mig_drv->dev_mig_ops->restore) {
+ ret = mig_drv->dev_mig_ops->restore(pdev, base,
+ mig_info->data_offset, mig_info->data_size);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to restore device state!\n");
+ return -EINVAL;
+ }
+ return 0;
+ }
+
+ return -EFAULT;
+}
+
+static int vfio_pci_set_device_state(struct vfio_pci_migration_data *data,
+ u32 state)
+{
+ struct vfio_device_migration_info *mig_ctl = data->mig_ctl;
+ struct vfio_pci_vendor_mig_driver *mig_drv = data->mig_driver;
+ struct pci_dev *pdev = data->vf_dev;
+ int ret = 0;
+
+ if (state == mig_ctl->device_state)
+ return 0;
+
+ if (!mig_drv->dev_mig_ops)
+ return -EINVAL;
+
+ switch (state) {
+ case VFIO_DEVICE_STATE_RUNNING:
+ if (!(mig_ctl->device_state &
+ VFIO_DEVICE_STATE_RUNNING))
+ ret = vfio_pci_device_enable(pdev, mig_drv);
+ break;
+ case VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RUNNING:
+ /*
+ * (pre-copy) - device should start logging data.
+ */
+ ret = 0;
+ break;
+ case VFIO_DEVICE_STATE_SAVING:
+ /* stop the vf function, save state */
+ ret = vfio_pci_device_state_save(pdev, data);
+ break;
+ case VFIO_DEVICE_STATE_STOP:
+ if (mig_ctl->device_state & VFIO_DEVICE_STATE_RUNNING)
+ ret = vfio_pci_device_disable(pdev, mig_drv);
+ break;
+ case VFIO_DEVICE_STATE_RESUMING:
+ ret = vfio_pci_device_pre_enable(pdev, mig_drv);
+ break;
+ default:
+ ret = -EFAULT;
+ break;
+ }
+
+ if (ret)
+ return ret;
+
+ mig_ctl->device_state = state;
+ return 0;
+}
+
+static ssize_t vfio_pci_handle_mig_dev_state(
+ struct vfio_pci_migration_data *data,
+ char __user *buf, size_t count, bool iswrite)
+{
+ struct vfio_device_migration_info *mig_ctl = data->mig_ctl;
+ u32 device_state;
+ int ret;
+
+ if (count != sizeof(device_state))
+ return -EINVAL;
+
+ if (iswrite) {
+ if (copy_from_user(&device_state, buf, count))
+ return -EFAULT;
+
+ ret = vfio_pci_set_device_state(data, device_state);
+ if (ret)
+ return ret;
+ } else {
+ if (copy_to_user(buf, &mig_ctl->device_state, count))
+ return -EFAULT;
+ }
+
+ return count;
+}
+
+static ssize_t vfio_pci_handle_mig_pending_bytes(
+ struct vfio_device_migration_info *mig_info,
+ char __user *buf, size_t count, bool iswrite)
+{
+ u64 pending_bytes;
+
+ if (count != sizeof(pending_bytes) || iswrite)
+ return -EINVAL;
+
+ if (mig_info->device_state ==
+ (VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RUNNING)) {
+ /* In pre-copy state we have no data to return for now,
+ * return 0 pending bytes
+ */
+ pending_bytes = 0;
+ } else {
+ pending_bytes = mig_info->pending_bytes;
+ }
+
+ if (copy_to_user(buf, &pending_bytes, count))
+ return -EFAULT;
+
+ return count;
+}
+
+static ssize_t vfio_pci_handle_mig_data_offset(
+ struct vfio_device_migration_info *mig_info,
+ char __user *buf, size_t count, bool iswrite)
+{
+ u64 data_offset = mig_info->data_offset;
+
+ if (count != sizeof(data_offset) || iswrite)
+ return -EINVAL;
+
+ if (copy_to_user(buf, &data_offset, count))
+ return -EFAULT;
+
+ return count;
+}
+
+static ssize_t vfio_pci_handle_mig_data_size(
+ struct vfio_device_migration_info *mig_info,
+ char __user *buf, size_t count, bool iswrite)
+{
+ u64 data_size;
+
+ if (count != sizeof(data_size))
+ return -EINVAL;
+
+ if (iswrite) {
+ /* data_size is writable only during resuming state */
+ if (mig_info->device_state != VFIO_DEVICE_STATE_RESUMING)
+ return -EINVAL;
+
+ if (copy_from_user(&data_size, buf, sizeof(data_size)))
+ return -EFAULT;
+
+ mig_info->data_size = data_size;
+ } else {
+ if (mig_info->device_state != VFIO_DEVICE_STATE_SAVING)
+ return -EINVAL;
+
+ if (copy_to_user(buf, &mig_info->data_size,
+ sizeof(data_size)))
+ return -EFAULT;
+ }
+
+ return count;
+}
+
+static ssize_t vfio_pci_handle_mig_dev_cmd(struct vfio_pci_migration_data *data,
+ char __user *buf, size_t count, bool iswrite)
+{
+ struct vfio_pci_vendor_mig_driver *mig_drv = data->mig_driver;
+ struct pci_dev *pdev = data->vf_dev;
+ u32 device_cmd;
+ int ret = -EFAULT;
+
+ if (count != sizeof(device_cmd) || !iswrite || !mig_drv->dev_mig_ops)
+ return -EINVAL;
+
+ if (copy_from_user(&device_cmd, buf, count))
+ return -EFAULT;
+
+ switch (device_cmd) {
+ case VFIO_DEVICE_MIGRATION_CANCEL:
+ if (mig_drv->dev_mig_ops->cancel)
+ ret = mig_drv->dev_mig_ops->cancel(pdev);
+ break;
+ default:
+ dev_err(&pdev->dev, "cmd is invaild\n");
+ return -EINVAL;
+ }
+
+ if (ret != 0)
+ return ret;
+
+ return count;
+}
+
+static ssize_t vfio_pci_handle_mig_drv_version(
+ struct vfio_device_migration_info *mig_info,
+ char __user *buf, size_t count, bool iswrite)
+{
+ u32 version_id = mig_info->version_id;
+
+ if (count != sizeof(version_id) || iswrite)
+ return -EINVAL;
+
+ if (copy_to_user(buf, &version_id, count))
+ return -EFAULT;
+
+ return count;
+}
+
+static ssize_t vfio_pci_handle_mig_data_rw(
+ struct vfio_pci_migration_data *data,
+ char __user *buf, size_t count, u64 pos, bool iswrite)
+{
+ struct vfio_device_migration_info *mig_ctl = data->mig_ctl;
+ void *data_addr = data->vf_data;
+
+ if (count == 0) {
+ dev_err(&data->vf_dev->dev, "qemu operation data size error!\n");
+ return -EINVAL;
+ }
+
+ data_addr += pos - mig_ctl->data_offset;
+ if (iswrite) {
+ if (copy_from_user(data_addr, buf, count))
+ return -EFAULT;
+
+ mig_ctl->pending_bytes += count;
+ if (mig_ctl->pending_bytes > data->state_size)
+ return -EINVAL;
+ } else {
+ if (copy_to_user(buf, data_addr, count))
+ return -EFAULT;
+
+ if (mig_ctl->pending_bytes < count)
+ return -EINVAL;
+
+ mig_ctl->pending_bytes -= count;
+ }
+
+ return count;
+}
+
+static ssize_t vfio_pci_dev_migrn_rw(struct vfio_pci_device *vdev,
+ char __user *buf, size_t count, loff_t *ppos, bool iswrite)
+{
+ unsigned int index =
+ VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
+ struct vfio_pci_migration_data *data =
+ (struct vfio_pci_migration_data *)vdev->region[index].data;
+ loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
+ struct vfio_device_migration_info *mig_ctl = data->mig_ctl;
+ int ret;
+
+ if (pos >= vdev->region[index].size)
+ return -EINVAL;
+
+ count = min(count, (size_t)(vdev->region[index].size - pos));
+ if (pos >= VFIO_MIGRATION_REGION_DATA_OFFSET)
+ return vfio_pci_handle_mig_data_rw(data,
+ buf, count, pos, iswrite);
+
+ switch (pos) {
+ case VFIO_DEVICE_MIGRATION_OFFSET(device_state):
+ ret = vfio_pci_handle_mig_dev_state(data,
+ buf, count, iswrite);
+ break;
+ case VFIO_DEVICE_MIGRATION_OFFSET(pending_bytes):
+ ret = vfio_pci_handle_mig_pending_bytes(mig_ctl,
+ buf, count, iswrite);
+ break;
+ case VFIO_DEVICE_MIGRATION_OFFSET(data_offset):
+ ret = vfio_pci_handle_mig_data_offset(mig_ctl,
+ buf, count, iswrite);
+ break;
+ case VFIO_DEVICE_MIGRATION_OFFSET(data_size):
+ ret = vfio_pci_handle_mig_data_size(mig_ctl,
+ buf, count, iswrite);
+ break;
+ case VFIO_DEVICE_MIGRATION_OFFSET(device_cmd):
+ ret = vfio_pci_handle_mig_dev_cmd(data,
+ buf, count, iswrite);
+ break;
+ case VFIO_DEVICE_MIGRATION_OFFSET(version_id):
+ ret = vfio_pci_handle_mig_drv_version(mig_ctl,
+ buf, count, iswrite);
+ break;
+ default:
+ dev_err(&vdev->pdev->dev, "invalid pos offset\n");
+ ret = -EFAULT;
+ break;
+ }
+
+ if (mig_ctl->device_state == VFIO_DEVICE_STATE_RESUMING &&
+ mig_ctl->pending_bytes == data->state_size &&
+ mig_ctl->data_size == data->state_size) {
+ if (vfio_pci_device_state_restore(data) != 0) {
+ dev_err(&vdev->pdev->dev, "Failed to restore device state!\n");
+ return -EFAULT;
+ }
+ mig_ctl->pending_bytes = 0;
+ mig_ctl->data_size = 0;
+ }
+
+ return ret;
+}
+
+static void vfio_pci_dev_migrn_release(struct vfio_pci_device *vdev,
+ struct vfio_pci_region *region)
+{
+ struct vfio_pci_migration_data *data = region->data;
+
+ if (data) {
+ kfree(data->mig_ctl);
+ kfree(data);
+ }
+}
+
+static const struct vfio_pci_regops vfio_pci_migration_regops = {
+ .rw = vfio_pci_dev_migrn_rw,
+ .release = vfio_pci_dev_migrn_release,
+};
+
+static int vfio_pci_migration_info_init(struct pci_dev *pdev,
+ struct vfio_device_migration_info *mig_info,
+ struct vfio_pci_vendor_mig_driver *mig_drv)
+{
+ int ret;
+
+ ret = vfio_pci_device_get_info(pdev, mig_info, mig_drv);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to get device info\n");
+ return ret;
+ }
+
+ if (mig_info->data_size > VFIO_MIGRATION_BUFFER_MAX_SIZE) {
+ dev_err(&pdev->dev, "mig_info->data_size %llu is invalid\n",
+ mig_info->data_size);
+ return -EINVAL;
+ }
+
+ mig_info->data_offset = VFIO_MIGRATION_REGION_DATA_OFFSET;
+ return ret;
+}
+
+static int vfio_device_mig_data_init(struct vfio_pci_device *vdev,
+ struct vfio_pci_migration_data *data)
+{
+ struct vfio_device_migration_info *mig_ctl;
+ u64 mig_offset;
+ int ret;
+
+ mig_ctl = kzalloc(sizeof(*mig_ctl), GFP_KERNEL);
+ if (!mig_ctl)
+ return -ENOMEM;
+
+ ret = vfio_pci_migration_info_init(vdev->pdev, mig_ctl,
+ data->mig_driver);
+ if (ret) {
+ dev_err(&vdev->pdev->dev, "get device info error!\n");
+ goto err;
+ }
+
+ mig_offset = sizeof(struct vfio_device_migration_info);
+ data->state_size = mig_ctl->data_size;
+ data->mig_ctl = krealloc(mig_ctl, mig_offset + data->state_size,
+ GFP_KERNEL);
+ if (!data->mig_ctl) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ data->vf_data = (void *)((char *)data->mig_ctl + mig_offset);
+ memset(data->vf_data, 0, data->state_size);
+ data->mig_ctl->data_size = 0;
+
+ ret = vfio_pci_register_dev_region(vdev, VFIO_REGION_TYPE_MIGRATION,
+ VFIO_REGION_SUBTYPE_MIGRATION,
+ &vfio_pci_migration_regops, mig_offset + data->state_size,
+ VFIO_REGION_INFO_FLAG_READ | VFIO_REGION_INFO_FLAG_WRITE, data);
+ if (ret) {
+ kfree(data->mig_ctl);
+ return ret;
+ }
+
+ return 0;
+err:
+ kfree(mig_ctl);
+ return ret;
+}
+
+int vfio_pci_migration_init(struct vfio_pci_device *vdev)
+{
+ struct vfio_pci_vendor_mig_driver *mig_driver = NULL;
+ struct vfio_pci_migration_data *data = NULL;
+ struct pci_dev *pdev = vdev->pdev;
+ int ret;
+
+ mig_driver = vfio_pci_get_mig_driver(pdev);
+ if (!mig_driver || !mig_driver->dev_mig_ops) {
+ dev_err(&pdev->dev, "unable to find a mig_driver module\n");
+ return -EINVAL;
+ }
+
+ if (!try_module_get(mig_driver->owner)) {
+ pr_err("module %s is not live\n", mig_driver->owner->name);
+ return -ENODEV;
+ }
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data) {
+ module_put(mig_driver->owner);
+ return -ENOMEM;
+ }
+
+ data->mig_driver = mig_driver;
+ data->vf_dev = pdev;
+
+ ret = vfio_device_mig_data_init(vdev, data);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to init vfio device migration data!\n");
+ goto err;
+ }
+
+ return ret;
+err:
+ kfree(data);
+ module_put(mig_driver->owner);
+ return ret;
+}
+
+void vfio_pci_migration_exit(struct vfio_pci_device *vdev)
+{
+ struct vfio_pci_vendor_mig_driver *mig_driver = NULL;
+
+ mig_driver = vfio_pci_get_mig_driver(vdev->pdev);
+ if (!mig_driver || !mig_driver->dev_mig_ops) {
+ dev_warn(&vdev->pdev->dev, "mig_driver is not found\n");
+ return;
+ }
+
+ if (module_refcount(mig_driver->owner) > 0) {
+ vfio_pci_device_release(vdev->pdev, mig_driver);
+ module_put(mig_driver->owner);
+ }
+}
+
+int vfio_pci_register_migration_ops(struct vfio_device_migration_ops *ops,
+ struct module *mod, struct pci_dev *pdev)
+{
+ struct vfio_pci_vendor_mig_driver *mig_driver = NULL;
+
+ if (!ops || !mod || !pdev)
+ return -EINVAL;
+
+ mig_driver = vfio_pci_find_mig_drv(pdev, mod);
+ if (mig_driver) {
+ pr_info("%s migration ops has already been registered\n",
+ mod->name);
+ atomic_add(1, &mig_driver->count);
+ return 0;
+ }
+
+ if (!try_module_get(THIS_MODULE))
+ return -ENODEV;
+
+ mig_driver = kzalloc(sizeof(*mig_driver), GFP_KERNEL);
+ if (!mig_driver) {
+ module_put(THIS_MODULE);
+ return -ENOMEM;
+ }
+
+ mig_driver->pdev = pdev;
+ mig_driver->bus_num = pdev->bus->number;
+ mig_driver->owner = mod;
+ mig_driver->dev_mig_ops = ops;
+
+ vfio_pci_add_mig_drv(mig_driver);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(vfio_pci_register_migration_ops);
+
+void vfio_pci_unregister_migration_ops(struct module *mod, struct pci_dev *pdev)
+{
+ struct vfio_pci_vendor_mig_driver *mig_driver = NULL;
+
+ if (!mod || !pdev)
+ return;
+
+ mig_driver = vfio_pci_find_mig_drv(pdev, mod);
+ if (!mig_driver) {
+ pr_err("mig_driver is not found\n");
+ return;
+ }
+
+ if (atomic_sub_and_test(1, &mig_driver->count)) {
+ vfio_pci_remove_mig_drv(mig_driver);
+ kfree(mig_driver);
+ module_put(THIS_MODULE);
+ pr_info("%s succeed to unregister migration ops\n",
+ THIS_MODULE->name);
+ }
+}
+EXPORT_SYMBOL_GPL(vfio_pci_unregister_migration_ops);
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index 17d2bae..03af269 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -15,6 +15,7 @@
#include <linux/pci.h>
#include <linux/irqbypass.h>
#include <linux/types.h>
+#include <linux/vfio_pci_migration.h>
#ifndef VFIO_PCI_PRIVATE_H
#define VFIO_PCI_PRIVATE_H
@@ -55,7 +56,7 @@ struct vfio_pci_irq_ctx {
struct vfio_pci_region;
struct vfio_pci_regops {
- size_t (*rw)(struct vfio_pci_device *vdev, char __user *buf,
+ ssize_t (*rw)(struct vfio_pci_device *vdev, char __user *buf,
size_t count, loff_t *ppos, bool iswrite);
void (*release)(struct vfio_pci_device *vdev,
struct vfio_pci_region *region);
@@ -173,4 +174,15 @@ static inline int vfio_pci_igd_init(struct vfio_pci_device *vdev)
return -ENODEV;
}
#endif
+
+extern bool vfio_dev_migration_is_supported(struct pci_dev *pdev);
+extern int vfio_pci_migration_init(struct vfio_pci_device *vdev);
+extern void vfio_pci_migration_exit(struct vfio_pci_device *vdev);
+extern int vfio_pci_device_log_start(struct vfio_pci_device *vdev,
+ struct vf_migration_log_info *log_info);
+extern int vfio_pci_device_log_stop(struct vfio_pci_device *vdev,
+ uint32_t uuid);
+extern int vfio_pci_device_log_status_query(struct vfio_pci_device *vdev);
+extern int vfio_pci_device_init(struct pci_dev *pdev);
+extern void vfio_pci_device_uninit(struct pci_dev *pdev);
#endif /* VFIO_PCI_PRIVATE_H */
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 7a386fb..35f2a29 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -33,6 +33,7 @@
#include <linux/string.h>
#include <linux/uaccess.h>
#include <linux/vfio.h>
+#include <linux/vfio_pci_migration.h>
#include <linux/wait.h>
#include <linux/sched/signal.h>
@@ -40,6 +41,9 @@
#define DRIVER_AUTHOR "Alex Williamson <alex.williamson(a)redhat.com>"
#define DRIVER_DESC "VFIO - User Level meta-driver"
+#define LOG_BUF_FRAG_SIZE (2 * 1024 * 1024) // fix to 2M
+#define LOG_BUF_MAX_ADDRS_SIZE 128 // max vm ram size is 1T
+
static struct vfio {
struct class *class;
struct list_head iommu_drivers_list;
@@ -57,6 +61,14 @@ struct vfio_iommu_driver {
struct list_head vfio_next;
};
+struct vfio_log_buf {
+ struct vfio_log_buf_info info;
+ int fd;
+ int buffer_state;
+ int device_state;
+ unsigned long *cpu_addrs;
+};
+
struct vfio_container {
struct kref kref;
struct list_head group_list;
@@ -64,6 +76,7 @@ struct vfio_container {
struct vfio_iommu_driver *iommu_driver;
void *iommu_data;
bool noiommu;
+ struct vfio_log_buf log_buf;
};
struct vfio_unbound_dev {
@@ -1158,8 +1171,398 @@ static long vfio_ioctl_set_iommu(struct vfio_container *container,
return ret;
}
+static long vfio_dispatch_cmd_to_devices(const struct vfio_container *container,
+ unsigned int cmd, unsigned long arg)
+{
+ struct vfio_group *group = NULL;
+ struct vfio_device *device = NULL;
+ long ret = -ENXIO;
+
+ list_for_each_entry(group, &container->group_list, container_next) {
+ list_for_each_entry(device, &group->device_list, group_next) {
+ ret = device->ops->ioctl(device->device_data, cmd, arg);
+ if (ret) {
+ pr_err("dispatch cmd to devices failed\n");
+ return ret;
+ }
+ }
+ }
+ return ret;
+}
+
+static long vfio_log_buf_start(struct vfio_container *container)
+{
+ struct vfio_log_buf_ctl log_buf_ctl;
+ long ret;
+
+ log_buf_ctl.argsz = sizeof(struct vfio_log_buf_info);
+ log_buf_ctl.flags = VFIO_DEVICE_LOG_BUF_FLAG_START;
+ log_buf_ctl.data = (void *)&container->log_buf.info;
+ ret = vfio_dispatch_cmd_to_devices(container, VFIO_DEVICE_LOG_BUF_CTL,
+ (unsigned long)&log_buf_ctl);
+ if (ret)
+ return ret;
+
+ container->log_buf.device_state = 1;
+ return 0;
+}
+
+static long vfio_log_buf_stop(struct vfio_container *container)
+{
+ struct vfio_log_buf_ctl log_buf_ctl;
+ long ret;
+
+ if (container->log_buf.device_state == 0) {
+ pr_warn("device already stopped\n");
+ return 0;
+ }
+
+ log_buf_ctl.argsz = sizeof(struct vfio_log_buf_info);
+ log_buf_ctl.flags = VFIO_DEVICE_LOG_BUF_FLAG_STOP;
+ log_buf_ctl.data = (void *)&container->log_buf.info;
+ ret = vfio_dispatch_cmd_to_devices(container, VFIO_DEVICE_LOG_BUF_CTL,
+ (unsigned long)&log_buf_ctl);
+ if (ret)
+ return ret;
+
+ container->log_buf.device_state = 0;
+ return 0;
+}
+
+static long vfio_log_buf_query(struct vfio_container *container)
+{
+ struct vfio_log_buf_ctl log_buf_ctl;
+
+ log_buf_ctl.argsz = sizeof(struct vfio_log_buf_info);
+ log_buf_ctl.flags = VFIO_DEVICE_LOG_BUF_FLAG_STATUS_QUERY;
+ log_buf_ctl.data = (void *)&container->log_buf.info;
+
+ return vfio_dispatch_cmd_to_devices(container,
+ VFIO_DEVICE_LOG_BUF_CTL, (unsigned long)&log_buf_ctl);
+}
+
+static int vfio_log_buf_fops_mmap(struct file *filep,
+ struct vm_area_struct *vma)
+{
+ struct vfio_container *container = filep->private_data;
+ struct vfio_log_buf *log_buf = &container->log_buf;
+ unsigned long frag_pg_size;
+ unsigned long frag_offset;
+ phys_addr_t pa;
+ int ret = -EINVAL;
+
+ if (!log_buf->cpu_addrs) {
+ pr_err("mmap before setup, please setup log buf first\n");
+ return ret;
+ }
+
+ if (log_buf->info.frag_size < PAGE_SIZE) {
+ pr_err("mmap frag size should not less than page size!\n");
+ return ret;
+ }
+
+ frag_pg_size = log_buf->info.frag_size / PAGE_SIZE;
+ frag_offset = vma->vm_pgoff / frag_pg_size;
+
+ if (frag_offset >= log_buf->info.addrs_size) {
+ pr_err("mmap offset out of range!\n");
+ return ret;
+ }
+
+ if (vma->vm_end - vma->vm_start != log_buf->info.frag_size) {
+ pr_err("mmap size error, should be aligned with frag size!\n");
+ return ret;
+ }
+
+ pa = virt_to_phys((void *)log_buf->cpu_addrs[frag_offset]);
+ ret = remap_pfn_range(vma, vma->vm_start,
+ pa >> PAGE_SHIFT,
+ vma->vm_end - vma->vm_start,
+ vma->vm_page_prot);
+ if (ret)
+ pr_err("remap_pfn_range error!\n");
+ return ret;
+}
+
+static struct device *vfio_get_dev(struct vfio_container *container)
+{
+ struct vfio_group *group = NULL;
+ struct vfio_device *device = NULL;
+
+ list_for_each_entry(group, &container->group_list, container_next) {
+ list_for_each_entry(device, &group->device_list, group_next) {
+ return device->dev;
+ }
+ }
+ return NULL;
+}
+
+static void vfio_log_buf_release_dma(struct device *dev,
+ struct vfio_log_buf *log_buf)
+{
+ int i;
+
+ for (i = 0; i < log_buf->info.addrs_size; i++) {
+ if ((log_buf->cpu_addrs && log_buf->cpu_addrs[i] != 0) &&
+ (log_buf->info.sgevec &&
+ log_buf->info.sgevec[i].addr != 0)) {
+ dma_free_coherent(dev, log_buf->info.frag_size,
+ (void *)log_buf->cpu_addrs[i],
+ log_buf->info.sgevec[i].addr);
+ log_buf->cpu_addrs[i] = 0;
+ log_buf->info.sgevec[i].addr = 0;
+ }
+ }
+}
+
+static long vfio_log_buf_alloc_dma(struct vfio_log_buf_info *info,
+ struct vfio_log_buf *log_buf, struct device *dev)
+{
+ int i;
+
+ for (i = 0; i < info->addrs_size; i++) {
+ log_buf->cpu_addrs[i] = (unsigned long)dma_alloc_coherent(dev,
+ info->frag_size, &log_buf->info.sgevec[i].addr,
+ GFP_KERNEL);
+ log_buf->info.sgevec[i].len = info->frag_size;
+ if (log_buf->cpu_addrs[i] == 0 ||
+ log_buf->info.sgevec[i].addr == 0) {
+ return -ENOMEM;
+ }
+ }
+ return 0;
+}
+
+static long vfio_log_buf_alloc_addrs(struct vfio_log_buf_info *info,
+ struct vfio_log_buf *log_buf)
+{
+ log_buf->info.sgevec = kcalloc(info->addrs_size,
+ sizeof(struct vfio_log_buf_sge), GFP_KERNEL);
+ if (!log_buf->info.sgevec)
+ return -ENOMEM;
+
+ log_buf->cpu_addrs = kcalloc(info->addrs_size,
+ sizeof(unsigned long), GFP_KERNEL);
+ if (!log_buf->cpu_addrs) {
+ kfree(log_buf->info.sgevec);
+ log_buf->info.sgevec = NULL;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static long vfio_log_buf_info_valid(struct vfio_log_buf_info *info)
+{
+ if (info->addrs_size > LOG_BUF_MAX_ADDRS_SIZE ||
+ info->addrs_size == 0) {
+ pr_err("can`t support vm ram size larger than 1T or equal to 0\n");
+ return -EINVAL;
+ }
+ if (info->frag_size != LOG_BUF_FRAG_SIZE) {
+ pr_err("only support %d frag size\n", LOG_BUF_FRAG_SIZE);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static long vfio_log_buf_setup(struct vfio_container *container,
+ unsigned long data)
+{
+ struct vfio_log_buf_info info;
+ struct vfio_log_buf *log_buf = &container->log_buf;
+ struct device *dev = NULL;
+ long ret;
+
+ if (log_buf->info.sgevec) {
+ pr_warn("log buf already setup\n");
+ return 0;
+ }
+
+ if (copy_from_user(&info, (void __user *)data,
+ sizeof(struct vfio_log_buf_info)))
+ return -EFAULT;
+
+ ret = vfio_log_buf_info_valid(&info);
+ if (ret)
+ return ret;
+
+ ret = vfio_log_buf_alloc_addrs(&info, log_buf);
+ if (ret)
+ goto err_out;
+
+ dev = vfio_get_dev(container);
+ if (!dev) {
+ pr_err("can`t get dev\n");
+ goto err_free_addrs;
+ }
+
+ ret = vfio_log_buf_alloc_dma(&info, log_buf, dev);
+ if (ret)
+ goto err_free_dma_array;
+
+ log_buf->info.uuid = info.uuid;
+ log_buf->info.buffer_size = info.buffer_size;
+ log_buf->info.frag_size = info.frag_size;
+ log_buf->info.addrs_size = info.addrs_size;
+ log_buf->buffer_state = 1;
+ return 0;
+
+err_free_dma_array:
+ vfio_log_buf_release_dma(dev, log_buf);
+err_free_addrs:
+ kfree(log_buf->cpu_addrs);
+ log_buf->cpu_addrs = NULL;
+ kfree(log_buf->info.sgevec);
+ log_buf->info.sgevec = NULL;
+err_out:
+ return -ENOMEM;
+}
+
+static long vfio_log_buf_release_buffer(struct vfio_container *container)
+{
+ struct vfio_log_buf *log_buf = &container->log_buf;
+ struct device *dev = NULL;
+
+ if (log_buf->buffer_state == 0) {
+ pr_warn("buffer already released\n");
+ return 0;
+ }
+
+ dev = vfio_get_dev(container);
+ if (!dev) {
+ pr_err("can`t get dev\n");
+ return -EFAULT;
+ }
+
+ vfio_log_buf_release_dma(dev, log_buf);
+
+ kfree(log_buf->cpu_addrs);
+ log_buf->cpu_addrs = NULL;
+
+ kfree(log_buf->info.sgevec);
+ log_buf->info.sgevec = NULL;
+
+ log_buf->buffer_state = 0;
+ return 0;
+}
+
+static int vfio_log_buf_release(struct inode *inode, struct file *filep)
+{
+ struct vfio_container *container = filep->private_data;
+
+ vfio_log_buf_stop(container);
+ vfio_log_buf_release_buffer(container);
+ memset(&container->log_buf, 0, sizeof(struct vfio_log_buf));
+ return 0;
+}
+
+static long vfio_ioctl_handle_log_buf_ctl(struct vfio_container *container,
+ unsigned long arg)
+{
+ struct vfio_log_buf_ctl log_buf_ctl;
+ long ret = 0;
+
+ if (copy_from_user(&log_buf_ctl, (void __user *)arg,
+ sizeof(struct vfio_log_buf_ctl)))
+ return -EFAULT;
+
+ switch (log_buf_ctl.flags) {
+ case VFIO_DEVICE_LOG_BUF_FLAG_SETUP:
+ ret = vfio_log_buf_setup(container,
+ (unsigned long)log_buf_ctl.data);
+ break;
+ case VFIO_DEVICE_LOG_BUF_FLAG_RELEASE:
+ ret = vfio_log_buf_release_buffer(container);
+ break;
+ case VFIO_DEVICE_LOG_BUF_FLAG_START:
+ ret = vfio_log_buf_start(container);
+ break;
+ case VFIO_DEVICE_LOG_BUF_FLAG_STOP:
+ ret = vfio_log_buf_stop(container);
+ break;
+ case VFIO_DEVICE_LOG_BUF_FLAG_STATUS_QUERY:
+ ret = vfio_log_buf_query(container);
+ break;
+ default:
+ pr_err("log buf control flag incorrect\n");
+ ret = -EINVAL;
+ break;
+ }
+ return ret;
+}
+
+static long vfio_log_buf_fops_unl_ioctl(struct file *filep,
+ unsigned int cmd, unsigned long arg)
+{
+ struct vfio_container *container = filep->private_data;
+ long ret = -EINVAL;
+
+ switch (cmd) {
+ case VFIO_LOG_BUF_CTL:
+ ret = vfio_ioctl_handle_log_buf_ctl(container, arg);
+ break;
+ default:
+ pr_err("log buf control cmd incorrect\n");
+ break;
+ }
+
+ return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static long vfio_log_buf_fops_compat_ioctl(struct file *filep,
+ unsigned int cmd, unsigned long arg)
+{
+ arg = (unsigned long)compat_ptr(arg);
+ return vfio_log_buf_fops_unl_ioctl(filep, cmd, arg);
+}
+#endif /* CONFIG_COMPAT */
+
+static const struct file_operations vfio_log_buf_fops = {
+ .owner = THIS_MODULE,
+ .mmap = vfio_log_buf_fops_mmap,
+ .unlocked_ioctl = vfio_log_buf_fops_unl_ioctl,
+ .release = vfio_log_buf_release,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = vfio_log_buf_fops_compat_ioctl,
+#endif
+};
+
+static int vfio_get_log_buf_fd(struct vfio_container *container,
+ unsigned long arg)
+{
+ struct file *filep = NULL;
+ int ret;
+
+ if (container->log_buf.fd > 0)
+ return container->log_buf.fd;
+
+ ret = get_unused_fd_flags(O_CLOEXEC);
+ if (ret < 0) {
+ pr_err("get_unused_fd_flags get fd failed\n");
+ return ret;
+ }
+
+ filep = anon_inode_getfile("[vfio-log-buf]", &vfio_log_buf_fops,
+ container, O_RDWR);
+ if (IS_ERR(filep)) {
+ pr_err("anon_inode_getfile failed\n");
+ put_unused_fd(ret);
+ ret = PTR_ERR(filep);
+ return ret;
+ }
+
+ filep->f_mode |= (FMODE_READ | FMODE_WRITE | FMODE_LSEEK);
+
+ fd_install(ret, filep);
+
+ container->log_buf.fd = ret;
+ return ret;
+}
+
static long vfio_fops_unl_ioctl(struct file *filep,
- unsigned int cmd, unsigned long arg)
+ unsigned int cmd, unsigned long arg)
{
struct vfio_container *container = filep->private_data;
struct vfio_iommu_driver *driver;
@@ -1179,6 +1582,9 @@ static long vfio_fops_unl_ioctl(struct file *filep,
case VFIO_SET_IOMMU:
ret = vfio_ioctl_set_iommu(container, arg);
break;
+ case VFIO_GET_LOG_BUF_FD:
+ ret = vfio_get_log_buf_fd(container, arg);
+ break;
default:
driver = container->iommu_driver;
data = container->iommu_data;
@@ -1210,6 +1616,7 @@ static int vfio_fops_open(struct inode *inode, struct file *filep)
INIT_LIST_HEAD(&container->group_list);
init_rwsem(&container->group_lock);
kref_init(&container->kref);
+ memset(&container->log_buf, 0, sizeof(struct vfio_log_buf));
filep->private_data = container;
@@ -1219,9 +1626,7 @@ static int vfio_fops_open(struct inode *inode, struct file *filep)
static int vfio_fops_release(struct inode *inode, struct file *filep)
{
struct vfio_container *container = filep->private_data;
-
filep->private_data = NULL;
-
vfio_container_put(container);
return 0;
diff --git a/include/linux/vfio_pci_migration.h b/include/linux/vfio_pci_migration.h
new file mode 100644
index 0000000..464ffb4
--- /dev/null
+++ b/include/linux/vfio_pci_migration.h
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2022 Huawei Technologies Co., Ltd. All rights reserved.
+ */
+
+#ifndef VFIO_PCI_MIGRATION_H
+#define VFIO_PCI_MIGRATION_H
+
+#include <linux/types.h>
+#include <linux/pci.h>
+
+#define VFIO_REGION_TYPE_MIGRATION (3)
+/* sub-types for VFIO_REGION_TYPE_MIGRATION */
+#define VFIO_REGION_SUBTYPE_MIGRATION (1)
+
+#define VFIO_MIGRATION_BUFFER_MAX_SIZE SZ_256K
+#define VFIO_MIGRATION_REGION_DATA_OFFSET \
+ (sizeof(struct vfio_device_migration_info))
+#define VFIO_DEVICE_MIGRATION_OFFSET(x) \
+ offsetof(struct vfio_device_migration_info, x)
+
+struct vfio_device_migration_info {
+ __u32 device_state; /* VFIO device state */
+#define VFIO_DEVICE_STATE_STOP (0)
+#define VFIO_DEVICE_STATE_RUNNING (1 << 0)
+#define VFIO_DEVICE_STATE_SAVING (1 << 1)
+#define VFIO_DEVICE_STATE_RESUMING (1 << 2)
+#define VFIO_DEVICE_STATE_MASK (VFIO_DEVICE_STATE_RUNNING | \
+ VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RESUMING)
+ __u32 reserved;
+
+ __u32 device_cmd;
+ __u32 version_id;
+
+ __u64 pending_bytes;
+ __u64 data_offset;
+ __u64 data_size;
+};
+
+enum {
+ VFIO_DEVICE_STOP = 0xffff0001,
+ VFIO_DEVICE_CONTINUE,
+ VFIO_DEVICE_MIGRATION_CANCEL,
+};
+
+struct vfio_log_buf_sge {
+ __u64 len;
+ __u64 addr;
+};
+
+struct vfio_log_buf_info {
+ __u32 uuid;
+ __u64 buffer_size;
+ __u64 addrs_size;
+ __u64 frag_size;
+ struct vfio_log_buf_sge *sgevec;
+};
+
+struct vfio_log_buf_ctl {
+ __u32 argsz;
+ __u32 flags;
+ #define VFIO_DEVICE_LOG_BUF_FLAG_SETUP (1 << 0)
+ #define VFIO_DEVICE_LOG_BUF_FLAG_RELEASE (1 << 1)
+ #define VFIO_DEVICE_LOG_BUF_FLAG_START (1 << 2)
+ #define VFIO_DEVICE_LOG_BUF_FLAG_STOP (1 << 3)
+ #define VFIO_DEVICE_LOG_BUF_FLAG_STATUS_QUERY (1 << 4)
+ void *data;
+};
+#define VFIO_LOG_BUF_CTL _IO(VFIO_TYPE, VFIO_BASE + 21)
+#define VFIO_GET_LOG_BUF_FD _IO(VFIO_TYPE, VFIO_BASE + 22)
+#define VFIO_DEVICE_LOG_BUF_CTL _IO(VFIO_TYPE, VFIO_BASE + 23)
+
+struct vf_migration_log_info {
+ __u32 dom_uuid;
+ __u64 buffer_size;
+ __u64 sge_len;
+ __u64 sge_num;
+ struct vfio_log_buf_sge *sgevec;
+};
+
+struct vfio_device_migration_ops {
+ /* Get device information */
+ int (*get_info)(struct pci_dev *pdev,
+ struct vfio_device_migration_info *info);
+ /* Enable a vf device */
+ int (*enable)(struct pci_dev *pdev);
+ /* Disable a vf device */
+ int (*disable)(struct pci_dev *pdev);
+ /* Save a vf device */
+ int (*save)(struct pci_dev *pdev, void *base,
+ uint64_t off, uint64_t count);
+ /* Resuming a vf device */
+ int (*restore)(struct pci_dev *pdev, void *base,
+ uint64_t off, uint64_t count);
+ /* Log start a vf device */
+ int (*log_start)(struct pci_dev *pdev,
+ struct vf_migration_log_info *log_info);
+ /* Log stop a vf device */
+ int (*log_stop)(struct pci_dev *pdev, uint32_t uuid);
+ /* Get vf device log status */
+ int (*get_log_status)(struct pci_dev *pdev);
+ /* Pre enable a vf device(load_setup, before restore a vf) */
+ int (*pre_enable)(struct pci_dev *pdev);
+ /* Cancel a vf device when live migration failed (rollback) */
+ int (*cancel)(struct pci_dev *pdev);
+ /* Init a vf device */
+ int (*init)(struct pci_dev *pdev);
+ /* Uninit a vf device */
+ void (*uninit)(struct pci_dev *pdev);
+ /* Release a vf device */
+ void (*release)(struct pci_dev *pdev);
+};
+
+struct vfio_pci_vendor_mig_driver {
+ struct pci_dev *pdev;
+ unsigned char bus_num;
+ struct vfio_device_migration_ops *dev_mig_ops;
+ struct module *owner;
+ atomic_t count;
+ struct list_head list;
+};
+
+struct vfio_pci_migration_data {
+ u64 state_size;
+ struct pci_dev *vf_dev;
+ struct vfio_pci_vendor_mig_driver *mig_driver;
+ struct vfio_device_migration_info *mig_ctl;
+ void *vf_data;
+};
+
+int vfio_pci_register_migration_ops(struct vfio_device_migration_ops *ops,
+ struct module *mod, struct pci_dev *pdev);
+void vfio_pci_unregister_migration_ops(struct module *mod,
+ struct pci_dev *pdev);
+
+#endif /* VFIO_PCI_MIGRATION_H */
--
1.8.3.1
2
1
From: Rong Wang <w_angrong(a)163.com>
kunpeng inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I5CO9A
CVE: NA
---------------------------------
As pass through devices, hypervisor can`t control the status of
device, and can`t track dirty memory DMA from device, during
migration.
The goal of this framework is to combine hardware to accomplish
the task above.
qemu
|status control and dirty memory report
vfio
|ops to hardware
hardware
Signed-off-by: Rong Wang <w_angrong(a)163.com>
Signed-off-by: HuHua Li <18245010845(a)163.com>
Signed-off-by: Ripeng Qiu <965412048(a)qq.com>
---
drivers/vfio/pci/Makefile | 2 +-
drivers/vfio/pci/vfio_pci.c | 54 +++
drivers/vfio/pci/vfio_pci_migration.c | 755 ++++++++++++++++++++++++++++++++++
drivers/vfio/pci/vfio_pci_private.h | 14 +-
drivers/vfio/vfio.c | 411 +++++++++++++++++-
include/linux/vfio_pci_migration.h | 136 ++++++
6 files changed, 1367 insertions(+), 5 deletions(-)
create mode 100644 drivers/vfio/pci/vfio_pci_migration.c
create mode 100644 include/linux/vfio_pci_migration.h
diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile
index 76d8ec0..80a777d 100644
--- a/drivers/vfio/pci/Makefile
+++ b/drivers/vfio/pci/Makefile
@@ -1,5 +1,5 @@
-vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o
+vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o vfio_pci_migration.o
vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o
obj-$(CONFIG_VFIO_PCI) += vfio-pci.o
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 51b791c..59d8280 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -30,6 +30,7 @@
#include <linux/vgaarb.h>
#include <linux/nospec.h>
#include <linux/sched/mm.h>
+#include <linux/vfio_pci_migration.h>
#include "vfio_pci_private.h"
@@ -296,6 +297,14 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
vfio_pci_probe_mmaps(vdev);
+ if (vfio_dev_migration_is_supported(pdev)) {
+ ret = vfio_pci_migration_init(vdev);
+ if (ret) {
+ dev_warn(&vdev->pdev->dev, "Failed to init vfio_pci_migration\n");
+ vfio_pci_disable(vdev);
+ return ret;
+ }
+ }
return 0;
}
@@ -392,6 +401,7 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
out:
pci_disable_device(pdev);
+ vfio_pci_migration_exit(vdev);
vfio_pci_try_bus_reset(vdev);
if (!disable_idle_d3)
@@ -642,6 +652,41 @@ struct vfio_devices {
int max_index;
};
+static long vfio_pci_handle_log_buf_ctl(struct vfio_pci_device *vdev,
+ const unsigned long arg)
+{
+ struct vfio_log_buf_ctl *log_buf_ctl = NULL;
+ struct vfio_log_buf_info *log_buf_info = NULL;
+ struct vf_migration_log_info migration_log_info;
+ long ret = 0;
+
+ log_buf_ctl = (struct vfio_log_buf_ctl *)arg;
+ log_buf_info = (struct vfio_log_buf_info *)log_buf_ctl->data;
+
+ switch (log_buf_ctl->flags) {
+ case VFIO_DEVICE_LOG_BUF_FLAG_START:
+ migration_log_info.dom_uuid = log_buf_info->uuid;
+ migration_log_info.buffer_size =
+ log_buf_info->buffer_size;
+ migration_log_info.sge_num = log_buf_info->addrs_size;
+ migration_log_info.sge_len = log_buf_info->frag_size;
+ migration_log_info.sgevec = log_buf_info->sgevec;
+ ret = vfio_pci_device_log_start(vdev,
+ &migration_log_info);
+ break;
+ case VFIO_DEVICE_LOG_BUF_FLAG_STOP:
+ ret = vfio_pci_device_log_stop(vdev,
+ log_buf_info->uuid);
+ break;
+ case VFIO_DEVICE_LOG_BUF_FLAG_STATUS_QUERY:
+ ret = vfio_pci_device_log_status_query(vdev);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ return ret;
+}
static long vfio_pci_ioctl(void *device_data,
unsigned int cmd, unsigned long arg)
{
@@ -1142,6 +1187,8 @@ static long vfio_pci_ioctl(void *device_data,
return vfio_pci_ioeventfd(vdev, ioeventfd.offset,
ioeventfd.data, count, ioeventfd.fd);
+ } else if (cmd == VFIO_DEVICE_LOG_BUF_CTL) {
+ return vfio_pci_handle_log_buf_ctl(vdev, arg);
}
return -ENOTTY;
@@ -1566,6 +1613,9 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
pci_set_power_state(pdev, PCI_D3hot);
}
+ if (vfio_dev_migration_is_supported(pdev))
+ ret = vfio_pci_device_init(pdev);
+
return ret;
}
@@ -1591,6 +1641,10 @@ static void vfio_pci_remove(struct pci_dev *pdev)
if (!disable_idle_d3)
pci_set_power_state(pdev, PCI_D0);
+
+ if (vfio_dev_migration_is_supported(pdev)) {
+ vfio_pci_device_uninit(pdev);
+ }
}
static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
diff --git a/drivers/vfio/pci/vfio_pci_migration.c b/drivers/vfio/pci/vfio_pci_migration.c
new file mode 100644
index 0000000..f69cd13
--- /dev/null
+++ b/drivers/vfio/pci/vfio_pci_migration.c
@@ -0,0 +1,755 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022 Huawei Technologies Co., Ltd. All rights reserved.
+ */
+
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/pci.h>
+#include <linux/uaccess.h>
+#include <linux/vfio.h>
+#include <linux/vfio_pci_migration.h>
+
+#include "vfio_pci_private.h"
+
+static LIST_HEAD(vfio_pci_mig_drivers_list);
+static DEFINE_MUTEX(vfio_pci_mig_drivers_mutex);
+
+static void vfio_pci_add_mig_drv(struct vfio_pci_vendor_mig_driver *mig_drv)
+{
+ mutex_lock(&vfio_pci_mig_drivers_mutex);
+ atomic_set(&mig_drv->count, 1);
+ list_add_tail(&mig_drv->list, &vfio_pci_mig_drivers_list);
+ mutex_unlock(&vfio_pci_mig_drivers_mutex);
+}
+
+static void vfio_pci_remove_mig_drv(struct vfio_pci_vendor_mig_driver *mig_drv)
+{
+ mutex_lock(&vfio_pci_mig_drivers_mutex);
+ list_del(&mig_drv->list);
+ mutex_unlock(&vfio_pci_mig_drivers_mutex);
+}
+
+static struct vfio_pci_vendor_mig_driver *
+ vfio_pci_find_mig_drv(struct pci_dev *pdev, struct module *module)
+{
+ struct vfio_pci_vendor_mig_driver *mig_drv = NULL;
+
+ mutex_lock(&vfio_pci_mig_drivers_mutex);
+ list_for_each_entry(mig_drv, &vfio_pci_mig_drivers_list, list) {
+ if (mig_drv->owner == module) {
+ if (mig_drv->bus_num == pdev->bus->number)
+ goto out;
+ }
+ }
+ mig_drv = NULL;
+out:
+ mutex_unlock(&vfio_pci_mig_drivers_mutex);
+ return mig_drv;
+}
+
+static struct vfio_pci_vendor_mig_driver *
+ vfio_pci_get_mig_driver(struct pci_dev *pdev)
+{
+ struct vfio_pci_vendor_mig_driver *mig_drv = NULL;
+ struct pci_dev *pf_dev = pci_physfn(pdev);
+
+ mutex_lock(&vfio_pci_mig_drivers_mutex);
+ list_for_each_entry(mig_drv, &vfio_pci_mig_drivers_list, list) {
+ if (mig_drv->bus_num == pf_dev->bus->number)
+ goto out;
+ }
+ mig_drv = NULL;
+out:
+ mutex_unlock(&vfio_pci_mig_drivers_mutex);
+ return mig_drv;
+}
+
+bool vfio_dev_migration_is_supported(struct pci_dev *pdev)
+{
+ struct vfio_pci_vendor_mig_driver *mig_driver = NULL;
+
+ mig_driver = vfio_pci_get_mig_driver(pdev);
+ if (!mig_driver || !mig_driver->dev_mig_ops) {
+ dev_warn(&pdev->dev, "unable to find a mig_drv module\n");
+ return false;
+ }
+
+ return true;
+}
+
+int vfio_pci_device_log_start(struct vfio_pci_device *vdev,
+ struct vf_migration_log_info *log_info)
+{
+ struct vfio_pci_vendor_mig_driver *mig_driver;
+
+ mig_driver = vfio_pci_get_mig_driver(vdev->pdev);
+ if (!mig_driver || !mig_driver->dev_mig_ops) {
+ dev_err(&vdev->pdev->dev, "unable to find a mig_drv module\n");
+ return -EFAULT;
+ }
+
+ if (!mig_driver->dev_mig_ops->log_start ||
+ (mig_driver->dev_mig_ops->log_start(vdev->pdev,
+ log_info) != 0)) {
+ dev_err(&vdev->pdev->dev, "failed to set log start\n");
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+int vfio_pci_device_log_stop(struct vfio_pci_device *vdev, uint32_t uuid)
+{
+ struct vfio_pci_vendor_mig_driver *mig_driver;
+
+ mig_driver = vfio_pci_get_mig_driver(vdev->pdev);
+ if (!mig_driver || !mig_driver->dev_mig_ops) {
+ dev_err(&vdev->pdev->dev, "unable to find a mig_drv module\n");
+ return -EFAULT;
+ }
+
+ if (!mig_driver->dev_mig_ops->log_stop ||
+ (mig_driver->dev_mig_ops->log_stop(vdev->pdev, uuid) != 0)) {
+ dev_err(&vdev->pdev->dev, "failed to set log stop\n");
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+int vfio_pci_device_log_status_query(struct vfio_pci_device *vdev)
+{
+ struct vfio_pci_vendor_mig_driver *mig_driver;
+
+ mig_driver = vfio_pci_get_mig_driver(vdev->pdev);
+ if (!mig_driver || !mig_driver->dev_mig_ops) {
+ dev_err(&vdev->pdev->dev, "unable to find a mig_drv module\n");
+ return -EFAULT;
+ }
+
+ if (!mig_driver->dev_mig_ops->get_log_status ||
+ (mig_driver->dev_mig_ops->get_log_status(vdev->pdev) != 0)) {
+ dev_err(&vdev->pdev->dev, "failed to get log status\n");
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+int vfio_pci_device_init(struct pci_dev *pdev)
+{
+ struct vfio_pci_vendor_mig_driver *mig_drv;
+
+ mig_drv = vfio_pci_get_mig_driver(pdev);
+ if (!mig_drv || !mig_drv->dev_mig_ops) {
+ dev_err(&pdev->dev, "unable to find a mig_drv module\n");
+ return -EFAULT;
+ }
+
+ if (mig_drv->dev_mig_ops->init)
+ return mig_drv->dev_mig_ops->init(pdev);
+
+ return -EFAULT;
+}
+
+void vfio_pci_device_uninit(struct pci_dev *pdev)
+{
+ struct vfio_pci_vendor_mig_driver *mig_drv;
+
+ mig_drv = vfio_pci_get_mig_driver(pdev);
+ if (!mig_drv || !mig_drv->dev_mig_ops) {
+ dev_err(&pdev->dev, "unable to find a mig_drv module\n");
+ return;
+ }
+
+ if (mig_drv->dev_mig_ops->uninit)
+ mig_drv->dev_mig_ops->uninit(pdev);
+}
+
+static void vfio_pci_device_release(struct pci_dev *pdev,
+ struct vfio_pci_vendor_mig_driver *mig_drv)
+{
+ if (mig_drv->dev_mig_ops->release)
+ mig_drv->dev_mig_ops->release(pdev);
+}
+
+static int vfio_pci_device_get_info(struct pci_dev *pdev,
+ struct vfio_device_migration_info *mig_info,
+ struct vfio_pci_vendor_mig_driver *mig_drv)
+{
+ if (mig_drv->dev_mig_ops->get_info)
+ return mig_drv->dev_mig_ops->get_info(pdev, mig_info);
+ return -EFAULT;
+}
+
+static int vfio_pci_device_enable(struct pci_dev *pdev,
+ struct vfio_pci_vendor_mig_driver *mig_drv)
+{
+ if (!mig_drv->dev_mig_ops->enable ||
+ (mig_drv->dev_mig_ops->enable(pdev) != 0)) {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int vfio_pci_device_disable(struct pci_dev *pdev,
+ struct vfio_pci_vendor_mig_driver *mig_drv)
+{
+ if (!mig_drv->dev_mig_ops->disable ||
+ (mig_drv->dev_mig_ops->disable(pdev) != 0))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int vfio_pci_device_pre_enable(struct pci_dev *pdev,
+ struct vfio_pci_vendor_mig_driver *mig_drv)
+{
+ if (!mig_drv->dev_mig_ops->pre_enable ||
+ (mig_drv->dev_mig_ops->pre_enable(pdev) != 0))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int vfio_pci_device_state_save(struct pci_dev *pdev,
+ struct vfio_pci_migration_data *data)
+{
+ struct vfio_device_migration_info *mig_info = data->mig_ctl;
+ struct vfio_pci_vendor_mig_driver *mig_drv = data->mig_driver;
+ void *base = (void *)mig_info;
+ int ret = 0;
+
+ if ((mig_info->device_state & VFIO_DEVICE_STATE_RUNNING) != 0) {
+ ret = vfio_pci_device_disable(pdev, mig_drv);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to stop VF function!\n");
+ return ret;
+ }
+ mig_info->device_state &= ~VFIO_DEVICE_STATE_RUNNING;
+ }
+
+ if (mig_drv->dev_mig_ops && mig_drv->dev_mig_ops->save) {
+ ret = mig_drv->dev_mig_ops->save(pdev, base,
+ mig_info->data_offset, data->state_size);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to save device state!\n");
+ return -EINVAL;
+ }
+ } else {
+ return -EFAULT;
+ }
+
+ mig_info->data_size = data->state_size;
+ mig_info->pending_bytes = mig_info->data_size;
+ return ret;
+}
+
+static int vfio_pci_device_state_restore(struct vfio_pci_migration_data *data)
+{
+ struct vfio_device_migration_info *mig_info = data->mig_ctl;
+ struct vfio_pci_vendor_mig_driver *mig_drv = data->mig_driver;
+ struct pci_dev *pdev = data->vf_dev;
+ void *base = (void *)mig_info;
+ int ret;
+
+ if (mig_drv->dev_mig_ops && mig_drv->dev_mig_ops->restore) {
+ ret = mig_drv->dev_mig_ops->restore(pdev, base,
+ mig_info->data_offset, mig_info->data_size);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to restore device state!\n");
+ return -EINVAL;
+ }
+ return 0;
+ }
+
+ return -EFAULT;
+}
+
+static int vfio_pci_set_device_state(struct vfio_pci_migration_data *data,
+ u32 state)
+{
+ struct vfio_device_migration_info *mig_ctl = data->mig_ctl;
+ struct vfio_pci_vendor_mig_driver *mig_drv = data->mig_driver;
+ struct pci_dev *pdev = data->vf_dev;
+ int ret = 0;
+
+ if (state == mig_ctl->device_state)
+ return 0;
+
+ if (!mig_drv->dev_mig_ops)
+ return -EINVAL;
+
+ switch (state) {
+ case VFIO_DEVICE_STATE_RUNNING:
+ if (!(mig_ctl->device_state &
+ VFIO_DEVICE_STATE_RUNNING))
+ ret = vfio_pci_device_enable(pdev, mig_drv);
+ break;
+ case VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RUNNING:
+ /*
+ * (pre-copy) - device should start logging data.
+ */
+ ret = 0;
+ break;
+ case VFIO_DEVICE_STATE_SAVING:
+ /* stop the vf function, save state */
+ ret = vfio_pci_device_state_save(pdev, data);
+ break;
+ case VFIO_DEVICE_STATE_STOP:
+ if (mig_ctl->device_state & VFIO_DEVICE_STATE_RUNNING)
+ ret = vfio_pci_device_disable(pdev, mig_drv);
+ break;
+ case VFIO_DEVICE_STATE_RESUMING:
+ ret = vfio_pci_device_pre_enable(pdev, mig_drv);
+ break;
+ default:
+ ret = -EFAULT;
+ break;
+ }
+
+ if (ret)
+ return ret;
+
+ mig_ctl->device_state = state;
+ return 0;
+}
+
+static ssize_t vfio_pci_handle_mig_dev_state(
+ struct vfio_pci_migration_data *data,
+ char __user *buf, size_t count, bool iswrite)
+{
+ struct vfio_device_migration_info *mig_ctl = data->mig_ctl;
+ u32 device_state;
+ int ret;
+
+ if (count != sizeof(device_state))
+ return -EINVAL;
+
+ if (iswrite) {
+ if (copy_from_user(&device_state, buf, count))
+ return -EFAULT;
+
+ ret = vfio_pci_set_device_state(data, device_state);
+ if (ret)
+ return ret;
+ } else {
+ if (copy_to_user(buf, &mig_ctl->device_state, count))
+ return -EFAULT;
+ }
+
+ return count;
+}
+
+static ssize_t vfio_pci_handle_mig_pending_bytes(
+ struct vfio_device_migration_info *mig_info,
+ char __user *buf, size_t count, bool iswrite)
+{
+ u64 pending_bytes;
+
+ if (count != sizeof(pending_bytes) || iswrite)
+ return -EINVAL;
+
+ if (mig_info->device_state ==
+ (VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RUNNING)) {
+ /* In pre-copy state we have no data to return for now,
+ * return 0 pending bytes
+ */
+ pending_bytes = 0;
+ } else {
+ pending_bytes = mig_info->pending_bytes;
+ }
+
+ if (copy_to_user(buf, &pending_bytes, count))
+ return -EFAULT;
+
+ return count;
+}
+
+static ssize_t vfio_pci_handle_mig_data_offset(
+ struct vfio_device_migration_info *mig_info,
+ char __user *buf, size_t count, bool iswrite)
+{
+ u64 data_offset = mig_info->data_offset;
+
+ if (count != sizeof(data_offset) || iswrite)
+ return -EINVAL;
+
+ if (copy_to_user(buf, &data_offset, count))
+ return -EFAULT;
+
+ return count;
+}
+
+static ssize_t vfio_pci_handle_mig_data_size(
+ struct vfio_device_migration_info *mig_info,
+ char __user *buf, size_t count, bool iswrite)
+{
+ u64 data_size;
+
+ if (count != sizeof(data_size))
+ return -EINVAL;
+
+ if (iswrite) {
+ /* data_size is writable only during resuming state */
+ if (mig_info->device_state != VFIO_DEVICE_STATE_RESUMING)
+ return -EINVAL;
+
+ if (copy_from_user(&data_size, buf, sizeof(data_size)))
+ return -EFAULT;
+
+ mig_info->data_size = data_size;
+ } else {
+ if (mig_info->device_state != VFIO_DEVICE_STATE_SAVING)
+ return -EINVAL;
+
+ if (copy_to_user(buf, &mig_info->data_size,
+ sizeof(data_size)))
+ return -EFAULT;
+ }
+
+ return count;
+}
+
+static ssize_t vfio_pci_handle_mig_dev_cmd(struct vfio_pci_migration_data *data,
+ char __user *buf, size_t count, bool iswrite)
+{
+ struct vfio_pci_vendor_mig_driver *mig_drv = data->mig_driver;
+ struct pci_dev *pdev = data->vf_dev;
+ u32 device_cmd;
+ int ret = -EFAULT;
+
+ if (count != sizeof(device_cmd) || !iswrite || !mig_drv->dev_mig_ops)
+ return -EINVAL;
+
+ if (copy_from_user(&device_cmd, buf, count))
+ return -EFAULT;
+
+ switch (device_cmd) {
+ case VFIO_DEVICE_MIGRATION_CANCEL:
+ if (mig_drv->dev_mig_ops->cancel)
+ ret = mig_drv->dev_mig_ops->cancel(pdev);
+ break;
+ default:
+ dev_err(&pdev->dev, "cmd is invaild\n");
+ return -EINVAL;
+ }
+
+ if (ret != 0)
+ return ret;
+
+ return count;
+}
+
+static ssize_t vfio_pci_handle_mig_drv_version(
+ struct vfio_device_migration_info *mig_info,
+ char __user *buf, size_t count, bool iswrite)
+{
+ u32 version_id = mig_info->version_id;
+
+ if (count != sizeof(version_id) || iswrite)
+ return -EINVAL;
+
+ if (copy_to_user(buf, &version_id, count))
+ return -EFAULT;
+
+ return count;
+}
+
+static ssize_t vfio_pci_handle_mig_data_rw(
+ struct vfio_pci_migration_data *data,
+ char __user *buf, size_t count, u64 pos, bool iswrite)
+{
+ struct vfio_device_migration_info *mig_ctl = data->mig_ctl;
+ void *data_addr = data->vf_data;
+
+ if (count == 0) {
+ dev_err(&data->vf_dev->dev, "qemu operation data size error!\n");
+ return -EINVAL;
+ }
+
+ data_addr += pos - mig_ctl->data_offset;
+ if (iswrite) {
+ if (copy_from_user(data_addr, buf, count))
+ return -EFAULT;
+
+ mig_ctl->pending_bytes += count;
+ if (mig_ctl->pending_bytes > data->state_size)
+ return -EINVAL;
+ } else {
+ if (copy_to_user(buf, data_addr, count))
+ return -EFAULT;
+
+ if (mig_ctl->pending_bytes < count)
+ return -EINVAL;
+
+ mig_ctl->pending_bytes -= count;
+ }
+
+ return count;
+}
+
+static ssize_t vfio_pci_dev_migrn_rw(struct vfio_pci_device *vdev,
+ char __user *buf, size_t count, loff_t *ppos, bool iswrite)
+{
+ unsigned int index =
+ VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
+ struct vfio_pci_migration_data *data =
+ (struct vfio_pci_migration_data *)vdev->region[index].data;
+ loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
+ struct vfio_device_migration_info *mig_ctl = data->mig_ctl;
+ int ret;
+
+ if (pos >= vdev->region[index].size)
+ return -EINVAL;
+
+ count = min(count, (size_t)(vdev->region[index].size - pos));
+ if (pos >= VFIO_MIGRATION_REGION_DATA_OFFSET)
+ return vfio_pci_handle_mig_data_rw(data,
+ buf, count, pos, iswrite);
+
+ switch (pos) {
+ case VFIO_DEVICE_MIGRATION_OFFSET(device_state):
+ ret = vfio_pci_handle_mig_dev_state(data,
+ buf, count, iswrite);
+ break;
+ case VFIO_DEVICE_MIGRATION_OFFSET(pending_bytes):
+ ret = vfio_pci_handle_mig_pending_bytes(mig_ctl,
+ buf, count, iswrite);
+ break;
+ case VFIO_DEVICE_MIGRATION_OFFSET(data_offset):
+ ret = vfio_pci_handle_mig_data_offset(mig_ctl,
+ buf, count, iswrite);
+ break;
+ case VFIO_DEVICE_MIGRATION_OFFSET(data_size):
+ ret = vfio_pci_handle_mig_data_size(mig_ctl,
+ buf, count, iswrite);
+ break;
+ case VFIO_DEVICE_MIGRATION_OFFSET(device_cmd):
+ ret = vfio_pci_handle_mig_dev_cmd(data,
+ buf, count, iswrite);
+ break;
+ case VFIO_DEVICE_MIGRATION_OFFSET(version_id):
+ ret = vfio_pci_handle_mig_drv_version(mig_ctl,
+ buf, count, iswrite);
+ break;
+ default:
+ dev_err(&vdev->pdev->dev, "invalid pos offset\n");
+ ret = -EFAULT;
+ break;
+ }
+
+ if (mig_ctl->device_state == VFIO_DEVICE_STATE_RESUMING &&
+ mig_ctl->pending_bytes == data->state_size &&
+ mig_ctl->data_size == data->state_size) {
+ if (vfio_pci_device_state_restore(data) != 0) {
+ dev_err(&vdev->pdev->dev, "Failed to restore device state!\n");
+ return -EFAULT;
+ }
+ mig_ctl->pending_bytes = 0;
+ mig_ctl->data_size = 0;
+ }
+
+ return ret;
+}
+
+static void vfio_pci_dev_migrn_release(struct vfio_pci_device *vdev,
+ struct vfio_pci_region *region)
+{
+ struct vfio_pci_migration_data *data = region->data;
+
+ if (data) {
+ kfree(data->mig_ctl);
+ kfree(data);
+ }
+}
+
+static const struct vfio_pci_regops vfio_pci_migration_regops = {
+ .rw = vfio_pci_dev_migrn_rw,
+ .release = vfio_pci_dev_migrn_release,
+};
+
+static int vfio_pci_migration_info_init(struct pci_dev *pdev,
+ struct vfio_device_migration_info *mig_info,
+ struct vfio_pci_vendor_mig_driver *mig_drv)
+{
+ int ret;
+
+ ret = vfio_pci_device_get_info(pdev, mig_info, mig_drv);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to get device info\n");
+ return ret;
+ }
+
+ if (mig_info->data_size > VFIO_MIGRATION_BUFFER_MAX_SIZE) {
+ dev_err(&pdev->dev, "mig_info->data_size %llu is invalid\n",
+ mig_info->data_size);
+ return -EINVAL;
+ }
+
+ mig_info->data_offset = VFIO_MIGRATION_REGION_DATA_OFFSET;
+ return ret;
+}
+
+static int vfio_device_mig_data_init(struct vfio_pci_device *vdev,
+ struct vfio_pci_migration_data *data)
+{
+ struct vfio_device_migration_info *mig_ctl;
+ u64 mig_offset;
+ int ret;
+
+ mig_ctl = kzalloc(sizeof(*mig_ctl), GFP_KERNEL);
+ if (!mig_ctl)
+ return -ENOMEM;
+
+ ret = vfio_pci_migration_info_init(vdev->pdev, mig_ctl,
+ data->mig_driver);
+ if (ret) {
+ dev_err(&vdev->pdev->dev, "get device info error!\n");
+ goto err;
+ }
+
+ mig_offset = sizeof(struct vfio_device_migration_info);
+ data->state_size = mig_ctl->data_size;
+ data->mig_ctl = krealloc(mig_ctl, mig_offset + data->state_size,
+ GFP_KERNEL);
+ if (!data->mig_ctl) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ data->vf_data = (void *)((char *)data->mig_ctl + mig_offset);
+ memset(data->vf_data, 0, data->state_size);
+ data->mig_ctl->data_size = 0;
+
+ ret = vfio_pci_register_dev_region(vdev, VFIO_REGION_TYPE_MIGRATION,
+ VFIO_REGION_SUBTYPE_MIGRATION,
+ &vfio_pci_migration_regops, mig_offset + data->state_size,
+ VFIO_REGION_INFO_FLAG_READ | VFIO_REGION_INFO_FLAG_WRITE, data);
+ if (ret) {
+ kfree(data->mig_ctl);
+ return ret;
+ }
+
+ return 0;
+err:
+ kfree(mig_ctl);
+ return ret;
+}
+
+int vfio_pci_migration_init(struct vfio_pci_device *vdev)
+{
+ struct vfio_pci_vendor_mig_driver *mig_driver = NULL;
+ struct vfio_pci_migration_data *data = NULL;
+ struct pci_dev *pdev = vdev->pdev;
+ int ret;
+
+ mig_driver = vfio_pci_get_mig_driver(pdev);
+ if (!mig_driver || !mig_driver->dev_mig_ops) {
+ dev_err(&pdev->dev, "unable to find a mig_driver module\n");
+ return -EINVAL;
+ }
+
+ if (!try_module_get(mig_driver->owner)) {
+ pr_err("module %s is not live\n", mig_driver->owner->name);
+ return -ENODEV;
+ }
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data) {
+ module_put(mig_driver->owner);
+ return -ENOMEM;
+ }
+
+ data->mig_driver = mig_driver;
+ data->vf_dev = pdev;
+
+ ret = vfio_device_mig_data_init(vdev, data);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to init vfio device migration data!\n");
+ goto err;
+ }
+
+ return ret;
+err:
+ kfree(data);
+ module_put(mig_driver->owner);
+ return ret;
+}
+
+void vfio_pci_migration_exit(struct vfio_pci_device *vdev)
+{
+ struct vfio_pci_vendor_mig_driver *mig_driver = NULL;
+
+ mig_driver = vfio_pci_get_mig_driver(vdev->pdev);
+ if (!mig_driver || !mig_driver->dev_mig_ops) {
+ dev_warn(&vdev->pdev->dev, "mig_driver is not found\n");
+ return;
+ }
+
+ if (module_refcount(mig_driver->owner) > 0) {
+ vfio_pci_device_release(vdev->pdev, mig_driver);
+ module_put(mig_driver->owner);
+ }
+}
+
+int vfio_pci_register_migration_ops(struct vfio_device_migration_ops *ops,
+ struct module *mod, struct pci_dev *pdev)
+{
+ struct vfio_pci_vendor_mig_driver *mig_driver = NULL;
+
+ if (!ops || !mod || !pdev)
+ return -EINVAL;
+
+ mig_driver = vfio_pci_find_mig_drv(pdev, mod);
+ if (mig_driver) {
+ pr_info("%s migration ops has already been registered\n",
+ mod->name);
+ atomic_add(1, &mig_driver->count);
+ return 0;
+ }
+
+ if (!try_module_get(THIS_MODULE))
+ return -ENODEV;
+
+ mig_driver = kzalloc(sizeof(*mig_driver), GFP_KERNEL);
+ if (!mig_driver) {
+ module_put(THIS_MODULE);
+ return -ENOMEM;
+ }
+
+ mig_driver->pdev = pdev;
+ mig_driver->bus_num = pdev->bus->number;
+ mig_driver->owner = mod;
+ mig_driver->dev_mig_ops = ops;
+
+ vfio_pci_add_mig_drv(mig_driver);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(vfio_pci_register_migration_ops);
+
+void vfio_pci_unregister_migration_ops(struct module *mod, struct pci_dev *pdev)
+{
+ struct vfio_pci_vendor_mig_driver *mig_driver = NULL;
+
+ if (!mod || !pdev)
+ return;
+
+ mig_driver = vfio_pci_find_mig_drv(pdev, mod);
+ if (!mig_driver) {
+ pr_err("mig_driver is not found\n");
+ return;
+ }
+
+ if (atomic_sub_and_test(1, &mig_driver->count)) {
+ vfio_pci_remove_mig_drv(mig_driver);
+ kfree(mig_driver);
+ module_put(THIS_MODULE);
+ pr_info("%s succeed to unregister migration ops\n",
+ THIS_MODULE->name);
+ }
+}
+EXPORT_SYMBOL_GPL(vfio_pci_unregister_migration_ops);
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index 17d2bae..03af269 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -15,6 +15,7 @@
#include <linux/pci.h>
#include <linux/irqbypass.h>
#include <linux/types.h>
+#include <linux/vfio_pci_migration.h>
#ifndef VFIO_PCI_PRIVATE_H
#define VFIO_PCI_PRIVATE_H
@@ -55,7 +56,7 @@ struct vfio_pci_irq_ctx {
struct vfio_pci_region;
struct vfio_pci_regops {
- size_t (*rw)(struct vfio_pci_device *vdev, char __user *buf,
+ ssize_t (*rw)(struct vfio_pci_device *vdev, char __user *buf,
size_t count, loff_t *ppos, bool iswrite);
void (*release)(struct vfio_pci_device *vdev,
struct vfio_pci_region *region);
@@ -173,4 +174,15 @@ static inline int vfio_pci_igd_init(struct vfio_pci_device *vdev)
return -ENODEV;
}
#endif
+
+extern bool vfio_dev_migration_is_supported(struct pci_dev *pdev);
+extern int vfio_pci_migration_init(struct vfio_pci_device *vdev);
+extern void vfio_pci_migration_exit(struct vfio_pci_device *vdev);
+extern int vfio_pci_device_log_start(struct vfio_pci_device *vdev,
+ struct vf_migration_log_info *log_info);
+extern int vfio_pci_device_log_stop(struct vfio_pci_device *vdev,
+ uint32_t uuid);
+extern int vfio_pci_device_log_status_query(struct vfio_pci_device *vdev);
+extern int vfio_pci_device_init(struct pci_dev *pdev);
+extern void vfio_pci_device_uninit(struct pci_dev *pdev);
#endif /* VFIO_PCI_PRIVATE_H */
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 7a386fb..35f2a29 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -33,6 +33,7 @@
#include <linux/string.h>
#include <linux/uaccess.h>
#include <linux/vfio.h>
+#include <linux/vfio_pci_migration.h>
#include <linux/wait.h>
#include <linux/sched/signal.h>
@@ -40,6 +41,9 @@
#define DRIVER_AUTHOR "Alex Williamson <alex.williamson(a)redhat.com>"
#define DRIVER_DESC "VFIO - User Level meta-driver"
+#define LOG_BUF_FRAG_SIZE (2 * 1024 * 1024) // fix to 2M
+#define LOG_BUF_MAX_ADDRS_SIZE 128 // max vm ram size is 1T
+
static struct vfio {
struct class *class;
struct list_head iommu_drivers_list;
@@ -57,6 +61,14 @@ struct vfio_iommu_driver {
struct list_head vfio_next;
};
+struct vfio_log_buf {
+ struct vfio_log_buf_info info;
+ int fd;
+ int buffer_state;
+ int device_state;
+ unsigned long *cpu_addrs;
+};
+
struct vfio_container {
struct kref kref;
struct list_head group_list;
@@ -64,6 +76,7 @@ struct vfio_container {
struct vfio_iommu_driver *iommu_driver;
void *iommu_data;
bool noiommu;
+ struct vfio_log_buf log_buf;
};
struct vfio_unbound_dev {
@@ -1158,8 +1171,398 @@ static long vfio_ioctl_set_iommu(struct vfio_container *container,
return ret;
}
+static long vfio_dispatch_cmd_to_devices(const struct vfio_container *container,
+ unsigned int cmd, unsigned long arg)
+{
+ struct vfio_group *group = NULL;
+ struct vfio_device *device = NULL;
+ long ret = -ENXIO;
+
+ list_for_each_entry(group, &container->group_list, container_next) {
+ list_for_each_entry(device, &group->device_list, group_next) {
+ ret = device->ops->ioctl(device->device_data, cmd, arg);
+ if (ret) {
+ pr_err("dispatch cmd to devices failed\n");
+ return ret;
+ }
+ }
+ }
+ return ret;
+}
+
+static long vfio_log_buf_start(struct vfio_container *container)
+{
+ struct vfio_log_buf_ctl log_buf_ctl;
+ long ret;
+
+ log_buf_ctl.argsz = sizeof(struct vfio_log_buf_info);
+ log_buf_ctl.flags = VFIO_DEVICE_LOG_BUF_FLAG_START;
+ log_buf_ctl.data = (void *)&container->log_buf.info;
+ ret = vfio_dispatch_cmd_to_devices(container, VFIO_DEVICE_LOG_BUF_CTL,
+ (unsigned long)&log_buf_ctl);
+ if (ret)
+ return ret;
+
+ container->log_buf.device_state = 1;
+ return 0;
+}
+
+static long vfio_log_buf_stop(struct vfio_container *container)
+{
+ struct vfio_log_buf_ctl log_buf_ctl;
+ long ret;
+
+ if (container->log_buf.device_state == 0) {
+ pr_warn("device already stopped\n");
+ return 0;
+ }
+
+ log_buf_ctl.argsz = sizeof(struct vfio_log_buf_info);
+ log_buf_ctl.flags = VFIO_DEVICE_LOG_BUF_FLAG_STOP;
+ log_buf_ctl.data = (void *)&container->log_buf.info;
+ ret = vfio_dispatch_cmd_to_devices(container, VFIO_DEVICE_LOG_BUF_CTL,
+ (unsigned long)&log_buf_ctl);
+ if (ret)
+ return ret;
+
+ container->log_buf.device_state = 0;
+ return 0;
+}
+
+static long vfio_log_buf_query(struct vfio_container *container)
+{
+ struct vfio_log_buf_ctl log_buf_ctl;
+
+ log_buf_ctl.argsz = sizeof(struct vfio_log_buf_info);
+ log_buf_ctl.flags = VFIO_DEVICE_LOG_BUF_FLAG_STATUS_QUERY;
+ log_buf_ctl.data = (void *)&container->log_buf.info;
+
+ return vfio_dispatch_cmd_to_devices(container,
+ VFIO_DEVICE_LOG_BUF_CTL, (unsigned long)&log_buf_ctl);
+}
+
+static int vfio_log_buf_fops_mmap(struct file *filep,
+ struct vm_area_struct *vma)
+{
+ struct vfio_container *container = filep->private_data;
+ struct vfio_log_buf *log_buf = &container->log_buf;
+ unsigned long frag_pg_size;
+ unsigned long frag_offset;
+ phys_addr_t pa;
+ int ret = -EINVAL;
+
+ if (!log_buf->cpu_addrs) {
+ pr_err("mmap before setup, please setup log buf first\n");
+ return ret;
+ }
+
+ if (log_buf->info.frag_size < PAGE_SIZE) {
+ pr_err("mmap frag size should not less than page size!\n");
+ return ret;
+ }
+
+ frag_pg_size = log_buf->info.frag_size / PAGE_SIZE;
+ frag_offset = vma->vm_pgoff / frag_pg_size;
+
+ if (frag_offset >= log_buf->info.addrs_size) {
+ pr_err("mmap offset out of range!\n");
+ return ret;
+ }
+
+ if (vma->vm_end - vma->vm_start != log_buf->info.frag_size) {
+ pr_err("mmap size error, should be aligned with frag size!\n");
+ return ret;
+ }
+
+ pa = virt_to_phys((void *)log_buf->cpu_addrs[frag_offset]);
+ ret = remap_pfn_range(vma, vma->vm_start,
+ pa >> PAGE_SHIFT,
+ vma->vm_end - vma->vm_start,
+ vma->vm_page_prot);
+ if (ret)
+ pr_err("remap_pfn_range error!\n");
+ return ret;
+}
+
+static struct device *vfio_get_dev(struct vfio_container *container)
+{
+ struct vfio_group *group = NULL;
+ struct vfio_device *device = NULL;
+
+ list_for_each_entry(group, &container->group_list, container_next) {
+ list_for_each_entry(device, &group->device_list, group_next) {
+ return device->dev;
+ }
+ }
+ return NULL;
+}
+
+static void vfio_log_buf_release_dma(struct device *dev,
+ struct vfio_log_buf *log_buf)
+{
+ int i;
+
+ for (i = 0; i < log_buf->info.addrs_size; i++) {
+ if ((log_buf->cpu_addrs && log_buf->cpu_addrs[i] != 0) &&
+ (log_buf->info.sgevec &&
+ log_buf->info.sgevec[i].addr != 0)) {
+ dma_free_coherent(dev, log_buf->info.frag_size,
+ (void *)log_buf->cpu_addrs[i],
+ log_buf->info.sgevec[i].addr);
+ log_buf->cpu_addrs[i] = 0;
+ log_buf->info.sgevec[i].addr = 0;
+ }
+ }
+}
+
+static long vfio_log_buf_alloc_dma(struct vfio_log_buf_info *info,
+ struct vfio_log_buf *log_buf, struct device *dev)
+{
+ int i;
+
+ for (i = 0; i < info->addrs_size; i++) {
+ log_buf->cpu_addrs[i] = (unsigned long)dma_alloc_coherent(dev,
+ info->frag_size, &log_buf->info.sgevec[i].addr,
+ GFP_KERNEL);
+ log_buf->info.sgevec[i].len = info->frag_size;
+ if (log_buf->cpu_addrs[i] == 0 ||
+ log_buf->info.sgevec[i].addr == 0) {
+ return -ENOMEM;
+ }
+ }
+ return 0;
+}
+
+static long vfio_log_buf_alloc_addrs(struct vfio_log_buf_info *info,
+ struct vfio_log_buf *log_buf)
+{
+ log_buf->info.sgevec = kcalloc(info->addrs_size,
+ sizeof(struct vfio_log_buf_sge), GFP_KERNEL);
+ if (!log_buf->info.sgevec)
+ return -ENOMEM;
+
+ log_buf->cpu_addrs = kcalloc(info->addrs_size,
+ sizeof(unsigned long), GFP_KERNEL);
+ if (!log_buf->cpu_addrs) {
+ kfree(log_buf->info.sgevec);
+ log_buf->info.sgevec = NULL;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static long vfio_log_buf_info_valid(struct vfio_log_buf_info *info)
+{
+ if (info->addrs_size > LOG_BUF_MAX_ADDRS_SIZE ||
+ info->addrs_size == 0) {
+ pr_err("can`t support vm ram size larger than 1T or equal to 0\n");
+ return -EINVAL;
+ }
+ if (info->frag_size != LOG_BUF_FRAG_SIZE) {
+ pr_err("only support %d frag size\n", LOG_BUF_FRAG_SIZE);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static long vfio_log_buf_setup(struct vfio_container *container,
+ unsigned long data)
+{
+ struct vfio_log_buf_info info;
+ struct vfio_log_buf *log_buf = &container->log_buf;
+ struct device *dev = NULL;
+ long ret;
+
+ if (log_buf->info.sgevec) {
+ pr_warn("log buf already setup\n");
+ return 0;
+ }
+
+ if (copy_from_user(&info, (void __user *)data,
+ sizeof(struct vfio_log_buf_info)))
+ return -EFAULT;
+
+ ret = vfio_log_buf_info_valid(&info);
+ if (ret)
+ return ret;
+
+ ret = vfio_log_buf_alloc_addrs(&info, log_buf);
+ if (ret)
+ goto err_out;
+
+ dev = vfio_get_dev(container);
+ if (!dev) {
+ pr_err("can`t get dev\n");
+ goto err_free_addrs;
+ }
+
+ ret = vfio_log_buf_alloc_dma(&info, log_buf, dev);
+ if (ret)
+ goto err_free_dma_array;
+
+ log_buf->info.uuid = info.uuid;
+ log_buf->info.buffer_size = info.buffer_size;
+ log_buf->info.frag_size = info.frag_size;
+ log_buf->info.addrs_size = info.addrs_size;
+ log_buf->buffer_state = 1;
+ return 0;
+
+err_free_dma_array:
+ vfio_log_buf_release_dma(dev, log_buf);
+err_free_addrs:
+ kfree(log_buf->cpu_addrs);
+ log_buf->cpu_addrs = NULL;
+ kfree(log_buf->info.sgevec);
+ log_buf->info.sgevec = NULL;
+err_out:
+ return -ENOMEM;
+}
+
+static long vfio_log_buf_release_buffer(struct vfio_container *container)
+{
+ struct vfio_log_buf *log_buf = &container->log_buf;
+ struct device *dev = NULL;
+
+ if (log_buf->buffer_state == 0) {
+ pr_warn("buffer already released\n");
+ return 0;
+ }
+
+ dev = vfio_get_dev(container);
+ if (!dev) {
+ pr_err("can`t get dev\n");
+ return -EFAULT;
+ }
+
+ vfio_log_buf_release_dma(dev, log_buf);
+
+ kfree(log_buf->cpu_addrs);
+ log_buf->cpu_addrs = NULL;
+
+ kfree(log_buf->info.sgevec);
+ log_buf->info.sgevec = NULL;
+
+ log_buf->buffer_state = 0;
+ return 0;
+}
+
+static int vfio_log_buf_release(struct inode *inode, struct file *filep)
+{
+ struct vfio_container *container = filep->private_data;
+
+ vfio_log_buf_stop(container);
+ vfio_log_buf_release_buffer(container);
+ memset(&container->log_buf, 0, sizeof(struct vfio_log_buf));
+ return 0;
+}
+
+static long vfio_ioctl_handle_log_buf_ctl(struct vfio_container *container,
+ unsigned long arg)
+{
+ struct vfio_log_buf_ctl log_buf_ctl;
+ long ret = 0;
+
+ if (copy_from_user(&log_buf_ctl, (void __user *)arg,
+ sizeof(struct vfio_log_buf_ctl)))
+ return -EFAULT;
+
+ switch (log_buf_ctl.flags) {
+ case VFIO_DEVICE_LOG_BUF_FLAG_SETUP:
+ ret = vfio_log_buf_setup(container,
+ (unsigned long)log_buf_ctl.data);
+ break;
+ case VFIO_DEVICE_LOG_BUF_FLAG_RELEASE:
+ ret = vfio_log_buf_release_buffer(container);
+ break;
+ case VFIO_DEVICE_LOG_BUF_FLAG_START:
+ ret = vfio_log_buf_start(container);
+ break;
+ case VFIO_DEVICE_LOG_BUF_FLAG_STOP:
+ ret = vfio_log_buf_stop(container);
+ break;
+ case VFIO_DEVICE_LOG_BUF_FLAG_STATUS_QUERY:
+ ret = vfio_log_buf_query(container);
+ break;
+ default:
+ pr_err("log buf control flag incorrect\n");
+ ret = -EINVAL;
+ break;
+ }
+ return ret;
+}
+
+static long vfio_log_buf_fops_unl_ioctl(struct file *filep,
+ unsigned int cmd, unsigned long arg)
+{
+ struct vfio_container *container = filep->private_data;
+ long ret = -EINVAL;
+
+ switch (cmd) {
+ case VFIO_LOG_BUF_CTL:
+ ret = vfio_ioctl_handle_log_buf_ctl(container, arg);
+ break;
+ default:
+ pr_err("log buf control cmd incorrect\n");
+ break;
+ }
+
+ return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static long vfio_log_buf_fops_compat_ioctl(struct file *filep,
+ unsigned int cmd, unsigned long arg)
+{
+ arg = (unsigned long)compat_ptr(arg);
+ return vfio_log_buf_fops_unl_ioctl(filep, cmd, arg);
+}
+#endif /* CONFIG_COMPAT */
+
+static const struct file_operations vfio_log_buf_fops = {
+ .owner = THIS_MODULE,
+ .mmap = vfio_log_buf_fops_mmap,
+ .unlocked_ioctl = vfio_log_buf_fops_unl_ioctl,
+ .release = vfio_log_buf_release,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = vfio_log_buf_fops_compat_ioctl,
+#endif
+};
+
+static int vfio_get_log_buf_fd(struct vfio_container *container,
+ unsigned long arg)
+{
+ struct file *filep = NULL;
+ int ret;
+
+ if (container->log_buf.fd > 0)
+ return container->log_buf.fd;
+
+ ret = get_unused_fd_flags(O_CLOEXEC);
+ if (ret < 0) {
+ pr_err("get_unused_fd_flags get fd failed\n");
+ return ret;
+ }
+
+ filep = anon_inode_getfile("[vfio-log-buf]", &vfio_log_buf_fops,
+ container, O_RDWR);
+ if (IS_ERR(filep)) {
+ pr_err("anon_inode_getfile failed\n");
+ put_unused_fd(ret);
+ ret = PTR_ERR(filep);
+ return ret;
+ }
+
+ filep->f_mode |= (FMODE_READ | FMODE_WRITE | FMODE_LSEEK);
+
+ fd_install(ret, filep);
+
+ container->log_buf.fd = ret;
+ return ret;
+}
+
static long vfio_fops_unl_ioctl(struct file *filep,
- unsigned int cmd, unsigned long arg)
+ unsigned int cmd, unsigned long arg)
{
struct vfio_container *container = filep->private_data;
struct vfio_iommu_driver *driver;
@@ -1179,6 +1582,9 @@ static long vfio_fops_unl_ioctl(struct file *filep,
case VFIO_SET_IOMMU:
ret = vfio_ioctl_set_iommu(container, arg);
break;
+ case VFIO_GET_LOG_BUF_FD:
+ ret = vfio_get_log_buf_fd(container, arg);
+ break;
default:
driver = container->iommu_driver;
data = container->iommu_data;
@@ -1210,6 +1616,7 @@ static int vfio_fops_open(struct inode *inode, struct file *filep)
INIT_LIST_HEAD(&container->group_list);
init_rwsem(&container->group_lock);
kref_init(&container->kref);
+ memset(&container->log_buf, 0, sizeof(struct vfio_log_buf));
filep->private_data = container;
@@ -1219,9 +1626,7 @@ static int vfio_fops_open(struct inode *inode, struct file *filep)
static int vfio_fops_release(struct inode *inode, struct file *filep)
{
struct vfio_container *container = filep->private_data;
-
filep->private_data = NULL;
-
vfio_container_put(container);
return 0;
diff --git a/include/linux/vfio_pci_migration.h b/include/linux/vfio_pci_migration.h
new file mode 100644
index 0000000..464ffb4
--- /dev/null
+++ b/include/linux/vfio_pci_migration.h
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2022 Huawei Technologies Co., Ltd. All rights reserved.
+ */
+
+#ifndef VFIO_PCI_MIGRATION_H
+#define VFIO_PCI_MIGRATION_H
+
+#include <linux/types.h>
+#include <linux/pci.h>
+
+#define VFIO_REGION_TYPE_MIGRATION (3)
+/* sub-types for VFIO_REGION_TYPE_MIGRATION */
+#define VFIO_REGION_SUBTYPE_MIGRATION (1)
+
+#define VFIO_MIGRATION_BUFFER_MAX_SIZE SZ_256K
+#define VFIO_MIGRATION_REGION_DATA_OFFSET \
+ (sizeof(struct vfio_device_migration_info))
+#define VFIO_DEVICE_MIGRATION_OFFSET(x) \
+ offsetof(struct vfio_device_migration_info, x)
+
+struct vfio_device_migration_info {
+ __u32 device_state; /* VFIO device state */
+#define VFIO_DEVICE_STATE_STOP (0)
+#define VFIO_DEVICE_STATE_RUNNING (1 << 0)
+#define VFIO_DEVICE_STATE_SAVING (1 << 1)
+#define VFIO_DEVICE_STATE_RESUMING (1 << 2)
+#define VFIO_DEVICE_STATE_MASK (VFIO_DEVICE_STATE_RUNNING | \
+ VFIO_DEVICE_STATE_SAVING | VFIO_DEVICE_STATE_RESUMING)
+ __u32 reserved;
+
+ __u32 device_cmd;
+ __u32 version_id;
+
+ __u64 pending_bytes;
+ __u64 data_offset;
+ __u64 data_size;
+};
+
+enum {
+ VFIO_DEVICE_STOP = 0xffff0001,
+ VFIO_DEVICE_CONTINUE,
+ VFIO_DEVICE_MIGRATION_CANCEL,
+};
+
+struct vfio_log_buf_sge {
+ __u64 len;
+ __u64 addr;
+};
+
+struct vfio_log_buf_info {
+ __u32 uuid;
+ __u64 buffer_size;
+ __u64 addrs_size;
+ __u64 frag_size;
+ struct vfio_log_buf_sge *sgevec;
+};
+
+struct vfio_log_buf_ctl {
+ __u32 argsz;
+ __u32 flags;
+ #define VFIO_DEVICE_LOG_BUF_FLAG_SETUP (1 << 0)
+ #define VFIO_DEVICE_LOG_BUF_FLAG_RELEASE (1 << 1)
+ #define VFIO_DEVICE_LOG_BUF_FLAG_START (1 << 2)
+ #define VFIO_DEVICE_LOG_BUF_FLAG_STOP (1 << 3)
+ #define VFIO_DEVICE_LOG_BUF_FLAG_STATUS_QUERY (1 << 4)
+ void *data;
+};
+#define VFIO_LOG_BUF_CTL _IO(VFIO_TYPE, VFIO_BASE + 21)
+#define VFIO_GET_LOG_BUF_FD _IO(VFIO_TYPE, VFIO_BASE + 22)
+#define VFIO_DEVICE_LOG_BUF_CTL _IO(VFIO_TYPE, VFIO_BASE + 23)
+
+struct vf_migration_log_info {
+ __u32 dom_uuid;
+ __u64 buffer_size;
+ __u64 sge_len;
+ __u64 sge_num;
+ struct vfio_log_buf_sge *sgevec;
+};
+
+struct vfio_device_migration_ops {
+ /* Get device information */
+ int (*get_info)(struct pci_dev *pdev,
+ struct vfio_device_migration_info *info);
+ /* Enable a vf device */
+ int (*enable)(struct pci_dev *pdev);
+ /* Disable a vf device */
+ int (*disable)(struct pci_dev *pdev);
+ /* Save a vf device */
+ int (*save)(struct pci_dev *pdev, void *base,
+ uint64_t off, uint64_t count);
+ /* Resuming a vf device */
+ int (*restore)(struct pci_dev *pdev, void *base,
+ uint64_t off, uint64_t count);
+ /* Log start a vf device */
+ int (*log_start)(struct pci_dev *pdev,
+ struct vf_migration_log_info *log_info);
+ /* Log stop a vf device */
+ int (*log_stop)(struct pci_dev *pdev, uint32_t uuid);
+ /* Get vf device log status */
+ int (*get_log_status)(struct pci_dev *pdev);
+ /* Pre enable a vf device(load_setup, before restore a vf) */
+ int (*pre_enable)(struct pci_dev *pdev);
+ /* Cancel a vf device when live migration failed (rollback) */
+ int (*cancel)(struct pci_dev *pdev);
+ /* Init a vf device */
+ int (*init)(struct pci_dev *pdev);
+ /* Uninit a vf device */
+ void (*uninit)(struct pci_dev *pdev);
+ /* Release a vf device */
+ void (*release)(struct pci_dev *pdev);
+};
+
+struct vfio_pci_vendor_mig_driver {
+ struct pci_dev *pdev;
+ unsigned char bus_num;
+ struct vfio_device_migration_ops *dev_mig_ops;
+ struct module *owner;
+ atomic_t count;
+ struct list_head list;
+};
+
+struct vfio_pci_migration_data {
+ u64 state_size;
+ struct pci_dev *vf_dev;
+ struct vfio_pci_vendor_mig_driver *mig_driver;
+ struct vfio_device_migration_info *mig_ctl;
+ void *vf_data;
+};
+
+int vfio_pci_register_migration_ops(struct vfio_device_migration_ops *ops,
+ struct module *mod, struct pci_dev *pdev);
+void vfio_pci_unregister_migration_ops(struct module *mod,
+ struct pci_dev *pdev);
+
+#endif /* VFIO_PCI_MIGRATION_H */
--
1.8.3.1
1
0
[PATCH OLK-5.10 v2 1/2] ipmi/watchdog: replace atomic_add() and atomic_sub()
by Miaohe Lin 24 Jun '22
by Miaohe Lin 24 Jun '22
24 Jun '22
From: Yejune Deng <yejune.deng(a)gmail.com>
mainline inclusion
from v5.11-rc1
commit a01a89b1db1066a6af23ae08b9a0c345b7966f0b
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I5DVR9
CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
--------------------------------
atomic_inc() and atomic_dec() looks better
Signed-off-by: Yejune Deng <yejune.deng(a)gmail.com>
Message-Id: <1605511807-7135-1-git-send-email-yejune.deng(a)gmail.com>
Signed-off-by: Corey Minyard <cminyard(a)mvista.com>
---
drivers/char/ipmi/ipmi_watchdog.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index f78156d93c3f..32c334e34d55 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -495,7 +495,7 @@ static void panic_halt_ipmi_heartbeat(void)
msg.cmd = IPMI_WDOG_RESET_TIMER;
msg.data = NULL;
msg.data_len = 0;
- atomic_add(1, &panic_done_count);
+ atomic_inc(&panic_done_count);
rv = ipmi_request_supply_msgs(watchdog_user,
(struct ipmi_addr *) &addr,
0,
@@ -505,7 +505,7 @@ static void panic_halt_ipmi_heartbeat(void)
&panic_halt_heartbeat_recv_msg,
1);
if (rv)
- atomic_sub(1, &panic_done_count);
+ atomic_dec(&panic_done_count);
}
static struct ipmi_smi_msg panic_halt_smi_msg = {
@@ -529,12 +529,12 @@ static void panic_halt_ipmi_set_timeout(void)
/* Wait for the messages to be free. */
while (atomic_read(&panic_done_count) != 0)
ipmi_poll_interface(watchdog_user);
- atomic_add(1, &panic_done_count);
+ atomic_inc(&panic_done_count);
rv = __ipmi_set_timeout(&panic_halt_smi_msg,
&panic_halt_recv_msg,
&send_heartbeat_now);
if (rv) {
- atomic_sub(1, &panic_done_count);
+ atomic_dec(&panic_done_count);
pr_warn("Unable to extend the watchdog timeout\n");
} else {
if (send_heartbeat_now)
--
2.23.0
1
1
23 Jun '22
From: Yejune Deng <yejune.deng(a)gmail.com>
mainline inclusion
from v5.11-rc1
commit a01a89b1db1066a6af23ae08b9a0c345b7966f0b
category: bugfix
bugzilla: NA
CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
--------------------------------
atomic_inc() and atomic_dec() looks better
Signed-off-by: Yejune Deng <yejune.deng(a)gmail.com>
Message-Id: <1605511807-7135-1-git-send-email-yejune.deng(a)gmail.com>
Signed-off-by: Corey Minyard <cminyard(a)mvista.com>
---
drivers/char/ipmi/ipmi_watchdog.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index f78156d93c3f..32c334e34d55 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -495,7 +495,7 @@ static void panic_halt_ipmi_heartbeat(void)
msg.cmd = IPMI_WDOG_RESET_TIMER;
msg.data = NULL;
msg.data_len = 0;
- atomic_add(1, &panic_done_count);
+ atomic_inc(&panic_done_count);
rv = ipmi_request_supply_msgs(watchdog_user,
(struct ipmi_addr *) &addr,
0,
@@ -505,7 +505,7 @@ static void panic_halt_ipmi_heartbeat(void)
&panic_halt_heartbeat_recv_msg,
1);
if (rv)
- atomic_sub(1, &panic_done_count);
+ atomic_dec(&panic_done_count);
}
static struct ipmi_smi_msg panic_halt_smi_msg = {
@@ -529,12 +529,12 @@ static void panic_halt_ipmi_set_timeout(void)
/* Wait for the messages to be free. */
while (atomic_read(&panic_done_count) != 0)
ipmi_poll_interface(watchdog_user);
- atomic_add(1, &panic_done_count);
+ atomic_inc(&panic_done_count);
rv = __ipmi_set_timeout(&panic_halt_smi_msg,
&panic_halt_recv_msg,
&send_heartbeat_now);
if (rv) {
- atomic_sub(1, &panic_done_count);
+ atomic_dec(&panic_done_count);
pr_warn("Unable to extend the watchdog timeout\n");
} else {
if (send_heartbeat_now)
--
2.23.0
2
2
[PATCH openEuler-5.10 01/59] bcache: fix race between setting bdev state to none and new write request direct to backing
by Zheng Zengkai 22 Jun '22
by Zheng Zengkai 22 Jun '22
22 Jun '22
From: Dongsheng Yang <dongsheng.yang(a)easystack.cn>
mainline inclusion
from v5.11-rc1
commit df4ad53242158f9f1f97daf4feddbb4f8b77f080
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I59A5L?from=project-issue
CVE: N/A
-----------------------------------------------
There is a race condition in detaching as below:
A. detaching B. Write request
(1) writing back
(2) write back done, set bdev
state to clean.
(3) cached_dev_put() and
schedule_work(&dc->detach);
(4) write data [0 - 4K] directly
into backing and ack to user.
(5) power-failure...
When we restart this bcache device, this bdev is clean but not detached,
and read [0 - 4K], we will get unexpected old data from cache device.
To fix this problem, set the bdev state to none when we writeback done
in detaching, and then if power-failure happened as above, the data in
cache will not be used in next bcache device starting, it's detached, we
will read the correct data from backing derectly.
Signed-off-by: Dongsheng Yang <dongsheng.yang(a)easystack.cn>
Signed-off-by: Coly Li <colyli(a)suse.de>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
Reviewed-by: Jason Yan <yanaijie(a)huawei.com>
Signed-off-by: Zheng Zengkai <zhengzengkai(a)huawei.com>
---
drivers/md/bcache/super.c | 9 ---------
drivers/md/bcache/writeback.c | 9 +++++++++
2 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 81f1cc5b3499..b7d9d1b79ac2 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1151,9 +1151,6 @@ static void cancel_writeback_rate_update_dwork(struct cached_dev *dc)
static void cached_dev_detach_finish(struct work_struct *w)
{
struct cached_dev *dc = container_of(w, struct cached_dev, detach);
- struct closure cl;
-
- closure_init_stack(&cl);
BUG_ON(!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags));
BUG_ON(refcount_read(&dc->count));
@@ -1167,12 +1164,6 @@ static void cached_dev_detach_finish(struct work_struct *w)
dc->writeback_thread = NULL;
}
- memset(&dc->sb.set_uuid, 0, 16);
- SET_BDEV_STATE(&dc->sb, BDEV_STATE_NONE);
-
- bch_write_bdev_super(dc, &cl);
- closure_sync(&cl);
-
mutex_lock(&bch_register_lock);
calc_cached_dev_sectors(dc->disk.c);
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 3c74996978da..a129e4d2707c 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -705,6 +705,15 @@ static int bch_writeback_thread(void *arg)
* bch_cached_dev_detach().
*/
if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) {
+ struct closure cl;
+
+ closure_init_stack(&cl);
+ memset(&dc->sb.set_uuid, 0, 16);
+ SET_BDEV_STATE(&dc->sb, BDEV_STATE_NONE);
+
+ bch_write_bdev_super(dc, &cl);
+ closure_sync(&cl);
+
up_write(&dc->writeback_lock);
break;
}
--
2.20.1
1
58
Backport 5.10.108 LTS patches from upstream
Revert "selftests/bpf: Add test for bpf_timer overwriting crash"
smsc95xx: Ignore -ENODEV errors when device is unplugged
net: usb: Correct reset handling of smsc95xx
net: usb: Correct PHY handling of smsc95xx
perf symbols: Fix symbol size calculation condition
Input: aiptek - properly check endpoint type
scsi: mpt3sas: Page fault in reply q processing
usb: usbtmc: Fix bug in pipe direction for control transfers
usb: gadget: Fix use-after-free bug by not setting udc->dev.driver
net: mscc: ocelot: fix backwards compatibility with single-chain tc-flower
offload
net: bcmgenet: skip invalid partial checksums
bnx2x: fix built-in kernel driver load failure
net: phy: mscc: Add MODULE_FIRMWARE macros
net: dsa: Add missing of_node_put() in dsa_port_parse_of
net: handle ARPHRD_PIMREG in dev_is_mac_header_xmit()
drm/panel: simple: Fix Innolux G070Y2-L01 BPP settings
drm/imx: parallel-display: Remove bus flags check in
imx_pd_bridge_atomic_check()
hv_netvsc: Add check for kvmalloc_array
atm: eni: Add check for dma_map_single
net/packet: fix slab-out-of-bounds access in packet_recvmsg()
net: phy: marvell: Fix invalid comparison in the resume and suspend functions
esp6: fix check on ipv6_skip_exthdr's return value
vsock: each transport cycles only on its own sockets
efi: fix return value of __setup handlers
mm: swap: get rid of livelock in swapin readahead
ocfs2: fix crash when initialize filecheck kobj fails
crypto: qcom-rng - ensure buffer for generate is completely filled
already merged (2)
esp: Fix possible buffer overflow in ESP transformation
arm64: fix clang warning about TRAMP_VALIAS
Total patches = 30 - 2 = 28
Alan Stern (2):
usb: gadget: Fix use-after-free bug by not setting udc->dev.driver
usb: usbtmc: Fix bug in pipe direction for control transfers
Brian Masney (1):
crypto: qcom-rng - ensure buffer for generate is completely filled
Christoph Niedermaier (1):
drm/imx: parallel-display: Remove bus flags check in
imx_pd_bridge_atomic_check()
Dan Carpenter (1):
usb: gadget: rndis: prevent integer overflow in rndis_set_response()
Doug Berger (1):
net: bcmgenet: skip invalid partial checksums
Eric Dumazet (1):
net/packet: fix slab-out-of-bounds access in packet_recvmsg()
Fabio Estevam (1):
smsc95xx: Ignore -ENODEV errors when device is unplugged
Greg Kroah-Hartman (1):
Revert "selftests/bpf: Add test for bpf_timer overwriting crash"
Guo Ziliang (1):
mm: swap: get rid of livelock in swapin readahead
Jiasheng Jiang (2):
atm: eni: Add check for dma_map_single
hv_netvsc: Add check for kvmalloc_array
Jiyong Park (1):
vsock: each transport cycles only on its own sockets
Joseph Qi (1):
ocfs2: fix crash when initialize filecheck kobj fails
Juerg Haefliger (1):
net: phy: mscc: Add MODULE_FIRMWARE macros
Kurt Cancemi (1):
net: phy: marvell: Fix invalid comparison in the resume and suspend
functions
Manish Chopra (1):
bnx2x: fix built-in kernel driver load failure
Marek Vasut (1):
drm/panel: simple: Fix Innolux G070Y2-L01 BPP settings
Markus Reichl (1):
net: usb: Correct reset handling of smsc95xx
Martyn Welch (1):
net: usb: Correct PHY handling of smsc95xx
Matt Lupfer (1):
scsi: mpt3sas: Page fault in reply q processing
Miaoqian Lin (1):
net: dsa: Add missing of_node_put() in dsa_port_parse_of
Michael Petlan (1):
perf symbols: Fix symbol size calculation condition
Nicolas Dichtel (1):
net: handle ARPHRD_PIMREG in dev_is_mac_header_xmit()
Pavel Skripkin (1):
Input: aiptek - properly check endpoint type
Randy Dunlap (1):
efi: fix return value of __setup handlers
Sabrina Dubroca (1):
esp6: fix check on ipv6_skip_exthdr's return value
Vladimir Oltean (1):
net: mscc: ocelot: fix backwards compatibility with single-chain
tc-flower offload
drivers/atm/eni.c | 2 +
drivers/crypto/qcom-rng.c | 17 ++--
drivers/firmware/efi/apple-properties.c | 2 +-
drivers/firmware/efi/efi.c | 2 +-
drivers/gpu/drm/imx/parallel-display.c | 8 --
drivers/gpu/drm/panel/panel-simple.c | 2 +-
drivers/input/tablet/aiptek.c | 10 +--
drivers/net/ethernet/broadcom/bnx2x/bnx2x.h | 2 -
.../net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 28 +++---
.../net/ethernet/broadcom/bnx2x/bnx2x_main.c | 15 +---
.../net/ethernet/broadcom/genet/bcmgenet.c | 6 +-
drivers/net/ethernet/mscc/ocelot_flower.c | 16 +++-
drivers/net/hyperv/netvsc_drv.c | 3 +
drivers/net/phy/marvell.c | 8 +-
drivers/net/phy/mscc/mscc_main.c | 3 +
drivers/net/usb/smsc95xx.c | 86 +++++++++++--------
drivers/scsi/mpt3sas/mpt3sas_base.c | 5 +-
drivers/usb/class/usbtmc.c | 13 ++-
drivers/usb/gadget/function/rndis.c | 1 +
drivers/usb/gadget/udc/core.c | 3 -
drivers/vhost/vsock.c | 3 +-
fs/ocfs2/super.c | 22 ++---
include/linux/if_arp.h | 1 +
include/net/af_vsock.h | 3 +-
mm/swap_state.c | 2 +-
net/dsa/dsa2.c | 1 +
net/ipv6/esp6.c | 3 +-
net/packet/af_packet.c | 11 ++-
net/vmw_vsock/af_vsock.c | 9 +-
net/vmw_vsock/virtio_transport.c | 7 +-
net/vmw_vsock/vmci_transport.c | 5 +-
tools/perf/util/symbol.c | 2 +-
.../selftests/bpf/prog_tests/timer_crash.c | 32 -------
.../testing/selftests/bpf/progs/timer_crash.c | 54 ------------
34 files changed, 175 insertions(+), 212 deletions(-)
delete mode 100644 tools/testing/selftests/bpf/prog_tests/timer_crash.c
delete mode 100644 tools/testing/selftests/bpf/progs/timer_crash.c
--
2.20.1
1
28
22 Jun '22
From: Kaixu Xia <kaixuxia(a)tencent.com>
mainline inclusion
from mainline-v5.11-rc6
commit 237d7887ae723af7d978e8b9a385fdff416f357b
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I5AG18
CVE: NA
--------------------------------
The quota option 'usrquota' should be shown if both the XFS_UQUOTA_ACCT
and XFS_UQUOTA_ENFD flags are set. The option 'uqnoenforce' should be
shown when only the XFS_UQUOTA_ACCT flag is set. The current code logic
seems wrong, Fix it and show proper options.
Signed-off-by: Kaixu Xia <kaixuxia(a)tencent.com>
Reviewed-by: Darrick J. Wong <darrick.wong(a)oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong(a)oracle.com>
Signed-off-by: tangbin <tangbin(a)cmss.chinamobile.com>
Reviewed-by: Xuenan Guo <guoxuenan(a)huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin(a)huawei.com>
---
fs/xfs/xfs_super.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index e126bc6cdeb8..5d8e3a4d2671 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -505,10 +505,12 @@ xfs_showargs(
seq_printf(m, ",swidth=%d",
(int)XFS_FSB_TO_BB(mp, mp->m_swidth));
- if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD))
- seq_puts(m, ",usrquota");
- else if (mp->m_qflags & XFS_UQUOTA_ACCT)
- seq_puts(m, ",uqnoenforce");
+ if (mp->m_qflags & XFS_UQUOTA_ACCT) {
+ if (mp->m_qflags & XFS_UQUOTA_ENFD)
+ seq_puts(m, ",usrquota");
+ else
+ seq_puts(m, ",uqnoenforce");
+ }
if (mp->m_qflags & XFS_PQUOTA_ACCT) {
if (mp->m_qflags & XFS_PQUOTA_ENFD)
--
2.25.1
1
0
[PATCH openEuler-5.10-LTS 01/55] bcache: fix race between setting bdev state to none and new write request direct to backing
by Zheng Zengkai 21 Jun '22
by Zheng Zengkai 21 Jun '22
21 Jun '22
From: Dongsheng Yang <dongsheng.yang(a)easystack.cn>
mainline inclusion
from v5.11-rc1
commit df4ad53242158f9f1f97daf4feddbb4f8b77f080
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I59A5L?from=project-issue
CVE: N/A
-----------------------------------------------
There is a race condition in detaching as below:
A. detaching B. Write request
(1) writing back
(2) write back done, set bdev
state to clean.
(3) cached_dev_put() and
schedule_work(&dc->detach);
(4) write data [0 - 4K] directly
into backing and ack to user.
(5) power-failure...
When we restart this bcache device, this bdev is clean but not detached,
and read [0 - 4K], we will get unexpected old data from cache device.
To fix this problem, set the bdev state to none when we writeback done
in detaching, and then if power-failure happened as above, the data in
cache will not be used in next bcache device starting, it's detached, we
will read the correct data from backing derectly.
Signed-off-by: Dongsheng Yang <dongsheng.yang(a)easystack.cn>
Signed-off-by: Coly Li <colyli(a)suse.de>
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
Reviewed-by: Jason Yan <yanaijie(a)huawei.com>
Signed-off-by: Zheng Zengkai <zhengzengkai(a)huawei.com>
---
drivers/md/bcache/super.c | 9 ---------
drivers/md/bcache/writeback.c | 9 +++++++++
2 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 81f1cc5b3499..b7d9d1b79ac2 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1151,9 +1151,6 @@ static void cancel_writeback_rate_update_dwork(struct cached_dev *dc)
static void cached_dev_detach_finish(struct work_struct *w)
{
struct cached_dev *dc = container_of(w, struct cached_dev, detach);
- struct closure cl;
-
- closure_init_stack(&cl);
BUG_ON(!test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags));
BUG_ON(refcount_read(&dc->count));
@@ -1167,12 +1164,6 @@ static void cached_dev_detach_finish(struct work_struct *w)
dc->writeback_thread = NULL;
}
- memset(&dc->sb.set_uuid, 0, 16);
- SET_BDEV_STATE(&dc->sb, BDEV_STATE_NONE);
-
- bch_write_bdev_super(dc, &cl);
- closure_sync(&cl);
-
mutex_lock(&bch_register_lock);
calc_cached_dev_sectors(dc->disk.c);
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 3c74996978da..a129e4d2707c 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -705,6 +705,15 @@ static int bch_writeback_thread(void *arg)
* bch_cached_dev_detach().
*/
if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) {
+ struct closure cl;
+
+ closure_init_stack(&cl);
+ memset(&dc->sb.set_uuid, 0, 16);
+ SET_BDEV_STATE(&dc->sb, BDEV_STATE_NONE);
+
+ bch_write_bdev_super(dc, &cl);
+ closure_sync(&cl);
+
up_write(&dc->writeback_lock);
break;
}
--
2.20.1
1
54
Backport 5.10.108 LTS patches from upstream
Revert "selftests/bpf: Add test for bpf_timer overwriting crash"
smsc95xx: Ignore -ENODEV errors when device is unplugged
net: usb: Correct reset handling of smsc95xx
net: usb: Correct PHY handling of smsc95xx
perf symbols: Fix symbol size calculation condition
Input: aiptek - properly check endpoint type
scsi: mpt3sas: Page fault in reply q processing
usb: usbtmc: Fix bug in pipe direction for control transfers
usb: gadget: Fix use-after-free bug by not setting udc->dev.driver
net: mscc: ocelot: fix backwards compatibility with single-chain tc-flower
offload
net: bcmgenet: skip invalid partial checksums
bnx2x: fix built-in kernel driver load failure
net: phy: mscc: Add MODULE_FIRMWARE macros
net: dsa: Add missing of_node_put() in dsa_port_parse_of
net: handle ARPHRD_PIMREG in dev_is_mac_header_xmit()
drm/panel: simple: Fix Innolux G070Y2-L01 BPP settings
drm/imx: parallel-display: Remove bus flags check in
imx_pd_bridge_atomic_check()
hv_netvsc: Add check for kvmalloc_array
atm: eni: Add check for dma_map_single
net/packet: fix slab-out-of-bounds access in packet_recvmsg()
net: phy: marvell: Fix invalid comparison in the resume and suspend functions
esp6: fix check on ipv6_skip_exthdr's return value
vsock: each transport cycles only on its own sockets
efi: fix return value of __setup handlers
mm: swap: get rid of livelock in swapin readahead
ocfs2: fix crash when initialize filecheck kobj fails
crypto: qcom-rng - ensure buffer for generate is completely filled
already merged (2)
esp: Fix possible buffer overflow in ESP transformation
arm64: fix clang warning about TRAMP_VALIAS
Total patches = 30 - 2 = 28
Alan Stern (2):
usb: gadget: Fix use-after-free bug by not setting udc->dev.driver
usb: usbtmc: Fix bug in pipe direction for control transfers
Brian Masney (1):
crypto: qcom-rng - ensure buffer for generate is completely filled
Christoph Niedermaier (1):
drm/imx: parallel-display: Remove bus flags check in
imx_pd_bridge_atomic_check()
Dan Carpenter (1):
usb: gadget: rndis: prevent integer overflow in rndis_set_response()
Doug Berger (1):
net: bcmgenet: skip invalid partial checksums
Eric Dumazet (1):
net/packet: fix slab-out-of-bounds access in packet_recvmsg()
Fabio Estevam (1):
smsc95xx: Ignore -ENODEV errors when device is unplugged
Greg Kroah-Hartman (1):
Revert "selftests/bpf: Add test for bpf_timer overwriting crash"
Guo Ziliang (1):
mm: swap: get rid of livelock in swapin readahead
Jiasheng Jiang (2):
atm: eni: Add check for dma_map_single
hv_netvsc: Add check for kvmalloc_array
Jiyong Park (1):
vsock: each transport cycles only on its own sockets
Joseph Qi (1):
ocfs2: fix crash when initialize filecheck kobj fails
Juerg Haefliger (1):
net: phy: mscc: Add MODULE_FIRMWARE macros
Kurt Cancemi (1):
net: phy: marvell: Fix invalid comparison in the resume and suspend
functions
Manish Chopra (1):
bnx2x: fix built-in kernel driver load failure
Marek Vasut (1):
drm/panel: simple: Fix Innolux G070Y2-L01 BPP settings
Markus Reichl (1):
net: usb: Correct reset handling of smsc95xx
Martyn Welch (1):
net: usb: Correct PHY handling of smsc95xx
Matt Lupfer (1):
scsi: mpt3sas: Page fault in reply q processing
Miaoqian Lin (1):
net: dsa: Add missing of_node_put() in dsa_port_parse_of
Michael Petlan (1):
perf symbols: Fix symbol size calculation condition
Nicolas Dichtel (1):
net: handle ARPHRD_PIMREG in dev_is_mac_header_xmit()
Pavel Skripkin (1):
Input: aiptek - properly check endpoint type
Randy Dunlap (1):
efi: fix return value of __setup handlers
Sabrina Dubroca (1):
esp6: fix check on ipv6_skip_exthdr's return value
Vladimir Oltean (1):
net: mscc: ocelot: fix backwards compatibility with single-chain
tc-flower offload
drivers/atm/eni.c | 2 +
drivers/crypto/qcom-rng.c | 17 ++--
drivers/firmware/efi/apple-properties.c | 2 +-
drivers/firmware/efi/efi.c | 2 +-
drivers/gpu/drm/imx/parallel-display.c | 8 --
drivers/gpu/drm/panel/panel-simple.c | 2 +-
drivers/input/tablet/aiptek.c | 10 +--
drivers/net/ethernet/broadcom/bnx2x/bnx2x.h | 2 -
.../net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 28 +++---
.../net/ethernet/broadcom/bnx2x/bnx2x_main.c | 15 +---
.../net/ethernet/broadcom/genet/bcmgenet.c | 6 +-
drivers/net/ethernet/mscc/ocelot_flower.c | 16 +++-
drivers/net/hyperv/netvsc_drv.c | 3 +
drivers/net/phy/marvell.c | 8 +-
drivers/net/phy/mscc/mscc_main.c | 3 +
drivers/net/usb/smsc95xx.c | 86 +++++++++++--------
drivers/scsi/mpt3sas/mpt3sas_base.c | 5 +-
drivers/usb/class/usbtmc.c | 13 ++-
drivers/usb/gadget/function/rndis.c | 1 +
drivers/usb/gadget/udc/core.c | 3 -
drivers/vhost/vsock.c | 3 +-
fs/ocfs2/super.c | 22 ++---
include/linux/if_arp.h | 1 +
include/net/af_vsock.h | 3 +-
mm/swap_state.c | 2 +-
net/dsa/dsa2.c | 1 +
net/ipv6/esp6.c | 3 +-
net/packet/af_packet.c | 11 ++-
net/vmw_vsock/af_vsock.c | 9 +-
net/vmw_vsock/virtio_transport.c | 7 +-
net/vmw_vsock/vmci_transport.c | 5 +-
tools/perf/util/symbol.c | 2 +-
.../selftests/bpf/prog_tests/timer_crash.c | 32 -------
.../testing/selftests/bpf/progs/timer_crash.c | 54 ------------
34 files changed, 175 insertions(+), 212 deletions(-)
delete mode 100644 tools/testing/selftests/bpf/prog_tests/timer_crash.c
delete mode 100644 tools/testing/selftests/bpf/progs/timer_crash.c
--
2.20.1
1
28
[PATCH openEuler-1.0-LTS 1/4] sched: Introduce qos smt expeller for co-location
by liuzhengyuan@kylinos.cn 21 Jun '22
by liuzhengyuan@kylinos.cn 21 Jun '22
21 Jun '22
From: Guan Jing <guanjing6(a)huawei.com>
hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I52611
CVE: NA
--------------------------------
We introduce the qos smt expeller, which lets
online tasks to expel offline tasks on the smt sibling cpus,
and exclusively occupy CPU resources.In this way we are
able to improve QOS of online tasks in co-location.
Change-Id: I1860d20d5e78467773e67cc47b4fa2d1f0110783
Signed-off-by: Guan Jing <guanjing6(a)huawei.com>
Reviewed-by: Chen Hui <judy.chenhui(a)huawei.com>
Signed-off-by: Zheng Zengkai <zhengzengkai(a)huawei.com>
Signed-off-by: Zhengyuan Liu <liuzhengyuan(a)kylinos.cn>
---
init/Kconfig | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/init/Kconfig b/init/Kconfig
index ac1c864524ac..dd81d19e2fcb 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -794,6 +794,15 @@ config QOS_SCHED
default n
+config QOS_SCHED_SMT_EXPELLER
+ bool "Qos smt expeller"
+ depends on SCHED_SMT
+ depends on QOS_SCHED
+ default n
+ help
+ This feature enable online tasks to expel offline tasks
+ on the smt sibling cpus, and exclusively occupy CPU resources.
+
config FAIR_GROUP_SCHED
bool "Group scheduling for SCHED_OTHER"
depends on CGROUP_SCHED
--
2.25.1
1
3
[PATCH openEuler-1.0-LTS 1/6] drivers core: Use sysfs_emit and sysfs_emit_at for show(device *...) functions
by Yongqiang Liu 21 Jun '22
by Yongqiang Liu 21 Jun '22
21 Jun '22
From: Joe Perches <joe(a)perches.com>
mainline inclusion
from mainline-v5.10-rc1
commit aa838896d87af561a33ecefea1caa4c15a68bc47
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I5C32F
CVE: CVE-2022-20166
----------------------------------------------
Convert the various sprintf fmaily calls in sysfs device show functions
to sysfs_emit and sysfs_emit_at for PAGE_SIZE buffer safety.
Done with:
$ spatch -sp-file sysfs_emit_dev.cocci --in-place --max-width=80 .
And cocci script:
$ cat sysfs_emit_dev.cocci
@@
identifier d_show;
identifier dev, attr, buf;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
return
- sprintf(buf,
+ sysfs_emit(buf,
...);
...>
}
@@
identifier d_show;
identifier dev, attr, buf;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
return
- snprintf(buf, PAGE_SIZE,
+ sysfs_emit(buf,
...);
...>
}
@@
identifier d_show;
identifier dev, attr, buf;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
return
- scnprintf(buf, PAGE_SIZE,
+ sysfs_emit(buf,
...);
...>
}
@@
identifier d_show;
identifier dev, attr, buf;
expression chr;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
return
- strcpy(buf, chr);
+ sysfs_emit(buf, chr);
...>
}
@@
identifier d_show;
identifier dev, attr, buf;
identifier len;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
len =
- sprintf(buf,
+ sysfs_emit(buf,
...);
...>
return len;
}
@@
identifier d_show;
identifier dev, attr, buf;
identifier len;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
len =
- snprintf(buf, PAGE_SIZE,
+ sysfs_emit(buf,
...);
...>
return len;
}
@@
identifier d_show;
identifier dev, attr, buf;
identifier len;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
len =
- scnprintf(buf, PAGE_SIZE,
+ sysfs_emit(buf,
...);
...>
return len;
}
@@
identifier d_show;
identifier dev, attr, buf;
identifier len;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
<...
- len += scnprintf(buf + len, PAGE_SIZE - len,
+ len += sysfs_emit_at(buf, len,
...);
...>
return len;
}
@@
identifier d_show;
identifier dev, attr, buf;
expression chr;
@@
ssize_t d_show(struct device *dev, struct device_attribute *attr, char *buf)
{
...
- strcpy(buf, chr);
- return strlen(buf);
+ return sysfs_emit(buf, chr);
}
Signed-off-by: Joe Perches <joe(a)perches.com>
Link: https://lore.kernel.org/r/3d033c33056d88bbe34d4ddb62afd05ee166ab9a.16002859…
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Conflicts:
drivers/base/node.c
Signed-off-by: Guo Mengqi <guomengqi3(a)huawei.com>
Reviewed-by: Weilong Chen <chenweilong(a)huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng(a)huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13(a)huawei.com>
---
drivers/base/arch_topology.c | 3 +-
drivers/base/cacheinfo.c | 18 ++++-----
drivers/base/core.c | 8 ++--
drivers/base/cpu.c | 26 ++++++------
drivers/base/firmware_loader/fallback.c | 2 +-
drivers/base/memory.c | 24 +++++------
drivers/base/node.c | 26 ++++++------
drivers/base/platform.c | 2 +-
drivers/base/power/sysfs.c | 53 +++++++++++++------------
drivers/base/soc.c | 8 ++--
10 files changed, 86 insertions(+), 84 deletions(-)
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 7157e4039b4c..729dded51e7b 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -42,7 +42,8 @@ static ssize_t cpu_capacity_show(struct device *dev,
{
struct cpu *cpu = container_of(dev, struct cpu, dev);
- return sprintf(buf, "%lu\n", topology_get_cpu_scale(NULL, cpu->dev.id));
+ return sysfs_emit(buf, "%lu\n",
+ topology_get_cpu_scale(NULL, cpu->dev.id));
}
static DEVICE_ATTR_RO(cpu_capacity);
diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index aee69d78c2e7..f7d107ca1a66 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -415,7 +415,7 @@ static ssize_t size_show(struct device *dev,
{
struct cacheinfo *this_leaf = dev_get_drvdata(dev);
- return sprintf(buf, "%uK\n", this_leaf->size >> 10);
+ return sysfs_emit(buf, "%uK\n", this_leaf->size >> 10);
}
static ssize_t shared_cpumap_show_func(struct device *dev, bool list, char *buf)
@@ -445,11 +445,11 @@ static ssize_t type_show(struct device *dev,
switch (this_leaf->type) {
case CACHE_TYPE_DATA:
- return sprintf(buf, "Data\n");
+ return sysfs_emit(buf, "Data\n");
case CACHE_TYPE_INST:
- return sprintf(buf, "Instruction\n");
+ return sysfs_emit(buf, "Instruction\n");
case CACHE_TYPE_UNIFIED:
- return sprintf(buf, "Unified\n");
+ return sysfs_emit(buf, "Unified\n");
default:
return -EINVAL;
}
@@ -463,11 +463,11 @@ static ssize_t allocation_policy_show(struct device *dev,
int n = 0;
if ((ci_attr & CACHE_READ_ALLOCATE) && (ci_attr & CACHE_WRITE_ALLOCATE))
- n = sprintf(buf, "ReadWriteAllocate\n");
+ n = sysfs_emit(buf, "ReadWriteAllocate\n");
else if (ci_attr & CACHE_READ_ALLOCATE)
- n = sprintf(buf, "ReadAllocate\n");
+ n = sysfs_emit(buf, "ReadAllocate\n");
else if (ci_attr & CACHE_WRITE_ALLOCATE)
- n = sprintf(buf, "WriteAllocate\n");
+ n = sysfs_emit(buf, "WriteAllocate\n");
return n;
}
@@ -479,9 +479,9 @@ static ssize_t write_policy_show(struct device *dev,
int n = 0;
if (ci_attr & CACHE_WRITE_THROUGH)
- n = sprintf(buf, "WriteThrough\n");
+ n = sysfs_emit(buf, "WriteThrough\n");
else if (ci_attr & CACHE_WRITE_BACK)
- n = sprintf(buf, "WriteBack\n");
+ n = sysfs_emit(buf, "WriteBack\n");
return n;
}
diff --git a/drivers/base/core.c b/drivers/base/core.c
index f6d8a4246adf..b4f80e82a91f 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -994,7 +994,7 @@ ssize_t device_show_ulong(struct device *dev,
char *buf)
{
struct dev_ext_attribute *ea = to_ext_attr(attr);
- return snprintf(buf, PAGE_SIZE, "%lx\n", *(unsigned long *)(ea->var));
+ return sysfs_emit(buf, "%lx\n", *(unsigned long *)(ea->var));
}
EXPORT_SYMBOL_GPL(device_show_ulong);
@@ -1019,7 +1019,7 @@ ssize_t device_show_int(struct device *dev,
{
struct dev_ext_attribute *ea = to_ext_attr(attr);
- return snprintf(buf, PAGE_SIZE, "%d\n", *(int *)(ea->var));
+ return sysfs_emit(buf, "%d\n", *(int *)(ea->var));
}
EXPORT_SYMBOL_GPL(device_show_int);
@@ -1040,7 +1040,7 @@ ssize_t device_show_bool(struct device *dev, struct device_attribute *attr,
{
struct dev_ext_attribute *ea = to_ext_attr(attr);
- return snprintf(buf, PAGE_SIZE, "%d\n", *(bool *)(ea->var));
+ return sysfs_emit(buf, "%d\n", *(bool *)(ea->var));
}
EXPORT_SYMBOL_GPL(device_show_bool);
@@ -1273,7 +1273,7 @@ static ssize_t online_show(struct device *dev, struct device_attribute *attr,
device_lock(dev);
val = !dev->offline;
device_unlock(dev);
- return sprintf(buf, "%u\n", val);
+ return sysfs_emit(buf, "%u\n", val);
}
static ssize_t online_store(struct device *dev, struct device_attribute *attr,
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 1df057486176..128e5867c35d 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -156,7 +156,7 @@ static ssize_t show_crash_notes(struct device *dev, struct device_attribute *att
* operation should be safe. No locking required.
*/
addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpunum));
- rc = sprintf(buf, "%Lx\n", addr);
+ rc = sysfs_emit(buf, "%Lx\n", addr);
return rc;
}
static DEVICE_ATTR(crash_notes, 0400, show_crash_notes, NULL);
@@ -167,7 +167,7 @@ static ssize_t show_crash_notes_size(struct device *dev,
{
ssize_t rc;
- rc = sprintf(buf, "%zu\n", sizeof(note_buf_t));
+ rc = sysfs_emit(buf, "%zu\n", sizeof(note_buf_t));
return rc;
}
static DEVICE_ATTR(crash_notes_size, 0400, show_crash_notes_size, NULL);
@@ -328,8 +328,8 @@ static ssize_t print_cpu_modalias(struct device *dev,
ssize_t n;
u32 i;
- n = sprintf(buf, "cpu:type:" CPU_FEATURE_TYPEFMT ":feature:",
- CPU_FEATURE_TYPEVAL);
+ n = sysfs_emit(buf, "cpu:type:" CPU_FEATURE_TYPEFMT ":feature:",
+ CPU_FEATURE_TYPEVAL);
for (i = 0; i < MAX_CPU_FEATURES; i++)
if (cpu_have_feature(i)) {
@@ -519,56 +519,56 @@ static void __init cpu_dev_register_generic(void)
ssize_t __weak cpu_show_meltdown(struct device *dev,
struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "Not affected\n");
+ return sysfs_emit(buf, "Not affected\n");
}
ssize_t __weak cpu_show_spectre_v1(struct device *dev,
struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "Not affected\n");
+ return sysfs_emit(buf, "Not affected\n");
}
ssize_t __weak cpu_show_spectre_v2(struct device *dev,
struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "Not affected\n");
+ return sysfs_emit(buf, "Not affected\n");
}
ssize_t __weak cpu_show_spec_store_bypass(struct device *dev,
struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "Not affected\n");
+ return sysfs_emit(buf, "Not affected\n");
}
ssize_t __weak cpu_show_l1tf(struct device *dev,
struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "Not affected\n");
+ return sysfs_emit(buf, "Not affected\n");
}
ssize_t __weak cpu_show_mds(struct device *dev,
struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "Not affected\n");
+ return sysfs_emit(buf, "Not affected\n");
}
ssize_t __weak cpu_show_tsx_async_abort(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- return sprintf(buf, "Not affected\n");
+ return sysfs_emit(buf, "Not affected\n");
}
ssize_t __weak cpu_show_itlb_multihit(struct device *dev,
struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "Not affected\n");
+ return sysfs_emit(buf, "Not affected\n");
}
ssize_t __weak cpu_show_srbds(struct device *dev,
struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "Not affected\n");
+ return sysfs_emit(buf, "Not affected\n");
}
static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
diff --git a/drivers/base/firmware_loader/fallback.c b/drivers/base/firmware_loader/fallback.c
index 818d8c37d70a..ab619da92734 100644
--- a/drivers/base/firmware_loader/fallback.c
+++ b/drivers/base/firmware_loader/fallback.c
@@ -216,7 +216,7 @@ static ssize_t firmware_loading_show(struct device *dev,
loading = fw_sysfs_loading(fw_sysfs->fw_priv);
mutex_unlock(&fw_lock);
- return sprintf(buf, "%d\n", loading);
+ return sysfs_emit(buf, "%d\n", loading);
}
/* one pages buffer should be mapped/unmapped only once */
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 1b97f305173f..54225b812cd7 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -140,7 +140,7 @@ static ssize_t phys_index_show(struct device *dev,
unsigned long phys_index;
phys_index = mem->start_section_nr / sections_per_block;
- return sprintf(buf, "%08lx\n", phys_index);
+ return sysfs_emit(buf, "%08lx\n", phys_index);
}
/*
@@ -164,7 +164,7 @@ static ssize_t removable_show(struct device *dev, struct device_attribute *attr,
}
out:
- return sprintf(buf, "%d\n", ret);
+ return sysfs_emit(buf, "%d\n", ret);
}
/*
@@ -182,17 +182,17 @@ static ssize_t state_show(struct device *dev, struct device_attribute *attr,
*/
switch (mem->state) {
case MEM_ONLINE:
- len = sprintf(buf, "online\n");
+ len = sysfs_emit(buf, "online\n");
break;
case MEM_OFFLINE:
- len = sprintf(buf, "offline\n");
+ len = sysfs_emit(buf, "offline\n");
break;
case MEM_GOING_OFFLINE:
- len = sprintf(buf, "going-offline\n");
+ len = sysfs_emit(buf, "going-offline\n");
break;
default:
- len = sprintf(buf, "ERROR-UNKNOWN-%ld\n",
- mem->state);
+ len = sysfs_emit(buf, "ERROR-UNKNOWN-%ld\n",
+ mem->state);
WARN_ON(1);
break;
}
@@ -389,7 +389,7 @@ static ssize_t phys_device_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct memory_block *mem = to_memory_block(dev);
- return sprintf(buf, "%d\n", mem->phys_device);
+ return sysfs_emit(buf, "%d\n", mem->phys_device);
}
#ifdef CONFIG_MEMORY_HOTREMOVE
@@ -427,7 +427,7 @@ static ssize_t valid_zones_show(struct device *dev,
*/
if (!test_pages_in_a_zone(start_pfn, start_pfn + nr_pages,
&valid_start_pfn, &valid_end_pfn))
- return sprintf(buf, "none\n");
+ return sysfs_emit(buf, "none\n");
start_pfn = valid_start_pfn;
strcat(buf, page_zone(pfn_to_page(start_pfn))->name);
goto out;
@@ -461,7 +461,7 @@ static DEVICE_ATTR_RO(removable);
static ssize_t block_size_bytes_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "%lx\n", get_memory_block_size());
+ return sysfs_emit(buf, "%lx\n", get_memory_block_size());
}
static DEVICE_ATTR_RO(block_size_bytes);
@@ -473,8 +473,8 @@ static DEVICE_ATTR_RO(block_size_bytes);
static ssize_t auto_online_blocks_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "%s\n",
- online_type_to_str[memhp_default_online_type]);
+ return sysfs_emit(buf, "%s\n",
+ online_type_to_str[memhp_default_online_type]);
}
static ssize_t auto_online_blocks_store(struct device *dev,
diff --git a/drivers/base/node.c b/drivers/base/node.c
index ac44db3f63c7..7a807e471fbf 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -457,19 +457,19 @@ static DEVICE_ATTR(meminfo, S_IRUGO, node_read_meminfo, NULL);
static ssize_t node_read_numastat(struct device *dev,
struct device_attribute *attr, char *buf)
{
- return sprintf(buf,
- "numa_hit %lu\n"
- "numa_miss %lu\n"
- "numa_foreign %lu\n"
- "interleave_hit %lu\n"
- "local_node %lu\n"
- "other_node %lu\n",
- sum_zone_numa_state(dev->id, NUMA_HIT),
- sum_zone_numa_state(dev->id, NUMA_MISS),
- sum_zone_numa_state(dev->id, NUMA_FOREIGN),
- sum_zone_numa_state(dev->id, NUMA_INTERLEAVE_HIT),
- sum_zone_numa_state(dev->id, NUMA_LOCAL),
- sum_zone_numa_state(dev->id, NUMA_OTHER));
+ return sysfs_emit(buf,
+ "numa_hit %lu\n"
+ "numa_miss %lu\n"
+ "numa_foreign %lu\n"
+ "interleave_hit %lu\n"
+ "local_node %lu\n"
+ "other_node %lu\n",
+ sum_zone_numa_state(dev->id, NUMA_HIT),
+ sum_zone_numa_state(dev->id, NUMA_MISS),
+ sum_zone_numa_state(dev->id, NUMA_FOREIGN),
+ sum_zone_numa_state(dev->id, NUMA_INTERLEAVE_HIT),
+ sum_zone_numa_state(dev->id, NUMA_LOCAL),
+ sum_zone_numa_state(dev->id, NUMA_OTHER));
}
static DEVICE_ATTR(numastat, S_IRUGO, node_read_numastat, NULL);
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 349c2754eed7..fe3f4225d80a 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -907,7 +907,7 @@ static ssize_t driver_override_show(struct device *dev,
ssize_t len;
device_lock(dev);
- len = sprintf(buf, "%s\n", pdev->driver_override);
+ len = sysfs_emit(buf, "%s\n", pdev->driver_override);
device_unlock(dev);
return len;
}
diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c
index d713738ce796..d071d5d1a6d7 100644
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -101,7 +101,7 @@ static const char ctrl_on[] = "on";
static ssize_t control_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
- return sprintf(buf, "%s\n",
+ return sysfs_emit(buf, "%s\n",
dev->power.runtime_auto ? ctrl_auto : ctrl_on);
}
@@ -127,7 +127,8 @@ static ssize_t runtime_active_time_show(struct device *dev,
int ret;
spin_lock_irq(&dev->power.lock);
update_pm_runtime_accounting(dev);
- ret = sprintf(buf, "%i\n", jiffies_to_msecs(dev->power.active_jiffies));
+ ret = sysfs_emit(buf, "%i\n",
+ jiffies_to_msecs(dev->power.active_jiffies));
spin_unlock_irq(&dev->power.lock);
return ret;
}
@@ -140,8 +141,8 @@ static ssize_t runtime_suspended_time_show(struct device *dev,
int ret;
spin_lock_irq(&dev->power.lock);
update_pm_runtime_accounting(dev);
- ret = sprintf(buf, "%i\n",
- jiffies_to_msecs(dev->power.suspended_jiffies));
+ ret = sysfs_emit(buf, "%i\n",
+ jiffies_to_msecs(dev->power.suspended_jiffies));
spin_unlock_irq(&dev->power.lock);
return ret;
}
@@ -175,7 +176,7 @@ static ssize_t runtime_status_show(struct device *dev,
return -EIO;
}
}
- return sprintf(buf, p);
+ return sysfs_emit(buf, p);
}
static DEVICE_ATTR_RO(runtime_status);
@@ -185,7 +186,7 @@ static ssize_t autosuspend_delay_ms_show(struct device *dev,
{
if (!dev->power.use_autosuspend)
return -EIO;
- return sprintf(buf, "%d\n", dev->power.autosuspend_delay);
+ return sysfs_emit(buf, "%d\n", dev->power.autosuspend_delay);
}
static ssize_t autosuspend_delay_ms_store(struct device *dev,
@@ -214,11 +215,11 @@ static ssize_t pm_qos_resume_latency_us_show(struct device *dev,
s32 value = dev_pm_qos_requested_resume_latency(dev);
if (value == 0)
- return sprintf(buf, "n/a\n");
+ return sysfs_emit(buf, "n/a\n");
if (value == PM_QOS_RESUME_LATENCY_NO_CONSTRAINT)
value = 0;
- return sprintf(buf, "%d\n", value);
+ return sysfs_emit(buf, "%d\n", value);
}
static ssize_t pm_qos_resume_latency_us_store(struct device *dev,
@@ -258,11 +259,11 @@ static ssize_t pm_qos_latency_tolerance_us_show(struct device *dev,
s32 value = dev_pm_qos_get_user_latency_tolerance(dev);
if (value < 0)
- return sprintf(buf, "auto\n");
+ return sysfs_emit(buf, "auto\n");
if (value == PM_QOS_LATENCY_ANY)
- return sprintf(buf, "any\n");
+ return sysfs_emit(buf, "any\n");
- return sprintf(buf, "%d\n", value);
+ return sysfs_emit(buf, "%d\n", value);
}
static ssize_t pm_qos_latency_tolerance_us_store(struct device *dev,
@@ -294,8 +295,8 @@ static ssize_t pm_qos_no_power_off_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- return sprintf(buf, "%d\n", !!(dev_pm_qos_requested_flags(dev)
- & PM_QOS_FLAG_NO_POWER_OFF));
+ return sysfs_emit(buf, "%d\n", !!(dev_pm_qos_requested_flags(dev)
+ & PM_QOS_FLAG_NO_POWER_OFF));
}
static ssize_t pm_qos_no_power_off_store(struct device *dev,
@@ -323,9 +324,9 @@ static const char _disabled[] = "disabled";
static ssize_t wakeup_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
- return sprintf(buf, "%s\n", device_can_wakeup(dev)
- ? (device_may_wakeup(dev) ? _enabled : _disabled)
- : "");
+ return sysfs_emit(buf, "%s\n", device_can_wakeup(dev)
+ ? (device_may_wakeup(dev) ? _enabled : _disabled)
+ : "");
}
static ssize_t wakeup_store(struct device *dev, struct device_attribute *attr,
@@ -511,7 +512,7 @@ static DEVICE_ATTR_RO(wakeup_prevent_sleep_time_ms);
static ssize_t runtime_usage_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- return sprintf(buf, "%d\n", atomic_read(&dev->power.usage_count));
+ return sysfs_emit(buf, "%d\n", atomic_read(&dev->power.usage_count));
}
static DEVICE_ATTR_RO(runtime_usage);
@@ -519,8 +520,8 @@ static ssize_t runtime_active_kids_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- return sprintf(buf, "%d\n", dev->power.ignore_children ?
- 0 : atomic_read(&dev->power.child_count));
+ return sysfs_emit(buf, "%d\n", dev->power.ignore_children ?
+ 0 : atomic_read(&dev->power.child_count));
}
static DEVICE_ATTR_RO(runtime_active_kids);
@@ -528,12 +529,12 @@ static ssize_t runtime_enabled_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
if (dev->power.disable_depth && (dev->power.runtime_auto == false))
- return sprintf(buf, "disabled & forbidden\n");
+ return sysfs_emit(buf, "disabled & forbidden\n");
if (dev->power.disable_depth)
- return sprintf(buf, "disabled\n");
+ return sysfs_emit(buf, "disabled\n");
if (dev->power.runtime_auto == false)
- return sprintf(buf, "forbidden\n");
- return sprintf(buf, "enabled\n");
+ return sysfs_emit(buf, "forbidden\n");
+ return sysfs_emit(buf, "enabled\n");
}
static DEVICE_ATTR_RO(runtime_enabled);
@@ -541,9 +542,9 @@ static DEVICE_ATTR_RO(runtime_enabled);
static ssize_t async_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
- return sprintf(buf, "%s\n",
- device_async_suspend_enabled(dev) ?
- _enabled : _disabled);
+ return sysfs_emit(buf, "%s\n",
+ device_async_suspend_enabled(dev) ?
+ _enabled : _disabled);
}
static ssize_t async_store(struct device *dev, struct device_attribute *attr,
diff --git a/drivers/base/soc.c b/drivers/base/soc.c
index 7e91894a380b..23bc9eb794a2 100644
--- a/drivers/base/soc.c
+++ b/drivers/base/soc.c
@@ -72,13 +72,13 @@ static ssize_t soc_info_get(struct device *dev,
struct soc_device *soc_dev = container_of(dev, struct soc_device, dev);
if (attr == &dev_attr_machine)
- return sprintf(buf, "%s\n", soc_dev->attr->machine);
+ return sysfs_emit(buf, "%s\n", soc_dev->attr->machine);
if (attr == &dev_attr_family)
- return sprintf(buf, "%s\n", soc_dev->attr->family);
+ return sysfs_emit(buf, "%s\n", soc_dev->attr->family);
if (attr == &dev_attr_revision)
- return sprintf(buf, "%s\n", soc_dev->attr->revision);
+ return sysfs_emit(buf, "%s\n", soc_dev->attr->revision);
if (attr == &dev_attr_soc_id)
- return sprintf(buf, "%s\n", soc_dev->attr->soc_id);
+ return sysfs_emit(buf, "%s\n", soc_dev->attr->soc_id);
return -EINVAL;
--
2.25.1
1
5