Hi Eric,
-----Original Message----- From: Eric Auger [mailto:eric.auger@redhat.com] Sent: 16 November 2020 11:00 To: eric.auger.pro@gmail.com; eric.auger@redhat.com; iommu@lists.linux-foundation.org; linux-kernel@vger.kernel.org; kvm@vger.kernel.org; kvmarm@lists.cs.columbia.edu; will@kernel.org; joro@8bytes.org; maz@kernel.org; robin.murphy@arm.com; alex.williamson@redhat.com Cc: jean-philippe@linaro.org; zhangfei.gao@linaro.org; zhangfei.gao@gmail.com; vivek.gautam@arm.com; Shameerali Kolothum Thodi shameerali.kolothum.thodi@huawei.com; jacob.jun.pan@linux.intel.com; yi.l.liu@intel.com; tn@semihalf.com; nicoleotsuka@gmail.com; yuzenghui yuzenghui@huawei.com Subject: [PATCH v11 12/13] vfio/pci: Register a DMA fault response region
In preparation for vSVA, let's register a DMA fault response region, where the userspace will push the page responses and increment the head of the buffer. The kernel will pop those responses and inject them on iommu side.
Signed-off-by: Eric Auger eric.auger@redhat.com
drivers/vfio/pci/vfio_pci.c | 114 +++++++++++++++++++++++++--- drivers/vfio/pci/vfio_pci_private.h | 5 ++ drivers/vfio/pci/vfio_pci_rdwr.c | 39 ++++++++++ include/uapi/linux/vfio.h | 32 ++++++++ 4 files changed, 181 insertions(+), 9 deletions(-)
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 65a83fd0e8c0..e9a904ce3f0d 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -318,9 +318,20 @@ static void vfio_pci_dma_fault_release(struct vfio_pci_device *vdev, kfree(vdev->fault_pages); }
-static int vfio_pci_dma_fault_mmap(struct vfio_pci_device *vdev,
struct vfio_pci_region *region,
struct vm_area_struct *vma)
+static void +vfio_pci_dma_fault_response_release(struct vfio_pci_device *vdev,
struct vfio_pci_region *region) {
- if (vdev->dma_fault_response_wq)
destroy_workqueue(vdev->dma_fault_response_wq);
- kfree(vdev->fault_response_pages);
- vdev->fault_response_pages = NULL;
+}
+static int __vfio_pci_dma_fault_mmap(struct vfio_pci_device *vdev,
struct vfio_pci_region *region,
struct vm_area_struct *vma,
u8 *pages)
{ u64 phys_len, req_len, pgoff, req_start; unsigned long long addr; @@ -333,14 +344,14 @@ static int vfio_pci_dma_fault_mmap(struct vfio_pci_device *vdev, ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1); req_start = pgoff << PAGE_SHIFT;
- /* only the second page of the producer fault region is mmappable */
/* only the second page of the fault region is mmappable */ if (req_start < PAGE_SIZE) return -EINVAL;
if (req_start + req_len > phys_len) return -EINVAL;
- addr = virt_to_phys(vdev->fault_pages);
- addr = virt_to_phys(pages); vma->vm_private_data = vdev; vma->vm_pgoff = (addr >> PAGE_SHIFT) + pgoff;
@@ -349,13 +360,29 @@ static int vfio_pci_dma_fault_mmap(struct vfio_pci_device *vdev, return ret; }
-static int vfio_pci_dma_fault_add_capability(struct vfio_pci_device *vdev,
struct vfio_pci_region *region,
struct vfio_info_cap *caps)
+static int vfio_pci_dma_fault_mmap(struct vfio_pci_device *vdev,
struct vfio_pci_region *region,
struct vm_area_struct *vma)
+{
- return __vfio_pci_dma_fault_mmap(vdev, region, vma,
vdev->fault_pages); +}
+static int +vfio_pci_dma_fault_response_mmap(struct vfio_pci_device *vdev,
struct vfio_pci_region *region,
struct vm_area_struct *vma)
+{
- return __vfio_pci_dma_fault_mmap(vdev, region, vma,
vdev->fault_response_pages); +}
+static int __vfio_pci_dma_fault_add_capability(struct vfio_pci_device *vdev,
struct vfio_pci_region *region,
struct vfio_info_cap *caps,
u32 cap_id)
{ struct vfio_region_info_cap_sparse_mmap *sparse = NULL; struct vfio_region_info_cap_fault cap = {
.header.id = VFIO_REGION_INFO_CAP_DMA_FAULT,
.header.version = 1, .version = 1, };.header.id = cap_id,
@@ -383,6 +410,14 @@ static int vfio_pci_dma_fault_add_capability(struct vfio_pci_device *vdev, return ret; }
+static int vfio_pci_dma_fault_add_capability(struct vfio_pci_device *vdev,
struct vfio_pci_region *region,
struct vfio_info_cap *caps) {
- return __vfio_pci_dma_fault_add_capability(vdev, region, caps,
VFIO_REGION_INFO_CAP_DMA_FAULT); }
static const struct vfio_pci_regops vfio_pci_dma_fault_regops = { .rw = vfio_pci_dma_fault_rw, .release = vfio_pci_dma_fault_release, @@ -390,6 +425,13 @@ static const struct vfio_pci_regops vfio_pci_dma_fault_regops = { .add_capability = vfio_pci_dma_fault_add_capability, };
+static const struct vfio_pci_regops vfio_pci_dma_fault_response_regops = {
- .rw = vfio_pci_dma_fault_response_rw,
- .release = vfio_pci_dma_fault_response_release,
- .mmap = vfio_pci_dma_fault_response_mmap,
- .add_capability = vfio_pci_dma_fault_add_capability,
As I mentioned in the Qemu patch ([RFC v7 26/26] vfio/pci: Implement return_page_response page response callback), it looks like we are using the VFIO_REGION_INFO_CAP_DMA_FAULT cap id for the dma_fault_response here as well. Is that intentional? (Was wondering how it worked in the first place and noted this).
Please check.
Thanks, Shameer
Hi Shameer,
On 2/18/21 11:36 AM, Shameerali Kolothum Thodi wrote:
Hi Eric,
-----Original Message----- From: Eric Auger [mailto:eric.auger@redhat.com] Sent: 16 November 2020 11:00 To: eric.auger.pro@gmail.com; eric.auger@redhat.com; iommu@lists.linux-foundation.org; linux-kernel@vger.kernel.org; kvm@vger.kernel.org; kvmarm@lists.cs.columbia.edu; will@kernel.org; joro@8bytes.org; maz@kernel.org; robin.murphy@arm.com; alex.williamson@redhat.com Cc: jean-philippe@linaro.org; zhangfei.gao@linaro.org; zhangfei.gao@gmail.com; vivek.gautam@arm.com; Shameerali Kolothum Thodi shameerali.kolothum.thodi@huawei.com; jacob.jun.pan@linux.intel.com; yi.l.liu@intel.com; tn@semihalf.com; nicoleotsuka@gmail.com; yuzenghui yuzenghui@huawei.com Subject: [PATCH v11 12/13] vfio/pci: Register a DMA fault response region
In preparation for vSVA, let's register a DMA fault response region, where the userspace will push the page responses and increment the head of the buffer. The kernel will pop those responses and inject them on iommu side.
Signed-off-by: Eric Auger eric.auger@redhat.com
drivers/vfio/pci/vfio_pci.c | 114 +++++++++++++++++++++++++--- drivers/vfio/pci/vfio_pci_private.h | 5 ++ drivers/vfio/pci/vfio_pci_rdwr.c | 39 ++++++++++ include/uapi/linux/vfio.h | 32 ++++++++ 4 files changed, 181 insertions(+), 9 deletions(-)
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 65a83fd0e8c0..e9a904ce3f0d 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -318,9 +318,20 @@ static void vfio_pci_dma_fault_release(struct vfio_pci_device *vdev, kfree(vdev->fault_pages); }
-static int vfio_pci_dma_fault_mmap(struct vfio_pci_device *vdev,
struct vfio_pci_region *region,
struct vm_area_struct *vma)
+static void +vfio_pci_dma_fault_response_release(struct vfio_pci_device *vdev,
struct vfio_pci_region *region) {
- if (vdev->dma_fault_response_wq)
destroy_workqueue(vdev->dma_fault_response_wq);
- kfree(vdev->fault_response_pages);
- vdev->fault_response_pages = NULL;
+}
+static int __vfio_pci_dma_fault_mmap(struct vfio_pci_device *vdev,
struct vfio_pci_region *region,
struct vm_area_struct *vma,
u8 *pages)
{ u64 phys_len, req_len, pgoff, req_start; unsigned long long addr; @@ -333,14 +344,14 @@ static int vfio_pci_dma_fault_mmap(struct vfio_pci_device *vdev, ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1); req_start = pgoff << PAGE_SHIFT;
- /* only the second page of the producer fault region is mmappable */
/* only the second page of the fault region is mmappable */ if (req_start < PAGE_SIZE) return -EINVAL;
if (req_start + req_len > phys_len) return -EINVAL;
- addr = virt_to_phys(vdev->fault_pages);
- addr = virt_to_phys(pages); vma->vm_private_data = vdev; vma->vm_pgoff = (addr >> PAGE_SHIFT) + pgoff;
@@ -349,13 +360,29 @@ static int vfio_pci_dma_fault_mmap(struct vfio_pci_device *vdev, return ret; }
-static int vfio_pci_dma_fault_add_capability(struct vfio_pci_device *vdev,
struct vfio_pci_region *region,
struct vfio_info_cap *caps)
+static int vfio_pci_dma_fault_mmap(struct vfio_pci_device *vdev,
struct vfio_pci_region *region,
struct vm_area_struct *vma)
+{
- return __vfio_pci_dma_fault_mmap(vdev, region, vma,
vdev->fault_pages); +}
+static int +vfio_pci_dma_fault_response_mmap(struct vfio_pci_device *vdev,
struct vfio_pci_region *region,
struct vm_area_struct *vma)
+{
- return __vfio_pci_dma_fault_mmap(vdev, region, vma,
vdev->fault_response_pages); +}
+static int __vfio_pci_dma_fault_add_capability(struct vfio_pci_device *vdev,
struct vfio_pci_region *region,
struct vfio_info_cap *caps,
u32 cap_id)
{ struct vfio_region_info_cap_sparse_mmap *sparse = NULL; struct vfio_region_info_cap_fault cap = {
.header.id = VFIO_REGION_INFO_CAP_DMA_FAULT,
.header.version = 1, .version = 1, };.header.id = cap_id,
@@ -383,6 +410,14 @@ static int vfio_pci_dma_fault_add_capability(struct vfio_pci_device *vdev, return ret; }
+static int vfio_pci_dma_fault_add_capability(struct vfio_pci_device *vdev,
struct vfio_pci_region *region,
struct vfio_info_cap *caps) {
- return __vfio_pci_dma_fault_add_capability(vdev, region, caps,
VFIO_REGION_INFO_CAP_DMA_FAULT); }
static const struct vfio_pci_regops vfio_pci_dma_fault_regops = { .rw = vfio_pci_dma_fault_rw, .release = vfio_pci_dma_fault_release, @@ -390,6 +425,13 @@ static const struct vfio_pci_regops vfio_pci_dma_fault_regops = { .add_capability = vfio_pci_dma_fault_add_capability, };
+static const struct vfio_pci_regops vfio_pci_dma_fault_response_regops = {
- .rw = vfio_pci_dma_fault_response_rw,
- .release = vfio_pci_dma_fault_response_release,
- .mmap = vfio_pci_dma_fault_response_mmap,
- .add_capability = vfio_pci_dma_fault_add_capability,
As I mentioned in the Qemu patch ([RFC v7 26/26] vfio/pci: Implement return_page_response page response callback), it looks like we are using the VFIO_REGION_INFO_CAP_DMA_FAULT cap id for the dma_fault_response here as well. Is that intentional? (Was wondering how it worked in the first place and noted this).
yep, copy paste error :-(
Thanks
Eric
Please check.
Thanks, Shameer