summaryrefslogtreecommitdiff
path: root/drivers/vfio
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vfio')
-rw-r--r--drivers/vfio/pci/Kconfig3
-rw-r--r--drivers/vfio/pci/Makefile1
-rw-r--r--drivers/vfio/pci/nvgrace-gpu/main.c52
-rw-r--r--drivers/vfio/pci/vfio_pci.c5
-rw-r--r--drivers/vfio/pci/vfio_pci_config.c22
-rw-r--r--drivers/vfio/pci/vfio_pci_core.c53
-rw-r--r--drivers/vfio/pci/vfio_pci_dmabuf.c316
-rw-r--r--drivers/vfio/pci/vfio_pci_priv.h23
-rw-r--r--drivers/vfio/vfio_main.c2
9 files changed, 455 insertions, 22 deletions
diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
index 2b0172f54665..2b9fca00e9e8 100644
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -55,6 +55,9 @@ config VFIO_PCI_ZDEV_KVM
To enable s390x KVM vfio-pci extensions, say Y.
+config VFIO_PCI_DMABUF
+ def_bool y if VFIO_PCI_CORE && PCI_P2PDMA && DMA_SHARED_BUFFER
+
source "drivers/vfio/pci/mlx5/Kconfig"
source "drivers/vfio/pci/hisilicon/Kconfig"
diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile
index cf00c0a7e55c..53f59226ae01 100644
--- a/drivers/vfio/pci/Makefile
+++ b/drivers/vfio/pci/Makefile
@@ -2,6 +2,7 @@
vfio-pci-core-y := vfio_pci_core.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o
vfio-pci-core-$(CONFIG_VFIO_PCI_ZDEV_KVM) += vfio_pci_zdev.o
+vfio-pci-core-$(CONFIG_VFIO_PCI_DMABUF) += vfio_pci_dmabuf.o
obj-$(CONFIG_VFIO_PCI_CORE) += vfio-pci-core.o
vfio-pci-y := vfio_pci.o
diff --git a/drivers/vfio/pci/nvgrace-gpu/main.c b/drivers/vfio/pci/nvgrace-gpu/main.c
index 5a6f77d5c81e..e33f24fbb0a4 100644
--- a/drivers/vfio/pci/nvgrace-gpu/main.c
+++ b/drivers/vfio/pci/nvgrace-gpu/main.c
@@ -7,6 +7,7 @@
#include <linux/vfio_pci_core.h>
#include <linux/delay.h>
#include <linux/jiffies.h>
+#include <linux/pci-p2pdma.h>
/*
* The device memory usable to the workloads running in the VM is cached
@@ -652,6 +653,50 @@ nvgrace_gpu_write(struct vfio_device *core_vdev,
return vfio_pci_core_write(core_vdev, buf, count, ppos);
}
+static int nvgrace_get_dmabuf_phys(struct vfio_pci_core_device *core_vdev,
+ struct p2pdma_provider **provider,
+ unsigned int region_index,
+ struct dma_buf_phys_vec *phys_vec,
+ struct vfio_region_dma_range *dma_ranges,
+ size_t nr_ranges)
+{
+ struct nvgrace_gpu_pci_core_device *nvdev = container_of(
+ core_vdev, struct nvgrace_gpu_pci_core_device, core_device);
+ struct pci_dev *pdev = core_vdev->pdev;
+ struct mem_region *mem_region;
+
+ /*
+ * if (nvdev->resmem.memlength && region_index == RESMEM_REGION_INDEX) {
+ * The P2P properties of the non-BAR memory is the same as the
+ * BAR memory, so just use the provider for index 0. Someday
+ * when CXL gets P2P support we could create CXLish providers
+ * for the non-BAR memory.
+ * } else if (region_index == USEMEM_REGION_INDEX) {
+ * This is actually cachable memory and isn't treated as P2P in
+ * the chip. For now we have no way to push cachable memory
+ * through everything and the Grace HW doesn't care what caching
+ * attribute is programmed into the SMMU. So use BAR 0.
+ * }
+ */
+ mem_region = nvgrace_gpu_memregion(region_index, nvdev);
+ if (mem_region) {
+ *provider = pcim_p2pdma_provider(pdev, 0);
+ if (!*provider)
+ return -EINVAL;
+ return vfio_pci_core_fill_phys_vec(phys_vec, dma_ranges,
+ nr_ranges,
+ mem_region->memphys,
+ mem_region->memlength);
+ }
+
+ return vfio_pci_core_get_dmabuf_phys(core_vdev, provider, region_index,
+ phys_vec, dma_ranges, nr_ranges);
+}
+
+static const struct vfio_pci_device_ops nvgrace_gpu_pci_dev_ops = {
+ .get_dmabuf_phys = nvgrace_get_dmabuf_phys,
+};
+
static const struct vfio_device_ops nvgrace_gpu_pci_ops = {
.name = "nvgrace-gpu-vfio-pci",
.init = vfio_pci_core_init_dev,
@@ -673,6 +718,10 @@ static const struct vfio_device_ops nvgrace_gpu_pci_ops = {
.detach_ioas = vfio_iommufd_physical_detach_ioas,
};
+static const struct vfio_pci_device_ops nvgrace_gpu_pci_dev_core_ops = {
+ .get_dmabuf_phys = vfio_pci_core_get_dmabuf_phys,
+};
+
static const struct vfio_device_ops nvgrace_gpu_pci_core_ops = {
.name = "nvgrace-gpu-vfio-pci-core",
.init = vfio_pci_core_init_dev,
@@ -936,6 +985,9 @@ static int nvgrace_gpu_probe(struct pci_dev *pdev,
memphys, memlength);
if (ret)
goto out_put_vdev;
+ nvdev->core_device.pci_ops = &nvgrace_gpu_pci_dev_ops;
+ } else {
+ nvdev->core_device.pci_ops = &nvgrace_gpu_pci_dev_core_ops;
}
ret = vfio_pci_core_register_device(&nvdev->core_device);
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index a3e49d42c771..0c771064c0b8 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -148,6 +148,10 @@ static const struct vfio_device_ops vfio_pci_ops = {
.pasid_detach_ioas = vfio_iommufd_physical_pasid_detach_ioas,
};
+static const struct vfio_pci_device_ops vfio_pci_dev_ops = {
+ .get_dmabuf_phys = vfio_pci_core_get_dmabuf_phys,
+};
+
static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct vfio_pci_core_device *vdev;
@@ -162,6 +166,7 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return PTR_ERR(vdev);
dev_set_drvdata(&pdev->dev, vdev);
+ vdev->pci_ops = &vfio_pci_dev_ops;
ret = vfio_pci_core_register_device(vdev);
if (ret)
goto out_put_vdev;
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index 8f02f236b5b4..1f6008eabf23 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -589,10 +589,12 @@ static int vfio_basic_config_write(struct vfio_pci_core_device *vdev, int pos,
virt_mem = !!(le16_to_cpu(*virt_cmd) & PCI_COMMAND_MEMORY);
new_mem = !!(new_cmd & PCI_COMMAND_MEMORY);
- if (!new_mem)
+ if (!new_mem) {
vfio_pci_zap_and_down_write_memory_lock(vdev);
- else
+ vfio_pci_dma_buf_move(vdev, true);
+ } else {
down_write(&vdev->memory_lock);
+ }
/*
* If the user is writing mem/io enable (new_mem/io) and we
@@ -627,6 +629,8 @@ static int vfio_basic_config_write(struct vfio_pci_core_device *vdev, int pos,
*virt_cmd &= cpu_to_le16(~mask);
*virt_cmd |= cpu_to_le16(new_cmd & mask);
+ if (__vfio_pci_memory_enabled(vdev))
+ vfio_pci_dma_buf_move(vdev, false);
up_write(&vdev->memory_lock);
}
@@ -707,12 +711,16 @@ static int __init init_pci_cap_basic_perm(struct perm_bits *perm)
static void vfio_lock_and_set_power_state(struct vfio_pci_core_device *vdev,
pci_power_t state)
{
- if (state >= PCI_D3hot)
+ if (state >= PCI_D3hot) {
vfio_pci_zap_and_down_write_memory_lock(vdev);
- else
+ vfio_pci_dma_buf_move(vdev, true);
+ } else {
down_write(&vdev->memory_lock);
+ }
vfio_pci_set_power_state(vdev, state);
+ if (__vfio_pci_memory_enabled(vdev))
+ vfio_pci_dma_buf_move(vdev, false);
up_write(&vdev->memory_lock);
}
@@ -900,7 +908,10 @@ static int vfio_exp_config_write(struct vfio_pci_core_device *vdev, int pos,
if (!ret && (cap & PCI_EXP_DEVCAP_FLR)) {
vfio_pci_zap_and_down_write_memory_lock(vdev);
+ vfio_pci_dma_buf_move(vdev, true);
pci_try_reset_function(vdev->pdev);
+ if (__vfio_pci_memory_enabled(vdev))
+ vfio_pci_dma_buf_move(vdev, false);
up_write(&vdev->memory_lock);
}
}
@@ -982,7 +993,10 @@ static int vfio_af_config_write(struct vfio_pci_core_device *vdev, int pos,
if (!ret && (cap & PCI_AF_CAP_FLR) && (cap & PCI_AF_CAP_TP)) {
vfio_pci_zap_and_down_write_memory_lock(vdev);
+ vfio_pci_dma_buf_move(vdev, true);
pci_try_reset_function(vdev->pdev);
+ if (__vfio_pci_memory_enabled(vdev))
+ vfio_pci_dma_buf_move(vdev, false);
up_write(&vdev->memory_lock);
}
}
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 57c0766fb9f8..79a1a50a4ef7 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -28,6 +28,7 @@
#include <linux/nospec.h>
#include <linux/sched/mm.h>
#include <linux/iommufd.h>
+#include <linux/pci-p2pdma.h>
#if IS_ENABLED(CONFIG_EEH)
#include <asm/eeh.h>
#endif
@@ -286,6 +287,8 @@ static int vfio_pci_runtime_pm_entry(struct vfio_pci_core_device *vdev,
* semaphore.
*/
vfio_pci_zap_and_down_write_memory_lock(vdev);
+ vfio_pci_dma_buf_move(vdev, true);
+
if (vdev->pm_runtime_engaged) {
up_write(&vdev->memory_lock);
return -EINVAL;
@@ -299,11 +302,9 @@ static int vfio_pci_runtime_pm_entry(struct vfio_pci_core_device *vdev,
return 0;
}
-static int vfio_pci_core_pm_entry(struct vfio_device *device, u32 flags,
+static int vfio_pci_core_pm_entry(struct vfio_pci_core_device *vdev, u32 flags,
void __user *arg, size_t argsz)
{
- struct vfio_pci_core_device *vdev =
- container_of(device, struct vfio_pci_core_device, vdev);
int ret;
ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_SET, 0);
@@ -320,12 +321,10 @@ static int vfio_pci_core_pm_entry(struct vfio_device *device, u32 flags,
}
static int vfio_pci_core_pm_entry_with_wakeup(
- struct vfio_device *device, u32 flags,
+ struct vfio_pci_core_device *vdev, u32 flags,
struct vfio_device_low_power_entry_with_wakeup __user *arg,
size_t argsz)
{
- struct vfio_pci_core_device *vdev =
- container_of(device, struct vfio_pci_core_device, vdev);
struct vfio_device_low_power_entry_with_wakeup entry;
struct eventfd_ctx *efdctx;
int ret;
@@ -373,14 +372,14 @@ static void vfio_pci_runtime_pm_exit(struct vfio_pci_core_device *vdev)
*/
down_write(&vdev->memory_lock);
__vfio_pci_runtime_pm_exit(vdev);
+ if (__vfio_pci_memory_enabled(vdev))
+ vfio_pci_dma_buf_move(vdev, false);
up_write(&vdev->memory_lock);
}
-static int vfio_pci_core_pm_exit(struct vfio_device *device, u32 flags,
+static int vfio_pci_core_pm_exit(struct vfio_pci_core_device *vdev, u32 flags,
void __user *arg, size_t argsz)
{
- struct vfio_pci_core_device *vdev =
- container_of(device, struct vfio_pci_core_device, vdev);
int ret;
ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_SET, 0);
@@ -695,6 +694,8 @@ void vfio_pci_core_close_device(struct vfio_device *core_vdev)
#endif
vfio_pci_core_disable(vdev);
+ vfio_pci_dma_buf_cleanup(vdev);
+
mutex_lock(&vdev->igate);
if (vdev->err_trigger) {
eventfd_ctx_put(vdev->err_trigger);
@@ -1205,7 +1206,10 @@ static int vfio_pci_ioctl_reset(struct vfio_pci_core_device *vdev,
*/
vfio_pci_set_power_state(vdev, PCI_D0);
+ vfio_pci_dma_buf_move(vdev, true);
ret = pci_try_reset_function(vdev->pdev);
+ if (__vfio_pci_memory_enabled(vdev))
+ vfio_pci_dma_buf_move(vdev, false);
up_write(&vdev->memory_lock);
return ret;
@@ -1449,11 +1453,10 @@ long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
}
EXPORT_SYMBOL_GPL(vfio_pci_core_ioctl);
-static int vfio_pci_core_feature_token(struct vfio_device *device, u32 flags,
- uuid_t __user *arg, size_t argsz)
+static int vfio_pci_core_feature_token(struct vfio_pci_core_device *vdev,
+ u32 flags, uuid_t __user *arg,
+ size_t argsz)
{
- struct vfio_pci_core_device *vdev =
- container_of(device, struct vfio_pci_core_device, vdev);
uuid_t uuid;
int ret;
@@ -1480,16 +1483,21 @@ static int vfio_pci_core_feature_token(struct vfio_device *device, u32 flags,
int vfio_pci_core_ioctl_feature(struct vfio_device *device, u32 flags,
void __user *arg, size_t argsz)
{
+ struct vfio_pci_core_device *vdev =
+ container_of(device, struct vfio_pci_core_device, vdev);
+
switch (flags & VFIO_DEVICE_FEATURE_MASK) {
case VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY:
- return vfio_pci_core_pm_entry(device, flags, arg, argsz);
+ return vfio_pci_core_pm_entry(vdev, flags, arg, argsz);
case VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP:
- return vfio_pci_core_pm_entry_with_wakeup(device, flags,
+ return vfio_pci_core_pm_entry_with_wakeup(vdev, flags,
arg, argsz);
case VFIO_DEVICE_FEATURE_LOW_POWER_EXIT:
- return vfio_pci_core_pm_exit(device, flags, arg, argsz);
+ return vfio_pci_core_pm_exit(vdev, flags, arg, argsz);
case VFIO_DEVICE_FEATURE_PCI_VF_TOKEN:
- return vfio_pci_core_feature_token(device, flags, arg, argsz);
+ return vfio_pci_core_feature_token(vdev, flags, arg, argsz);
+ case VFIO_DEVICE_FEATURE_DMA_BUF:
+ return vfio_pci_core_feature_dma_buf(vdev, flags, arg, argsz);
default:
return -ENOTTY;
}
@@ -2061,6 +2069,7 @@ int vfio_pci_core_init_dev(struct vfio_device *core_vdev)
{
struct vfio_pci_core_device *vdev =
container_of(core_vdev, struct vfio_pci_core_device, vdev);
+ int ret;
vdev->pdev = to_pci_dev(core_vdev->dev);
vdev->irq_type = VFIO_PCI_NUM_IRQS;
@@ -2070,6 +2079,10 @@ int vfio_pci_core_init_dev(struct vfio_device *core_vdev)
INIT_LIST_HEAD(&vdev->dummy_resources_list);
INIT_LIST_HEAD(&vdev->ioeventfds_list);
INIT_LIST_HEAD(&vdev->sriov_pfs_item);
+ ret = pcim_p2pdma_init(vdev->pdev);
+ if (ret && ret != -EOPNOTSUPP)
+ return ret;
+ INIT_LIST_HEAD(&vdev->dmabufs);
init_rwsem(&vdev->memory_lock);
xa_init(&vdev->ctx);
@@ -2434,6 +2447,7 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
break;
}
+ vfio_pci_dma_buf_move(vdev, true);
vfio_pci_zap_bars(vdev);
}
@@ -2462,8 +2476,11 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
err_undo:
list_for_each_entry_from_reverse(vdev, &dev_set->device_list,
- vdev.dev_set_list)
+ vdev.dev_set_list) {
+ if (vdev->vdev.open_count && __vfio_pci_memory_enabled(vdev))
+ vfio_pci_dma_buf_move(vdev, false);
up_write(&vdev->memory_lock);
+ }
list_for_each_entry(vdev, &dev_set->device_list, vdev.dev_set_list)
pm_runtime_put(&vdev->pdev->dev);
diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c
new file mode 100644
index 000000000000..6698f540bdac
--- /dev/null
+++ b/drivers/vfio/pci/vfio_pci_dmabuf.c
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES.
+ */
+#include <linux/dma-buf-mapping.h>
+#include <linux/pci-p2pdma.h>
+#include <linux/dma-resv.h>
+
+#include "vfio_pci_priv.h"
+
+MODULE_IMPORT_NS("DMA_BUF");
+
+struct vfio_pci_dma_buf {
+ struct dma_buf *dmabuf;
+ struct vfio_pci_core_device *vdev;
+ struct list_head dmabufs_elm;
+ size_t size;
+ struct dma_buf_phys_vec *phys_vec;
+ struct p2pdma_provider *provider;
+ u32 nr_ranges;
+ u8 revoked : 1;
+};
+
+static int vfio_pci_dma_buf_attach(struct dma_buf *dmabuf,
+ struct dma_buf_attachment *attachment)
+{
+ struct vfio_pci_dma_buf *priv = dmabuf->priv;
+
+ if (!attachment->peer2peer)
+ return -EOPNOTSUPP;
+
+ if (priv->revoked)
+ return -ENODEV;
+
+ return 0;
+}
+
+static struct sg_table *
+vfio_pci_dma_buf_map(struct dma_buf_attachment *attachment,
+ enum dma_data_direction dir)
+{
+ struct vfio_pci_dma_buf *priv = attachment->dmabuf->priv;
+
+ dma_resv_assert_held(priv->dmabuf->resv);
+
+ if (priv->revoked)
+ return ERR_PTR(-ENODEV);
+
+ return dma_buf_phys_vec_to_sgt(attachment, priv->provider,
+ priv->phys_vec, priv->nr_ranges,
+ priv->size, dir);
+}
+
+static void vfio_pci_dma_buf_unmap(struct dma_buf_attachment *attachment,
+ struct sg_table *sgt,
+ enum dma_data_direction dir)
+{
+ dma_buf_free_sgt(attachment, sgt, dir);
+}
+
+static void vfio_pci_dma_buf_release(struct dma_buf *dmabuf)
+{
+ struct vfio_pci_dma_buf *priv = dmabuf->priv;
+
+ /*
+ * Either this or vfio_pci_dma_buf_cleanup() will remove from the list.
+ * The refcount prevents both.
+ */
+ if (priv->vdev) {
+ down_write(&priv->vdev->memory_lock);
+ list_del_init(&priv->dmabufs_elm);
+ up_write(&priv->vdev->memory_lock);
+ vfio_device_put_registration(&priv->vdev->vdev);
+ }
+ kfree(priv->phys_vec);
+ kfree(priv);
+}
+
+static const struct dma_buf_ops vfio_pci_dmabuf_ops = {
+ .attach = vfio_pci_dma_buf_attach,
+ .map_dma_buf = vfio_pci_dma_buf_map,
+ .unmap_dma_buf = vfio_pci_dma_buf_unmap,
+ .release = vfio_pci_dma_buf_release,
+};
+
+int vfio_pci_core_fill_phys_vec(struct dma_buf_phys_vec *phys_vec,
+ struct vfio_region_dma_range *dma_ranges,
+ size_t nr_ranges, phys_addr_t start,
+ phys_addr_t len)
+{
+ phys_addr_t max_addr;
+ unsigned int i;
+
+ max_addr = start + len;
+ for (i = 0; i < nr_ranges; i++) {
+ phys_addr_t end;
+
+ if (!dma_ranges[i].length)
+ return -EINVAL;
+
+ if (check_add_overflow(start, dma_ranges[i].offset,
+ &phys_vec[i].paddr) ||
+ check_add_overflow(phys_vec[i].paddr,
+ dma_ranges[i].length, &end))
+ return -EOVERFLOW;
+ if (end > max_addr)
+ return -EINVAL;
+
+ phys_vec[i].len = dma_ranges[i].length;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(vfio_pci_core_fill_phys_vec);
+
+int vfio_pci_core_get_dmabuf_phys(struct vfio_pci_core_device *vdev,
+ struct p2pdma_provider **provider,
+ unsigned int region_index,
+ struct dma_buf_phys_vec *phys_vec,
+ struct vfio_region_dma_range *dma_ranges,
+ size_t nr_ranges)
+{
+ struct pci_dev *pdev = vdev->pdev;
+
+ *provider = pcim_p2pdma_provider(pdev, region_index);
+ if (!*provider)
+ return -EINVAL;
+
+ return vfio_pci_core_fill_phys_vec(
+ phys_vec, dma_ranges, nr_ranges,
+ pci_resource_start(pdev, region_index),
+ pci_resource_len(pdev, region_index));
+}
+EXPORT_SYMBOL_GPL(vfio_pci_core_get_dmabuf_phys);
+
+static int validate_dmabuf_input(struct vfio_device_feature_dma_buf *dma_buf,
+ struct vfio_region_dma_range *dma_ranges,
+ size_t *lengthp)
+{
+ size_t length = 0;
+ u32 i;
+
+ for (i = 0; i < dma_buf->nr_ranges; i++) {
+ u64 offset = dma_ranges[i].offset;
+ u64 len = dma_ranges[i].length;
+
+ if (!len || !PAGE_ALIGNED(offset) || !PAGE_ALIGNED(len))
+ return -EINVAL;
+
+ if (check_add_overflow(length, len, &length))
+ return -EINVAL;
+ }
+
+ /*
+ * dma_iova_try_alloc() will WARN on if userspace proposes a size that
+ * is too big, eg with lots of ranges.
+ */
+ if ((u64)(length) & DMA_IOVA_USE_SWIOTLB)
+ return -EINVAL;
+
+ *lengthp = length;
+ return 0;
+}
+
+int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
+ struct vfio_device_feature_dma_buf __user *arg,
+ size_t argsz)
+{
+ struct vfio_device_feature_dma_buf get_dma_buf = {};
+ struct vfio_region_dma_range *dma_ranges;
+ DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+ struct vfio_pci_dma_buf *priv;
+ size_t length;
+ int ret;
+
+ if (!vdev->pci_ops || !vdev->pci_ops->get_dmabuf_phys)
+ return -EOPNOTSUPP;
+
+ ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET,
+ sizeof(get_dma_buf));
+ if (ret != 1)
+ return ret;
+
+ if (copy_from_user(&get_dma_buf, arg, sizeof(get_dma_buf)))
+ return -EFAULT;
+
+ if (!get_dma_buf.nr_ranges || get_dma_buf.flags)
+ return -EINVAL;
+
+ /*
+ * For PCI the region_index is the BAR number like everything else.
+ */
+ if (get_dma_buf.region_index >= VFIO_PCI_ROM_REGION_INDEX)
+ return -ENODEV;
+
+ dma_ranges = memdup_array_user(&arg->dma_ranges, get_dma_buf.nr_ranges,
+ sizeof(*dma_ranges));
+ if (IS_ERR(dma_ranges))
+ return PTR_ERR(dma_ranges);
+
+ ret = validate_dmabuf_input(&get_dma_buf, dma_ranges, &length);
+ if (ret)
+ goto err_free_ranges;
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv) {
+ ret = -ENOMEM;
+ goto err_free_ranges;
+ }
+ priv->phys_vec = kcalloc(get_dma_buf.nr_ranges, sizeof(*priv->phys_vec),
+ GFP_KERNEL);
+ if (!priv->phys_vec) {
+ ret = -ENOMEM;
+ goto err_free_priv;
+ }
+
+ priv->vdev = vdev;
+ priv->nr_ranges = get_dma_buf.nr_ranges;
+ priv->size = length;
+ ret = vdev->pci_ops->get_dmabuf_phys(vdev, &priv->provider,
+ get_dma_buf.region_index,
+ priv->phys_vec, dma_ranges,
+ priv->nr_ranges);
+ if (ret)
+ goto err_free_phys;
+
+ kfree(dma_ranges);
+ dma_ranges = NULL;
+
+ if (!vfio_device_try_get_registration(&vdev->vdev)) {
+ ret = -ENODEV;
+ goto err_free_phys;
+ }
+
+ exp_info.ops = &vfio_pci_dmabuf_ops;
+ exp_info.size = priv->size;
+ exp_info.flags = get_dma_buf.open_flags;
+ exp_info.priv = priv;
+
+ priv->dmabuf = dma_buf_export(&exp_info);
+ if (IS_ERR(priv->dmabuf)) {
+ ret = PTR_ERR(priv->dmabuf);
+ goto err_dev_put;
+ }
+
+ /* dma_buf_put() now frees priv */
+ INIT_LIST_HEAD(&priv->dmabufs_elm);
+ down_write(&vdev->memory_lock);
+ dma_resv_lock(priv->dmabuf->resv, NULL);
+ priv->revoked = !__vfio_pci_memory_enabled(vdev);
+ list_add_tail(&priv->dmabufs_elm, &vdev->dmabufs);
+ dma_resv_unlock(priv->dmabuf->resv);
+ up_write(&vdev->memory_lock);
+
+ /*
+ * dma_buf_fd() consumes the reference, when the file closes the dmabuf
+ * will be released.
+ */
+ ret = dma_buf_fd(priv->dmabuf, get_dma_buf.open_flags);
+ if (ret < 0)
+ goto err_dma_buf;
+ return ret;
+
+err_dma_buf:
+ dma_buf_put(priv->dmabuf);
+err_dev_put:
+ vfio_device_put_registration(&vdev->vdev);
+err_free_phys:
+ kfree(priv->phys_vec);
+err_free_priv:
+ kfree(priv);
+err_free_ranges:
+ kfree(dma_ranges);
+ return ret;
+}
+
+void vfio_pci_dma_buf_move(struct vfio_pci_core_device *vdev, bool revoked)
+{
+ struct vfio_pci_dma_buf *priv;
+ struct vfio_pci_dma_buf *tmp;
+
+ lockdep_assert_held_write(&vdev->memory_lock);
+
+ list_for_each_entry_safe(priv, tmp, &vdev->dmabufs, dmabufs_elm) {
+ if (!get_file_active(&priv->dmabuf->file))
+ continue;
+
+ if (priv->revoked != revoked) {
+ dma_resv_lock(priv->dmabuf->resv, NULL);
+ priv->revoked = revoked;
+ dma_buf_move_notify(priv->dmabuf);
+ dma_resv_unlock(priv->dmabuf->resv);
+ }
+ fput(priv->dmabuf->file);
+ }
+}
+
+void vfio_pci_dma_buf_cleanup(struct vfio_pci_core_device *vdev)
+{
+ struct vfio_pci_dma_buf *priv;
+ struct vfio_pci_dma_buf *tmp;
+
+ down_write(&vdev->memory_lock);
+ list_for_each_entry_safe(priv, tmp, &vdev->dmabufs, dmabufs_elm) {
+ if (!get_file_active(&priv->dmabuf->file))
+ continue;
+
+ dma_resv_lock(priv->dmabuf->resv, NULL);
+ list_del_init(&priv->dmabufs_elm);
+ priv->vdev = NULL;
+ priv->revoked = true;
+ dma_buf_move_notify(priv->dmabuf);
+ dma_resv_unlock(priv->dmabuf->resv);
+ vfio_device_put_registration(&vdev->vdev);
+ fput(priv->dmabuf->file);
+ }
+ up_write(&vdev->memory_lock);
+}
diff --git a/drivers/vfio/pci/vfio_pci_priv.h b/drivers/vfio/pci/vfio_pci_priv.h
index a9972eacb293..28a405f8b97c 100644
--- a/drivers/vfio/pci/vfio_pci_priv.h
+++ b/drivers/vfio/pci/vfio_pci_priv.h
@@ -107,4 +107,27 @@ static inline bool vfio_pci_is_vga(struct pci_dev *pdev)
return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA;
}
+#ifdef CONFIG_VFIO_PCI_DMABUF
+int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
+ struct vfio_device_feature_dma_buf __user *arg,
+ size_t argsz);
+void vfio_pci_dma_buf_cleanup(struct vfio_pci_core_device *vdev);
+void vfio_pci_dma_buf_move(struct vfio_pci_core_device *vdev, bool revoked);
+#else
+static inline int
+vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
+ struct vfio_device_feature_dma_buf __user *arg,
+ size_t argsz)
+{
+ return -ENOTTY;
+}
+static inline void vfio_pci_dma_buf_cleanup(struct vfio_pci_core_device *vdev)
+{
+}
+static inline void vfio_pci_dma_buf_move(struct vfio_pci_core_device *vdev,
+ bool revoked)
+{
+}
+#endif
+
#endif
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index b8fe1a75e48a..f7df90c423b4 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -172,11 +172,13 @@ void vfio_device_put_registration(struct vfio_device *device)
if (refcount_dec_and_test(&device->refcount))
complete(&device->comp);
}
+EXPORT_SYMBOL_GPL(vfio_device_put_registration);
bool vfio_device_try_get_registration(struct vfio_device *device)
{
return refcount_inc_not_zero(&device->refcount);
}
+EXPORT_SYMBOL_GPL(vfio_device_try_get_registration);
/*
* VFIO driver API