summaryrefslogtreecommitdiff
path: root/drivers/vfio/pci/vfio_pci_core.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-12-04 18:42:48 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-12-04 18:42:48 -0800
commita3ebb59eee2e558e8f8f27fc3f75cd367f17cd8e (patch)
tree934bbee766e0cce55cfcaec5dcae856c0361af37 /drivers/vfio/pci/vfio_pci_core.c
parentce5cfb0fa20dc6454da039612e34325b7b4a8243 (diff)
parentd721f52e31553a848e0e9947ca15a49c5674aef3 (diff)
Merge tag 'vfio-v6.19-rc1' of https://github.com/awilliam/linux-vfio
Pull VFIO updates from Alex Williamson: - Move libvfio selftest artifacts in preparation of more tightly coupled integration with KVM selftests (David Matlack) - Fix comment typo in mtty driver (Chu Guangqing) - Support for new hardware revision in the hisi_acc vfio-pci variant driver where the migration registers can now be accessed via the PF. When enabled for this support, the full BAR can be exposed to the user (Longfang Liu) - Fix vfio cdev support for VF token passing, using the correct size for the kernel structure, thereby actually allowing userspace to provide a non-zero UUID token. Also set the match token callback for the hisi_acc, fixing VF token support for this this vfio-pci variant driver (Raghavendra Rao Ananta) - Introduce internal callbacks on vfio devices to simplify and consolidate duplicate code for generating VFIO_DEVICE_GET_REGION_INFO data, removing various ioctl intercepts with a more structured solution (Jason Gunthorpe) - Introduce dma-buf support for vfio-pci devices, allowing MMIO regions to be exposed through dma-buf objects with lifecycle managed through move operations. This enables low-level interactions such as a vfio-pci based SPDK drivers interacting directly with dma-buf capable RDMA devices to enable peer-to-peer operations. IOMMUFD is also now able to build upon this support to fill a long standing feature gap versus the legacy vfio type1 IOMMU backend with an implementation of P2P support for VM use cases that better manages the lifecycle of the P2P mapping (Leon Romanovsky, Jason Gunthorpe, Vivek Kasireddy) - Convert eventfd triggering for error and request signals to use RCU mechanisms in order to avoid a 3-way lockdep reported deadlock issue (Alex Williamson) - Fix a 32-bit overflow introduced via dma-buf support manifesting with large DMA buffers (Alex Mastro) - Convert nvgrace-gpu vfio-pci variant driver to insert mappings on fault rather than at mmap time. This conversion serves both to make use of huge PFNMAPs but also to both avoid corrected RAS events during reset by now being subject to vfio-pci-core's use of unmap_mapping_range(), and to enable a device readiness test after reset (Ankit Agrawal) - Refactoring of vfio selftests to support multi-device tests and split code to provide better separation between IOMMU and device objects. This work also enables a new test suite addition to measure parallel device initialization latency (David Matlack) * tag 'vfio-v6.19-rc1' of https://github.com/awilliam/linux-vfio: (65 commits) vfio: selftests: Add vfio_pci_device_init_perf_test vfio: selftests: Eliminate INVALID_IOVA vfio: selftests: Split libvfio.h into separate header files vfio: selftests: Move vfio_selftests_*() helpers into libvfio.c vfio: selftests: Rename vfio_util.h to libvfio.h vfio: selftests: Stop passing device for IOMMU operations vfio: selftests: Move IOVA allocator into iova_allocator.c vfio: selftests: Move IOMMU library code into iommu.c vfio: selftests: Rename struct vfio_dma_region to dma_region vfio: selftests: Upgrade driver logging to dev_err() vfio: selftests: Prefix logs with device BDF where relevant vfio: selftests: Eliminate overly chatty logging vfio: selftests: Support multiple devices in the same container/iommufd vfio: selftests: Introduce struct iommu vfio: selftests: Rename struct vfio_iommu_mode to iommu_mode vfio: selftests: Allow passing multiple BDFs on the command line vfio: selftests: Split run.sh into separate scripts vfio: selftests: Move run.sh into scripts directory vfio/nvgrace-gpu: wait for the GPU mem to be ready vfio/nvgrace-gpu: Inform devmem unmapped after reset ...
Diffstat (limited to 'drivers/vfio/pci/vfio_pci_core.c')
-rw-r--r--drivers/vfio/pci/vfio_pci_core.c300
1 files changed, 156 insertions, 144 deletions
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 7dcf5439dedc..3a11e6f450f7 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -28,6 +28,7 @@
#include <linux/nospec.h>
#include <linux/sched/mm.h>
#include <linux/iommufd.h>
+#include <linux/pci-p2pdma.h>
#if IS_ENABLED(CONFIG_EEH)
#include <asm/eeh.h>
#endif
@@ -41,6 +42,40 @@ static bool nointxmask;
static bool disable_vga;
static bool disable_idle_d3;
+static void vfio_pci_eventfd_rcu_free(struct rcu_head *rcu)
+{
+ struct vfio_pci_eventfd *eventfd =
+ container_of(rcu, struct vfio_pci_eventfd, rcu);
+
+ eventfd_ctx_put(eventfd->ctx);
+ kfree(eventfd);
+}
+
+int vfio_pci_eventfd_replace_locked(struct vfio_pci_core_device *vdev,
+ struct vfio_pci_eventfd __rcu **peventfd,
+ struct eventfd_ctx *ctx)
+{
+ struct vfio_pci_eventfd *new = NULL;
+ struct vfio_pci_eventfd *old;
+
+ lockdep_assert_held(&vdev->igate);
+
+ if (ctx) {
+ new = kzalloc(sizeof(*new), GFP_KERNEL_ACCOUNT);
+ if (!new)
+ return -ENOMEM;
+
+ new->ctx = ctx;
+ }
+
+ old = rcu_replace_pointer(*peventfd, new,
+ lockdep_is_held(&vdev->igate));
+ if (old)
+ call_rcu(&old->rcu, vfio_pci_eventfd_rcu_free);
+
+ return 0;
+}
+
/* List of PF's that vfio_pci_core_sriov_configure() has been called on */
static DEFINE_MUTEX(vfio_pci_sriov_pfs_mutex);
static LIST_HEAD(vfio_pci_sriov_pfs);
@@ -286,6 +321,8 @@ static int vfio_pci_runtime_pm_entry(struct vfio_pci_core_device *vdev,
* semaphore.
*/
vfio_pci_zap_and_down_write_memory_lock(vdev);
+ vfio_pci_dma_buf_move(vdev, true);
+
if (vdev->pm_runtime_engaged) {
up_write(&vdev->memory_lock);
return -EINVAL;
@@ -299,11 +336,9 @@ static int vfio_pci_runtime_pm_entry(struct vfio_pci_core_device *vdev,
return 0;
}
-static int vfio_pci_core_pm_entry(struct vfio_device *device, u32 flags,
+static int vfio_pci_core_pm_entry(struct vfio_pci_core_device *vdev, u32 flags,
void __user *arg, size_t argsz)
{
- struct vfio_pci_core_device *vdev =
- container_of(device, struct vfio_pci_core_device, vdev);
int ret;
ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_SET, 0);
@@ -320,12 +355,10 @@ static int vfio_pci_core_pm_entry(struct vfio_device *device, u32 flags,
}
static int vfio_pci_core_pm_entry_with_wakeup(
- struct vfio_device *device, u32 flags,
+ struct vfio_pci_core_device *vdev, u32 flags,
struct vfio_device_low_power_entry_with_wakeup __user *arg,
size_t argsz)
{
- struct vfio_pci_core_device *vdev =
- container_of(device, struct vfio_pci_core_device, vdev);
struct vfio_device_low_power_entry_with_wakeup entry;
struct eventfd_ctx *efdctx;
int ret;
@@ -373,14 +406,14 @@ static void vfio_pci_runtime_pm_exit(struct vfio_pci_core_device *vdev)
*/
down_write(&vdev->memory_lock);
__vfio_pci_runtime_pm_exit(vdev);
+ if (__vfio_pci_memory_enabled(vdev))
+ vfio_pci_dma_buf_move(vdev, false);
up_write(&vdev->memory_lock);
}
-static int vfio_pci_core_pm_exit(struct vfio_device *device, u32 flags,
+static int vfio_pci_core_pm_exit(struct vfio_pci_core_device *vdev, u32 flags,
void __user *arg, size_t argsz)
{
- struct vfio_pci_core_device *vdev =
- container_of(device, struct vfio_pci_core_device, vdev);
int ret;
ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_SET, 0);
@@ -695,15 +728,11 @@ void vfio_pci_core_close_device(struct vfio_device *core_vdev)
#endif
vfio_pci_core_disable(vdev);
+ vfio_pci_dma_buf_cleanup(vdev);
+
mutex_lock(&vdev->igate);
- if (vdev->err_trigger) {
- eventfd_ctx_put(vdev->err_trigger);
- vdev->err_trigger = NULL;
- }
- if (vdev->req_trigger) {
- eventfd_ctx_put(vdev->req_trigger);
- vdev->req_trigger = NULL;
- }
+ vfio_pci_eventfd_replace_locked(vdev, &vdev->err_trigger, NULL);
+ vfio_pci_eventfd_replace_locked(vdev, &vdev->req_trigger, NULL);
mutex_unlock(&vdev->igate);
}
EXPORT_SYMBOL_GPL(vfio_pci_core_close_device);
@@ -996,42 +1025,36 @@ static int vfio_pci_ioctl_get_info(struct vfio_pci_core_device *vdev,
return copy_to_user(arg, &info, minsz) ? -EFAULT : 0;
}
-static int vfio_pci_ioctl_get_region_info(struct vfio_pci_core_device *vdev,
- struct vfio_region_info __user *arg)
+int vfio_pci_ioctl_get_region_info(struct vfio_device *core_vdev,
+ struct vfio_region_info *info,
+ struct vfio_info_cap *caps)
{
- unsigned long minsz = offsetofend(struct vfio_region_info, offset);
+ struct vfio_pci_core_device *vdev =
+ container_of(core_vdev, struct vfio_pci_core_device, vdev);
struct pci_dev *pdev = vdev->pdev;
- struct vfio_region_info info;
- struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
int i, ret;
- if (copy_from_user(&info, arg, minsz))
- return -EFAULT;
-
- if (info.argsz < minsz)
- return -EINVAL;
-
- switch (info.index) {
+ switch (info->index) {
case VFIO_PCI_CONFIG_REGION_INDEX:
- info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
- info.size = pdev->cfg_size;
- info.flags = VFIO_REGION_INFO_FLAG_READ |
- VFIO_REGION_INFO_FLAG_WRITE;
+ info->offset = VFIO_PCI_INDEX_TO_OFFSET(info->index);
+ info->size = pdev->cfg_size;
+ info->flags = VFIO_REGION_INFO_FLAG_READ |
+ VFIO_REGION_INFO_FLAG_WRITE;
break;
case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
- info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
- info.size = pci_resource_len(pdev, info.index);
- if (!info.size) {
- info.flags = 0;
+ info->offset = VFIO_PCI_INDEX_TO_OFFSET(info->index);
+ info->size = pci_resource_len(pdev, info->index);
+ if (!info->size) {
+ info->flags = 0;
break;
}
- info.flags = VFIO_REGION_INFO_FLAG_READ |
- VFIO_REGION_INFO_FLAG_WRITE;
- if (vdev->bar_mmap_supported[info.index]) {
- info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
- if (info.index == vdev->msix_bar) {
- ret = msix_mmappable_cap(vdev, &caps);
+ info->flags = VFIO_REGION_INFO_FLAG_READ |
+ VFIO_REGION_INFO_FLAG_WRITE;
+ if (vdev->bar_mmap_supported[info->index]) {
+ info->flags |= VFIO_REGION_INFO_FLAG_MMAP;
+ if (info->index == vdev->msix_bar) {
+ ret = msix_mmappable_cap(vdev, caps);
if (ret)
return ret;
}
@@ -1043,9 +1066,9 @@ static int vfio_pci_ioctl_get_region_info(struct vfio_pci_core_device *vdev,
size_t size;
u16 cmd;
- info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
- info.flags = 0;
- info.size = 0;
+ info->offset = VFIO_PCI_INDEX_TO_OFFSET(info->index);
+ info->flags = 0;
+ info->size = 0;
if (pci_resource_start(pdev, PCI_ROM_RESOURCE)) {
/*
@@ -1055,16 +1078,17 @@ static int vfio_pci_ioctl_get_region_info(struct vfio_pci_core_device *vdev,
cmd = vfio_pci_memory_lock_and_enable(vdev);
io = pci_map_rom(pdev, &size);
if (io) {
- info.flags = VFIO_REGION_INFO_FLAG_READ;
+ info->flags = VFIO_REGION_INFO_FLAG_READ;
/* Report the BAR size, not the ROM size. */
- info.size = pci_resource_len(pdev, PCI_ROM_RESOURCE);
+ info->size = pci_resource_len(pdev,
+ PCI_ROM_RESOURCE);
pci_unmap_rom(pdev, io);
}
vfio_pci_memory_unlock_and_restore(vdev, cmd);
} else if (pdev->rom && pdev->romlen) {
- info.flags = VFIO_REGION_INFO_FLAG_READ;
+ info->flags = VFIO_REGION_INFO_FLAG_READ;
/* Report BAR size as power of two. */
- info.size = roundup_pow_of_two(pdev->romlen);
+ info->size = roundup_pow_of_two(pdev->romlen);
}
break;
@@ -1073,10 +1097,10 @@ static int vfio_pci_ioctl_get_region_info(struct vfio_pci_core_device *vdev,
if (!vdev->has_vga)
return -EINVAL;
- info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
- info.size = 0xc0000;
- info.flags = VFIO_REGION_INFO_FLAG_READ |
- VFIO_REGION_INFO_FLAG_WRITE;
+ info->offset = VFIO_PCI_INDEX_TO_OFFSET(info->index);
+ info->size = 0xc0000;
+ info->flags = VFIO_REGION_INFO_FLAG_READ |
+ VFIO_REGION_INFO_FLAG_WRITE;
break;
default: {
@@ -1085,53 +1109,36 @@ static int vfio_pci_ioctl_get_region_info(struct vfio_pci_core_device *vdev,
.header.version = 1
};
- if (info.index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)
+ if (info->index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)
return -EINVAL;
- info.index = array_index_nospec(
- info.index, VFIO_PCI_NUM_REGIONS + vdev->num_regions);
+ info->index = array_index_nospec(
+ info->index, VFIO_PCI_NUM_REGIONS + vdev->num_regions);
- i = info.index - VFIO_PCI_NUM_REGIONS;
+ i = info->index - VFIO_PCI_NUM_REGIONS;
- info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
- info.size = vdev->region[i].size;
- info.flags = vdev->region[i].flags;
+ info->offset = VFIO_PCI_INDEX_TO_OFFSET(info->index);
+ info->size = vdev->region[i].size;
+ info->flags = vdev->region[i].flags;
cap_type.type = vdev->region[i].type;
cap_type.subtype = vdev->region[i].subtype;
- ret = vfio_info_add_capability(&caps, &cap_type.header,
+ ret = vfio_info_add_capability(caps, &cap_type.header,
sizeof(cap_type));
if (ret)
return ret;
if (vdev->region[i].ops->add_capability) {
ret = vdev->region[i].ops->add_capability(
- vdev, &vdev->region[i], &caps);
+ vdev, &vdev->region[i], caps);
if (ret)
return ret;
}
}
}
-
- if (caps.size) {
- info.flags |= VFIO_REGION_INFO_FLAG_CAPS;
- if (info.argsz < sizeof(info) + caps.size) {
- info.argsz = sizeof(info) + caps.size;
- info.cap_offset = 0;
- } else {
- vfio_info_cap_shift(&caps, sizeof(info));
- if (copy_to_user(arg + 1, caps.buf, caps.size)) {
- kfree(caps.buf);
- return -EFAULT;
- }
- info.cap_offset = sizeof(*arg);
- }
-
- kfree(caps.buf);
- }
-
- return copy_to_user(arg, &info, minsz) ? -EFAULT : 0;
+ return 0;
}
+EXPORT_SYMBOL_GPL(vfio_pci_ioctl_get_region_info);
static int vfio_pci_ioctl_get_irq_info(struct vfio_pci_core_device *vdev,
struct vfio_irq_info __user *arg)
@@ -1227,7 +1234,10 @@ static int vfio_pci_ioctl_reset(struct vfio_pci_core_device *vdev,
*/
vfio_pci_set_power_state(vdev, PCI_D0);
+ vfio_pci_dma_buf_move(vdev, true);
ret = pci_try_reset_function(vdev->pdev);
+ if (__vfio_pci_memory_enabled(vdev))
+ vfio_pci_dma_buf_move(vdev, false);
up_write(&vdev->memory_lock);
return ret;
@@ -1457,8 +1467,6 @@ long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
return vfio_pci_ioctl_get_irq_info(vdev, uarg);
case VFIO_DEVICE_GET_PCI_HOT_RESET_INFO:
return vfio_pci_ioctl_get_pci_hot_reset_info(vdev, uarg);
- case VFIO_DEVICE_GET_REGION_INFO:
- return vfio_pci_ioctl_get_region_info(vdev, uarg);
case VFIO_DEVICE_IOEVENTFD:
return vfio_pci_ioctl_ioeventfd(vdev, uarg);
case VFIO_DEVICE_PCI_HOT_RESET:
@@ -1473,11 +1481,10 @@ long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
}
EXPORT_SYMBOL_GPL(vfio_pci_core_ioctl);
-static int vfio_pci_core_feature_token(struct vfio_device *device, u32 flags,
- uuid_t __user *arg, size_t argsz)
+static int vfio_pci_core_feature_token(struct vfio_pci_core_device *vdev,
+ u32 flags, uuid_t __user *arg,
+ size_t argsz)
{
- struct vfio_pci_core_device *vdev =
- container_of(device, struct vfio_pci_core_device, vdev);
uuid_t uuid;
int ret;
@@ -1504,16 +1511,21 @@ static int vfio_pci_core_feature_token(struct vfio_device *device, u32 flags,
int vfio_pci_core_ioctl_feature(struct vfio_device *device, u32 flags,
void __user *arg, size_t argsz)
{
+ struct vfio_pci_core_device *vdev =
+ container_of(device, struct vfio_pci_core_device, vdev);
+
switch (flags & VFIO_DEVICE_FEATURE_MASK) {
case VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY:
- return vfio_pci_core_pm_entry(device, flags, arg, argsz);
+ return vfio_pci_core_pm_entry(vdev, flags, arg, argsz);
case VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP:
- return vfio_pci_core_pm_entry_with_wakeup(device, flags,
+ return vfio_pci_core_pm_entry_with_wakeup(vdev, flags,
arg, argsz);
case VFIO_DEVICE_FEATURE_LOW_POWER_EXIT:
- return vfio_pci_core_pm_exit(device, flags, arg, argsz);
+ return vfio_pci_core_pm_exit(vdev, flags, arg, argsz);
case VFIO_DEVICE_FEATURE_PCI_VF_TOKEN:
- return vfio_pci_core_feature_token(device, flags, arg, argsz);
+ return vfio_pci_core_feature_token(vdev, flags, arg, argsz);
+ case VFIO_DEVICE_FEATURE_DMA_BUF:
+ return vfio_pci_core_feature_dma_buf(vdev, flags, arg, argsz);
default:
return -ENOTTY;
}
@@ -1640,49 +1652,49 @@ static unsigned long vma_to_pfn(struct vm_area_struct *vma)
return (pci_resource_start(vdev->pdev, index) >> PAGE_SHIFT) + pgoff;
}
-static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf,
- unsigned int order)
+vm_fault_t vfio_pci_vmf_insert_pfn(struct vfio_pci_core_device *vdev,
+ struct vm_fault *vmf,
+ unsigned long pfn,
+ unsigned int order)
{
- struct vm_area_struct *vma = vmf->vma;
- struct vfio_pci_core_device *vdev = vma->vm_private_data;
- unsigned long addr = vmf->address & ~((PAGE_SIZE << order) - 1);
- unsigned long pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
- unsigned long pfn = vma_to_pfn(vma) + pgoff;
- vm_fault_t ret = VM_FAULT_SIGBUS;
-
- if (order && (addr < vma->vm_start ||
- addr + (PAGE_SIZE << order) > vma->vm_end ||
- pfn & ((1 << order) - 1))) {
- ret = VM_FAULT_FALLBACK;
- goto out;
- }
-
- down_read(&vdev->memory_lock);
+ lockdep_assert_held_read(&vdev->memory_lock);
if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev))
- goto out_unlock;
+ return VM_FAULT_SIGBUS;
switch (order) {
case 0:
- ret = vmf_insert_pfn(vma, vmf->address, pfn);
- break;
+ return vmf_insert_pfn(vmf->vma, vmf->address, pfn);
#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
case PMD_ORDER:
- ret = vmf_insert_pfn_pmd(vmf, pfn, false);
- break;
+ return vmf_insert_pfn_pmd(vmf, pfn, false);
#endif
#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
case PUD_ORDER:
- ret = vmf_insert_pfn_pud(vmf, pfn, false);
+ return vmf_insert_pfn_pud(vmf, pfn, false);
break;
#endif
default:
- ret = VM_FAULT_FALLBACK;
+ return VM_FAULT_FALLBACK;
+ }
+}
+EXPORT_SYMBOL_GPL(vfio_pci_vmf_insert_pfn);
+
+static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf,
+ unsigned int order)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct vfio_pci_core_device *vdev = vma->vm_private_data;
+ unsigned long addr = vmf->address & ~((PAGE_SIZE << order) - 1);
+ unsigned long pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
+ unsigned long pfn = vma_to_pfn(vma) + pgoff;
+ vm_fault_t ret = VM_FAULT_FALLBACK;
+
+ if (is_aligned_for_order(vma, addr, pfn, order)) {
+ scoped_guard(rwsem_read, &vdev->memory_lock)
+ ret = vfio_pci_vmf_insert_pfn(vdev, vmf, pfn, order);
}
-out_unlock:
- up_read(&vdev->memory_lock);
-out:
dev_dbg_ratelimited(&vdev->pdev->dev,
"%s(,order = %d) BAR %ld page offset 0x%lx: 0x%x\n",
__func__, order,
@@ -1749,18 +1761,9 @@ int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma
* Even though we don't make use of the barmap for the mmap,
* we need to request the region and the barmap tracks that.
*/
- if (!vdev->barmap[index]) {
- ret = pci_request_selected_regions(pdev,
- 1 << index, "vfio-pci");
- if (ret)
- return ret;
-
- vdev->barmap[index] = pci_iomap(pdev, index, 0);
- if (!vdev->barmap[index]) {
- pci_release_selected_regions(pdev, 1 << index);
- return -ENOMEM;
- }
- }
+ ret = vfio_pci_core_setup_barmap(vdev, index);
+ if (ret)
+ return ret;
vma->vm_private_data = vdev;
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
@@ -1800,21 +1803,21 @@ void vfio_pci_core_request(struct vfio_device *core_vdev, unsigned int count)
struct vfio_pci_core_device *vdev =
container_of(core_vdev, struct vfio_pci_core_device, vdev);
struct pci_dev *pdev = vdev->pdev;
+ struct vfio_pci_eventfd *eventfd;
- mutex_lock(&vdev->igate);
-
- if (vdev->req_trigger) {
+ rcu_read_lock();
+ eventfd = rcu_dereference(vdev->req_trigger);
+ if (eventfd) {
if (!(count % 10))
pci_notice_ratelimited(pdev,
"Relaying device request to user (#%u)\n",
count);
- eventfd_signal(vdev->req_trigger);
+ eventfd_signal(eventfd->ctx);
} else if (count == 0) {
pci_warn(pdev,
"No device request channel registered, blocked until released by user\n");
}
-
- mutex_unlock(&vdev->igate);
+ rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(vfio_pci_core_request);
@@ -2085,6 +2088,7 @@ int vfio_pci_core_init_dev(struct vfio_device *core_vdev)
{
struct vfio_pci_core_device *vdev =
container_of(core_vdev, struct vfio_pci_core_device, vdev);
+ int ret;
vdev->pdev = to_pci_dev(core_vdev->dev);
vdev->irq_type = VFIO_PCI_NUM_IRQS;
@@ -2094,6 +2098,10 @@ int vfio_pci_core_init_dev(struct vfio_device *core_vdev)
INIT_LIST_HEAD(&vdev->dummy_resources_list);
INIT_LIST_HEAD(&vdev->ioeventfds_list);
INIT_LIST_HEAD(&vdev->sriov_pfs_item);
+ ret = pcim_p2pdma_init(vdev->pdev);
+ if (ret && ret != -EOPNOTSUPP)
+ return ret;
+ INIT_LIST_HEAD(&vdev->dmabufs);
init_rwsem(&vdev->memory_lock);
xa_init(&vdev->ctx);
@@ -2227,13 +2235,13 @@ pci_ers_result_t vfio_pci_core_aer_err_detected(struct pci_dev *pdev,
pci_channel_state_t state)
{
struct vfio_pci_core_device *vdev = dev_get_drvdata(&pdev->dev);
+ struct vfio_pci_eventfd *eventfd;
- mutex_lock(&vdev->igate);
-
- if (vdev->err_trigger)
- eventfd_signal(vdev->err_trigger);
-
- mutex_unlock(&vdev->igate);
+ rcu_read_lock();
+ eventfd = rcu_dereference(vdev->err_trigger);
+ if (eventfd)
+ eventfd_signal(eventfd->ctx);
+ rcu_read_unlock();
return PCI_ERS_RESULT_CAN_RECOVER;
}
@@ -2458,6 +2466,7 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
break;
}
+ vfio_pci_dma_buf_move(vdev, true);
vfio_pci_zap_bars(vdev);
}
@@ -2486,8 +2495,11 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
err_undo:
list_for_each_entry_from_reverse(vdev, &dev_set->device_list,
- vdev.dev_set_list)
+ vdev.dev_set_list) {
+ if (vdev->vdev.open_count && __vfio_pci_memory_enabled(vdev))
+ vfio_pci_dma_buf_move(vdev, false);
up_write(&vdev->memory_lock);
+ }
list_for_each_entry(vdev, &dev_set->device_list, vdev.dev_set_list)
pm_runtime_put(&vdev->pdev->dev);