diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-12-04 18:42:48 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-12-04 18:42:48 -0800 |
| commit | a3ebb59eee2e558e8f8f27fc3f75cd367f17cd8e (patch) | |
| tree | 934bbee766e0cce55cfcaec5dcae856c0361af37 /drivers/pci/p2pdma.c | |
| parent | ce5cfb0fa20dc6454da039612e34325b7b4a8243 (diff) | |
| parent | d721f52e31553a848e0e9947ca15a49c5674aef3 (diff) | |
Merge tag 'vfio-v6.19-rc1' of https://github.com/awilliam/linux-vfio
Pull VFIO updates from Alex Williamson:
- Move libvfio selftest artifacts in preparation of more tightly
coupled integration with KVM selftests (David Matlack)
- Fix comment typo in mtty driver (Chu Guangqing)
- Support for new hardware revision in the hisi_acc vfio-pci variant
driver where the migration registers can now be accessed via the PF.
When enabled for this support, the full BAR can be exposed to the
user (Longfang Liu)
- Fix vfio cdev support for VF token passing, using the correct size
for the kernel structure, thereby actually allowing userspace to
provide a non-zero UUID token. Also set the match token callback for
the hisi_acc, fixing VF token support for this this vfio-pci variant
driver (Raghavendra Rao Ananta)
- Introduce internal callbacks on vfio devices to simplify and
consolidate duplicate code for generating VFIO_DEVICE_GET_REGION_INFO
data, removing various ioctl intercepts with a more structured
solution (Jason Gunthorpe)
- Introduce dma-buf support for vfio-pci devices, allowing MMIO regions
to be exposed through dma-buf objects with lifecycle managed through
move operations. This enables low-level interactions such as a
vfio-pci based SPDK drivers interacting directly with dma-buf capable
RDMA devices to enable peer-to-peer operations. IOMMUFD is also now
able to build upon this support to fill a long standing feature gap
versus the legacy vfio type1 IOMMU backend with an implementation of
P2P support for VM use cases that better manages the lifecycle of the
P2P mapping (Leon Romanovsky, Jason Gunthorpe, Vivek Kasireddy)
- Convert eventfd triggering for error and request signals to use RCU
mechanisms in order to avoid a 3-way lockdep reported deadlock issue
(Alex Williamson)
- Fix a 32-bit overflow introduced via dma-buf support manifesting with
large DMA buffers (Alex Mastro)
- Convert nvgrace-gpu vfio-pci variant driver to insert mappings on
fault rather than at mmap time. This conversion serves both to make
use of huge PFNMAPs but also to both avoid corrected RAS events
during reset by now being subject to vfio-pci-core's use of
unmap_mapping_range(), and to enable a device readiness test after
reset (Ankit Agrawal)
- Refactoring of vfio selftests to support multi-device tests and split
code to provide better separation between IOMMU and device objects.
This work also enables a new test suite addition to measure parallel
device initialization latency (David Matlack)
* tag 'vfio-v6.19-rc1' of https://github.com/awilliam/linux-vfio: (65 commits)
vfio: selftests: Add vfio_pci_device_init_perf_test
vfio: selftests: Eliminate INVALID_IOVA
vfio: selftests: Split libvfio.h into separate header files
vfio: selftests: Move vfio_selftests_*() helpers into libvfio.c
vfio: selftests: Rename vfio_util.h to libvfio.h
vfio: selftests: Stop passing device for IOMMU operations
vfio: selftests: Move IOVA allocator into iova_allocator.c
vfio: selftests: Move IOMMU library code into iommu.c
vfio: selftests: Rename struct vfio_dma_region to dma_region
vfio: selftests: Upgrade driver logging to dev_err()
vfio: selftests: Prefix logs with device BDF where relevant
vfio: selftests: Eliminate overly chatty logging
vfio: selftests: Support multiple devices in the same container/iommufd
vfio: selftests: Introduce struct iommu
vfio: selftests: Rename struct vfio_iommu_mode to iommu_mode
vfio: selftests: Allow passing multiple BDFs on the command line
vfio: selftests: Split run.sh into separate scripts
vfio: selftests: Move run.sh into scripts directory
vfio/nvgrace-gpu: wait for the GPU mem to be ready
vfio/nvgrace-gpu: Inform devmem unmapped after reset
...
Diffstat (limited to 'drivers/pci/p2pdma.c')
| -rw-r--r-- | drivers/pci/p2pdma.c | 186 |
1 files changed, 144 insertions, 42 deletions
diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c index 78e108e47254..981a76b6b7c0 100644 --- a/drivers/pci/p2pdma.c +++ b/drivers/pci/p2pdma.c @@ -25,12 +25,12 @@ struct pci_p2pdma { struct gen_pool *pool; bool p2pmem_published; struct xarray map_types; + struct p2pdma_provider mem[PCI_STD_NUM_BARS]; }; struct pci_p2pdma_pagemap { - struct pci_dev *provider; - u64 bus_offset; struct dev_pagemap pgmap; + struct p2pdma_provider *mem; }; static struct pci_p2pdma_pagemap *to_p2p_pgmap(struct dev_pagemap *pgmap) @@ -204,8 +204,8 @@ static void p2pdma_page_free(struct page *page) { struct pci_p2pdma_pagemap *pgmap = to_p2p_pgmap(page_pgmap(page)); /* safe to dereference while a reference is held to the percpu ref */ - struct pci_p2pdma *p2pdma = - rcu_dereference_protected(pgmap->provider->p2pdma, 1); + struct pci_p2pdma *p2pdma = rcu_dereference_protected( + to_pci_dev(pgmap->mem->owner)->p2pdma, 1); struct percpu_ref *ref; gen_pool_free_owner(p2pdma->pool, (uintptr_t)page_to_virt(page), @@ -228,56 +228,136 @@ static void pci_p2pdma_release(void *data) /* Flush and disable pci_alloc_p2p_mem() */ pdev->p2pdma = NULL; - synchronize_rcu(); + if (p2pdma->pool) + synchronize_rcu(); + xa_destroy(&p2pdma->map_types); + + if (!p2pdma->pool) + return; gen_pool_destroy(p2pdma->pool); sysfs_remove_group(&pdev->dev.kobj, &p2pmem_group); - xa_destroy(&p2pdma->map_types); } -static int pci_p2pdma_setup(struct pci_dev *pdev) +/** + * pcim_p2pdma_init - Initialise peer-to-peer DMA providers + * @pdev: The PCI device to enable P2PDMA for + * + * This function initializes the peer-to-peer DMA infrastructure + * for a PCI device. It allocates and sets up the necessary data + * structures to support P2PDMA operations, including mapping type + * tracking. + */ +int pcim_p2pdma_init(struct pci_dev *pdev) { - int error = -ENOMEM; struct pci_p2pdma *p2p; + int i, ret; + + p2p = rcu_dereference_protected(pdev->p2pdma, 1); + if (p2p) + return 0; p2p = devm_kzalloc(&pdev->dev, sizeof(*p2p), GFP_KERNEL); if (!p2p) return -ENOMEM; xa_init(&p2p->map_types); + /* + * Iterate over all standard PCI BARs and record only those that + * correspond to MMIO regions. Skip non-memory resources (e.g. I/O + * port BARs) since they cannot be used for peer-to-peer (P2P) + * transactions. + */ + for (i = 0; i < PCI_STD_NUM_BARS; i++) { + if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM)) + continue; - p2p->pool = gen_pool_create(PAGE_SHIFT, dev_to_node(&pdev->dev)); - if (!p2p->pool) - goto out; + p2p->mem[i].owner = &pdev->dev; + p2p->mem[i].bus_offset = + pci_bus_address(pdev, i) - pci_resource_start(pdev, i); + } - error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_release, pdev); - if (error) - goto out_pool_destroy; + ret = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_release, pdev); + if (ret) + goto out_p2p; - error = sysfs_create_group(&pdev->dev.kobj, &p2pmem_group); - if (error) + rcu_assign_pointer(pdev->p2pdma, p2p); + return 0; + +out_p2p: + devm_kfree(&pdev->dev, p2p); + return ret; +} +EXPORT_SYMBOL_GPL(pcim_p2pdma_init); + +/** + * pcim_p2pdma_provider - Get peer-to-peer DMA provider + * @pdev: The PCI device to enable P2PDMA for + * @bar: BAR index to get provider + * + * This function gets peer-to-peer DMA provider for a PCI device. The lifetime + * of the provider (and of course the MMIO) is bound to the lifetime of the + * driver. A driver calling this function must ensure that all references to the + * provider, and any DMA mappings created for any MMIO, are all cleaned up + * before the driver remove() completes. + * + * Since P2P is almost always shared with a second driver this means some system + * to notify, invalidate and revoke the MMIO's DMA must be in place to use this + * function. For example a revoke can be built using DMABUF. + */ +struct p2pdma_provider *pcim_p2pdma_provider(struct pci_dev *pdev, int bar) +{ + struct pci_p2pdma *p2p; + + if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM)) + return NULL; + + p2p = rcu_dereference_protected(pdev->p2pdma, 1); + if (WARN_ON(!p2p)) + /* Someone forgot to call to pcim_p2pdma_init() before */ + return NULL; + + return &p2p->mem[bar]; +} +EXPORT_SYMBOL_GPL(pcim_p2pdma_provider); + +static int pci_p2pdma_setup_pool(struct pci_dev *pdev) +{ + struct pci_p2pdma *p2pdma; + int ret; + + p2pdma = rcu_dereference_protected(pdev->p2pdma, 1); + if (p2pdma->pool) + /* We already setup pools, do nothing, */ + return 0; + + p2pdma->pool = gen_pool_create(PAGE_SHIFT, dev_to_node(&pdev->dev)); + if (!p2pdma->pool) + return -ENOMEM; + + ret = sysfs_create_group(&pdev->dev.kobj, &p2pmem_group); + if (ret) goto out_pool_destroy; - rcu_assign_pointer(pdev->p2pdma, p2p); return 0; out_pool_destroy: - gen_pool_destroy(p2p->pool); -out: - devm_kfree(&pdev->dev, p2p); - return error; + gen_pool_destroy(p2pdma->pool); + p2pdma->pool = NULL; + return ret; } static void pci_p2pdma_unmap_mappings(void *data) { - struct pci_dev *pdev = data; + struct pci_p2pdma_pagemap *p2p_pgmap = data; /* * Removing the alloc attribute from sysfs will call * unmap_mapping_range() on the inode, teardown any existing userspace * mappings and prevent new ones from being created. */ - sysfs_remove_file_from_group(&pdev->dev.kobj, &p2pmem_alloc_attr.attr, + sysfs_remove_file_from_group(&p2p_pgmap->mem->owner->kobj, + &p2pmem_alloc_attr.attr, p2pmem_group.name); } @@ -295,6 +375,7 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, u64 offset) { struct pci_p2pdma_pagemap *p2p_pgmap; + struct p2pdma_provider *mem; struct dev_pagemap *pgmap; struct pci_p2pdma *p2pdma; void *addr; @@ -312,11 +393,21 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, if (size + offset > pci_resource_len(pdev, bar)) return -EINVAL; - if (!pdev->p2pdma) { - error = pci_p2pdma_setup(pdev); - if (error) - return error; - } + error = pcim_p2pdma_init(pdev); + if (error) + return error; + + error = pci_p2pdma_setup_pool(pdev); + if (error) + return error; + + mem = pcim_p2pdma_provider(pdev, bar); + /* + * We checked validity of BAR prior to call + * to pcim_p2pdma_provider. It should never return NULL. + */ + if (WARN_ON(!mem)) + return -EINVAL; p2p_pgmap = devm_kzalloc(&pdev->dev, sizeof(*p2p_pgmap), GFP_KERNEL); if (!p2p_pgmap) @@ -328,10 +419,7 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, pgmap->nr_range = 1; pgmap->type = MEMORY_DEVICE_PCI_P2PDMA; pgmap->ops = &p2pdma_pgmap_ops; - - p2p_pgmap->provider = pdev; - p2p_pgmap->bus_offset = pci_bus_address(pdev, bar) - - pci_resource_start(pdev, bar); + p2p_pgmap->mem = mem; addr = devm_memremap_pages(&pdev->dev, pgmap); if (IS_ERR(addr)) { @@ -340,7 +428,7 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size, } error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_unmap_mappings, - pdev); + p2p_pgmap); if (error) goto pages_free; @@ -972,16 +1060,26 @@ void pci_p2pmem_publish(struct pci_dev *pdev, bool publish) } EXPORT_SYMBOL_GPL(pci_p2pmem_publish); -static enum pci_p2pdma_map_type pci_p2pdma_map_type(struct dev_pagemap *pgmap, - struct device *dev) +/** + * pci_p2pdma_map_type - Determine the mapping type for P2PDMA transfers + * @provider: P2PDMA provider structure + * @dev: Target device for the transfer + * + * Determines how peer-to-peer DMA transfers should be mapped between + * the provider and the target device. The mapping type indicates whether + * the transfer can be done directly through PCI switches or must go + * through the host bridge. + */ +enum pci_p2pdma_map_type pci_p2pdma_map_type(struct p2pdma_provider *provider, + struct device *dev) { enum pci_p2pdma_map_type type = PCI_P2PDMA_MAP_NOT_SUPPORTED; - struct pci_dev *provider = to_p2p_pgmap(pgmap)->provider; + struct pci_dev *pdev = to_pci_dev(provider->owner); struct pci_dev *client; struct pci_p2pdma *p2pdma; int dist; - if (!provider->p2pdma) + if (!pdev->p2pdma) return PCI_P2PDMA_MAP_NOT_SUPPORTED; if (!dev_is_pci(dev)) @@ -990,7 +1088,7 @@ static enum pci_p2pdma_map_type pci_p2pdma_map_type(struct dev_pagemap *pgmap, client = to_pci_dev(dev); rcu_read_lock(); - p2pdma = rcu_dereference(provider->p2pdma); + p2pdma = rcu_dereference(pdev->p2pdma); if (p2pdma) type = xa_to_value(xa_load(&p2pdma->map_types, @@ -998,7 +1096,7 @@ static enum pci_p2pdma_map_type pci_p2pdma_map_type(struct dev_pagemap *pgmap, rcu_read_unlock(); if (type == PCI_P2PDMA_MAP_UNKNOWN) - return calc_map_type_and_dist(provider, client, &dist, true); + return calc_map_type_and_dist(pdev, client, &dist, true); return type; } @@ -1006,9 +1104,13 @@ static enum pci_p2pdma_map_type pci_p2pdma_map_type(struct dev_pagemap *pgmap, void __pci_p2pdma_update_state(struct pci_p2pdma_map_state *state, struct device *dev, struct page *page) { - state->pgmap = page_pgmap(page); - state->map = pci_p2pdma_map_type(state->pgmap, dev); - state->bus_off = to_p2p_pgmap(state->pgmap)->bus_offset; + struct pci_p2pdma_pagemap *p2p_pgmap = to_p2p_pgmap(page_pgmap(page)); + + if (state->mem == p2p_pgmap->mem) + return; + + state->mem = p2p_pgmap->mem; + state->map = pci_p2pdma_map_type(p2p_pgmap->mem, dev); } /** |