mirror of
https://github.com/torvalds/linux.git
synced 2024-11-10 06:01:57 +00:00
iommufd for 6.2
iommufd is the user API to control the IOMMU subsystem as it relates to managing IO page tables that point at user space memory. It takes over from drivers/vfio/vfio_iommu_type1.c (aka the VFIO container) which is the VFIO specific interface for a similar idea. We see a broad need for extended features, some being highly IOMMU device specific: - Binding iommu_domain's to PASID/SSID - Userspace IO page tables, for ARM, x86 and S390 - Kernel bypassed invalidation of user page tables - Re-use of the KVM page table in the IOMMU - Dirty page tracking in the IOMMU - Runtime Increase/Decrease of IOPTE size - PRI support with faults resolved in userspace Many of these HW features exist to support VM use cases - for instance the combination of PASID, PRI and Userspace IO Page Tables allows an implementation of DMA Shared Virtual Addressing (vSVA) within a guest. Dirty tracking enables VM live migration with SRIOV devices and PASID support allow creating "scalable IOV" devices, among other things. As these features are fundamental to a VM platform they need to be uniformly exposed to all the driver families that do DMA into VMs, which is currently VFIO and VDPA. -----BEGIN PGP SIGNATURE----- iHUEABYIAB0WIQRRRCHOFoQz/8F5bUaFwuHvBreFYQUCY5ct7wAKCRCFwuHvBreF YZZ5AQDciXfcgXLt0UBEmWupNb0f/asT6tk717pdsKm8kAZMNAEAsIyLiKT5HqGl s7fAu+CQ1pr9+9NKGevD+frw8Solsw4= =jJkd -----END PGP SIGNATURE----- Merge tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd Pull iommufd implementation from Jason Gunthorpe: "iommufd is the user API to control the IOMMU subsystem as it relates to managing IO page tables that point at user space memory. It takes over from drivers/vfio/vfio_iommu_type1.c (aka the VFIO container) which is the VFIO specific interface for a similar idea. We see a broad need for extended features, some being highly IOMMU device specific: - Binding iommu_domain's to PASID/SSID - Userspace IO page tables, for ARM, x86 and S390 - Kernel bypassed invalidation of user page tables - Re-use of the KVM page table in the IOMMU - Dirty page tracking in the IOMMU - Runtime Increase/Decrease of IOPTE size - PRI support with faults resolved in userspace Many of these HW features exist to support VM use cases - for instance the combination of PASID, PRI and Userspace IO Page Tables allows an implementation of DMA Shared Virtual Addressing (vSVA) within a guest. Dirty tracking enables VM live migration with SRIOV devices and PASID support allow creating "scalable IOV" devices, among other things. As these features are fundamental to a VM platform they need to be uniformly exposed to all the driver families that do DMA into VMs, which is currently VFIO and VDPA" For more background, see the extended explanations in Jason's pull request: https://lore.kernel.org/lkml/Y5dzTU8dlmXTbzoJ@nvidia.com/ * tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd: (62 commits) iommufd: Change the order of MSI setup iommufd: Improve a few unclear bits of code iommufd: Fix comment typos vfio: Move vfio group specific code into group.c vfio: Refactor dma APIs for emulated devices vfio: Wrap vfio group module init/clean code into helpers vfio: Refactor vfio_device open and close vfio: Make vfio_device_open() truly device specific vfio: Swap order of vfio_device_container_register() and open_device() vfio: Set device->group in helper function vfio: Create wrappers for group register/unregister vfio: Move the sanity check of the group to vfio_create_group() vfio: Simplify vfio_create_group() iommufd: Allow iommufd to supply /dev/vfio/vfio vfio: Make vfio_container optionally compiled vfio: Move container related MODULE_ALIAS statements into container.c vfio-iommufd: Support iommufd for emulated VFIO devices vfio-iommufd: Support iommufd for physical VFIO devices vfio-iommufd: Allow iommufd to be used in place of a container fd vfio: Use IOMMU_CAP_ENFORCE_CACHE_COHERENCY for vfio_file_enforced_coherent() ...
This commit is contained in:
commit
08cdc21579
@ -441,8 +441,11 @@ ForEachMacros:
|
||||
- 'inet_lhash2_for_each_icsk'
|
||||
- 'inet_lhash2_for_each_icsk_continue'
|
||||
- 'inet_lhash2_for_each_icsk_rcu'
|
||||
- 'interval_tree_for_each_double_span'
|
||||
- 'interval_tree_for_each_span'
|
||||
- 'intlist__for_each_entry'
|
||||
- 'intlist__for_each_entry_safe'
|
||||
- 'iopt_for_each_contig_area'
|
||||
- 'kcore_copy__for_each_phdr'
|
||||
- 'key_for_each'
|
||||
- 'key_for_each_safe'
|
||||
|
@ -25,6 +25,7 @@ place where this information is gathered.
|
||||
ebpf/index
|
||||
ioctl/index
|
||||
iommu
|
||||
iommufd
|
||||
media/index
|
||||
netlink/index
|
||||
sysfs-platform_profile
|
||||
|
@ -105,6 +105,7 @@ Code Seq# Include File Comments
|
||||
'8' all SNP8023 advanced NIC card
|
||||
<mailto:mcr@solidum.com>
|
||||
';' 64-7F linux/vfio.h
|
||||
';' 80-FF linux/iommufd.h
|
||||
'=' 00-3f uapi/linux/ptp_clock.h <mailto:richardcochran@gmail.com>
|
||||
'@' 00-0F linux/radeonfb.h conflict!
|
||||
'@' 00-0F drivers/video/aty/aty128fb.c conflict!
|
||||
|
223
Documentation/userspace-api/iommufd.rst
Normal file
223
Documentation/userspace-api/iommufd.rst
Normal file
@ -0,0 +1,223 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0+
|
||||
|
||||
=======
|
||||
IOMMUFD
|
||||
=======
|
||||
|
||||
:Author: Jason Gunthorpe
|
||||
:Author: Kevin Tian
|
||||
|
||||
Overview
|
||||
========
|
||||
|
||||
IOMMUFD is the user API to control the IOMMU subsystem as it relates to managing
|
||||
IO page tables from userspace using file descriptors. It intends to be general
|
||||
and consumable by any driver that wants to expose DMA to userspace. These
|
||||
drivers are eventually expected to deprecate any internal IOMMU logic
|
||||
they may already/historically implement (e.g. vfio_iommu_type1.c).
|
||||
|
||||
At minimum iommufd provides universal support of managing I/O address spaces and
|
||||
I/O page tables for all IOMMUs, with room in the design to add non-generic
|
||||
features to cater to specific hardware functionality.
|
||||
|
||||
In this context the capital letter (IOMMUFD) refers to the subsystem while the
|
||||
small letter (iommufd) refers to the file descriptors created via /dev/iommu for
|
||||
use by userspace.
|
||||
|
||||
Key Concepts
|
||||
============
|
||||
|
||||
User Visible Objects
|
||||
--------------------
|
||||
|
||||
Following IOMMUFD objects are exposed to userspace:
|
||||
|
||||
- IOMMUFD_OBJ_IOAS, representing an I/O address space (IOAS), allowing map/unmap
|
||||
of user space memory into ranges of I/O Virtual Address (IOVA).
|
||||
|
||||
The IOAS is a functional replacement for the VFIO container, and like the VFIO
|
||||
container it copies an IOVA map to a list of iommu_domains held within it.
|
||||
|
||||
- IOMMUFD_OBJ_DEVICE, representing a device that is bound to iommufd by an
|
||||
external driver.
|
||||
|
||||
- IOMMUFD_OBJ_HW_PAGETABLE, representing an actual hardware I/O page table
|
||||
(i.e. a single struct iommu_domain) managed by the iommu driver.
|
||||
|
||||
The IOAS has a list of HW_PAGETABLES that share the same IOVA mapping and
|
||||
it will synchronize its mapping with each member HW_PAGETABLE.
|
||||
|
||||
All user-visible objects are destroyed via the IOMMU_DESTROY uAPI.
|
||||
|
||||
The diagram below shows relationship between user-visible objects and kernel
|
||||
datastructures (external to iommufd), with numbers referred to operations
|
||||
creating the objects and links::
|
||||
|
||||
_________________________________________________________
|
||||
| iommufd |
|
||||
| [1] |
|
||||
| _________________ |
|
||||
| | | |
|
||||
| | | |
|
||||
| | | |
|
||||
| | | |
|
||||
| | | |
|
||||
| | | |
|
||||
| | | [3] [2] |
|
||||
| | | ____________ __________ |
|
||||
| | IOAS |<--| |<------| | |
|
||||
| | | |HW_PAGETABLE| | DEVICE | |
|
||||
| | | |____________| |__________| |
|
||||
| | | | | |
|
||||
| | | | | |
|
||||
| | | | | |
|
||||
| | | | | |
|
||||
| | | | | |
|
||||
| |_________________| | | |
|
||||
| | | | |
|
||||
|_________|___________________|___________________|_______|
|
||||
| | |
|
||||
| _____v______ _______v_____
|
||||
| PFN storage | | | |
|
||||
|------------>|iommu_domain| |struct device|
|
||||
|____________| |_____________|
|
||||
|
||||
1. IOMMUFD_OBJ_IOAS is created via the IOMMU_IOAS_ALLOC uAPI. An iommufd can
|
||||
hold multiple IOAS objects. IOAS is the most generic object and does not
|
||||
expose interfaces that are specific to single IOMMU drivers. All operations
|
||||
on the IOAS must operate equally on each of the iommu_domains inside of it.
|
||||
|
||||
2. IOMMUFD_OBJ_DEVICE is created when an external driver calls the IOMMUFD kAPI
|
||||
to bind a device to an iommufd. The driver is expected to implement a set of
|
||||
ioctls to allow userspace to initiate the binding operation. Successful
|
||||
completion of this operation establishes the desired DMA ownership over the
|
||||
device. The driver must also set the driver_managed_dma flag and must not
|
||||
touch the device until this operation succeeds.
|
||||
|
||||
3. IOMMUFD_OBJ_HW_PAGETABLE is created when an external driver calls the IOMMUFD
|
||||
kAPI to attach a bound device to an IOAS. Similarly the external driver uAPI
|
||||
allows userspace to initiate the attaching operation. If a compatible
|
||||
pagetable already exists then it is reused for the attachment. Otherwise a
|
||||
new pagetable object and iommu_domain is created. Successful completion of
|
||||
this operation sets up the linkages among IOAS, device and iommu_domain. Once
|
||||
this completes the device could do DMA.
|
||||
|
||||
Every iommu_domain inside the IOAS is also represented to userspace as a
|
||||
HW_PAGETABLE object.
|
||||
|
||||
.. note::
|
||||
|
||||
Future IOMMUFD updates will provide an API to create and manipulate the
|
||||
HW_PAGETABLE directly.
|
||||
|
||||
A device can only bind to an iommufd due to DMA ownership claim and attach to at
|
||||
most one IOAS object (no support of PASID yet).
|
||||
|
||||
Kernel Datastructure
|
||||
--------------------
|
||||
|
||||
User visible objects are backed by following datastructures:
|
||||
|
||||
- iommufd_ioas for IOMMUFD_OBJ_IOAS.
|
||||
- iommufd_device for IOMMUFD_OBJ_DEVICE.
|
||||
- iommufd_hw_pagetable for IOMMUFD_OBJ_HW_PAGETABLE.
|
||||
|
||||
Several terminologies when looking at these datastructures:
|
||||
|
||||
- Automatic domain - refers to an iommu domain created automatically when
|
||||
attaching a device to an IOAS object. This is compatible to the semantics of
|
||||
VFIO type1.
|
||||
|
||||
- Manual domain - refers to an iommu domain designated by the user as the
|
||||
target pagetable to be attached to by a device. Though currently there are
|
||||
no uAPIs to directly create such domain, the datastructure and algorithms
|
||||
are ready for handling that use case.
|
||||
|
||||
- In-kernel user - refers to something like a VFIO mdev that is using the
|
||||
IOMMUFD access interface to access the IOAS. This starts by creating an
|
||||
iommufd_access object that is similar to the domain binding a physical device
|
||||
would do. The access object will then allow converting IOVA ranges into struct
|
||||
page * lists, or doing direct read/write to an IOVA.
|
||||
|
||||
iommufd_ioas serves as the metadata datastructure to manage how IOVA ranges are
|
||||
mapped to memory pages, composed of:
|
||||
|
||||
- struct io_pagetable holding the IOVA map
|
||||
- struct iopt_area's representing populated portions of IOVA
|
||||
- struct iopt_pages representing the storage of PFNs
|
||||
- struct iommu_domain representing the IO page table in the IOMMU
|
||||
- struct iopt_pages_access representing in-kernel users of PFNs
|
||||
- struct xarray pinned_pfns holding a list of pages pinned by in-kernel users
|
||||
|
||||
Each iopt_pages represents a logical linear array of full PFNs. The PFNs are
|
||||
ultimately derived from userspace VAs via an mm_struct. Once they have been
|
||||
pinned the PFNs are stored in IOPTEs of an iommu_domain or inside the pinned_pfns
|
||||
xarray if they have been pinned through an iommufd_access.
|
||||
|
||||
PFN have to be copied between all combinations of storage locations, depending
|
||||
on what domains are present and what kinds of in-kernel "software access" users
|
||||
exist. The mechanism ensures that a page is pinned only once.
|
||||
|
||||
An io_pagetable is composed of iopt_areas pointing at iopt_pages, along with a
|
||||
list of iommu_domains that mirror the IOVA to PFN map.
|
||||
|
||||
Multiple io_pagetable-s, through their iopt_area-s, can share a single
|
||||
iopt_pages which avoids multi-pinning and double accounting of page
|
||||
consumption.
|
||||
|
||||
iommufd_ioas is sharable between subsystems, e.g. VFIO and VDPA, as long as
|
||||
devices managed by different subsystems are bound to a same iommufd.
|
||||
|
||||
IOMMUFD User API
|
||||
================
|
||||
|
||||
.. kernel-doc:: include/uapi/linux/iommufd.h
|
||||
|
||||
IOMMUFD Kernel API
|
||||
==================
|
||||
|
||||
The IOMMUFD kAPI is device-centric with group-related tricks managed behind the
|
||||
scene. This allows the external drivers calling such kAPI to implement a simple
|
||||
device-centric uAPI for connecting its device to an iommufd, instead of
|
||||
explicitly imposing the group semantics in its uAPI as VFIO does.
|
||||
|
||||
.. kernel-doc:: drivers/iommu/iommufd/device.c
|
||||
:export:
|
||||
|
||||
.. kernel-doc:: drivers/iommu/iommufd/main.c
|
||||
:export:
|
||||
|
||||
VFIO and IOMMUFD
|
||||
----------------
|
||||
|
||||
Connecting a VFIO device to iommufd can be done in two ways.
|
||||
|
||||
First is a VFIO compatible way by directly implementing the /dev/vfio/vfio
|
||||
container IOCTLs by mapping them into io_pagetable operations. Doing so allows
|
||||
the use of iommufd in legacy VFIO applications by symlinking /dev/vfio/vfio to
|
||||
/dev/iommufd or extending VFIO to SET_CONTAINER using an iommufd instead of a
|
||||
container fd.
|
||||
|
||||
The second approach directly extends VFIO to support a new set of device-centric
|
||||
user API based on aforementioned IOMMUFD kernel API. It requires userspace
|
||||
change but better matches the IOMMUFD API semantics and easier to support new
|
||||
iommufd features when comparing it to the first approach.
|
||||
|
||||
Currently both approaches are still work-in-progress.
|
||||
|
||||
There are still a few gaps to be resolved to catch up with VFIO type1, as
|
||||
documented in iommufd_vfio_check_extension().
|
||||
|
||||
Future TODOs
|
||||
============
|
||||
|
||||
Currently IOMMUFD supports only kernel-managed I/O page table, similar to VFIO
|
||||
type1. New features on the radar include:
|
||||
|
||||
- Binding iommu_domain's to PASID/SSID
|
||||
- Userspace page tables, for ARM, x86 and S390
|
||||
- Kernel bypass'd invalidation of user page tables
|
||||
- Re-use of the KVM page table in the IOMMU
|
||||
- Dirty page tracking in the IOMMU
|
||||
- Runtime Increase/Decrease of IOPTE size
|
||||
- PRI support with faults resolved in userspace
|
12
MAINTAINERS
12
MAINTAINERS
@ -10793,6 +10793,18 @@ F: drivers/iommu/dma-iommu.h
|
||||
F: drivers/iommu/iova.c
|
||||
F: include/linux/iova.h
|
||||
|
||||
IOMMUFD
|
||||
M: Jason Gunthorpe <jgg@nvidia.com>
|
||||
M: Kevin Tian <kevin.tian@intel.com>
|
||||
L: iommu@lists.linux.dev
|
||||
S: Maintained
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd.git
|
||||
F: Documentation/userspace-api/iommufd.rst
|
||||
F: drivers/iommu/iommufd/
|
||||
F: include/linux/iommufd.h
|
||||
F: include/uapi/linux/iommufd.h
|
||||
F: tools/testing/selftests/iommu/
|
||||
|
||||
IOMMU SUBSYSTEM
|
||||
M: Joerg Roedel <joro@8bytes.org>
|
||||
M: Will Deacon <will@kernel.org>
|
||||
|
@ -6,7 +6,6 @@
|
||||
#include <linux/pci.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/sched/task.h>
|
||||
#include <linux/intel-svm.h>
|
||||
#include <linux/io-64-nonatomic-lo-hi.h>
|
||||
#include <linux/cdev.h>
|
||||
#include <linux/fs.h>
|
||||
@ -100,7 +99,7 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp)
|
||||
filp->private_data = ctx;
|
||||
|
||||
if (device_user_pasid_enabled(idxd)) {
|
||||
sva = iommu_sva_bind_device(dev, current->mm, NULL);
|
||||
sva = iommu_sva_bind_device(dev, current->mm);
|
||||
if (IS_ERR(sva)) {
|
||||
rc = PTR_ERR(sva);
|
||||
dev_err(dev, "pasid allocation failed: %d\n", rc);
|
||||
|
@ -14,7 +14,6 @@
|
||||
#include <linux/io-64-nonatomic-lo-hi.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/idr.h>
|
||||
#include <linux/intel-svm.h>
|
||||
#include <linux/iommu.h>
|
||||
#include <uapi/linux/idxd.h>
|
||||
#include <linux/dmaengine.h>
|
||||
@ -502,29 +501,7 @@ static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_d
|
||||
|
||||
static int idxd_enable_system_pasid(struct idxd_device *idxd)
|
||||
{
|
||||
int flags;
|
||||
unsigned int pasid;
|
||||
struct iommu_sva *sva;
|
||||
|
||||
flags = SVM_FLAG_SUPERVISOR_MODE;
|
||||
|
||||
sva = iommu_sva_bind_device(&idxd->pdev->dev, NULL, &flags);
|
||||
if (IS_ERR(sva)) {
|
||||
dev_warn(&idxd->pdev->dev,
|
||||
"iommu sva bind failed: %ld\n", PTR_ERR(sva));
|
||||
return PTR_ERR(sva);
|
||||
}
|
||||
|
||||
pasid = iommu_sva_get_pasid(sva);
|
||||
if (pasid == IOMMU_PASID_INVALID) {
|
||||
iommu_sva_unbind_device(sva);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
idxd->sva = sva;
|
||||
idxd->pasid = pasid;
|
||||
dev_dbg(&idxd->pdev->dev, "system pasid: %u\n", pasid);
|
||||
return 0;
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static void idxd_disable_system_pasid(struct idxd_device *idxd)
|
||||
|
@ -669,9 +669,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
|
||||
|
||||
vgpu->attached = true;
|
||||
|
||||
kvmgt_protect_table_init(vgpu);
|
||||
gvt_cache_init(vgpu);
|
||||
|
||||
vgpu->track_node.track_write = kvmgt_page_track_write;
|
||||
vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
|
||||
kvm_get_kvm(vgpu->vfio_device.kvm);
|
||||
@ -715,6 +712,11 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
|
||||
kvmgt_protect_table_destroy(vgpu);
|
||||
gvt_cache_destroy(vgpu);
|
||||
|
||||
WARN_ON(vgpu->nr_cache_entries);
|
||||
|
||||
vgpu->gfn_cache = RB_ROOT;
|
||||
vgpu->dma_addr_cache = RB_ROOT;
|
||||
|
||||
intel_vgpu_release_msi_eventfd_ctx(vgpu);
|
||||
|
||||
vgpu->attached = false;
|
||||
@ -1445,9 +1447,17 @@ static int intel_vgpu_init_dev(struct vfio_device *vfio_dev)
|
||||
struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
|
||||
struct intel_vgpu_type *type =
|
||||
container_of(mdev->type, struct intel_vgpu_type, type);
|
||||
int ret;
|
||||
|
||||
vgpu->gvt = kdev_to_i915(mdev->type->parent->dev)->gvt;
|
||||
return intel_gvt_create_vgpu(vgpu, type->conf);
|
||||
ret = intel_gvt_create_vgpu(vgpu, type->conf);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
kvmgt_protect_table_init(vgpu);
|
||||
gvt_cache_init(vgpu);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void intel_vgpu_release_dev(struct vfio_device *vfio_dev)
|
||||
@ -1468,6 +1478,9 @@ static const struct vfio_device_ops intel_vgpu_dev_ops = {
|
||||
.mmap = intel_vgpu_mmap,
|
||||
.ioctl = intel_vgpu_ioctl,
|
||||
.dma_unmap = intel_vgpu_dma_unmap,
|
||||
.bind_iommufd = vfio_iommufd_emulated_bind,
|
||||
.unbind_iommufd = vfio_iommufd_emulated_unbind,
|
||||
.attach_ioas = vfio_iommufd_emulated_attach_ioas,
|
||||
};
|
||||
|
||||
static int intel_vgpu_probe(struct mdev_device *mdev)
|
||||
|
@ -188,6 +188,7 @@ config MSM_IOMMU
|
||||
|
||||
source "drivers/iommu/amd/Kconfig"
|
||||
source "drivers/iommu/intel/Kconfig"
|
||||
source "drivers/iommu/iommufd/Kconfig"
|
||||
|
||||
config IRQ_REMAP
|
||||
bool "Support for Interrupt Remapping"
|
||||
|
@ -1,5 +1,5 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
obj-y += amd/ intel/ arm/
|
||||
obj-y += amd/ intel/ arm/ iommufd/
|
||||
obj-$(CONFIG_IOMMU_API) += iommu.o
|
||||
obj-$(CONFIG_IOMMU_API) += iommu-traces.o
|
||||
obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o
|
||||
@ -28,6 +28,6 @@ obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
|
||||
obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
|
||||
obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o
|
||||
obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o
|
||||
obj-$(CONFIG_IOMMU_SVA) += iommu-sva-lib.o io-pgfault.o
|
||||
obj-$(CONFIG_IOMMU_SVA) += iommu-sva.o io-pgfault.o
|
||||
obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o
|
||||
obj-$(CONFIG_APPLE_DART) += apple-dart.o
|
||||
|
@ -2155,21 +2155,13 @@ static void amd_iommu_detach_device(struct iommu_domain *dom,
|
||||
static int amd_iommu_attach_device(struct iommu_domain *dom,
|
||||
struct device *dev)
|
||||
{
|
||||
struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
|
||||
struct protection_domain *domain = to_pdomain(dom);
|
||||
struct iommu_dev_data *dev_data;
|
||||
struct amd_iommu *iommu;
|
||||
struct amd_iommu *iommu = rlookup_amd_iommu(dev);
|
||||
int ret;
|
||||
|
||||
if (!check_device(dev))
|
||||
return -EINVAL;
|
||||
|
||||
dev_data = dev_iommu_priv_get(dev);
|
||||
dev_data->defer_attach = false;
|
||||
|
||||
iommu = rlookup_amd_iommu(dev);
|
||||
if (!iommu)
|
||||
return -EINVAL;
|
||||
|
||||
if (dev_data->domain)
|
||||
detach_device(dev);
|
||||
|
||||
@ -2286,6 +2278,8 @@ static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap)
|
||||
return false;
|
||||
case IOMMU_CAP_PRE_BOOT_PROTECTION:
|
||||
return amdr_ivrs_remap_support;
|
||||
case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
|
||||
return true;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -10,7 +10,7 @@
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include "arm-smmu-v3.h"
|
||||
#include "../../iommu-sva-lib.h"
|
||||
#include "../../iommu-sva.h"
|
||||
#include "../../io-pgtable-arm.h"
|
||||
|
||||
struct arm_smmu_mmu_notifier {
|
||||
@ -344,11 +344,6 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
|
||||
if (!bond)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
/* Allocate a PASID for this mm if necessary */
|
||||
ret = iommu_sva_alloc_pasid(mm, 1, (1U << master->ssid_bits) - 1);
|
||||
if (ret)
|
||||
goto err_free_bond;
|
||||
|
||||
bond->mm = mm;
|
||||
bond->sva.dev = dev;
|
||||
refcount_set(&bond->refs, 1);
|
||||
@ -367,42 +362,6 @@ err_free_bond:
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
struct iommu_sva *
|
||||
arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
|
||||
{
|
||||
struct iommu_sva *handle;
|
||||
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
|
||||
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
|
||||
|
||||
if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
mutex_lock(&sva_lock);
|
||||
handle = __arm_smmu_sva_bind(dev, mm);
|
||||
mutex_unlock(&sva_lock);
|
||||
return handle;
|
||||
}
|
||||
|
||||
void arm_smmu_sva_unbind(struct iommu_sva *handle)
|
||||
{
|
||||
struct arm_smmu_bond *bond = sva_to_bond(handle);
|
||||
|
||||
mutex_lock(&sva_lock);
|
||||
if (refcount_dec_and_test(&bond->refs)) {
|
||||
list_del(&bond->list);
|
||||
arm_smmu_mmu_notifier_put(bond->smmu_mn);
|
||||
kfree(bond);
|
||||
}
|
||||
mutex_unlock(&sva_lock);
|
||||
}
|
||||
|
||||
u32 arm_smmu_sva_get_pasid(struct iommu_sva *handle)
|
||||
{
|
||||
struct arm_smmu_bond *bond = sva_to_bond(handle);
|
||||
|
||||
return bond->mm->pasid;
|
||||
}
|
||||
|
||||
bool arm_smmu_sva_supported(struct arm_smmu_device *smmu)
|
||||
{
|
||||
unsigned long reg, fld;
|
||||
@ -550,3 +509,64 @@ void arm_smmu_sva_notifier_synchronize(void)
|
||||
*/
|
||||
mmu_notifier_synchronize();
|
||||
}
|
||||
|
||||
void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain,
|
||||
struct device *dev, ioasid_t id)
|
||||
{
|
||||
struct mm_struct *mm = domain->mm;
|
||||
struct arm_smmu_bond *bond = NULL, *t;
|
||||
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
|
||||
|
||||
mutex_lock(&sva_lock);
|
||||
list_for_each_entry(t, &master->bonds, list) {
|
||||
if (t->mm == mm) {
|
||||
bond = t;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!WARN_ON(!bond) && refcount_dec_and_test(&bond->refs)) {
|
||||
list_del(&bond->list);
|
||||
arm_smmu_mmu_notifier_put(bond->smmu_mn);
|
||||
kfree(bond);
|
||||
}
|
||||
mutex_unlock(&sva_lock);
|
||||
}
|
||||
|
||||
static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain,
|
||||
struct device *dev, ioasid_t id)
|
||||
{
|
||||
int ret = 0;
|
||||
struct iommu_sva *handle;
|
||||
struct mm_struct *mm = domain->mm;
|
||||
|
||||
mutex_lock(&sva_lock);
|
||||
handle = __arm_smmu_sva_bind(dev, mm);
|
||||
if (IS_ERR(handle))
|
||||
ret = PTR_ERR(handle);
|
||||
mutex_unlock(&sva_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void arm_smmu_sva_domain_free(struct iommu_domain *domain)
|
||||
{
|
||||
kfree(domain);
|
||||
}
|
||||
|
||||
static const struct iommu_domain_ops arm_smmu_sva_domain_ops = {
|
||||
.set_dev_pasid = arm_smmu_sva_set_dev_pasid,
|
||||
.free = arm_smmu_sva_domain_free
|
||||
};
|
||||
|
||||
struct iommu_domain *arm_smmu_sva_domain_alloc(void)
|
||||
{
|
||||
struct iommu_domain *domain;
|
||||
|
||||
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
|
||||
if (!domain)
|
||||
return NULL;
|
||||
domain->ops = &arm_smmu_sva_domain_ops;
|
||||
|
||||
return domain;
|
||||
}
|
||||
|
@ -29,7 +29,7 @@
|
||||
|
||||
#include "arm-smmu-v3.h"
|
||||
#include "../../dma-iommu.h"
|
||||
#include "../../iommu-sva-lib.h"
|
||||
#include "../../iommu-sva.h"
|
||||
|
||||
static bool disable_bypass = true;
|
||||
module_param(disable_bypass, bool, 0444);
|
||||
@ -2009,6 +2009,9 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
|
||||
{
|
||||
struct arm_smmu_domain *smmu_domain;
|
||||
|
||||
if (type == IOMMU_DOMAIN_SVA)
|
||||
return arm_smmu_sva_domain_alloc();
|
||||
|
||||
if (type != IOMMU_DOMAIN_UNMANAGED &&
|
||||
type != IOMMU_DOMAIN_DMA &&
|
||||
type != IOMMU_DOMAIN_DMA_FQ &&
|
||||
@ -2430,23 +2433,14 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
|
||||
goto out_unlock;
|
||||
}
|
||||
} else if (smmu_domain->smmu != smmu) {
|
||||
dev_err(dev,
|
||||
"cannot attach to SMMU %s (upstream of %s)\n",
|
||||
dev_name(smmu_domain->smmu->dev),
|
||||
dev_name(smmu->dev));
|
||||
ret = -ENXIO;
|
||||
ret = -EINVAL;
|
||||
goto out_unlock;
|
||||
} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
|
||||
master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
|
||||
dev_err(dev,
|
||||
"cannot attach to incompatible domain (%u SSID bits != %u)\n",
|
||||
smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
|
||||
ret = -EINVAL;
|
||||
goto out_unlock;
|
||||
} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
|
||||
smmu_domain->stall_enabled != master->stall_enabled) {
|
||||
dev_err(dev, "cannot attach to stall-%s domain\n",
|
||||
smmu_domain->stall_enabled ? "enabled" : "disabled");
|
||||
ret = -EINVAL;
|
||||
goto out_unlock;
|
||||
}
|
||||
@ -2838,6 +2832,17 @@ static int arm_smmu_def_domain_type(struct device *dev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
|
||||
{
|
||||
struct iommu_domain *domain;
|
||||
|
||||
domain = iommu_get_domain_for_dev_pasid(dev, pasid, IOMMU_DOMAIN_SVA);
|
||||
if (WARN_ON(IS_ERR(domain)) || !domain)
|
||||
return;
|
||||
|
||||
arm_smmu_sva_remove_dev_pasid(domain, dev, pasid);
|
||||
}
|
||||
|
||||
static struct iommu_ops arm_smmu_ops = {
|
||||
.capable = arm_smmu_capable,
|
||||
.domain_alloc = arm_smmu_domain_alloc,
|
||||
@ -2846,11 +2851,9 @@ static struct iommu_ops arm_smmu_ops = {
|
||||
.device_group = arm_smmu_device_group,
|
||||
.of_xlate = arm_smmu_of_xlate,
|
||||
.get_resv_regions = arm_smmu_get_resv_regions,
|
||||
.remove_dev_pasid = arm_smmu_remove_dev_pasid,
|
||||
.dev_enable_feat = arm_smmu_dev_enable_feature,
|
||||
.dev_disable_feat = arm_smmu_dev_disable_feature,
|
||||
.sva_bind = arm_smmu_sva_bind,
|
||||
.sva_unbind = arm_smmu_sva_unbind,
|
||||
.sva_get_pasid = arm_smmu_sva_get_pasid,
|
||||
.page_response = arm_smmu_page_response,
|
||||
.def_domain_type = arm_smmu_def_domain_type,
|
||||
.pgsize_bitmap = -1UL, /* Restricted during device attach */
|
||||
@ -3543,6 +3546,7 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
|
||||
/* SID/SSID sizes */
|
||||
smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
|
||||
smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
|
||||
smmu->iommu.max_pasids = 1UL << smmu->ssid_bits;
|
||||
|
||||
/*
|
||||
* If the SMMU supports fewer bits than would fill a single L2 stream
|
||||
|
@ -754,11 +754,10 @@ bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master);
|
||||
int arm_smmu_master_enable_sva(struct arm_smmu_master *master);
|
||||
int arm_smmu_master_disable_sva(struct arm_smmu_master *master);
|
||||
bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master);
|
||||
struct iommu_sva *arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm,
|
||||
void *drvdata);
|
||||
void arm_smmu_sva_unbind(struct iommu_sva *handle);
|
||||
u32 arm_smmu_sva_get_pasid(struct iommu_sva *handle);
|
||||
void arm_smmu_sva_notifier_synchronize(void);
|
||||
struct iommu_domain *arm_smmu_sva_domain_alloc(void);
|
||||
void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain,
|
||||
struct device *dev, ioasid_t id);
|
||||
#else /* CONFIG_ARM_SMMU_V3_SVA */
|
||||
static inline bool arm_smmu_sva_supported(struct arm_smmu_device *smmu)
|
||||
{
|
||||
@ -790,19 +789,17 @@ static inline bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline struct iommu_sva *
|
||||
arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
|
||||
{
|
||||
return ERR_PTR(-ENODEV);
|
||||
}
|
||||
|
||||
static inline void arm_smmu_sva_unbind(struct iommu_sva *handle) {}
|
||||
|
||||
static inline u32 arm_smmu_sva_get_pasid(struct iommu_sva *handle)
|
||||
{
|
||||
return IOMMU_PASID_INVALID;
|
||||
}
|
||||
|
||||
static inline void arm_smmu_sva_notifier_synchronize(void) {}
|
||||
|
||||
static inline struct iommu_domain *arm_smmu_sva_domain_alloc(void)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain,
|
||||
struct device *dev,
|
||||
ioasid_t id)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_ARM_SMMU_V3_SVA */
|
||||
#endif /* _ARM_SMMU_V3_H */
|
||||
|
@ -1150,9 +1150,6 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
|
||||
* different SMMUs.
|
||||
*/
|
||||
if (smmu_domain->smmu != smmu) {
|
||||
dev_err(dev,
|
||||
"cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
|
||||
dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
|
||||
ret = -EINVAL;
|
||||
goto rpm_put;
|
||||
}
|
||||
|
@ -381,13 +381,8 @@ static int qcom_iommu_attach_dev(struct iommu_domain *domain, struct device *dev
|
||||
* Sanity check the domain. We don't support domains across
|
||||
* different IOMMUs.
|
||||
*/
|
||||
if (qcom_domain->iommu != qcom_iommu) {
|
||||
dev_err(dev, "cannot attach to IOMMU %s while already "
|
||||
"attached to domain on IOMMU %s\n",
|
||||
dev_name(qcom_domain->iommu->dev),
|
||||
dev_name(qcom_iommu->dev));
|
||||
if (qcom_domain->iommu != qcom_iommu)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -211,7 +211,7 @@ int pamu_config_ppaace(int liodn, u32 omi, u32 stashid, int prot)
|
||||
ppaace->op_encode.index_ot.omi = omi;
|
||||
} else if (~omi != 0) {
|
||||
pr_debug("bad operation mapping index: %d\n", omi);
|
||||
return -EINVAL;
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
/* configure stash id */
|
||||
|
@ -258,7 +258,7 @@ static int fsl_pamu_attach_device(struct iommu_domain *domain,
|
||||
liodn = of_get_property(dev->of_node, "fsl,liodn", &len);
|
||||
if (!liodn) {
|
||||
pr_debug("missing fsl,liodn property at %pOF\n", dev->of_node);
|
||||
return -EINVAL;
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&dma_domain->domain_lock, flags);
|
||||
@ -267,7 +267,7 @@ static int fsl_pamu_attach_device(struct iommu_domain *domain,
|
||||
if (liodn[i] >= PAACE_NUMBER_ENTRIES) {
|
||||
pr_debug("Invalid liodn %d, attach device failed for %pOF\n",
|
||||
liodn[i], dev->of_node);
|
||||
ret = -EINVAL;
|
||||
ret = -ENODEV;
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1105,6 +1105,13 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
|
||||
|
||||
raw_spin_lock_init(&iommu->register_lock);
|
||||
|
||||
/*
|
||||
* A value of N in PSS field of eCap register indicates hardware
|
||||
* supports PASID field of N+1 bits.
|
||||
*/
|
||||
if (pasid_supported(iommu))
|
||||
iommu->iommu.max_pasids = 2UL << ecap_pss(iommu->ecap);
|
||||
|
||||
/*
|
||||
* This is only for hotplug; at boot time intel_iommu_enabled won't
|
||||
* be set yet. When intel_iommu_init() runs, it registers the units
|
||||
|
@ -27,7 +27,7 @@
|
||||
#include "iommu.h"
|
||||
#include "../dma-iommu.h"
|
||||
#include "../irq_remapping.h"
|
||||
#include "../iommu-sva-lib.h"
|
||||
#include "../iommu-sva.h"
|
||||
#include "pasid.h"
|
||||
#include "cap_audit.h"
|
||||
|
||||
@ -4188,6 +4188,8 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
|
||||
return domain;
|
||||
case IOMMU_DOMAIN_IDENTITY:
|
||||
return &si_domain->domain;
|
||||
case IOMMU_DOMAIN_SVA:
|
||||
return intel_svm_domain_alloc();
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
@ -4213,19 +4215,15 @@ static int prepare_domain_attach_device(struct iommu_domain *domain,
|
||||
return -ENODEV;
|
||||
|
||||
if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap))
|
||||
return -EOPNOTSUPP;
|
||||
return -EINVAL;
|
||||
|
||||
/* check if this iommu agaw is sufficient for max mapped address */
|
||||
addr_width = agaw_to_width(iommu->agaw);
|
||||
if (addr_width > cap_mgaw(iommu->cap))
|
||||
addr_width = cap_mgaw(iommu->cap);
|
||||
|
||||
if (dmar_domain->max_addr > (1LL << addr_width)) {
|
||||
dev_err(dev, "%s: iommu width (%d) is not "
|
||||
"sufficient for the mapped address (%llx)\n",
|
||||
__func__, addr_width, dmar_domain->max_addr);
|
||||
return -EFAULT;
|
||||
}
|
||||
if (dmar_domain->max_addr > (1LL << addr_width))
|
||||
return -EINVAL;
|
||||
dmar_domain->gaw = addr_width;
|
||||
|
||||
/*
|
||||
@ -4471,14 +4469,20 @@ static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain)
|
||||
|
||||
static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap)
|
||||
{
|
||||
if (cap == IOMMU_CAP_CACHE_COHERENCY)
|
||||
return true;
|
||||
if (cap == IOMMU_CAP_INTR_REMAP)
|
||||
return irq_remapping_enabled == 1;
|
||||
if (cap == IOMMU_CAP_PRE_BOOT_PROTECTION)
|
||||
return dmar_platform_optin();
|
||||
struct device_domain_info *info = dev_iommu_priv_get(dev);
|
||||
|
||||
return false;
|
||||
switch (cap) {
|
||||
case IOMMU_CAP_CACHE_COHERENCY:
|
||||
return true;
|
||||
case IOMMU_CAP_INTR_REMAP:
|
||||
return irq_remapping_enabled == 1;
|
||||
case IOMMU_CAP_PRE_BOOT_PROTECTION:
|
||||
return dmar_platform_optin();
|
||||
case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
|
||||
return ecap_sc_support(info->iommu->ecap);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static struct iommu_device *intel_iommu_probe_device(struct device *dev)
|
||||
@ -4732,6 +4736,28 @@ static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
|
||||
__mapping_notify_one(info->iommu, dmar_domain, pfn, pages);
|
||||
}
|
||||
|
||||
static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
|
||||
{
|
||||
struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
|
||||
struct iommu_domain *domain;
|
||||
|
||||
/* Domain type specific cleanup: */
|
||||
domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0);
|
||||
if (domain) {
|
||||
switch (domain->type) {
|
||||
case IOMMU_DOMAIN_SVA:
|
||||
intel_svm_remove_dev_pasid(dev, pasid);
|
||||
break;
|
||||
default:
|
||||
/* should never reach here */
|
||||
WARN_ON(1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
intel_pasid_tear_down_entry(iommu, dev, pasid, false);
|
||||
}
|
||||
|
||||
const struct iommu_ops intel_iommu_ops = {
|
||||
.capable = intel_iommu_capable,
|
||||
.domain_alloc = intel_iommu_domain_alloc,
|
||||
@ -4744,11 +4770,9 @@ const struct iommu_ops intel_iommu_ops = {
|
||||
.dev_disable_feat = intel_iommu_dev_disable_feat,
|
||||
.is_attach_deferred = intel_iommu_is_attach_deferred,
|
||||
.def_domain_type = device_def_domain_type,
|
||||
.remove_dev_pasid = intel_iommu_remove_dev_pasid,
|
||||
.pgsize_bitmap = SZ_4K,
|
||||
#ifdef CONFIG_INTEL_IOMMU_SVM
|
||||
.sva_bind = intel_svm_bind,
|
||||
.sva_unbind = intel_svm_unbind,
|
||||
.sva_get_pasid = intel_svm_get_pasid,
|
||||
.page_response = intel_svm_page_response,
|
||||
#endif
|
||||
.default_domain_ops = &(const struct iommu_domain_ops) {
|
||||
|
@ -480,8 +480,6 @@ enum {
|
||||
#define VTD_FLAG_IRQ_REMAP_PRE_ENABLED (1 << 1)
|
||||
#define VTD_FLAG_SVM_CAPABLE (1 << 2)
|
||||
|
||||
extern int intel_iommu_sm;
|
||||
|
||||
#define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap))
|
||||
#define pasid_supported(iommu) (sm_supported(iommu) && \
|
||||
ecap_pasid((iommu)->ecap))
|
||||
@ -753,12 +751,10 @@ struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn);
|
||||
extern void intel_svm_check(struct intel_iommu *iommu);
|
||||
extern int intel_svm_enable_prq(struct intel_iommu *iommu);
|
||||
extern int intel_svm_finish_prq(struct intel_iommu *iommu);
|
||||
struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm,
|
||||
void *drvdata);
|
||||
void intel_svm_unbind(struct iommu_sva *handle);
|
||||
u32 intel_svm_get_pasid(struct iommu_sva *handle);
|
||||
int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt,
|
||||
struct iommu_page_response *msg);
|
||||
struct iommu_domain *intel_svm_domain_alloc(void);
|
||||
void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid);
|
||||
|
||||
struct intel_svm_dev {
|
||||
struct list_head list;
|
||||
@ -783,6 +779,14 @@ struct intel_svm {
|
||||
};
|
||||
#else
|
||||
static inline void intel_svm_check(struct intel_iommu *iommu) {}
|
||||
static inline struct iommu_domain *intel_svm_domain_alloc(void)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_INTEL_IOMMU_DEBUGFS
|
||||
@ -798,6 +802,7 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
|
||||
extern const struct iommu_ops intel_iommu_ops;
|
||||
|
||||
#ifdef CONFIG_INTEL_IOMMU
|
||||
extern int intel_iommu_sm;
|
||||
extern int iommu_calculate_agaw(struct intel_iommu *iommu);
|
||||
extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu);
|
||||
extern int dmar_disabled;
|
||||
@ -813,6 +818,7 @@ static inline int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
|
||||
}
|
||||
#define dmar_disabled (1)
|
||||
#define intel_iommu_enabled (0)
|
||||
#define intel_iommu_sm (0)
|
||||
#endif
|
||||
|
||||
static inline const char *decode_prq_descriptor(char *str, size_t size,
|
||||
|
@ -101,8 +101,10 @@ int intel_pasid_alloc_table(struct device *dev)
|
||||
|
||||
might_sleep();
|
||||
info = dev_iommu_priv_get(dev);
|
||||
if (WARN_ON(!info || !dev_is_pci(dev) || info->pasid_table))
|
||||
return -EINVAL;
|
||||
if (WARN_ON(!info || !dev_is_pci(dev)))
|
||||
return -ENODEV;
|
||||
if (WARN_ON(info->pasid_table))
|
||||
return -EEXIST;
|
||||
|
||||
pasid_table = kzalloc(sizeof(*pasid_table), GFP_KERNEL);
|
||||
if (!pasid_table)
|
||||
|
@ -24,7 +24,7 @@
|
||||
#include "iommu.h"
|
||||
#include "pasid.h"
|
||||
#include "perf.h"
|
||||
#include "../iommu-sva-lib.h"
|
||||
#include "../iommu-sva.h"
|
||||
#include "trace.h"
|
||||
|
||||
static irqreturn_t prq_event_thread(int irq, void *d);
|
||||
@ -299,19 +299,9 @@ out:
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm,
|
||||
unsigned int flags)
|
||||
{
|
||||
ioasid_t max_pasid = dev_is_pci(dev) ?
|
||||
pci_max_pasids(to_pci_dev(dev)) : intel_pasid_max_id;
|
||||
|
||||
return iommu_sva_alloc_pasid(mm, PASID_MIN, max_pasid - 1);
|
||||
}
|
||||
|
||||
static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu,
|
||||
struct device *dev,
|
||||
struct mm_struct *mm,
|
||||
unsigned int flags)
|
||||
struct mm_struct *mm)
|
||||
{
|
||||
struct device_domain_info *info = dev_iommu_priv_get(dev);
|
||||
struct intel_svm_dev *sdev;
|
||||
@ -327,22 +317,18 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu,
|
||||
|
||||
svm->pasid = mm->pasid;
|
||||
svm->mm = mm;
|
||||
svm->flags = flags;
|
||||
INIT_LIST_HEAD_RCU(&svm->devs);
|
||||
|
||||
if (!(flags & SVM_FLAG_SUPERVISOR_MODE)) {
|
||||
svm->notifier.ops = &intel_mmuops;
|
||||
ret = mmu_notifier_register(&svm->notifier, mm);
|
||||
if (ret) {
|
||||
kfree(svm);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
svm->notifier.ops = &intel_mmuops;
|
||||
ret = mmu_notifier_register(&svm->notifier, mm);
|
||||
if (ret) {
|
||||
kfree(svm);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
ret = pasid_private_add(svm->pasid, svm);
|
||||
if (ret) {
|
||||
if (svm->notifier.ops)
|
||||
mmu_notifier_unregister(&svm->notifier, mm);
|
||||
mmu_notifier_unregister(&svm->notifier, mm);
|
||||
kfree(svm);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
@ -377,9 +363,7 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu,
|
||||
}
|
||||
|
||||
/* Setup the pasid table: */
|
||||
sflags = (flags & SVM_FLAG_SUPERVISOR_MODE) ?
|
||||
PASID_FLAG_SUPERVISOR_MODE : 0;
|
||||
sflags |= cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;
|
||||
sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;
|
||||
ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid,
|
||||
FLPT_DEFAULT_DID, sflags);
|
||||
if (ret)
|
||||
@ -393,8 +377,7 @@ free_sdev:
|
||||
kfree(sdev);
|
||||
free_svm:
|
||||
if (list_empty(&svm->devs)) {
|
||||
if (svm->notifier.ops)
|
||||
mmu_notifier_unregister(&svm->notifier, mm);
|
||||
mmu_notifier_unregister(&svm->notifier, mm);
|
||||
pasid_private_remove(mm->pasid);
|
||||
kfree(svm);
|
||||
}
|
||||
@ -787,67 +770,6 @@ prq_advance:
|
||||
return IRQ_RETVAL(handled);
|
||||
}
|
||||
|
||||
struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
|
||||
{
|
||||
struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
|
||||
unsigned int flags = 0;
|
||||
struct iommu_sva *sva;
|
||||
int ret;
|
||||
|
||||
if (drvdata)
|
||||
flags = *(unsigned int *)drvdata;
|
||||
|
||||
if (flags & SVM_FLAG_SUPERVISOR_MODE) {
|
||||
if (!ecap_srs(iommu->ecap)) {
|
||||
dev_err(dev, "%s: Supervisor PASID not supported\n",
|
||||
iommu->name);
|
||||
return ERR_PTR(-EOPNOTSUPP);
|
||||
}
|
||||
|
||||
if (mm) {
|
||||
dev_err(dev, "%s: Supervisor PASID with user provided mm\n",
|
||||
iommu->name);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
mm = &init_mm;
|
||||
}
|
||||
|
||||
mutex_lock(&pasid_mutex);
|
||||
ret = intel_svm_alloc_pasid(dev, mm, flags);
|
||||
if (ret) {
|
||||
mutex_unlock(&pasid_mutex);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
sva = intel_svm_bind_mm(iommu, dev, mm, flags);
|
||||
mutex_unlock(&pasid_mutex);
|
||||
|
||||
return sva;
|
||||
}
|
||||
|
||||
void intel_svm_unbind(struct iommu_sva *sva)
|
||||
{
|
||||
struct intel_svm_dev *sdev = to_intel_svm_dev(sva);
|
||||
|
||||
mutex_lock(&pasid_mutex);
|
||||
intel_svm_unbind_mm(sdev->dev, sdev->pasid);
|
||||
mutex_unlock(&pasid_mutex);
|
||||
}
|
||||
|
||||
u32 intel_svm_get_pasid(struct iommu_sva *sva)
|
||||
{
|
||||
struct intel_svm_dev *sdev;
|
||||
u32 pasid;
|
||||
|
||||
mutex_lock(&pasid_mutex);
|
||||
sdev = to_intel_svm_dev(sva);
|
||||
pasid = sdev->pasid;
|
||||
mutex_unlock(&pasid_mutex);
|
||||
|
||||
return pasid;
|
||||
}
|
||||
|
||||
int intel_svm_page_response(struct device *dev,
|
||||
struct iommu_fault_event *evt,
|
||||
struct iommu_page_response *msg)
|
||||
@ -918,3 +840,50 @@ int intel_svm_page_response(struct device *dev,
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid)
|
||||
{
|
||||
mutex_lock(&pasid_mutex);
|
||||
intel_svm_unbind_mm(dev, pasid);
|
||||
mutex_unlock(&pasid_mutex);
|
||||
}
|
||||
|
||||
static int intel_svm_set_dev_pasid(struct iommu_domain *domain,
|
||||
struct device *dev, ioasid_t pasid)
|
||||
{
|
||||
struct device_domain_info *info = dev_iommu_priv_get(dev);
|
||||
struct intel_iommu *iommu = info->iommu;
|
||||
struct mm_struct *mm = domain->mm;
|
||||
struct iommu_sva *sva;
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&pasid_mutex);
|
||||
sva = intel_svm_bind_mm(iommu, dev, mm);
|
||||
if (IS_ERR(sva))
|
||||
ret = PTR_ERR(sva);
|
||||
mutex_unlock(&pasid_mutex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void intel_svm_domain_free(struct iommu_domain *domain)
|
||||
{
|
||||
kfree(to_dmar_domain(domain));
|
||||
}
|
||||
|
||||
static const struct iommu_domain_ops intel_svm_domain_ops = {
|
||||
.set_dev_pasid = intel_svm_set_dev_pasid,
|
||||
.free = intel_svm_domain_free
|
||||
};
|
||||
|
||||
struct iommu_domain *intel_svm_domain_alloc(void)
|
||||
{
|
||||
struct dmar_domain *domain;
|
||||
|
||||
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
|
||||
if (!domain)
|
||||
return NULL;
|
||||
domain->domain.ops = &intel_svm_domain_ops;
|
||||
|
||||
return &domain->domain;
|
||||
}
|
||||
|
@ -11,7 +11,7 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/workqueue.h>
|
||||
|
||||
#include "iommu-sva-lib.h"
|
||||
#include "iommu-sva.h"
|
||||
|
||||
/**
|
||||
* struct iopf_queue - IO Page Fault queue
|
||||
@ -69,69 +69,18 @@ static int iopf_complete_group(struct device *dev, struct iopf_fault *iopf,
|
||||
return iommu_page_response(dev, &resp);
|
||||
}
|
||||
|
||||
static enum iommu_page_response_code
|
||||
iopf_handle_single(struct iopf_fault *iopf)
|
||||
{
|
||||
vm_fault_t ret;
|
||||
struct mm_struct *mm;
|
||||
struct vm_area_struct *vma;
|
||||
unsigned int access_flags = 0;
|
||||
unsigned int fault_flags = FAULT_FLAG_REMOTE;
|
||||
struct iommu_fault_page_request *prm = &iopf->fault.prm;
|
||||
enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID;
|
||||
|
||||
if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID))
|
||||
return status;
|
||||
|
||||
mm = iommu_sva_find(prm->pasid);
|
||||
if (IS_ERR_OR_NULL(mm))
|
||||
return status;
|
||||
|
||||
mmap_read_lock(mm);
|
||||
|
||||
vma = find_extend_vma(mm, prm->addr);
|
||||
if (!vma)
|
||||
/* Unmapped area */
|
||||
goto out_put_mm;
|
||||
|
||||
if (prm->perm & IOMMU_FAULT_PERM_READ)
|
||||
access_flags |= VM_READ;
|
||||
|
||||
if (prm->perm & IOMMU_FAULT_PERM_WRITE) {
|
||||
access_flags |= VM_WRITE;
|
||||
fault_flags |= FAULT_FLAG_WRITE;
|
||||
}
|
||||
|
||||
if (prm->perm & IOMMU_FAULT_PERM_EXEC) {
|
||||
access_flags |= VM_EXEC;
|
||||
fault_flags |= FAULT_FLAG_INSTRUCTION;
|
||||
}
|
||||
|
||||
if (!(prm->perm & IOMMU_FAULT_PERM_PRIV))
|
||||
fault_flags |= FAULT_FLAG_USER;
|
||||
|
||||
if (access_flags & ~vma->vm_flags)
|
||||
/* Access fault */
|
||||
goto out_put_mm;
|
||||
|
||||
ret = handle_mm_fault(vma, prm->addr, fault_flags, NULL);
|
||||
status = ret & VM_FAULT_ERROR ? IOMMU_PAGE_RESP_INVALID :
|
||||
IOMMU_PAGE_RESP_SUCCESS;
|
||||
|
||||
out_put_mm:
|
||||
mmap_read_unlock(mm);
|
||||
mmput(mm);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static void iopf_handle_group(struct work_struct *work)
|
||||
static void iopf_handler(struct work_struct *work)
|
||||
{
|
||||
struct iopf_group *group;
|
||||
struct iommu_domain *domain;
|
||||
struct iopf_fault *iopf, *next;
|
||||
enum iommu_page_response_code status = IOMMU_PAGE_RESP_SUCCESS;
|
||||
|
||||
group = container_of(work, struct iopf_group, work);
|
||||
domain = iommu_get_domain_for_dev_pasid(group->dev,
|
||||
group->last_fault.fault.prm.pasid, 0);
|
||||
if (!domain || !domain->iopf_handler)
|
||||
status = IOMMU_PAGE_RESP_INVALID;
|
||||
|
||||
list_for_each_entry_safe(iopf, next, &group->faults, list) {
|
||||
/*
|
||||
@ -139,7 +88,8 @@ static void iopf_handle_group(struct work_struct *work)
|
||||
* faults in the group if there is an error.
|
||||
*/
|
||||
if (status == IOMMU_PAGE_RESP_SUCCESS)
|
||||
status = iopf_handle_single(iopf);
|
||||
status = domain->iopf_handler(&iopf->fault,
|
||||
domain->fault_data);
|
||||
|
||||
if (!(iopf->fault.prm.flags &
|
||||
IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE))
|
||||
@ -181,6 +131,13 @@ static void iopf_handle_group(struct work_struct *work)
|
||||
* request completes, outstanding faults will have been dealt with by the time
|
||||
* the PASID is freed.
|
||||
*
|
||||
* Any valid page fault will be eventually routed to an iommu domain and the
|
||||
* page fault handler installed there will get called. The users of this
|
||||
* handling framework should guarantee that the iommu domain could only be
|
||||
* freed after the device has stopped generating page faults (or the iommu
|
||||
* hardware has been set to block the page faults) and the pending page faults
|
||||
* have been flushed.
|
||||
*
|
||||
* Return: 0 on success and <0 on error.
|
||||
*/
|
||||
int iommu_queue_iopf(struct iommu_fault *fault, void *cookie)
|
||||
@ -235,7 +192,7 @@ int iommu_queue_iopf(struct iommu_fault *fault, void *cookie)
|
||||
group->last_fault.fault = *fault;
|
||||
INIT_LIST_HEAD(&group->faults);
|
||||
list_add(&group->last_fault.list, &group->faults);
|
||||
INIT_WORK(&group->work, iopf_handle_group);
|
||||
INIT_WORK(&group->work, iopf_handler);
|
||||
|
||||
/* See if we have partial faults for this group */
|
||||
list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) {
|
||||
|
@ -1,71 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Helpers for IOMMU drivers implementing SVA
|
||||
*/
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/sched/mm.h>
|
||||
|
||||
#include "iommu-sva-lib.h"
|
||||
|
||||
static DEFINE_MUTEX(iommu_sva_lock);
|
||||
static DECLARE_IOASID_SET(iommu_sva_pasid);
|
||||
|
||||
/**
|
||||
* iommu_sva_alloc_pasid - Allocate a PASID for the mm
|
||||
* @mm: the mm
|
||||
* @min: minimum PASID value (inclusive)
|
||||
* @max: maximum PASID value (inclusive)
|
||||
*
|
||||
* Try to allocate a PASID for this mm, or take a reference to the existing one
|
||||
* provided it fits within the [@min, @max] range. On success the PASID is
|
||||
* available in mm->pasid and will be available for the lifetime of the mm.
|
||||
*
|
||||
* Returns 0 on success and < 0 on error.
|
||||
*/
|
||||
int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max)
|
||||
{
|
||||
int ret = 0;
|
||||
ioasid_t pasid;
|
||||
|
||||
if (min == INVALID_IOASID || max == INVALID_IOASID ||
|
||||
min == 0 || max < min)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&iommu_sva_lock);
|
||||
/* Is a PASID already associated with this mm? */
|
||||
if (pasid_valid(mm->pasid)) {
|
||||
if (mm->pasid < min || mm->pasid >= max)
|
||||
ret = -EOVERFLOW;
|
||||
goto out;
|
||||
}
|
||||
|
||||
pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm);
|
||||
if (!pasid_valid(pasid))
|
||||
ret = -ENOMEM;
|
||||
else
|
||||
mm_pasid_set(mm, pasid);
|
||||
out:
|
||||
mutex_unlock(&iommu_sva_lock);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iommu_sva_alloc_pasid);
|
||||
|
||||
/* ioasid_find getter() requires a void * argument */
|
||||
static bool __mmget_not_zero(void *mm)
|
||||
{
|
||||
return mmget_not_zero(mm);
|
||||
}
|
||||
|
||||
/**
|
||||
* iommu_sva_find() - Find mm associated to the given PASID
|
||||
* @pasid: Process Address Space ID assigned to the mm
|
||||
*
|
||||
* On success a reference to the mm is taken, and must be released with mmput().
|
||||
*
|
||||
* Returns the mm corresponding to this PASID, or an error if not found.
|
||||
*/
|
||||
struct mm_struct *iommu_sva_find(ioasid_t pasid)
|
||||
{
|
||||
return ioasid_find(&iommu_sva_pasid, pasid, __mmget_not_zero);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iommu_sva_find);
|
240
drivers/iommu/iommu-sva.c
Normal file
240
drivers/iommu/iommu-sva.c
Normal file
@ -0,0 +1,240 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Helpers for IOMMU drivers implementing SVA
|
||||
*/
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/iommu.h>
|
||||
|
||||
#include "iommu-sva.h"
|
||||
|
||||
static DEFINE_MUTEX(iommu_sva_lock);
|
||||
static DECLARE_IOASID_SET(iommu_sva_pasid);
|
||||
|
||||
/**
|
||||
* iommu_sva_alloc_pasid - Allocate a PASID for the mm
|
||||
* @mm: the mm
|
||||
* @min: minimum PASID value (inclusive)
|
||||
* @max: maximum PASID value (inclusive)
|
||||
*
|
||||
* Try to allocate a PASID for this mm, or take a reference to the existing one
|
||||
* provided it fits within the [@min, @max] range. On success the PASID is
|
||||
* available in mm->pasid and will be available for the lifetime of the mm.
|
||||
*
|
||||
* Returns 0 on success and < 0 on error.
|
||||
*/
|
||||
int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max)
|
||||
{
|
||||
int ret = 0;
|
||||
ioasid_t pasid;
|
||||
|
||||
if (min == INVALID_IOASID || max == INVALID_IOASID ||
|
||||
min == 0 || max < min)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&iommu_sva_lock);
|
||||
/* Is a PASID already associated with this mm? */
|
||||
if (pasid_valid(mm->pasid)) {
|
||||
if (mm->pasid < min || mm->pasid >= max)
|
||||
ret = -EOVERFLOW;
|
||||
goto out;
|
||||
}
|
||||
|
||||
pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm);
|
||||
if (!pasid_valid(pasid))
|
||||
ret = -ENOMEM;
|
||||
else
|
||||
mm_pasid_set(mm, pasid);
|
||||
out:
|
||||
mutex_unlock(&iommu_sva_lock);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iommu_sva_alloc_pasid);
|
||||
|
||||
/* ioasid_find getter() requires a void * argument */
|
||||
static bool __mmget_not_zero(void *mm)
|
||||
{
|
||||
return mmget_not_zero(mm);
|
||||
}
|
||||
|
||||
/**
|
||||
* iommu_sva_find() - Find mm associated to the given PASID
|
||||
* @pasid: Process Address Space ID assigned to the mm
|
||||
*
|
||||
* On success a reference to the mm is taken, and must be released with mmput().
|
||||
*
|
||||
* Returns the mm corresponding to this PASID, or an error if not found.
|
||||
*/
|
||||
struct mm_struct *iommu_sva_find(ioasid_t pasid)
|
||||
{
|
||||
return ioasid_find(&iommu_sva_pasid, pasid, __mmget_not_zero);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iommu_sva_find);
|
||||
|
||||
/**
|
||||
* iommu_sva_bind_device() - Bind a process address space to a device
|
||||
* @dev: the device
|
||||
* @mm: the mm to bind, caller must hold a reference to mm_users
|
||||
*
|
||||
* Create a bond between device and address space, allowing the device to
|
||||
* access the mm using the PASID returned by iommu_sva_get_pasid(). If a
|
||||
* bond already exists between @device and @mm, an additional internal
|
||||
* reference is taken. Caller must call iommu_sva_unbind_device()
|
||||
* to release each reference.
|
||||
*
|
||||
* iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA) must be called first, to
|
||||
* initialize the required SVA features.
|
||||
*
|
||||
* On error, returns an ERR_PTR value.
|
||||
*/
|
||||
struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm)
|
||||
{
|
||||
struct iommu_domain *domain;
|
||||
struct iommu_sva *handle;
|
||||
ioasid_t max_pasids;
|
||||
int ret;
|
||||
|
||||
max_pasids = dev->iommu->max_pasids;
|
||||
if (!max_pasids)
|
||||
return ERR_PTR(-EOPNOTSUPP);
|
||||
|
||||
/* Allocate mm->pasid if necessary. */
|
||||
ret = iommu_sva_alloc_pasid(mm, 1, max_pasids - 1);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
handle = kzalloc(sizeof(*handle), GFP_KERNEL);
|
||||
if (!handle)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
mutex_lock(&iommu_sva_lock);
|
||||
/* Search for an existing domain. */
|
||||
domain = iommu_get_domain_for_dev_pasid(dev, mm->pasid,
|
||||
IOMMU_DOMAIN_SVA);
|
||||
if (IS_ERR(domain)) {
|
||||
ret = PTR_ERR(domain);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (domain) {
|
||||
domain->users++;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Allocate a new domain and set it on device pasid. */
|
||||
domain = iommu_sva_domain_alloc(dev, mm);
|
||||
if (!domain) {
|
||||
ret = -ENOMEM;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
ret = iommu_attach_device_pasid(domain, dev, mm->pasid);
|
||||
if (ret)
|
||||
goto out_free_domain;
|
||||
domain->users = 1;
|
||||
out:
|
||||
mutex_unlock(&iommu_sva_lock);
|
||||
handle->dev = dev;
|
||||
handle->domain = domain;
|
||||
|
||||
return handle;
|
||||
|
||||
out_free_domain:
|
||||
iommu_domain_free(domain);
|
||||
out_unlock:
|
||||
mutex_unlock(&iommu_sva_lock);
|
||||
kfree(handle);
|
||||
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iommu_sva_bind_device);
|
||||
|
||||
/**
|
||||
* iommu_sva_unbind_device() - Remove a bond created with iommu_sva_bind_device
|
||||
* @handle: the handle returned by iommu_sva_bind_device()
|
||||
*
|
||||
* Put reference to a bond between device and address space. The device should
|
||||
* not be issuing any more transaction for this PASID. All outstanding page
|
||||
* requests for this PASID must have been flushed to the IOMMU.
|
||||
*/
|
||||
void iommu_sva_unbind_device(struct iommu_sva *handle)
|
||||
{
|
||||
struct iommu_domain *domain = handle->domain;
|
||||
ioasid_t pasid = domain->mm->pasid;
|
||||
struct device *dev = handle->dev;
|
||||
|
||||
mutex_lock(&iommu_sva_lock);
|
||||
if (--domain->users == 0) {
|
||||
iommu_detach_device_pasid(domain, dev, pasid);
|
||||
iommu_domain_free(domain);
|
||||
}
|
||||
mutex_unlock(&iommu_sva_lock);
|
||||
kfree(handle);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iommu_sva_unbind_device);
|
||||
|
||||
u32 iommu_sva_get_pasid(struct iommu_sva *handle)
|
||||
{
|
||||
struct iommu_domain *domain = handle->domain;
|
||||
|
||||
return domain->mm->pasid;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iommu_sva_get_pasid);
|
||||
|
||||
/*
|
||||
* I/O page fault handler for SVA
|
||||
*/
|
||||
enum iommu_page_response_code
|
||||
iommu_sva_handle_iopf(struct iommu_fault *fault, void *data)
|
||||
{
|
||||
vm_fault_t ret;
|
||||
struct vm_area_struct *vma;
|
||||
struct mm_struct *mm = data;
|
||||
unsigned int access_flags = 0;
|
||||
unsigned int fault_flags = FAULT_FLAG_REMOTE;
|
||||
struct iommu_fault_page_request *prm = &fault->prm;
|
||||
enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID;
|
||||
|
||||
if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID))
|
||||
return status;
|
||||
|
||||
if (!mmget_not_zero(mm))
|
||||
return status;
|
||||
|
||||
mmap_read_lock(mm);
|
||||
|
||||
vma = find_extend_vma(mm, prm->addr);
|
||||
if (!vma)
|
||||
/* Unmapped area */
|
||||
goto out_put_mm;
|
||||
|
||||
if (prm->perm & IOMMU_FAULT_PERM_READ)
|
||||
access_flags |= VM_READ;
|
||||
|
||||
if (prm->perm & IOMMU_FAULT_PERM_WRITE) {
|
||||
access_flags |= VM_WRITE;
|
||||
fault_flags |= FAULT_FLAG_WRITE;
|
||||
}
|
||||
|
||||
if (prm->perm & IOMMU_FAULT_PERM_EXEC) {
|
||||
access_flags |= VM_EXEC;
|
||||
fault_flags |= FAULT_FLAG_INSTRUCTION;
|
||||
}
|
||||
|
||||
if (!(prm->perm & IOMMU_FAULT_PERM_PRIV))
|
||||
fault_flags |= FAULT_FLAG_USER;
|
||||
|
||||
if (access_flags & ~vma->vm_flags)
|
||||
/* Access fault */
|
||||
goto out_put_mm;
|
||||
|
||||
ret = handle_mm_fault(vma, prm->addr, fault_flags, NULL);
|
||||
status = ret & VM_FAULT_ERROR ? IOMMU_PAGE_RESP_INVALID :
|
||||
IOMMU_PAGE_RESP_SUCCESS;
|
||||
|
||||
out_put_mm:
|
||||
mmap_read_unlock(mm);
|
||||
mmput(mm);
|
||||
|
||||
return status;
|
||||
}
|
@ -2,8 +2,8 @@
|
||||
/*
|
||||
* SVA library for IOMMU drivers
|
||||
*/
|
||||
#ifndef _IOMMU_SVA_LIB_H
|
||||
#define _IOMMU_SVA_LIB_H
|
||||
#ifndef _IOMMU_SVA_H
|
||||
#define _IOMMU_SVA_H
|
||||
|
||||
#include <linux/ioasid.h>
|
||||
#include <linux/mm_types.h>
|
||||
@ -26,6 +26,8 @@ int iopf_queue_flush_dev(struct device *dev);
|
||||
struct iopf_queue *iopf_queue_alloc(const char *name);
|
||||
void iopf_queue_free(struct iopf_queue *queue);
|
||||
int iopf_queue_discard_partial(struct iopf_queue *queue);
|
||||
enum iommu_page_response_code
|
||||
iommu_sva_handle_iopf(struct iommu_fault *fault, void *data);
|
||||
|
||||
#else /* CONFIG_IOMMU_SVA */
|
||||
static inline int iommu_queue_iopf(struct iommu_fault *fault, void *cookie)
|
||||
@ -63,5 +65,11 @@ static inline int iopf_queue_discard_partial(struct iopf_queue *queue)
|
||||
{
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
static inline enum iommu_page_response_code
|
||||
iommu_sva_handle_iopf(struct iommu_fault *fault, void *data)
|
||||
{
|
||||
return IOMMU_PAGE_RESP_INVALID;
|
||||
}
|
||||
#endif /* CONFIG_IOMMU_SVA */
|
||||
#endif /* _IOMMU_SVA_LIB_H */
|
||||
#endif /* _IOMMU_SVA_H */
|
@ -21,6 +21,7 @@
|
||||
#include <linux/idr.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/pci-ats.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/platform_device.h>
|
||||
#include <linux/property.h>
|
||||
@ -28,9 +29,12 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/cc_platform.h>
|
||||
#include <trace/events/iommu.h>
|
||||
#include <linux/sched/mm.h>
|
||||
|
||||
#include "dma-iommu.h"
|
||||
|
||||
#include "iommu-sva.h"
|
||||
|
||||
static struct kset *iommu_group_kset;
|
||||
static DEFINE_IDA(iommu_group_ida);
|
||||
|
||||
@ -42,6 +46,7 @@ struct iommu_group {
|
||||
struct kobject kobj;
|
||||
struct kobject *devices_kobj;
|
||||
struct list_head devices;
|
||||
struct xarray pasid_array;
|
||||
struct mutex mutex;
|
||||
void *iommu_data;
|
||||
void (*iommu_data_release)(void *iommu_data);
|
||||
@ -278,6 +283,24 @@ static void dev_iommu_free(struct device *dev)
|
||||
kfree(param);
|
||||
}
|
||||
|
||||
static u32 dev_iommu_get_max_pasids(struct device *dev)
|
||||
{
|
||||
u32 max_pasids = 0, bits = 0;
|
||||
int ret;
|
||||
|
||||
if (dev_is_pci(dev)) {
|
||||
ret = pci_max_pasids(to_pci_dev(dev));
|
||||
if (ret > 0)
|
||||
max_pasids = ret;
|
||||
} else {
|
||||
ret = device_property_read_u32(dev, "pasid-num-bits", &bits);
|
||||
if (!ret)
|
||||
max_pasids = 1UL << bits;
|
||||
}
|
||||
|
||||
return min_t(u32, max_pasids, dev->iommu->iommu_dev->max_pasids);
|
||||
}
|
||||
|
||||
static int __iommu_probe_device(struct device *dev, struct list_head *group_list)
|
||||
{
|
||||
const struct iommu_ops *ops = dev->bus->iommu_ops;
|
||||
@ -303,6 +326,7 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
|
||||
}
|
||||
|
||||
dev->iommu->iommu_dev = iommu_dev;
|
||||
dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev);
|
||||
|
||||
group = iommu_group_get_for_dev(dev);
|
||||
if (IS_ERR(group)) {
|
||||
@ -703,6 +727,7 @@ struct iommu_group *iommu_group_alloc(void)
|
||||
mutex_init(&group->mutex);
|
||||
INIT_LIST_HEAD(&group->devices);
|
||||
INIT_LIST_HEAD(&group->entry);
|
||||
xa_init(&group->pasid_array);
|
||||
|
||||
ret = ida_alloc(&iommu_group_ida, GFP_KERNEL);
|
||||
if (ret < 0) {
|
||||
@ -1912,6 +1937,8 @@ EXPORT_SYMBOL_GPL(iommu_domain_alloc);
|
||||
|
||||
void iommu_domain_free(struct iommu_domain *domain)
|
||||
{
|
||||
if (domain->type == IOMMU_DOMAIN_SVA)
|
||||
mmdrop(domain->mm);
|
||||
iommu_put_dma_cookie(domain);
|
||||
domain->ops->free(domain);
|
||||
}
|
||||
@ -1949,6 +1976,18 @@ static int __iommu_attach_device(struct iommu_domain *domain,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* iommu_attach_device - Attach an IOMMU domain to a device
|
||||
* @domain: IOMMU domain to attach
|
||||
* @dev: Device that will be attached
|
||||
*
|
||||
* Returns 0 on success and error code on failure
|
||||
*
|
||||
* Note that EINVAL can be treated as a soft failure, indicating
|
||||
* that certain configuration of the domain is incompatible with
|
||||
* the device. In this case attaching a different domain to the
|
||||
* device may succeed.
|
||||
*/
|
||||
int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
|
||||
{
|
||||
struct iommu_group *group;
|
||||
@ -2075,6 +2114,18 @@ static int __iommu_attach_group(struct iommu_domain *domain,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* iommu_attach_group - Attach an IOMMU domain to an IOMMU group
|
||||
* @domain: IOMMU domain to attach
|
||||
* @group: IOMMU group that will be attached
|
||||
*
|
||||
* Returns 0 on success and error code on failure
|
||||
*
|
||||
* Note that EINVAL can be treated as a soft failure, indicating
|
||||
* that certain configuration of the domain is incompatible with
|
||||
* the group. In this case attaching a different domain to the
|
||||
* group may succeed.
|
||||
*/
|
||||
int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group)
|
||||
{
|
||||
int ret;
|
||||
@ -2726,98 +2777,6 @@ int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iommu_dev_disable_feature);
|
||||
|
||||
/**
|
||||
* iommu_sva_bind_device() - Bind a process address space to a device
|
||||
* @dev: the device
|
||||
* @mm: the mm to bind, caller must hold a reference to it
|
||||
* @drvdata: opaque data pointer to pass to bind callback
|
||||
*
|
||||
* Create a bond between device and address space, allowing the device to access
|
||||
* the mm using the returned PASID. If a bond already exists between @device and
|
||||
* @mm, it is returned and an additional reference is taken. Caller must call
|
||||
* iommu_sva_unbind_device() to release each reference.
|
||||
*
|
||||
* iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA) must be called first, to
|
||||
* initialize the required SVA features.
|
||||
*
|
||||
* On error, returns an ERR_PTR value.
|
||||
*/
|
||||
struct iommu_sva *
|
||||
iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata)
|
||||
{
|
||||
struct iommu_group *group;
|
||||
struct iommu_sva *handle = ERR_PTR(-EINVAL);
|
||||
const struct iommu_ops *ops = dev_iommu_ops(dev);
|
||||
|
||||
if (!ops->sva_bind)
|
||||
return ERR_PTR(-ENODEV);
|
||||
|
||||
group = iommu_group_get(dev);
|
||||
if (!group)
|
||||
return ERR_PTR(-ENODEV);
|
||||
|
||||
/* Ensure device count and domain don't change while we're binding */
|
||||
mutex_lock(&group->mutex);
|
||||
|
||||
/*
|
||||
* To keep things simple, SVA currently doesn't support IOMMU groups
|
||||
* with more than one device. Existing SVA-capable systems are not
|
||||
* affected by the problems that required IOMMU groups (lack of ACS
|
||||
* isolation, device ID aliasing and other hardware issues).
|
||||
*/
|
||||
if (iommu_group_device_count(group) != 1)
|
||||
goto out_unlock;
|
||||
|
||||
handle = ops->sva_bind(dev, mm, drvdata);
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&group->mutex);
|
||||
iommu_group_put(group);
|
||||
|
||||
return handle;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iommu_sva_bind_device);
|
||||
|
||||
/**
|
||||
* iommu_sva_unbind_device() - Remove a bond created with iommu_sva_bind_device
|
||||
* @handle: the handle returned by iommu_sva_bind_device()
|
||||
*
|
||||
* Put reference to a bond between device and address space. The device should
|
||||
* not be issuing any more transaction for this PASID. All outstanding page
|
||||
* requests for this PASID must have been flushed to the IOMMU.
|
||||
*/
|
||||
void iommu_sva_unbind_device(struct iommu_sva *handle)
|
||||
{
|
||||
struct iommu_group *group;
|
||||
struct device *dev = handle->dev;
|
||||
const struct iommu_ops *ops = dev_iommu_ops(dev);
|
||||
|
||||
if (!ops->sva_unbind)
|
||||
return;
|
||||
|
||||
group = iommu_group_get(dev);
|
||||
if (!group)
|
||||
return;
|
||||
|
||||
mutex_lock(&group->mutex);
|
||||
ops->sva_unbind(handle);
|
||||
mutex_unlock(&group->mutex);
|
||||
|
||||
iommu_group_put(group);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iommu_sva_unbind_device);
|
||||
|
||||
u32 iommu_sva_get_pasid(struct iommu_sva *handle)
|
||||
{
|
||||
const struct iommu_ops *ops = dev_iommu_ops(handle->dev);
|
||||
|
||||
if (!ops->sva_get_pasid)
|
||||
return IOMMU_PASID_INVALID;
|
||||
|
||||
return ops->sva_get_pasid(handle);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iommu_sva_get_pasid);
|
||||
|
||||
/*
|
||||
* Changes the default domain of an iommu group that has *only* one device
|
||||
*
|
||||
@ -3087,7 +3046,8 @@ int iommu_device_use_default_domain(struct device *dev)
|
||||
|
||||
mutex_lock(&group->mutex);
|
||||
if (group->owner_cnt) {
|
||||
if (group->owner || !iommu_is_default_domain(group)) {
|
||||
if (group->owner || !iommu_is_default_domain(group) ||
|
||||
!xa_empty(&group->pasid_array)) {
|
||||
ret = -EBUSY;
|
||||
goto unlock_out;
|
||||
}
|
||||
@ -3118,7 +3078,7 @@ void iommu_device_unuse_default_domain(struct device *dev)
|
||||
return;
|
||||
|
||||
mutex_lock(&group->mutex);
|
||||
if (!WARN_ON(!group->owner_cnt))
|
||||
if (!WARN_ON(!group->owner_cnt || !xa_empty(&group->pasid_array)))
|
||||
group->owner_cnt--;
|
||||
|
||||
mutex_unlock(&group->mutex);
|
||||
@ -3148,40 +3108,49 @@ static int __iommu_group_alloc_blocking_domain(struct iommu_group *group)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __iommu_take_dma_ownership(struct iommu_group *group, void *owner)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if ((group->domain && group->domain != group->default_domain) ||
|
||||
!xa_empty(&group->pasid_array))
|
||||
return -EBUSY;
|
||||
|
||||
ret = __iommu_group_alloc_blocking_domain(group);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = __iommu_group_set_domain(group, group->blocking_domain);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
group->owner = owner;
|
||||
group->owner_cnt++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* iommu_group_claim_dma_owner() - Set DMA ownership of a group
|
||||
* @group: The group.
|
||||
* @owner: Caller specified pointer. Used for exclusive ownership.
|
||||
*
|
||||
* This is to support backward compatibility for vfio which manages
|
||||
* the dma ownership in iommu_group level. New invocations on this
|
||||
* interface should be prohibited.
|
||||
* This is to support backward compatibility for vfio which manages the dma
|
||||
* ownership in iommu_group level. New invocations on this interface should be
|
||||
* prohibited. Only a single owner may exist for a group.
|
||||
*/
|
||||
int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (WARN_ON(!owner))
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&group->mutex);
|
||||
if (group->owner_cnt) {
|
||||
ret = -EPERM;
|
||||
goto unlock_out;
|
||||
} else {
|
||||
if (group->domain && group->domain != group->default_domain) {
|
||||
ret = -EBUSY;
|
||||
goto unlock_out;
|
||||
}
|
||||
|
||||
ret = __iommu_group_alloc_blocking_domain(group);
|
||||
if (ret)
|
||||
goto unlock_out;
|
||||
|
||||
ret = __iommu_group_set_domain(group, group->blocking_domain);
|
||||
if (ret)
|
||||
goto unlock_out;
|
||||
group->owner = owner;
|
||||
}
|
||||
|
||||
group->owner_cnt++;
|
||||
ret = __iommu_take_dma_ownership(group, owner);
|
||||
unlock_out:
|
||||
mutex_unlock(&group->mutex);
|
||||
|
||||
@ -3190,29 +3159,91 @@ unlock_out:
|
||||
EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner);
|
||||
|
||||
/**
|
||||
* iommu_group_release_dma_owner() - Release DMA ownership of a group
|
||||
* @group: The group.
|
||||
* iommu_device_claim_dma_owner() - Set DMA ownership of a device
|
||||
* @dev: The device.
|
||||
* @owner: Caller specified pointer. Used for exclusive ownership.
|
||||
*
|
||||
* Release the DMA ownership claimed by iommu_group_claim_dma_owner().
|
||||
* Claim the DMA ownership of a device. Multiple devices in the same group may
|
||||
* concurrently claim ownership if they present the same owner value. Returns 0
|
||||
* on success and error code on failure
|
||||
*/
|
||||
void iommu_group_release_dma_owner(struct iommu_group *group)
|
||||
int iommu_device_claim_dma_owner(struct device *dev, void *owner)
|
||||
{
|
||||
struct iommu_group *group = iommu_group_get(dev);
|
||||
int ret = 0;
|
||||
|
||||
if (!group)
|
||||
return -ENODEV;
|
||||
if (WARN_ON(!owner))
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&group->mutex);
|
||||
if (group->owner_cnt) {
|
||||
if (group->owner != owner) {
|
||||
ret = -EPERM;
|
||||
goto unlock_out;
|
||||
}
|
||||
group->owner_cnt++;
|
||||
goto unlock_out;
|
||||
}
|
||||
|
||||
ret = __iommu_take_dma_ownership(group, owner);
|
||||
unlock_out:
|
||||
mutex_unlock(&group->mutex);
|
||||
iommu_group_put(group);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iommu_device_claim_dma_owner);
|
||||
|
||||
static void __iommu_release_dma_ownership(struct iommu_group *group)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mutex_lock(&group->mutex);
|
||||
if (WARN_ON(!group->owner_cnt || !group->owner))
|
||||
goto unlock_out;
|
||||
if (WARN_ON(!group->owner_cnt || !group->owner ||
|
||||
!xa_empty(&group->pasid_array)))
|
||||
return;
|
||||
|
||||
group->owner_cnt = 0;
|
||||
group->owner = NULL;
|
||||
ret = __iommu_group_set_domain(group, group->default_domain);
|
||||
WARN(ret, "iommu driver failed to attach the default domain");
|
||||
}
|
||||
|
||||
unlock_out:
|
||||
/**
|
||||
* iommu_group_release_dma_owner() - Release DMA ownership of a group
|
||||
* @dev: The device
|
||||
*
|
||||
* Release the DMA ownership claimed by iommu_group_claim_dma_owner().
|
||||
*/
|
||||
void iommu_group_release_dma_owner(struct iommu_group *group)
|
||||
{
|
||||
mutex_lock(&group->mutex);
|
||||
__iommu_release_dma_ownership(group);
|
||||
mutex_unlock(&group->mutex);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner);
|
||||
|
||||
/**
|
||||
* iommu_device_release_dma_owner() - Release DMA ownership of a device
|
||||
* @group: The device.
|
||||
*
|
||||
* Release the DMA ownership claimed by iommu_device_claim_dma_owner().
|
||||
*/
|
||||
void iommu_device_release_dma_owner(struct device *dev)
|
||||
{
|
||||
struct iommu_group *group = iommu_group_get(dev);
|
||||
|
||||
mutex_lock(&group->mutex);
|
||||
if (group->owner_cnt > 1)
|
||||
group->owner_cnt--;
|
||||
else
|
||||
__iommu_release_dma_ownership(group);
|
||||
mutex_unlock(&group->mutex);
|
||||
iommu_group_put(group);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iommu_device_release_dma_owner);
|
||||
|
||||
/**
|
||||
* iommu_group_dma_owner_claimed() - Query group dma ownership status
|
||||
* @group: The group.
|
||||
@ -3231,3 +3262,150 @@ bool iommu_group_dma_owner_claimed(struct iommu_group *group)
|
||||
return user;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed);
|
||||
|
||||
static int __iommu_set_group_pasid(struct iommu_domain *domain,
|
||||
struct iommu_group *group, ioasid_t pasid)
|
||||
{
|
||||
struct group_device *device;
|
||||
int ret = 0;
|
||||
|
||||
list_for_each_entry(device, &group->devices, list) {
|
||||
ret = domain->ops->set_dev_pasid(domain, device->dev, pasid);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __iommu_remove_group_pasid(struct iommu_group *group,
|
||||
ioasid_t pasid)
|
||||
{
|
||||
struct group_device *device;
|
||||
const struct iommu_ops *ops;
|
||||
|
||||
list_for_each_entry(device, &group->devices, list) {
|
||||
ops = dev_iommu_ops(device->dev);
|
||||
ops->remove_dev_pasid(device->dev, pasid);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* iommu_attach_device_pasid() - Attach a domain to pasid of device
|
||||
* @domain: the iommu domain.
|
||||
* @dev: the attached device.
|
||||
* @pasid: the pasid of the device.
|
||||
*
|
||||
* Return: 0 on success, or an error.
|
||||
*/
|
||||
int iommu_attach_device_pasid(struct iommu_domain *domain,
|
||||
struct device *dev, ioasid_t pasid)
|
||||
{
|
||||
struct iommu_group *group;
|
||||
void *curr;
|
||||
int ret;
|
||||
|
||||
if (!domain->ops->set_dev_pasid)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
group = iommu_group_get(dev);
|
||||
if (!group)
|
||||
return -ENODEV;
|
||||
|
||||
mutex_lock(&group->mutex);
|
||||
curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, domain, GFP_KERNEL);
|
||||
if (curr) {
|
||||
ret = xa_err(curr) ? : -EBUSY;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
ret = __iommu_set_group_pasid(domain, group, pasid);
|
||||
if (ret) {
|
||||
__iommu_remove_group_pasid(group, pasid);
|
||||
xa_erase(&group->pasid_array, pasid);
|
||||
}
|
||||
out_unlock:
|
||||
mutex_unlock(&group->mutex);
|
||||
iommu_group_put(group);
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iommu_attach_device_pasid);
|
||||
|
||||
/*
|
||||
* iommu_detach_device_pasid() - Detach the domain from pasid of device
|
||||
* @domain: the iommu domain.
|
||||
* @dev: the attached device.
|
||||
* @pasid: the pasid of the device.
|
||||
*
|
||||
* The @domain must have been attached to @pasid of the @dev with
|
||||
* iommu_attach_device_pasid().
|
||||
*/
|
||||
void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev,
|
||||
ioasid_t pasid)
|
||||
{
|
||||
struct iommu_group *group = iommu_group_get(dev);
|
||||
|
||||
mutex_lock(&group->mutex);
|
||||
__iommu_remove_group_pasid(group, pasid);
|
||||
WARN_ON(xa_erase(&group->pasid_array, pasid) != domain);
|
||||
mutex_unlock(&group->mutex);
|
||||
|
||||
iommu_group_put(group);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iommu_detach_device_pasid);
|
||||
|
||||
/*
|
||||
* iommu_get_domain_for_dev_pasid() - Retrieve domain for @pasid of @dev
|
||||
* @dev: the queried device
|
||||
* @pasid: the pasid of the device
|
||||
* @type: matched domain type, 0 for any match
|
||||
*
|
||||
* This is a variant of iommu_get_domain_for_dev(). It returns the existing
|
||||
* domain attached to pasid of a device. Callers must hold a lock around this
|
||||
* function, and both iommu_attach/detach_dev_pasid() whenever a domain of
|
||||
* type is being manipulated. This API does not internally resolve races with
|
||||
* attach/detach.
|
||||
*
|
||||
* Return: attached domain on success, NULL otherwise.
|
||||
*/
|
||||
struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev,
|
||||
ioasid_t pasid,
|
||||
unsigned int type)
|
||||
{
|
||||
struct iommu_domain *domain;
|
||||
struct iommu_group *group;
|
||||
|
||||
group = iommu_group_get(dev);
|
||||
if (!group)
|
||||
return NULL;
|
||||
|
||||
xa_lock(&group->pasid_array);
|
||||
domain = xa_load(&group->pasid_array, pasid);
|
||||
if (type && domain && domain->type != type)
|
||||
domain = ERR_PTR(-EBUSY);
|
||||
xa_unlock(&group->pasid_array);
|
||||
iommu_group_put(group);
|
||||
|
||||
return domain;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev_pasid);
|
||||
|
||||
struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
|
||||
struct mm_struct *mm)
|
||||
{
|
||||
const struct iommu_ops *ops = dev_iommu_ops(dev);
|
||||
struct iommu_domain *domain;
|
||||
|
||||
domain = ops->domain_alloc(IOMMU_DOMAIN_SVA);
|
||||
if (!domain)
|
||||
return NULL;
|
||||
|
||||
domain->type = IOMMU_DOMAIN_SVA;
|
||||
mmgrab(mm);
|
||||
domain->mm = mm;
|
||||
domain->iopf_handler = iommu_sva_handle_iopf;
|
||||
domain->fault_data = mm;
|
||||
|
||||
return domain;
|
||||
}
|
||||
|
44
drivers/iommu/iommufd/Kconfig
Normal file
44
drivers/iommu/iommufd/Kconfig
Normal file
@ -0,0 +1,44 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
config IOMMUFD
|
||||
tristate "IOMMU Userspace API"
|
||||
select INTERVAL_TREE
|
||||
select INTERVAL_TREE_SPAN_ITER
|
||||
select IOMMU_API
|
||||
default n
|
||||
help
|
||||
Provides /dev/iommu, the user API to control the IOMMU subsystem as
|
||||
it relates to managing IO page tables that point at user space memory.
|
||||
|
||||
If you don't know what to do here, say N.
|
||||
|
||||
if IOMMUFD
|
||||
config IOMMUFD_VFIO_CONTAINER
|
||||
bool "IOMMUFD provides the VFIO container /dev/vfio/vfio"
|
||||
depends on VFIO && !VFIO_CONTAINER
|
||||
default VFIO && !VFIO_CONTAINER
|
||||
help
|
||||
IOMMUFD will provide /dev/vfio/vfio instead of VFIO. This relies on
|
||||
IOMMUFD providing compatibility emulation to give the same ioctls.
|
||||
It provides an option to build a kernel with legacy VFIO components
|
||||
removed.
|
||||
|
||||
IOMMUFD VFIO container emulation is known to lack certain features
|
||||
of the native VFIO container, such as no-IOMMU support, peer-to-peer
|
||||
DMA mapping, PPC IOMMU support, as well as other potentially
|
||||
undiscovered gaps. This option is currently intended for the
|
||||
purpose of testing IOMMUFD with unmodified userspace supporting VFIO
|
||||
and making use of the Type1 VFIO IOMMU backend. General purpose
|
||||
enabling of this option is currently discouraged.
|
||||
|
||||
Unless testing IOMMUFD, say N here.
|
||||
|
||||
config IOMMUFD_TEST
|
||||
bool "IOMMU Userspace API Test support"
|
||||
depends on DEBUG_KERNEL
|
||||
depends on FAULT_INJECTION
|
||||
depends on RUNTIME_TESTING_MENU
|
||||
default n
|
||||
help
|
||||
This is dangerous, do not enable unless running
|
||||
tools/testing/selftests/iommu
|
||||
endif
|
13
drivers/iommu/iommufd/Makefile
Normal file
13
drivers/iommu/iommufd/Makefile
Normal file
@ -0,0 +1,13 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
iommufd-y := \
|
||||
device.o \
|
||||
hw_pagetable.o \
|
||||
io_pagetable.o \
|
||||
ioas.o \
|
||||
main.o \
|
||||
pages.o \
|
||||
vfio_compat.o
|
||||
|
||||
iommufd-$(CONFIG_IOMMUFD_TEST) += selftest.o
|
||||
|
||||
obj-$(CONFIG_IOMMUFD) += iommufd.o
|
772
drivers/iommu/iommufd/device.c
Normal file
772
drivers/iommu/iommufd/device.c
Normal file
@ -0,0 +1,772 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
|
||||
*/
|
||||
#include <linux/iommufd.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/iommu.h>
|
||||
#include <linux/irqdomain.h>
|
||||
|
||||
#include "io_pagetable.h"
|
||||
#include "iommufd_private.h"
|
||||
|
||||
static bool allow_unsafe_interrupts;
|
||||
module_param(allow_unsafe_interrupts, bool, S_IRUGO | S_IWUSR);
|
||||
MODULE_PARM_DESC(
|
||||
allow_unsafe_interrupts,
|
||||
"Allow IOMMUFD to bind to devices even if the platform cannot isolate "
|
||||
"the MSI interrupt window. Enabling this is a security weakness.");
|
||||
|
||||
/*
|
||||
* A iommufd_device object represents the binding relationship between a
|
||||
* consuming driver and the iommufd. These objects are created/destroyed by
|
||||
* external drivers, not by userspace.
|
||||
*/
|
||||
struct iommufd_device {
|
||||
struct iommufd_object obj;
|
||||
struct iommufd_ctx *ictx;
|
||||
struct iommufd_hw_pagetable *hwpt;
|
||||
/* Head at iommufd_hw_pagetable::devices */
|
||||
struct list_head devices_item;
|
||||
/* always the physical device */
|
||||
struct device *dev;
|
||||
struct iommu_group *group;
|
||||
bool enforce_cache_coherency;
|
||||
};
|
||||
|
||||
void iommufd_device_destroy(struct iommufd_object *obj)
|
||||
{
|
||||
struct iommufd_device *idev =
|
||||
container_of(obj, struct iommufd_device, obj);
|
||||
|
||||
iommu_device_release_dma_owner(idev->dev);
|
||||
iommu_group_put(idev->group);
|
||||
iommufd_ctx_put(idev->ictx);
|
||||
}
|
||||
|
||||
/**
|
||||
* iommufd_device_bind - Bind a physical device to an iommu fd
|
||||
* @ictx: iommufd file descriptor
|
||||
* @dev: Pointer to a physical device struct
|
||||
* @id: Output ID number to return to userspace for this device
|
||||
*
|
||||
* A successful bind establishes an ownership over the device and returns
|
||||
* struct iommufd_device pointer, otherwise returns error pointer.
|
||||
*
|
||||
* A driver using this API must set driver_managed_dma and must not touch
|
||||
* the device until this routine succeeds and establishes ownership.
|
||||
*
|
||||
* Binding a PCI device places the entire RID under iommufd control.
|
||||
*
|
||||
* The caller must undo this with iommufd_device_unbind()
|
||||
*/
|
||||
struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx,
|
||||
struct device *dev, u32 *id)
|
||||
{
|
||||
struct iommufd_device *idev;
|
||||
struct iommu_group *group;
|
||||
int rc;
|
||||
|
||||
/*
|
||||
* iommufd always sets IOMMU_CACHE because we offer no way for userspace
|
||||
* to restore cache coherency.
|
||||
*/
|
||||
if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
group = iommu_group_get(dev);
|
||||
if (!group)
|
||||
return ERR_PTR(-ENODEV);
|
||||
|
||||
rc = iommu_device_claim_dma_owner(dev, ictx);
|
||||
if (rc)
|
||||
goto out_group_put;
|
||||
|
||||
idev = iommufd_object_alloc(ictx, idev, IOMMUFD_OBJ_DEVICE);
|
||||
if (IS_ERR(idev)) {
|
||||
rc = PTR_ERR(idev);
|
||||
goto out_release_owner;
|
||||
}
|
||||
idev->ictx = ictx;
|
||||
iommufd_ctx_get(ictx);
|
||||
idev->dev = dev;
|
||||
idev->enforce_cache_coherency =
|
||||
device_iommu_capable(dev, IOMMU_CAP_ENFORCE_CACHE_COHERENCY);
|
||||
/* The calling driver is a user until iommufd_device_unbind() */
|
||||
refcount_inc(&idev->obj.users);
|
||||
/* group refcount moves into iommufd_device */
|
||||
idev->group = group;
|
||||
|
||||
/*
|
||||
* If the caller fails after this success it must call
|
||||
* iommufd_unbind_device() which is safe since we hold this refcount.
|
||||
* This also means the device is a leaf in the graph and no other object
|
||||
* can take a reference on it.
|
||||
*/
|
||||
iommufd_object_finalize(ictx, &idev->obj);
|
||||
*id = idev->obj.id;
|
||||
return idev;
|
||||
|
||||
out_release_owner:
|
||||
iommu_device_release_dma_owner(dev);
|
||||
out_group_put:
|
||||
iommu_group_put(group);
|
||||
return ERR_PTR(rc);
|
||||
}
|
||||
EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, IOMMUFD);
|
||||
|
||||
/**
|
||||
* iommufd_device_unbind - Undo iommufd_device_bind()
|
||||
* @idev: Device returned by iommufd_device_bind()
|
||||
*
|
||||
* Release the device from iommufd control. The DMA ownership will return back
|
||||
* to unowned with DMA controlled by the DMA API. This invalidates the
|
||||
* iommufd_device pointer, other APIs that consume it must not be called
|
||||
* concurrently.
|
||||
*/
|
||||
void iommufd_device_unbind(struct iommufd_device *idev)
|
||||
{
|
||||
bool was_destroyed;
|
||||
|
||||
was_destroyed = iommufd_object_destroy_user(idev->ictx, &idev->obj);
|
||||
WARN_ON(!was_destroyed);
|
||||
}
|
||||
EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, IOMMUFD);
|
||||
|
||||
static int iommufd_device_setup_msi(struct iommufd_device *idev,
|
||||
struct iommufd_hw_pagetable *hwpt,
|
||||
phys_addr_t sw_msi_start)
|
||||
{
|
||||
int rc;
|
||||
|
||||
/*
|
||||
* If the IOMMU driver gives a IOMMU_RESV_SW_MSI then it is asking us to
|
||||
* call iommu_get_msi_cookie() on its behalf. This is necessary to setup
|
||||
* the MSI window so iommu_dma_prepare_msi() can install pages into our
|
||||
* domain after request_irq(). If it is not done interrupts will not
|
||||
* work on this domain.
|
||||
*
|
||||
* FIXME: This is conceptually broken for iommufd since we want to allow
|
||||
* userspace to change the domains, eg switch from an identity IOAS to a
|
||||
* DMA IOAS. There is currently no way to create a MSI window that
|
||||
* matches what the IRQ layer actually expects in a newly created
|
||||
* domain.
|
||||
*/
|
||||
if (sw_msi_start != PHYS_ADDR_MAX && !hwpt->msi_cookie) {
|
||||
rc = iommu_get_msi_cookie(hwpt->domain, sw_msi_start);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
/*
|
||||
* iommu_get_msi_cookie() can only be called once per domain,
|
||||
* it returns -EBUSY on later calls.
|
||||
*/
|
||||
hwpt->msi_cookie = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* For historical compat with VFIO the insecure interrupt path is
|
||||
* allowed if the module parameter is set. Insecure means that a MemWr
|
||||
* operation from the device (eg a simple DMA) cannot trigger an
|
||||
* interrupt outside this iommufd context.
|
||||
*/
|
||||
if (!device_iommu_capable(idev->dev, IOMMU_CAP_INTR_REMAP) &&
|
||||
!irq_domain_check_msi_remap()) {
|
||||
if (!allow_unsafe_interrupts)
|
||||
return -EPERM;
|
||||
|
||||
dev_warn(
|
||||
idev->dev,
|
||||
"MSI interrupts are not secure, they cannot be isolated by the platform. "
|
||||
"Check that platform features like interrupt remapping are enabled. "
|
||||
"Use the \"allow_unsafe_interrupts\" module parameter to override\n");
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool iommufd_hw_pagetable_has_group(struct iommufd_hw_pagetable *hwpt,
|
||||
struct iommu_group *group)
|
||||
{
|
||||
struct iommufd_device *cur_dev;
|
||||
|
||||
list_for_each_entry(cur_dev, &hwpt->devices, devices_item)
|
||||
if (cur_dev->group == group)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static int iommufd_device_do_attach(struct iommufd_device *idev,
|
||||
struct iommufd_hw_pagetable *hwpt)
|
||||
{
|
||||
phys_addr_t sw_msi_start = PHYS_ADDR_MAX;
|
||||
int rc;
|
||||
|
||||
mutex_lock(&hwpt->devices_lock);
|
||||
|
||||
/*
|
||||
* Try to upgrade the domain we have, it is an iommu driver bug to
|
||||
* report IOMMU_CAP_ENFORCE_CACHE_COHERENCY but fail
|
||||
* enforce_cache_coherency when there are no devices attached to the
|
||||
* domain.
|
||||
*/
|
||||
if (idev->enforce_cache_coherency && !hwpt->enforce_cache_coherency) {
|
||||
if (hwpt->domain->ops->enforce_cache_coherency)
|
||||
hwpt->enforce_cache_coherency =
|
||||
hwpt->domain->ops->enforce_cache_coherency(
|
||||
hwpt->domain);
|
||||
if (!hwpt->enforce_cache_coherency) {
|
||||
WARN_ON(list_empty(&hwpt->devices));
|
||||
rc = -EINVAL;
|
||||
goto out_unlock;
|
||||
}
|
||||
}
|
||||
|
||||
rc = iopt_table_enforce_group_resv_regions(&hwpt->ioas->iopt, idev->dev,
|
||||
idev->group, &sw_msi_start);
|
||||
if (rc)
|
||||
goto out_unlock;
|
||||
|
||||
rc = iommufd_device_setup_msi(idev, hwpt, sw_msi_start);
|
||||
if (rc)
|
||||
goto out_iova;
|
||||
|
||||
/*
|
||||
* FIXME: Hack around missing a device-centric iommu api, only attach to
|
||||
* the group once for the first device that is in the group.
|
||||
*/
|
||||
if (!iommufd_hw_pagetable_has_group(hwpt, idev->group)) {
|
||||
rc = iommu_attach_group(hwpt->domain, idev->group);
|
||||
if (rc)
|
||||
goto out_iova;
|
||||
|
||||
if (list_empty(&hwpt->devices)) {
|
||||
rc = iopt_table_add_domain(&hwpt->ioas->iopt,
|
||||
hwpt->domain);
|
||||
if (rc)
|
||||
goto out_detach;
|
||||
}
|
||||
}
|
||||
|
||||
idev->hwpt = hwpt;
|
||||
refcount_inc(&hwpt->obj.users);
|
||||
list_add(&idev->devices_item, &hwpt->devices);
|
||||
mutex_unlock(&hwpt->devices_lock);
|
||||
return 0;
|
||||
|
||||
out_detach:
|
||||
iommu_detach_group(hwpt->domain, idev->group);
|
||||
out_iova:
|
||||
iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev);
|
||||
out_unlock:
|
||||
mutex_unlock(&hwpt->devices_lock);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* When automatically managing the domains we search for a compatible domain in
|
||||
* the iopt and if one is found use it, otherwise create a new domain.
|
||||
* Automatic domain selection will never pick a manually created domain.
|
||||
*/
|
||||
static int iommufd_device_auto_get_domain(struct iommufd_device *idev,
|
||||
struct iommufd_ioas *ioas)
|
||||
{
|
||||
struct iommufd_hw_pagetable *hwpt;
|
||||
int rc;
|
||||
|
||||
/*
|
||||
* There is no differentiation when domains are allocated, so any domain
|
||||
* that is willing to attach to the device is interchangeable with any
|
||||
* other.
|
||||
*/
|
||||
mutex_lock(&ioas->mutex);
|
||||
list_for_each_entry(hwpt, &ioas->hwpt_list, hwpt_item) {
|
||||
if (!hwpt->auto_domain)
|
||||
continue;
|
||||
|
||||
rc = iommufd_device_do_attach(idev, hwpt);
|
||||
|
||||
/*
|
||||
* -EINVAL means the domain is incompatible with the device.
|
||||
* Other error codes should propagate to userspace as failure.
|
||||
* Success means the domain is attached.
|
||||
*/
|
||||
if (rc == -EINVAL)
|
||||
continue;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
hwpt = iommufd_hw_pagetable_alloc(idev->ictx, ioas, idev->dev);
|
||||
if (IS_ERR(hwpt)) {
|
||||
rc = PTR_ERR(hwpt);
|
||||
goto out_unlock;
|
||||
}
|
||||
hwpt->auto_domain = true;
|
||||
|
||||
rc = iommufd_device_do_attach(idev, hwpt);
|
||||
if (rc)
|
||||
goto out_abort;
|
||||
list_add_tail(&hwpt->hwpt_item, &ioas->hwpt_list);
|
||||
|
||||
mutex_unlock(&ioas->mutex);
|
||||
iommufd_object_finalize(idev->ictx, &hwpt->obj);
|
||||
return 0;
|
||||
|
||||
out_abort:
|
||||
iommufd_object_abort_and_destroy(idev->ictx, &hwpt->obj);
|
||||
out_unlock:
|
||||
mutex_unlock(&ioas->mutex);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* iommufd_device_attach - Connect a device from an iommu_domain
|
||||
* @idev: device to attach
|
||||
* @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HW_PAGETABLE
|
||||
* Output the IOMMUFD_OBJ_HW_PAGETABLE ID
|
||||
*
|
||||
* This connects the device to an iommu_domain, either automatically or manually
|
||||
* selected. Once this completes the device could do DMA.
|
||||
*
|
||||
* The caller should return the resulting pt_id back to userspace.
|
||||
* This function is undone by calling iommufd_device_detach().
|
||||
*/
|
||||
int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id)
|
||||
{
|
||||
struct iommufd_object *pt_obj;
|
||||
int rc;
|
||||
|
||||
pt_obj = iommufd_get_object(idev->ictx, *pt_id, IOMMUFD_OBJ_ANY);
|
||||
if (IS_ERR(pt_obj))
|
||||
return PTR_ERR(pt_obj);
|
||||
|
||||
switch (pt_obj->type) {
|
||||
case IOMMUFD_OBJ_HW_PAGETABLE: {
|
||||
struct iommufd_hw_pagetable *hwpt =
|
||||
container_of(pt_obj, struct iommufd_hw_pagetable, obj);
|
||||
|
||||
rc = iommufd_device_do_attach(idev, hwpt);
|
||||
if (rc)
|
||||
goto out_put_pt_obj;
|
||||
|
||||
mutex_lock(&hwpt->ioas->mutex);
|
||||
list_add_tail(&hwpt->hwpt_item, &hwpt->ioas->hwpt_list);
|
||||
mutex_unlock(&hwpt->ioas->mutex);
|
||||
break;
|
||||
}
|
||||
case IOMMUFD_OBJ_IOAS: {
|
||||
struct iommufd_ioas *ioas =
|
||||
container_of(pt_obj, struct iommufd_ioas, obj);
|
||||
|
||||
rc = iommufd_device_auto_get_domain(idev, ioas);
|
||||
if (rc)
|
||||
goto out_put_pt_obj;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
rc = -EINVAL;
|
||||
goto out_put_pt_obj;
|
||||
}
|
||||
|
||||
refcount_inc(&idev->obj.users);
|
||||
*pt_id = idev->hwpt->obj.id;
|
||||
rc = 0;
|
||||
|
||||
out_put_pt_obj:
|
||||
iommufd_put_object(pt_obj);
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD);
|
||||
|
||||
/**
|
||||
* iommufd_device_detach - Disconnect a device to an iommu_domain
|
||||
* @idev: device to detach
|
||||
*
|
||||
* Undo iommufd_device_attach(). This disconnects the idev from the previously
|
||||
* attached pt_id. The device returns back to a blocked DMA translation.
|
||||
*/
|
||||
void iommufd_device_detach(struct iommufd_device *idev)
|
||||
{
|
||||
struct iommufd_hw_pagetable *hwpt = idev->hwpt;
|
||||
|
||||
mutex_lock(&hwpt->ioas->mutex);
|
||||
mutex_lock(&hwpt->devices_lock);
|
||||
list_del(&idev->devices_item);
|
||||
if (!iommufd_hw_pagetable_has_group(hwpt, idev->group)) {
|
||||
if (list_empty(&hwpt->devices)) {
|
||||
iopt_table_remove_domain(&hwpt->ioas->iopt,
|
||||
hwpt->domain);
|
||||
list_del(&hwpt->hwpt_item);
|
||||
}
|
||||
iommu_detach_group(hwpt->domain, idev->group);
|
||||
}
|
||||
iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev);
|
||||
mutex_unlock(&hwpt->devices_lock);
|
||||
mutex_unlock(&hwpt->ioas->mutex);
|
||||
|
||||
if (hwpt->auto_domain)
|
||||
iommufd_object_destroy_user(idev->ictx, &hwpt->obj);
|
||||
else
|
||||
refcount_dec(&hwpt->obj.users);
|
||||
|
||||
idev->hwpt = NULL;
|
||||
|
||||
refcount_dec(&idev->obj.users);
|
||||
}
|
||||
EXPORT_SYMBOL_NS_GPL(iommufd_device_detach, IOMMUFD);
|
||||
|
||||
void iommufd_access_destroy_object(struct iommufd_object *obj)
|
||||
{
|
||||
struct iommufd_access *access =
|
||||
container_of(obj, struct iommufd_access, obj);
|
||||
|
||||
iopt_remove_access(&access->ioas->iopt, access);
|
||||
iommufd_ctx_put(access->ictx);
|
||||
refcount_dec(&access->ioas->obj.users);
|
||||
}
|
||||
|
||||
/**
|
||||
* iommufd_access_create - Create an iommufd_access
|
||||
* @ictx: iommufd file descriptor
|
||||
* @ioas_id: ID for a IOMMUFD_OBJ_IOAS
|
||||
* @ops: Driver's ops to associate with the access
|
||||
* @data: Opaque data to pass into ops functions
|
||||
*
|
||||
* An iommufd_access allows a driver to read/write to the IOAS without using
|
||||
* DMA. The underlying CPU memory can be accessed using the
|
||||
* iommufd_access_pin_pages() or iommufd_access_rw() functions.
|
||||
*
|
||||
* The provided ops are required to use iommufd_access_pin_pages().
|
||||
*/
|
||||
struct iommufd_access *
|
||||
iommufd_access_create(struct iommufd_ctx *ictx, u32 ioas_id,
|
||||
const struct iommufd_access_ops *ops, void *data)
|
||||
{
|
||||
struct iommufd_access *access;
|
||||
struct iommufd_object *obj;
|
||||
int rc;
|
||||
|
||||
/*
|
||||
* There is no uAPI for the access object, but to keep things symmetric
|
||||
* use the object infrastructure anyhow.
|
||||
*/
|
||||
access = iommufd_object_alloc(ictx, access, IOMMUFD_OBJ_ACCESS);
|
||||
if (IS_ERR(access))
|
||||
return access;
|
||||
|
||||
access->data = data;
|
||||
access->ops = ops;
|
||||
|
||||
obj = iommufd_get_object(ictx, ioas_id, IOMMUFD_OBJ_IOAS);
|
||||
if (IS_ERR(obj)) {
|
||||
rc = PTR_ERR(obj);
|
||||
goto out_abort;
|
||||
}
|
||||
access->ioas = container_of(obj, struct iommufd_ioas, obj);
|
||||
iommufd_ref_to_users(obj);
|
||||
|
||||
if (ops->needs_pin_pages)
|
||||
access->iova_alignment = PAGE_SIZE;
|
||||
else
|
||||
access->iova_alignment = 1;
|
||||
rc = iopt_add_access(&access->ioas->iopt, access);
|
||||
if (rc)
|
||||
goto out_put_ioas;
|
||||
|
||||
/* The calling driver is a user until iommufd_access_destroy() */
|
||||
refcount_inc(&access->obj.users);
|
||||
access->ictx = ictx;
|
||||
iommufd_ctx_get(ictx);
|
||||
iommufd_object_finalize(ictx, &access->obj);
|
||||
return access;
|
||||
out_put_ioas:
|
||||
refcount_dec(&access->ioas->obj.users);
|
||||
out_abort:
|
||||
iommufd_object_abort(ictx, &access->obj);
|
||||
return ERR_PTR(rc);
|
||||
}
|
||||
EXPORT_SYMBOL_NS_GPL(iommufd_access_create, IOMMUFD);
|
||||
|
||||
/**
|
||||
* iommufd_access_destroy - Destroy an iommufd_access
|
||||
* @access: The access to destroy
|
||||
*
|
||||
* The caller must stop using the access before destroying it.
|
||||
*/
|
||||
void iommufd_access_destroy(struct iommufd_access *access)
|
||||
{
|
||||
bool was_destroyed;
|
||||
|
||||
was_destroyed = iommufd_object_destroy_user(access->ictx, &access->obj);
|
||||
WARN_ON(!was_destroyed);
|
||||
}
|
||||
EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, IOMMUFD);
|
||||
|
||||
/**
|
||||
* iommufd_access_notify_unmap - Notify users of an iopt to stop using it
|
||||
* @iopt: iopt to work on
|
||||
* @iova: Starting iova in the iopt
|
||||
* @length: Number of bytes
|
||||
*
|
||||
* After this function returns there should be no users attached to the pages
|
||||
* linked to this iopt that intersect with iova,length. Anyone that has attached
|
||||
* a user through iopt_access_pages() needs to detach it through
|
||||
* iommufd_access_unpin_pages() before this function returns.
|
||||
*
|
||||
* iommufd_access_destroy() will wait for any outstanding unmap callback to
|
||||
* complete. Once iommufd_access_destroy() no unmap ops are running or will
|
||||
* run in the future. Due to this a driver must not create locking that prevents
|
||||
* unmap to complete while iommufd_access_destroy() is running.
|
||||
*/
|
||||
void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova,
|
||||
unsigned long length)
|
||||
{
|
||||
struct iommufd_ioas *ioas =
|
||||
container_of(iopt, struct iommufd_ioas, iopt);
|
||||
struct iommufd_access *access;
|
||||
unsigned long index;
|
||||
|
||||
xa_lock(&ioas->iopt.access_list);
|
||||
xa_for_each(&ioas->iopt.access_list, index, access) {
|
||||
if (!iommufd_lock_obj(&access->obj))
|
||||
continue;
|
||||
xa_unlock(&ioas->iopt.access_list);
|
||||
|
||||
access->ops->unmap(access->data, iova, length);
|
||||
|
||||
iommufd_put_object(&access->obj);
|
||||
xa_lock(&ioas->iopt.access_list);
|
||||
}
|
||||
xa_unlock(&ioas->iopt.access_list);
|
||||
}
|
||||
|
||||
/**
|
||||
* iommufd_access_unpin_pages() - Undo iommufd_access_pin_pages
|
||||
* @access: IOAS access to act on
|
||||
* @iova: Starting IOVA
|
||||
* @length: Number of bytes to access
|
||||
*
|
||||
* Return the struct page's. The caller must stop accessing them before calling
|
||||
* this. The iova/length must exactly match the one provided to access_pages.
|
||||
*/
|
||||
void iommufd_access_unpin_pages(struct iommufd_access *access,
|
||||
unsigned long iova, unsigned long length)
|
||||
{
|
||||
struct io_pagetable *iopt = &access->ioas->iopt;
|
||||
struct iopt_area_contig_iter iter;
|
||||
unsigned long last_iova;
|
||||
struct iopt_area *area;
|
||||
|
||||
if (WARN_ON(!length) ||
|
||||
WARN_ON(check_add_overflow(iova, length - 1, &last_iova)))
|
||||
return;
|
||||
|
||||
down_read(&iopt->iova_rwsem);
|
||||
iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova)
|
||||
iopt_area_remove_access(
|
||||
area, iopt_area_iova_to_index(area, iter.cur_iova),
|
||||
iopt_area_iova_to_index(
|
||||
area,
|
||||
min(last_iova, iopt_area_last_iova(area))));
|
||||
up_read(&iopt->iova_rwsem);
|
||||
WARN_ON(!iopt_area_contig_done(&iter));
|
||||
}
|
||||
EXPORT_SYMBOL_NS_GPL(iommufd_access_unpin_pages, IOMMUFD);
|
||||
|
||||
static bool iopt_area_contig_is_aligned(struct iopt_area_contig_iter *iter)
|
||||
{
|
||||
if (iopt_area_start_byte(iter->area, iter->cur_iova) % PAGE_SIZE)
|
||||
return false;
|
||||
|
||||
if (!iopt_area_contig_done(iter) &&
|
||||
(iopt_area_start_byte(iter->area, iopt_area_last_iova(iter->area)) %
|
||||
PAGE_SIZE) != (PAGE_SIZE - 1))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool check_area_prot(struct iopt_area *area, unsigned int flags)
|
||||
{
|
||||
if (flags & IOMMUFD_ACCESS_RW_WRITE)
|
||||
return area->iommu_prot & IOMMU_WRITE;
|
||||
return area->iommu_prot & IOMMU_READ;
|
||||
}
|
||||
|
||||
/**
|
||||
* iommufd_access_pin_pages() - Return a list of pages under the iova
|
||||
* @access: IOAS access to act on
|
||||
* @iova: Starting IOVA
|
||||
* @length: Number of bytes to access
|
||||
* @out_pages: Output page list
|
||||
* @flags: IOPMMUFD_ACCESS_RW_* flags
|
||||
*
|
||||
* Reads @length bytes starting at iova and returns the struct page * pointers.
|
||||
* These can be kmap'd by the caller for CPU access.
|
||||
*
|
||||
* The caller must perform iommufd_access_unpin_pages() when done to balance
|
||||
* this.
|
||||
*
|
||||
* This API always requires a page aligned iova. This happens naturally if the
|
||||
* ioas alignment is >= PAGE_SIZE and the iova is PAGE_SIZE aligned. However
|
||||
* smaller alignments have corner cases where this API can fail on otherwise
|
||||
* aligned iova.
|
||||
*/
|
||||
int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova,
|
||||
unsigned long length, struct page **out_pages,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct io_pagetable *iopt = &access->ioas->iopt;
|
||||
struct iopt_area_contig_iter iter;
|
||||
unsigned long last_iova;
|
||||
struct iopt_area *area;
|
||||
int rc;
|
||||
|
||||
/* Driver's ops don't support pin_pages */
|
||||
if (IS_ENABLED(CONFIG_IOMMUFD_TEST) &&
|
||||
WARN_ON(access->iova_alignment != PAGE_SIZE || !access->ops->unmap))
|
||||
return -EINVAL;
|
||||
|
||||
if (!length)
|
||||
return -EINVAL;
|
||||
if (check_add_overflow(iova, length - 1, &last_iova))
|
||||
return -EOVERFLOW;
|
||||
|
||||
down_read(&iopt->iova_rwsem);
|
||||
iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) {
|
||||
unsigned long last = min(last_iova, iopt_area_last_iova(area));
|
||||
unsigned long last_index = iopt_area_iova_to_index(area, last);
|
||||
unsigned long index =
|
||||
iopt_area_iova_to_index(area, iter.cur_iova);
|
||||
|
||||
if (area->prevent_access ||
|
||||
!iopt_area_contig_is_aligned(&iter)) {
|
||||
rc = -EINVAL;
|
||||
goto err_remove;
|
||||
}
|
||||
|
||||
if (!check_area_prot(area, flags)) {
|
||||
rc = -EPERM;
|
||||
goto err_remove;
|
||||
}
|
||||
|
||||
rc = iopt_area_add_access(area, index, last_index, out_pages,
|
||||
flags);
|
||||
if (rc)
|
||||
goto err_remove;
|
||||
out_pages += last_index - index + 1;
|
||||
}
|
||||
if (!iopt_area_contig_done(&iter)) {
|
||||
rc = -ENOENT;
|
||||
goto err_remove;
|
||||
}
|
||||
|
||||
up_read(&iopt->iova_rwsem);
|
||||
return 0;
|
||||
|
||||
err_remove:
|
||||
if (iova < iter.cur_iova) {
|
||||
last_iova = iter.cur_iova - 1;
|
||||
iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova)
|
||||
iopt_area_remove_access(
|
||||
area,
|
||||
iopt_area_iova_to_index(area, iter.cur_iova),
|
||||
iopt_area_iova_to_index(
|
||||
area, min(last_iova,
|
||||
iopt_area_last_iova(area))));
|
||||
}
|
||||
up_read(&iopt->iova_rwsem);
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, IOMMUFD);
|
||||
|
||||
/**
|
||||
* iommufd_access_rw - Read or write data under the iova
|
||||
* @access: IOAS access to act on
|
||||
* @iova: Starting IOVA
|
||||
* @data: Kernel buffer to copy to/from
|
||||
* @length: Number of bytes to access
|
||||
* @flags: IOMMUFD_ACCESS_RW_* flags
|
||||
*
|
||||
* Copy kernel to/from data into the range given by IOVA/length. If flags
|
||||
* indicates IOMMUFD_ACCESS_RW_KTHREAD then a large copy can be optimized
|
||||
* by changing it into copy_to/from_user().
|
||||
*/
|
||||
int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
|
||||
void *data, size_t length, unsigned int flags)
|
||||
{
|
||||
struct io_pagetable *iopt = &access->ioas->iopt;
|
||||
struct iopt_area_contig_iter iter;
|
||||
struct iopt_area *area;
|
||||
unsigned long last_iova;
|
||||
int rc;
|
||||
|
||||
if (!length)
|
||||
return -EINVAL;
|
||||
if (check_add_overflow(iova, length - 1, &last_iova))
|
||||
return -EOVERFLOW;
|
||||
|
||||
down_read(&iopt->iova_rwsem);
|
||||
iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) {
|
||||
unsigned long last = min(last_iova, iopt_area_last_iova(area));
|
||||
unsigned long bytes = (last - iter.cur_iova) + 1;
|
||||
|
||||
if (area->prevent_access) {
|
||||
rc = -EINVAL;
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
if (!check_area_prot(area, flags)) {
|
||||
rc = -EPERM;
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
rc = iopt_pages_rw_access(
|
||||
area->pages, iopt_area_start_byte(area, iter.cur_iova),
|
||||
data, bytes, flags);
|
||||
if (rc)
|
||||
goto err_out;
|
||||
data += bytes;
|
||||
}
|
||||
if (!iopt_area_contig_done(&iter))
|
||||
rc = -ENOENT;
|
||||
err_out:
|
||||
up_read(&iopt->iova_rwsem);
|
||||
return rc;
|
||||
}
|
||||
EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, IOMMUFD);
|
||||
|
||||
#ifdef CONFIG_IOMMUFD_TEST
|
||||
/*
|
||||
* Creating a real iommufd_device is too hard, bypass creating a iommufd_device
|
||||
* and go directly to attaching a domain.
|
||||
*/
|
||||
struct iommufd_hw_pagetable *
|
||||
iommufd_device_selftest_attach(struct iommufd_ctx *ictx,
|
||||
struct iommufd_ioas *ioas,
|
||||
struct device *mock_dev)
|
||||
{
|
||||
struct iommufd_hw_pagetable *hwpt;
|
||||
int rc;
|
||||
|
||||
hwpt = iommufd_hw_pagetable_alloc(ictx, ioas, mock_dev);
|
||||
if (IS_ERR(hwpt))
|
||||
return hwpt;
|
||||
|
||||
rc = iopt_table_add_domain(&hwpt->ioas->iopt, hwpt->domain);
|
||||
if (rc)
|
||||
goto out_hwpt;
|
||||
|
||||
refcount_inc(&hwpt->obj.users);
|
||||
iommufd_object_finalize(ictx, &hwpt->obj);
|
||||
return hwpt;
|
||||
|
||||
out_hwpt:
|
||||
iommufd_object_abort_and_destroy(ictx, &hwpt->obj);
|
||||
return ERR_PTR(rc);
|
||||
}
|
||||
|
||||
void iommufd_device_selftest_detach(struct iommufd_ctx *ictx,
|
||||
struct iommufd_hw_pagetable *hwpt)
|
||||
{
|
||||
iopt_table_remove_domain(&hwpt->ioas->iopt, hwpt->domain);
|
||||
refcount_dec(&hwpt->obj.users);
|
||||
}
|
||||
#endif
|
53
drivers/iommu/iommufd/double_span.h
Normal file
53
drivers/iommu/iommufd/double_span.h
Normal file
@ -0,0 +1,53 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES.
|
||||
*/
|
||||
#ifndef __IOMMUFD_DOUBLE_SPAN_H
|
||||
#define __IOMMUFD_DOUBLE_SPAN_H
|
||||
|
||||
#include <linux/interval_tree.h>
|
||||
|
||||
/*
|
||||
* This is a variation of the general interval_tree_span_iter that computes the
|
||||
* spans over the union of two different interval trees. Used ranges are broken
|
||||
* up and reported based on the tree that provides the interval. The first span
|
||||
* always takes priority. Like interval_tree_span_iter it is greedy and the same
|
||||
* value of is_used will not repeat on two iteration cycles.
|
||||
*/
|
||||
struct interval_tree_double_span_iter {
|
||||
struct rb_root_cached *itrees[2];
|
||||
struct interval_tree_span_iter spans[2];
|
||||
union {
|
||||
unsigned long start_hole;
|
||||
unsigned long start_used;
|
||||
};
|
||||
union {
|
||||
unsigned long last_hole;
|
||||
unsigned long last_used;
|
||||
};
|
||||
/* 0 = hole, 1 = used span[0], 2 = used span[1], -1 done iteration */
|
||||
int is_used;
|
||||
};
|
||||
|
||||
void interval_tree_double_span_iter_update(
|
||||
struct interval_tree_double_span_iter *iter);
|
||||
void interval_tree_double_span_iter_first(
|
||||
struct interval_tree_double_span_iter *iter,
|
||||
struct rb_root_cached *itree1, struct rb_root_cached *itree2,
|
||||
unsigned long first_index, unsigned long last_index);
|
||||
void interval_tree_double_span_iter_next(
|
||||
struct interval_tree_double_span_iter *iter);
|
||||
|
||||
static inline bool
|
||||
interval_tree_double_span_iter_done(struct interval_tree_double_span_iter *state)
|
||||
{
|
||||
return state->is_used == -1;
|
||||
}
|
||||
|
||||
#define interval_tree_for_each_double_span(span, itree1, itree2, first_index, \
|
||||
last_index) \
|
||||
for (interval_tree_double_span_iter_first(span, itree1, itree2, \
|
||||
first_index, last_index); \
|
||||
!interval_tree_double_span_iter_done(span); \
|
||||
interval_tree_double_span_iter_next(span))
|
||||
|
||||
#endif
|
57
drivers/iommu/iommufd/hw_pagetable.c
Normal file
57
drivers/iommu/iommufd/hw_pagetable.c
Normal file
@ -0,0 +1,57 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
|
||||
*/
|
||||
#include <linux/iommu.h>
|
||||
|
||||
#include "iommufd_private.h"
|
||||
|
||||
void iommufd_hw_pagetable_destroy(struct iommufd_object *obj)
|
||||
{
|
||||
struct iommufd_hw_pagetable *hwpt =
|
||||
container_of(obj, struct iommufd_hw_pagetable, obj);
|
||||
|
||||
WARN_ON(!list_empty(&hwpt->devices));
|
||||
|
||||
iommu_domain_free(hwpt->domain);
|
||||
refcount_dec(&hwpt->ioas->obj.users);
|
||||
mutex_destroy(&hwpt->devices_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* iommufd_hw_pagetable_alloc() - Get an iommu_domain for a device
|
||||
* @ictx: iommufd context
|
||||
* @ioas: IOAS to associate the domain with
|
||||
* @dev: Device to get an iommu_domain for
|
||||
*
|
||||
* Allocate a new iommu_domain and return it as a hw_pagetable.
|
||||
*/
|
||||
struct iommufd_hw_pagetable *
|
||||
iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
|
||||
struct device *dev)
|
||||
{
|
||||
struct iommufd_hw_pagetable *hwpt;
|
||||
int rc;
|
||||
|
||||
hwpt = iommufd_object_alloc(ictx, hwpt, IOMMUFD_OBJ_HW_PAGETABLE);
|
||||
if (IS_ERR(hwpt))
|
||||
return hwpt;
|
||||
|
||||
hwpt->domain = iommu_domain_alloc(dev->bus);
|
||||
if (!hwpt->domain) {
|
||||
rc = -ENOMEM;
|
||||
goto out_abort;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&hwpt->devices);
|
||||
INIT_LIST_HEAD(&hwpt->hwpt_item);
|
||||
mutex_init(&hwpt->devices_lock);
|
||||
/* Pairs with iommufd_hw_pagetable_destroy() */
|
||||
refcount_inc(&ioas->obj.users);
|
||||
hwpt->ioas = ioas;
|
||||
return hwpt;
|
||||
|
||||
out_abort:
|
||||
iommufd_object_abort(ictx, &hwpt->obj);
|
||||
return ERR_PTR(rc);
|
||||
}
|
1216
drivers/iommu/iommufd/io_pagetable.c
Normal file
1216
drivers/iommu/iommufd/io_pagetable.c
Normal file
File diff suppressed because it is too large
Load Diff
241
drivers/iommu/iommufd/io_pagetable.h
Normal file
241
drivers/iommu/iommufd/io_pagetable.h
Normal file
@ -0,0 +1,241 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES.
|
||||
*
|
||||
*/
|
||||
#ifndef __IO_PAGETABLE_H
|
||||
#define __IO_PAGETABLE_H
|
||||
|
||||
#include <linux/interval_tree.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/kref.h>
|
||||
#include <linux/xarray.h>
|
||||
|
||||
#include "iommufd_private.h"
|
||||
|
||||
struct iommu_domain;
|
||||
|
||||
/*
|
||||
* Each io_pagetable is composed of intervals of areas which cover regions of
|
||||
* the iova that are backed by something. iova not covered by areas is not
|
||||
* populated in the page table. Each area is fully populated with pages.
|
||||
*
|
||||
* iovas are in byte units, but must be iopt->iova_alignment aligned.
|
||||
*
|
||||
* pages can be NULL, this means some other thread is still working on setting
|
||||
* up or tearing down the area. When observed under the write side of the
|
||||
* domain_rwsem a NULL pages must mean the area is still being setup and no
|
||||
* domains are filled.
|
||||
*
|
||||
* storage_domain points at an arbitrary iommu_domain that is holding the PFNs
|
||||
* for this area. It is locked by the pages->mutex. This simplifies the locking
|
||||
* as the pages code can rely on the storage_domain without having to get the
|
||||
* iopt->domains_rwsem.
|
||||
*
|
||||
* The io_pagetable::iova_rwsem protects node
|
||||
* The iopt_pages::mutex protects pages_node
|
||||
* iopt and iommu_prot are immutable
|
||||
* The pages::mutex protects num_accesses
|
||||
*/
|
||||
struct iopt_area {
|
||||
struct interval_tree_node node;
|
||||
struct interval_tree_node pages_node;
|
||||
struct io_pagetable *iopt;
|
||||
struct iopt_pages *pages;
|
||||
struct iommu_domain *storage_domain;
|
||||
/* How many bytes into the first page the area starts */
|
||||
unsigned int page_offset;
|
||||
/* IOMMU_READ, IOMMU_WRITE, etc */
|
||||
int iommu_prot;
|
||||
bool prevent_access : 1;
|
||||
unsigned int num_accesses;
|
||||
};
|
||||
|
||||
struct iopt_allowed {
|
||||
struct interval_tree_node node;
|
||||
};
|
||||
|
||||
struct iopt_reserved {
|
||||
struct interval_tree_node node;
|
||||
void *owner;
|
||||
};
|
||||
|
||||
int iopt_area_fill_domains(struct iopt_area *area, struct iopt_pages *pages);
|
||||
void iopt_area_unfill_domains(struct iopt_area *area, struct iopt_pages *pages);
|
||||
|
||||
int iopt_area_fill_domain(struct iopt_area *area, struct iommu_domain *domain);
|
||||
void iopt_area_unfill_domain(struct iopt_area *area, struct iopt_pages *pages,
|
||||
struct iommu_domain *domain);
|
||||
void iopt_area_unmap_domain(struct iopt_area *area,
|
||||
struct iommu_domain *domain);
|
||||
|
||||
static inline unsigned long iopt_area_index(struct iopt_area *area)
|
||||
{
|
||||
return area->pages_node.start;
|
||||
}
|
||||
|
||||
static inline unsigned long iopt_area_last_index(struct iopt_area *area)
|
||||
{
|
||||
return area->pages_node.last;
|
||||
}
|
||||
|
||||
static inline unsigned long iopt_area_iova(struct iopt_area *area)
|
||||
{
|
||||
return area->node.start;
|
||||
}
|
||||
|
||||
static inline unsigned long iopt_area_last_iova(struct iopt_area *area)
|
||||
{
|
||||
return area->node.last;
|
||||
}
|
||||
|
||||
static inline size_t iopt_area_length(struct iopt_area *area)
|
||||
{
|
||||
return (area->node.last - area->node.start) + 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Number of bytes from the start of the iopt_pages that the iova begins.
|
||||
* iopt_area_start_byte() / PAGE_SIZE encodes the starting page index
|
||||
* iopt_area_start_byte() % PAGE_SIZE encodes the offset within that page
|
||||
*/
|
||||
static inline unsigned long iopt_area_start_byte(struct iopt_area *area,
|
||||
unsigned long iova)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_IOMMUFD_TEST))
|
||||
WARN_ON(iova < iopt_area_iova(area) ||
|
||||
iova > iopt_area_last_iova(area));
|
||||
return (iova - iopt_area_iova(area)) + area->page_offset +
|
||||
iopt_area_index(area) * PAGE_SIZE;
|
||||
}
|
||||
|
||||
static inline unsigned long iopt_area_iova_to_index(struct iopt_area *area,
|
||||
unsigned long iova)
|
||||
{
|
||||
return iopt_area_start_byte(area, iova) / PAGE_SIZE;
|
||||
}
|
||||
|
||||
#define __make_iopt_iter(name) \
|
||||
static inline struct iopt_##name *iopt_##name##_iter_first( \
|
||||
struct io_pagetable *iopt, unsigned long start, \
|
||||
unsigned long last) \
|
||||
{ \
|
||||
struct interval_tree_node *node; \
|
||||
\
|
||||
lockdep_assert_held(&iopt->iova_rwsem); \
|
||||
node = interval_tree_iter_first(&iopt->name##_itree, start, \
|
||||
last); \
|
||||
if (!node) \
|
||||
return NULL; \
|
||||
return container_of(node, struct iopt_##name, node); \
|
||||
} \
|
||||
static inline struct iopt_##name *iopt_##name##_iter_next( \
|
||||
struct iopt_##name *last_node, unsigned long start, \
|
||||
unsigned long last) \
|
||||
{ \
|
||||
struct interval_tree_node *node; \
|
||||
\
|
||||
node = interval_tree_iter_next(&last_node->node, start, last); \
|
||||
if (!node) \
|
||||
return NULL; \
|
||||
return container_of(node, struct iopt_##name, node); \
|
||||
}
|
||||
|
||||
__make_iopt_iter(area)
|
||||
__make_iopt_iter(allowed)
|
||||
__make_iopt_iter(reserved)
|
||||
|
||||
struct iopt_area_contig_iter {
|
||||
unsigned long cur_iova;
|
||||
unsigned long last_iova;
|
||||
struct iopt_area *area;
|
||||
};
|
||||
struct iopt_area *iopt_area_contig_init(struct iopt_area_contig_iter *iter,
|
||||
struct io_pagetable *iopt,
|
||||
unsigned long iova,
|
||||
unsigned long last_iova);
|
||||
struct iopt_area *iopt_area_contig_next(struct iopt_area_contig_iter *iter);
|
||||
|
||||
static inline bool iopt_area_contig_done(struct iopt_area_contig_iter *iter)
|
||||
{
|
||||
return iter->area && iter->last_iova <= iopt_area_last_iova(iter->area);
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterate over a contiguous list of areas that span the iova,last_iova range.
|
||||
* The caller must check iopt_area_contig_done() after the loop to see if
|
||||
* contiguous areas existed.
|
||||
*/
|
||||
#define iopt_for_each_contig_area(iter, area, iopt, iova, last_iova) \
|
||||
for (area = iopt_area_contig_init(iter, iopt, iova, last_iova); area; \
|
||||
area = iopt_area_contig_next(iter))
|
||||
|
||||
enum {
|
||||
IOPT_PAGES_ACCOUNT_NONE = 0,
|
||||
IOPT_PAGES_ACCOUNT_USER = 1,
|
||||
IOPT_PAGES_ACCOUNT_MM = 2,
|
||||
};
|
||||
|
||||
/*
|
||||
* This holds a pinned page list for multiple areas of IO address space. The
|
||||
* pages always originate from a linear chunk of userspace VA. Multiple
|
||||
* io_pagetable's, through their iopt_area's, can share a single iopt_pages
|
||||
* which avoids multi-pinning and double accounting of page consumption.
|
||||
*
|
||||
* indexes in this structure are measured in PAGE_SIZE units, are 0 based from
|
||||
* the start of the uptr and extend to npages. pages are pinned dynamically
|
||||
* according to the intervals in the access_itree and domains_itree, npinned
|
||||
* records the current number of pages pinned.
|
||||
*/
|
||||
struct iopt_pages {
|
||||
struct kref kref;
|
||||
struct mutex mutex;
|
||||
size_t npages;
|
||||
size_t npinned;
|
||||
size_t last_npinned;
|
||||
struct task_struct *source_task;
|
||||
struct mm_struct *source_mm;
|
||||
struct user_struct *source_user;
|
||||
void __user *uptr;
|
||||
bool writable:1;
|
||||
u8 account_mode;
|
||||
|
||||
struct xarray pinned_pfns;
|
||||
/* Of iopt_pages_access::node */
|
||||
struct rb_root_cached access_itree;
|
||||
/* Of iopt_area::pages_node */
|
||||
struct rb_root_cached domains_itree;
|
||||
};
|
||||
|
||||
struct iopt_pages *iopt_alloc_pages(void __user *uptr, unsigned long length,
|
||||
bool writable);
|
||||
void iopt_release_pages(struct kref *kref);
|
||||
static inline void iopt_put_pages(struct iopt_pages *pages)
|
||||
{
|
||||
kref_put(&pages->kref, iopt_release_pages);
|
||||
}
|
||||
|
||||
void iopt_pages_fill_from_xarray(struct iopt_pages *pages, unsigned long start,
|
||||
unsigned long last, struct page **out_pages);
|
||||
int iopt_pages_fill_xarray(struct iopt_pages *pages, unsigned long start,
|
||||
unsigned long last, struct page **out_pages);
|
||||
void iopt_pages_unfill_xarray(struct iopt_pages *pages, unsigned long start,
|
||||
unsigned long last);
|
||||
|
||||
int iopt_area_add_access(struct iopt_area *area, unsigned long start,
|
||||
unsigned long last, struct page **out_pages,
|
||||
unsigned int flags);
|
||||
void iopt_area_remove_access(struct iopt_area *area, unsigned long start,
|
||||
unsigned long last);
|
||||
int iopt_pages_rw_access(struct iopt_pages *pages, unsigned long start_byte,
|
||||
void *data, unsigned long length, unsigned int flags);
|
||||
|
||||
/*
|
||||
* Each interval represents an active iopt_access_pages(), it acts as an
|
||||
* interval lock that keeps the PFNs pinned and stored in the xarray.
|
||||
*/
|
||||
struct iopt_pages_access {
|
||||
struct interval_tree_node node;
|
||||
unsigned int users;
|
||||
};
|
||||
|
||||
#endif
|
398
drivers/iommu/iommufd/ioas.c
Normal file
398
drivers/iommu/iommufd/ioas.c
Normal file
@ -0,0 +1,398 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
|
||||
*/
|
||||
#include <linux/interval_tree.h>
|
||||
#include <linux/iommufd.h>
|
||||
#include <linux/iommu.h>
|
||||
#include <uapi/linux/iommufd.h>
|
||||
|
||||
#include "io_pagetable.h"
|
||||
|
||||
void iommufd_ioas_destroy(struct iommufd_object *obj)
|
||||
{
|
||||
struct iommufd_ioas *ioas = container_of(obj, struct iommufd_ioas, obj);
|
||||
int rc;
|
||||
|
||||
rc = iopt_unmap_all(&ioas->iopt, NULL);
|
||||
WARN_ON(rc && rc != -ENOENT);
|
||||
iopt_destroy_table(&ioas->iopt);
|
||||
mutex_destroy(&ioas->mutex);
|
||||
}
|
||||
|
||||
struct iommufd_ioas *iommufd_ioas_alloc(struct iommufd_ctx *ictx)
|
||||
{
|
||||
struct iommufd_ioas *ioas;
|
||||
|
||||
ioas = iommufd_object_alloc(ictx, ioas, IOMMUFD_OBJ_IOAS);
|
||||
if (IS_ERR(ioas))
|
||||
return ioas;
|
||||
|
||||
iopt_init_table(&ioas->iopt);
|
||||
INIT_LIST_HEAD(&ioas->hwpt_list);
|
||||
mutex_init(&ioas->mutex);
|
||||
return ioas;
|
||||
}
|
||||
|
||||
int iommufd_ioas_alloc_ioctl(struct iommufd_ucmd *ucmd)
|
||||
{
|
||||
struct iommu_ioas_alloc *cmd = ucmd->cmd;
|
||||
struct iommufd_ioas *ioas;
|
||||
int rc;
|
||||
|
||||
if (cmd->flags)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
ioas = iommufd_ioas_alloc(ucmd->ictx);
|
||||
if (IS_ERR(ioas))
|
||||
return PTR_ERR(ioas);
|
||||
|
||||
cmd->out_ioas_id = ioas->obj.id;
|
||||
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
|
||||
if (rc)
|
||||
goto out_table;
|
||||
iommufd_object_finalize(ucmd->ictx, &ioas->obj);
|
||||
return 0;
|
||||
|
||||
out_table:
|
||||
iommufd_object_abort_and_destroy(ucmd->ictx, &ioas->obj);
|
||||
return rc;
|
||||
}
|
||||
|
||||
int iommufd_ioas_iova_ranges(struct iommufd_ucmd *ucmd)
|
||||
{
|
||||
struct iommu_iova_range __user *ranges;
|
||||
struct iommu_ioas_iova_ranges *cmd = ucmd->cmd;
|
||||
struct iommufd_ioas *ioas;
|
||||
struct interval_tree_span_iter span;
|
||||
u32 max_iovas;
|
||||
int rc;
|
||||
|
||||
if (cmd->__reserved)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
ioas = iommufd_get_ioas(ucmd, cmd->ioas_id);
|
||||
if (IS_ERR(ioas))
|
||||
return PTR_ERR(ioas);
|
||||
|
||||
down_read(&ioas->iopt.iova_rwsem);
|
||||
max_iovas = cmd->num_iovas;
|
||||
ranges = u64_to_user_ptr(cmd->allowed_iovas);
|
||||
cmd->num_iovas = 0;
|
||||
cmd->out_iova_alignment = ioas->iopt.iova_alignment;
|
||||
interval_tree_for_each_span(&span, &ioas->iopt.reserved_itree, 0,
|
||||
ULONG_MAX) {
|
||||
if (!span.is_hole)
|
||||
continue;
|
||||
if (cmd->num_iovas < max_iovas) {
|
||||
struct iommu_iova_range elm = {
|
||||
.start = span.start_hole,
|
||||
.last = span.last_hole,
|
||||
};
|
||||
|
||||
if (copy_to_user(&ranges[cmd->num_iovas], &elm,
|
||||
sizeof(elm))) {
|
||||
rc = -EFAULT;
|
||||
goto out_put;
|
||||
}
|
||||
}
|
||||
cmd->num_iovas++;
|
||||
}
|
||||
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
|
||||
if (rc)
|
||||
goto out_put;
|
||||
if (cmd->num_iovas > max_iovas)
|
||||
rc = -EMSGSIZE;
|
||||
out_put:
|
||||
up_read(&ioas->iopt.iova_rwsem);
|
||||
iommufd_put_object(&ioas->obj);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int iommufd_ioas_load_iovas(struct rb_root_cached *itree,
|
||||
struct iommu_iova_range __user *ranges,
|
||||
u32 num)
|
||||
{
|
||||
u32 i;
|
||||
|
||||
for (i = 0; i != num; i++) {
|
||||
struct iommu_iova_range range;
|
||||
struct iopt_allowed *allowed;
|
||||
|
||||
if (copy_from_user(&range, ranges + i, sizeof(range)))
|
||||
return -EFAULT;
|
||||
|
||||
if (range.start >= range.last)
|
||||
return -EINVAL;
|
||||
|
||||
if (interval_tree_iter_first(itree, range.start, range.last))
|
||||
return -EINVAL;
|
||||
|
||||
allowed = kzalloc(sizeof(*allowed), GFP_KERNEL_ACCOUNT);
|
||||
if (!allowed)
|
||||
return -ENOMEM;
|
||||
allowed->node.start = range.start;
|
||||
allowed->node.last = range.last;
|
||||
|
||||
interval_tree_insert(&allowed->node, itree);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int iommufd_ioas_allow_iovas(struct iommufd_ucmd *ucmd)
|
||||
{
|
||||
struct iommu_ioas_allow_iovas *cmd = ucmd->cmd;
|
||||
struct rb_root_cached allowed_iova = RB_ROOT_CACHED;
|
||||
struct interval_tree_node *node;
|
||||
struct iommufd_ioas *ioas;
|
||||
struct io_pagetable *iopt;
|
||||
int rc = 0;
|
||||
|
||||
if (cmd->__reserved)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
ioas = iommufd_get_ioas(ucmd, cmd->ioas_id);
|
||||
if (IS_ERR(ioas))
|
||||
return PTR_ERR(ioas);
|
||||
iopt = &ioas->iopt;
|
||||
|
||||
rc = iommufd_ioas_load_iovas(&allowed_iova,
|
||||
u64_to_user_ptr(cmd->allowed_iovas),
|
||||
cmd->num_iovas);
|
||||
if (rc)
|
||||
goto out_free;
|
||||
|
||||
/*
|
||||
* We want the allowed tree update to be atomic, so we have to keep the
|
||||
* original nodes around, and keep track of the new nodes as we allocate
|
||||
* memory for them. The simplest solution is to have a new/old tree and
|
||||
* then swap new for old. On success we free the old tree, on failure we
|
||||
* free the new tree.
|
||||
*/
|
||||
rc = iopt_set_allow_iova(iopt, &allowed_iova);
|
||||
out_free:
|
||||
while ((node = interval_tree_iter_first(&allowed_iova, 0, ULONG_MAX))) {
|
||||
interval_tree_remove(node, &allowed_iova);
|
||||
kfree(container_of(node, struct iopt_allowed, node));
|
||||
}
|
||||
iommufd_put_object(&ioas->obj);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int conv_iommu_prot(u32 map_flags)
|
||||
{
|
||||
/*
|
||||
* We provide no manual cache coherency ioctls to userspace and most
|
||||
* architectures make the CPU ops for cache flushing privileged.
|
||||
* Therefore we require the underlying IOMMU to support CPU coherent
|
||||
* operation. Support for IOMMU_CACHE is enforced by the
|
||||
* IOMMU_CAP_CACHE_COHERENCY test during bind.
|
||||
*/
|
||||
int iommu_prot = IOMMU_CACHE;
|
||||
|
||||
if (map_flags & IOMMU_IOAS_MAP_WRITEABLE)
|
||||
iommu_prot |= IOMMU_WRITE;
|
||||
if (map_flags & IOMMU_IOAS_MAP_READABLE)
|
||||
iommu_prot |= IOMMU_READ;
|
||||
return iommu_prot;
|
||||
}
|
||||
|
||||
int iommufd_ioas_map(struct iommufd_ucmd *ucmd)
|
||||
{
|
||||
struct iommu_ioas_map *cmd = ucmd->cmd;
|
||||
unsigned long iova = cmd->iova;
|
||||
struct iommufd_ioas *ioas;
|
||||
unsigned int flags = 0;
|
||||
int rc;
|
||||
|
||||
if ((cmd->flags &
|
||||
~(IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE |
|
||||
IOMMU_IOAS_MAP_READABLE)) ||
|
||||
cmd->__reserved)
|
||||
return -EOPNOTSUPP;
|
||||
if (cmd->iova >= ULONG_MAX || cmd->length >= ULONG_MAX)
|
||||
return -EOVERFLOW;
|
||||
|
||||
ioas = iommufd_get_ioas(ucmd, cmd->ioas_id);
|
||||
if (IS_ERR(ioas))
|
||||
return PTR_ERR(ioas);
|
||||
|
||||
if (!(cmd->flags & IOMMU_IOAS_MAP_FIXED_IOVA))
|
||||
flags = IOPT_ALLOC_IOVA;
|
||||
rc = iopt_map_user_pages(ucmd->ictx, &ioas->iopt, &iova,
|
||||
u64_to_user_ptr(cmd->user_va), cmd->length,
|
||||
conv_iommu_prot(cmd->flags), flags);
|
||||
if (rc)
|
||||
goto out_put;
|
||||
|
||||
cmd->iova = iova;
|
||||
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
|
||||
out_put:
|
||||
iommufd_put_object(&ioas->obj);
|
||||
return rc;
|
||||
}
|
||||
|
||||
int iommufd_ioas_copy(struct iommufd_ucmd *ucmd)
|
||||
{
|
||||
struct iommu_ioas_copy *cmd = ucmd->cmd;
|
||||
struct iommufd_ioas *src_ioas;
|
||||
struct iommufd_ioas *dst_ioas;
|
||||
unsigned int flags = 0;
|
||||
LIST_HEAD(pages_list);
|
||||
unsigned long iova;
|
||||
int rc;
|
||||
|
||||
iommufd_test_syz_conv_iova_id(ucmd, cmd->src_ioas_id, &cmd->src_iova,
|
||||
&cmd->flags);
|
||||
|
||||
if ((cmd->flags &
|
||||
~(IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE |
|
||||
IOMMU_IOAS_MAP_READABLE)))
|
||||
return -EOPNOTSUPP;
|
||||
if (cmd->length >= ULONG_MAX || cmd->src_iova >= ULONG_MAX ||
|
||||
cmd->dst_iova >= ULONG_MAX)
|
||||
return -EOVERFLOW;
|
||||
|
||||
src_ioas = iommufd_get_ioas(ucmd, cmd->src_ioas_id);
|
||||
if (IS_ERR(src_ioas))
|
||||
return PTR_ERR(src_ioas);
|
||||
rc = iopt_get_pages(&src_ioas->iopt, cmd->src_iova, cmd->length,
|
||||
&pages_list);
|
||||
iommufd_put_object(&src_ioas->obj);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
dst_ioas = iommufd_get_ioas(ucmd, cmd->dst_ioas_id);
|
||||
if (IS_ERR(dst_ioas)) {
|
||||
rc = PTR_ERR(dst_ioas);
|
||||
goto out_pages;
|
||||
}
|
||||
|
||||
if (!(cmd->flags & IOMMU_IOAS_MAP_FIXED_IOVA))
|
||||
flags = IOPT_ALLOC_IOVA;
|
||||
iova = cmd->dst_iova;
|
||||
rc = iopt_map_pages(&dst_ioas->iopt, &pages_list, cmd->length, &iova,
|
||||
conv_iommu_prot(cmd->flags), flags);
|
||||
if (rc)
|
||||
goto out_put_dst;
|
||||
|
||||
cmd->dst_iova = iova;
|
||||
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
|
||||
out_put_dst:
|
||||
iommufd_put_object(&dst_ioas->obj);
|
||||
out_pages:
|
||||
iopt_free_pages_list(&pages_list);
|
||||
return rc;
|
||||
}
|
||||
|
||||
int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd)
|
||||
{
|
||||
struct iommu_ioas_unmap *cmd = ucmd->cmd;
|
||||
struct iommufd_ioas *ioas;
|
||||
unsigned long unmapped = 0;
|
||||
int rc;
|
||||
|
||||
ioas = iommufd_get_ioas(ucmd, cmd->ioas_id);
|
||||
if (IS_ERR(ioas))
|
||||
return PTR_ERR(ioas);
|
||||
|
||||
if (cmd->iova == 0 && cmd->length == U64_MAX) {
|
||||
rc = iopt_unmap_all(&ioas->iopt, &unmapped);
|
||||
if (rc)
|
||||
goto out_put;
|
||||
} else {
|
||||
if (cmd->iova >= ULONG_MAX || cmd->length >= ULONG_MAX) {
|
||||
rc = -EOVERFLOW;
|
||||
goto out_put;
|
||||
}
|
||||
rc = iopt_unmap_iova(&ioas->iopt, cmd->iova, cmd->length,
|
||||
&unmapped);
|
||||
if (rc)
|
||||
goto out_put;
|
||||
}
|
||||
|
||||
cmd->length = unmapped;
|
||||
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
|
||||
|
||||
out_put:
|
||||
iommufd_put_object(&ioas->obj);
|
||||
return rc;
|
||||
}
|
||||
|
||||
int iommufd_option_rlimit_mode(struct iommu_option *cmd,
|
||||
struct iommufd_ctx *ictx)
|
||||
{
|
||||
if (cmd->object_id)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (cmd->op == IOMMU_OPTION_OP_GET) {
|
||||
cmd->val64 = ictx->account_mode == IOPT_PAGES_ACCOUNT_MM;
|
||||
return 0;
|
||||
}
|
||||
if (cmd->op == IOMMU_OPTION_OP_SET) {
|
||||
int rc = 0;
|
||||
|
||||
if (!capable(CAP_SYS_RESOURCE))
|
||||
return -EPERM;
|
||||
|
||||
xa_lock(&ictx->objects);
|
||||
if (!xa_empty(&ictx->objects)) {
|
||||
rc = -EBUSY;
|
||||
} else {
|
||||
if (cmd->val64 == 0)
|
||||
ictx->account_mode = IOPT_PAGES_ACCOUNT_USER;
|
||||
else if (cmd->val64 == 1)
|
||||
ictx->account_mode = IOPT_PAGES_ACCOUNT_MM;
|
||||
else
|
||||
rc = -EINVAL;
|
||||
}
|
||||
xa_unlock(&ictx->objects);
|
||||
|
||||
return rc;
|
||||
}
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static int iommufd_ioas_option_huge_pages(struct iommu_option *cmd,
|
||||
struct iommufd_ioas *ioas)
|
||||
{
|
||||
if (cmd->op == IOMMU_OPTION_OP_GET) {
|
||||
cmd->val64 = !ioas->iopt.disable_large_pages;
|
||||
return 0;
|
||||
}
|
||||
if (cmd->op == IOMMU_OPTION_OP_SET) {
|
||||
if (cmd->val64 == 0)
|
||||
return iopt_disable_large_pages(&ioas->iopt);
|
||||
if (cmd->val64 == 1) {
|
||||
iopt_enable_large_pages(&ioas->iopt);
|
||||
return 0;
|
||||
}
|
||||
return -EINVAL;
|
||||
}
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
int iommufd_ioas_option(struct iommufd_ucmd *ucmd)
|
||||
{
|
||||
struct iommu_option *cmd = ucmd->cmd;
|
||||
struct iommufd_ioas *ioas;
|
||||
int rc = 0;
|
||||
|
||||
if (cmd->__reserved)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
ioas = iommufd_get_ioas(ucmd, cmd->object_id);
|
||||
if (IS_ERR(ioas))
|
||||
return PTR_ERR(ioas);
|
||||
|
||||
switch (cmd->option_id) {
|
||||
case IOMMU_OPTION_HUGE_PAGES:
|
||||
rc = iommufd_ioas_option_huge_pages(cmd, ioas);
|
||||
break;
|
||||
default:
|
||||
rc = -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
iommufd_put_object(&ioas->obj);
|
||||
return rc;
|
||||
}
|
307
drivers/iommu/iommufd/iommufd_private.h
Normal file
307
drivers/iommu/iommufd/iommufd_private.h
Normal file
@ -0,0 +1,307 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
|
||||
*/
|
||||
#ifndef __IOMMUFD_PRIVATE_H
|
||||
#define __IOMMUFD_PRIVATE_H
|
||||
|
||||
#include <linux/rwsem.h>
|
||||
#include <linux/xarray.h>
|
||||
#include <linux/refcount.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
struct iommu_domain;
|
||||
struct iommu_group;
|
||||
struct iommu_option;
|
||||
|
||||
struct iommufd_ctx {
|
||||
struct file *file;
|
||||
struct xarray objects;
|
||||
|
||||
u8 account_mode;
|
||||
struct iommufd_ioas *vfio_ioas;
|
||||
};
|
||||
|
||||
/*
|
||||
* The IOVA to PFN map. The map automatically copies the PFNs into multiple
|
||||
* domains and permits sharing of PFNs between io_pagetable instances. This
|
||||
* supports both a design where IOAS's are 1:1 with a domain (eg because the
|
||||
* domain is HW customized), or where the IOAS is 1:N with multiple generic
|
||||
* domains. The io_pagetable holds an interval tree of iopt_areas which point
|
||||
* to shared iopt_pages which hold the pfns mapped to the page table.
|
||||
*
|
||||
* The locking order is domains_rwsem -> iova_rwsem -> pages::mutex
|
||||
*/
|
||||
struct io_pagetable {
|
||||
struct rw_semaphore domains_rwsem;
|
||||
struct xarray domains;
|
||||
struct xarray access_list;
|
||||
unsigned int next_domain_id;
|
||||
|
||||
struct rw_semaphore iova_rwsem;
|
||||
struct rb_root_cached area_itree;
|
||||
/* IOVA that cannot become reserved, struct iopt_allowed */
|
||||
struct rb_root_cached allowed_itree;
|
||||
/* IOVA that cannot be allocated, struct iopt_reserved */
|
||||
struct rb_root_cached reserved_itree;
|
||||
u8 disable_large_pages;
|
||||
unsigned long iova_alignment;
|
||||
};
|
||||
|
||||
void iopt_init_table(struct io_pagetable *iopt);
|
||||
void iopt_destroy_table(struct io_pagetable *iopt);
|
||||
int iopt_get_pages(struct io_pagetable *iopt, unsigned long iova,
|
||||
unsigned long length, struct list_head *pages_list);
|
||||
void iopt_free_pages_list(struct list_head *pages_list);
|
||||
enum {
|
||||
IOPT_ALLOC_IOVA = 1 << 0,
|
||||
};
|
||||
int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt,
|
||||
unsigned long *iova, void __user *uptr,
|
||||
unsigned long length, int iommu_prot,
|
||||
unsigned int flags);
|
||||
int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list,
|
||||
unsigned long length, unsigned long *dst_iova,
|
||||
int iommu_prot, unsigned int flags);
|
||||
int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova,
|
||||
unsigned long length, unsigned long *unmapped);
|
||||
int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped);
|
||||
|
||||
void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova,
|
||||
unsigned long length);
|
||||
int iopt_table_add_domain(struct io_pagetable *iopt,
|
||||
struct iommu_domain *domain);
|
||||
void iopt_table_remove_domain(struct io_pagetable *iopt,
|
||||
struct iommu_domain *domain);
|
||||
int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt,
|
||||
struct device *device,
|
||||
struct iommu_group *group,
|
||||
phys_addr_t *sw_msi_start);
|
||||
int iopt_set_allow_iova(struct io_pagetable *iopt,
|
||||
struct rb_root_cached *allowed_iova);
|
||||
int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start,
|
||||
unsigned long last, void *owner);
|
||||
void iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner);
|
||||
int iopt_cut_iova(struct io_pagetable *iopt, unsigned long *iovas,
|
||||
size_t num_iovas);
|
||||
void iopt_enable_large_pages(struct io_pagetable *iopt);
|
||||
int iopt_disable_large_pages(struct io_pagetable *iopt);
|
||||
|
||||
struct iommufd_ucmd {
|
||||
struct iommufd_ctx *ictx;
|
||||
void __user *ubuffer;
|
||||
u32 user_size;
|
||||
void *cmd;
|
||||
};
|
||||
|
||||
int iommufd_vfio_ioctl(struct iommufd_ctx *ictx, unsigned int cmd,
|
||||
unsigned long arg);
|
||||
|
||||
/* Copy the response in ucmd->cmd back to userspace. */
|
||||
static inline int iommufd_ucmd_respond(struct iommufd_ucmd *ucmd,
|
||||
size_t cmd_len)
|
||||
{
|
||||
if (copy_to_user(ucmd->ubuffer, ucmd->cmd,
|
||||
min_t(size_t, ucmd->user_size, cmd_len)))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
}
|
||||
|
||||
enum iommufd_object_type {
|
||||
IOMMUFD_OBJ_NONE,
|
||||
IOMMUFD_OBJ_ANY = IOMMUFD_OBJ_NONE,
|
||||
IOMMUFD_OBJ_DEVICE,
|
||||
IOMMUFD_OBJ_HW_PAGETABLE,
|
||||
IOMMUFD_OBJ_IOAS,
|
||||
IOMMUFD_OBJ_ACCESS,
|
||||
#ifdef CONFIG_IOMMUFD_TEST
|
||||
IOMMUFD_OBJ_SELFTEST,
|
||||
#endif
|
||||
};
|
||||
|
||||
/* Base struct for all objects with a userspace ID handle. */
|
||||
struct iommufd_object {
|
||||
struct rw_semaphore destroy_rwsem;
|
||||
refcount_t users;
|
||||
enum iommufd_object_type type;
|
||||
unsigned int id;
|
||||
};
|
||||
|
||||
static inline bool iommufd_lock_obj(struct iommufd_object *obj)
|
||||
{
|
||||
if (!down_read_trylock(&obj->destroy_rwsem))
|
||||
return false;
|
||||
if (!refcount_inc_not_zero(&obj->users)) {
|
||||
up_read(&obj->destroy_rwsem);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id,
|
||||
enum iommufd_object_type type);
|
||||
static inline void iommufd_put_object(struct iommufd_object *obj)
|
||||
{
|
||||
refcount_dec(&obj->users);
|
||||
up_read(&obj->destroy_rwsem);
|
||||
}
|
||||
|
||||
/**
|
||||
* iommufd_ref_to_users() - Switch from destroy_rwsem to users refcount
|
||||
* protection
|
||||
* @obj - Object to release
|
||||
*
|
||||
* Objects have two refcount protections (destroy_rwsem and the refcount_t
|
||||
* users). Holding either of these will prevent the object from being destroyed.
|
||||
*
|
||||
* Depending on the use case, one protection or the other is appropriate. In
|
||||
* most cases references are being protected by the destroy_rwsem. This allows
|
||||
* orderly destruction of the object because iommufd_object_destroy_user() will
|
||||
* wait for it to become unlocked. However, as a rwsem, it cannot be held across
|
||||
* a system call return. So cases that have longer term needs must switch
|
||||
* to the weaker users refcount_t.
|
||||
*
|
||||
* With users protection iommufd_object_destroy_user() will return false,
|
||||
* refusing to destroy the object, causing -EBUSY to userspace.
|
||||
*/
|
||||
static inline void iommufd_ref_to_users(struct iommufd_object *obj)
|
||||
{
|
||||
up_read(&obj->destroy_rwsem);
|
||||
/* iommufd_lock_obj() obtains users as well */
|
||||
}
|
||||
void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj);
|
||||
void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx,
|
||||
struct iommufd_object *obj);
|
||||
void iommufd_object_finalize(struct iommufd_ctx *ictx,
|
||||
struct iommufd_object *obj);
|
||||
bool iommufd_object_destroy_user(struct iommufd_ctx *ictx,
|
||||
struct iommufd_object *obj);
|
||||
struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
|
||||
size_t size,
|
||||
enum iommufd_object_type type);
|
||||
|
||||
#define iommufd_object_alloc(ictx, ptr, type) \
|
||||
container_of(_iommufd_object_alloc( \
|
||||
ictx, \
|
||||
sizeof(*(ptr)) + BUILD_BUG_ON_ZERO( \
|
||||
offsetof(typeof(*(ptr)), \
|
||||
obj) != 0), \
|
||||
type), \
|
||||
typeof(*(ptr)), obj)
|
||||
|
||||
/*
|
||||
* The IO Address Space (IOAS) pagetable is a virtual page table backed by the
|
||||
* io_pagetable object. It is a user controlled mapping of IOVA -> PFNs. The
|
||||
* mapping is copied into all of the associated domains and made available to
|
||||
* in-kernel users.
|
||||
*
|
||||
* Every iommu_domain that is created is wrapped in a iommufd_hw_pagetable
|
||||
* object. When we go to attach a device to an IOAS we need to get an
|
||||
* iommu_domain and wrapping iommufd_hw_pagetable for it.
|
||||
*
|
||||
* An iommu_domain & iommfd_hw_pagetable will be automatically selected
|
||||
* for a device based on the hwpt_list. If no suitable iommu_domain
|
||||
* is found a new iommu_domain will be created.
|
||||
*/
|
||||
struct iommufd_ioas {
|
||||
struct iommufd_object obj;
|
||||
struct io_pagetable iopt;
|
||||
struct mutex mutex;
|
||||
struct list_head hwpt_list;
|
||||
};
|
||||
|
||||
static inline struct iommufd_ioas *iommufd_get_ioas(struct iommufd_ucmd *ucmd,
|
||||
u32 id)
|
||||
{
|
||||
return container_of(iommufd_get_object(ucmd->ictx, id,
|
||||
IOMMUFD_OBJ_IOAS),
|
||||
struct iommufd_ioas, obj);
|
||||
}
|
||||
|
||||
struct iommufd_ioas *iommufd_ioas_alloc(struct iommufd_ctx *ictx);
|
||||
int iommufd_ioas_alloc_ioctl(struct iommufd_ucmd *ucmd);
|
||||
void iommufd_ioas_destroy(struct iommufd_object *obj);
|
||||
int iommufd_ioas_iova_ranges(struct iommufd_ucmd *ucmd);
|
||||
int iommufd_ioas_allow_iovas(struct iommufd_ucmd *ucmd);
|
||||
int iommufd_ioas_map(struct iommufd_ucmd *ucmd);
|
||||
int iommufd_ioas_copy(struct iommufd_ucmd *ucmd);
|
||||
int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd);
|
||||
int iommufd_ioas_option(struct iommufd_ucmd *ucmd);
|
||||
int iommufd_option_rlimit_mode(struct iommu_option *cmd,
|
||||
struct iommufd_ctx *ictx);
|
||||
|
||||
int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd);
|
||||
|
||||
/*
|
||||
* A HW pagetable is called an iommu_domain inside the kernel. This user object
|
||||
* allows directly creating and inspecting the domains. Domains that have kernel
|
||||
* owned page tables will be associated with an iommufd_ioas that provides the
|
||||
* IOVA to PFN map.
|
||||
*/
|
||||
struct iommufd_hw_pagetable {
|
||||
struct iommufd_object obj;
|
||||
struct iommufd_ioas *ioas;
|
||||
struct iommu_domain *domain;
|
||||
bool auto_domain : 1;
|
||||
bool enforce_cache_coherency : 1;
|
||||
bool msi_cookie : 1;
|
||||
/* Head at iommufd_ioas::hwpt_list */
|
||||
struct list_head hwpt_item;
|
||||
struct mutex devices_lock;
|
||||
struct list_head devices;
|
||||
};
|
||||
|
||||
struct iommufd_hw_pagetable *
|
||||
iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
|
||||
struct device *dev);
|
||||
void iommufd_hw_pagetable_destroy(struct iommufd_object *obj);
|
||||
|
||||
void iommufd_device_destroy(struct iommufd_object *obj);
|
||||
|
||||
struct iommufd_access {
|
||||
struct iommufd_object obj;
|
||||
struct iommufd_ctx *ictx;
|
||||
struct iommufd_ioas *ioas;
|
||||
const struct iommufd_access_ops *ops;
|
||||
void *data;
|
||||
unsigned long iova_alignment;
|
||||
u32 iopt_access_list_id;
|
||||
};
|
||||
|
||||
int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access);
|
||||
void iopt_remove_access(struct io_pagetable *iopt,
|
||||
struct iommufd_access *access);
|
||||
void iommufd_access_destroy_object(struct iommufd_object *obj);
|
||||
|
||||
#ifdef CONFIG_IOMMUFD_TEST
|
||||
struct iommufd_hw_pagetable *
|
||||
iommufd_device_selftest_attach(struct iommufd_ctx *ictx,
|
||||
struct iommufd_ioas *ioas,
|
||||
struct device *mock_dev);
|
||||
void iommufd_device_selftest_detach(struct iommufd_ctx *ictx,
|
||||
struct iommufd_hw_pagetable *hwpt);
|
||||
int iommufd_test(struct iommufd_ucmd *ucmd);
|
||||
void iommufd_selftest_destroy(struct iommufd_object *obj);
|
||||
extern size_t iommufd_test_memory_limit;
|
||||
void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd,
|
||||
unsigned int ioas_id, u64 *iova, u32 *flags);
|
||||
bool iommufd_should_fail(void);
|
||||
void __init iommufd_test_init(void);
|
||||
void iommufd_test_exit(void);
|
||||
#else
|
||||
static inline void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd,
|
||||
unsigned int ioas_id,
|
||||
u64 *iova, u32 *flags)
|
||||
{
|
||||
}
|
||||
static inline bool iommufd_should_fail(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline void __init iommufd_test_init(void)
|
||||
{
|
||||
}
|
||||
static inline void iommufd_test_exit(void)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
#endif
|
93
drivers/iommu/iommufd/iommufd_test.h
Normal file
93
drivers/iommu/iommufd/iommufd_test.h
Normal file
@ -0,0 +1,93 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES.
|
||||
*/
|
||||
#ifndef _UAPI_IOMMUFD_TEST_H
|
||||
#define _UAPI_IOMMUFD_TEST_H
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/iommufd.h>
|
||||
|
||||
enum {
|
||||
IOMMU_TEST_OP_ADD_RESERVED = 1,
|
||||
IOMMU_TEST_OP_MOCK_DOMAIN,
|
||||
IOMMU_TEST_OP_MD_CHECK_MAP,
|
||||
IOMMU_TEST_OP_MD_CHECK_REFS,
|
||||
IOMMU_TEST_OP_CREATE_ACCESS,
|
||||
IOMMU_TEST_OP_DESTROY_ACCESS_PAGES,
|
||||
IOMMU_TEST_OP_ACCESS_PAGES,
|
||||
IOMMU_TEST_OP_ACCESS_RW,
|
||||
IOMMU_TEST_OP_SET_TEMP_MEMORY_LIMIT,
|
||||
};
|
||||
|
||||
enum {
|
||||
MOCK_APERTURE_START = 1UL << 24,
|
||||
MOCK_APERTURE_LAST = (1UL << 31) - 1,
|
||||
};
|
||||
|
||||
enum {
|
||||
MOCK_FLAGS_ACCESS_WRITE = 1 << 0,
|
||||
MOCK_FLAGS_ACCESS_SYZ = 1 << 16,
|
||||
};
|
||||
|
||||
enum {
|
||||
MOCK_ACCESS_RW_WRITE = 1 << 0,
|
||||
MOCK_ACCESS_RW_SLOW_PATH = 1 << 2,
|
||||
};
|
||||
|
||||
enum {
|
||||
MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES = 1 << 0,
|
||||
};
|
||||
|
||||
struct iommu_test_cmd {
|
||||
__u32 size;
|
||||
__u32 op;
|
||||
__u32 id;
|
||||
__u32 __reserved;
|
||||
union {
|
||||
struct {
|
||||
__aligned_u64 start;
|
||||
__aligned_u64 length;
|
||||
} add_reserved;
|
||||
struct {
|
||||
__u32 out_device_id;
|
||||
__u32 out_hwpt_id;
|
||||
} mock_domain;
|
||||
struct {
|
||||
__aligned_u64 iova;
|
||||
__aligned_u64 length;
|
||||
__aligned_u64 uptr;
|
||||
} check_map;
|
||||
struct {
|
||||
__aligned_u64 length;
|
||||
__aligned_u64 uptr;
|
||||
__u32 refs;
|
||||
} check_refs;
|
||||
struct {
|
||||
__u32 out_access_fd;
|
||||
__u32 flags;
|
||||
} create_access;
|
||||
struct {
|
||||
__u32 access_pages_id;
|
||||
} destroy_access_pages;
|
||||
struct {
|
||||
__u32 flags;
|
||||
__u32 out_access_pages_id;
|
||||
__aligned_u64 iova;
|
||||
__aligned_u64 length;
|
||||
__aligned_u64 uptr;
|
||||
} access_pages;
|
||||
struct {
|
||||
__aligned_u64 iova;
|
||||
__aligned_u64 length;
|
||||
__aligned_u64 uptr;
|
||||
__u32 flags;
|
||||
} access_rw;
|
||||
struct {
|
||||
__u32 limit;
|
||||
} memory_limit;
|
||||
};
|
||||
__u32 last;
|
||||
};
|
||||
#define IOMMU_TEST_CMD _IO(IOMMUFD_TYPE, IOMMUFD_CMD_BASE + 32)
|
||||
|
||||
#endif
|
460
drivers/iommu/iommufd/main.c
Normal file
460
drivers/iommu/iommufd/main.c
Normal file
@ -0,0 +1,460 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/* Copyright (C) 2021 Intel Corporation
|
||||
* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
|
||||
*
|
||||
* iommufd provides control over the IOMMU HW objects created by IOMMU kernel
|
||||
* drivers. IOMMU HW objects revolve around IO page tables that map incoming DMA
|
||||
* addresses (IOVA) to CPU addresses.
|
||||
*/
|
||||
#define pr_fmt(fmt) "iommufd: " fmt
|
||||
|
||||
#include <linux/file.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/miscdevice.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/bug.h>
|
||||
#include <uapi/linux/iommufd.h>
|
||||
#include <linux/iommufd.h>
|
||||
|
||||
#include "io_pagetable.h"
|
||||
#include "iommufd_private.h"
|
||||
#include "iommufd_test.h"
|
||||
|
||||
struct iommufd_object_ops {
|
||||
void (*destroy)(struct iommufd_object *obj);
|
||||
};
|
||||
static const struct iommufd_object_ops iommufd_object_ops[];
|
||||
static struct miscdevice vfio_misc_dev;
|
||||
|
||||
struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
|
||||
size_t size,
|
||||
enum iommufd_object_type type)
|
||||
{
|
||||
struct iommufd_object *obj;
|
||||
int rc;
|
||||
|
||||
obj = kzalloc(size, GFP_KERNEL_ACCOUNT);
|
||||
if (!obj)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
obj->type = type;
|
||||
init_rwsem(&obj->destroy_rwsem);
|
||||
refcount_set(&obj->users, 1);
|
||||
|
||||
/*
|
||||
* Reserve an ID in the xarray but do not publish the pointer yet since
|
||||
* the caller hasn't initialized it yet. Once the pointer is published
|
||||
* in the xarray and visible to other threads we can't reliably destroy
|
||||
* it anymore, so the caller must complete all errorable operations
|
||||
* before calling iommufd_object_finalize().
|
||||
*/
|
||||
rc = xa_alloc(&ictx->objects, &obj->id, XA_ZERO_ENTRY,
|
||||
xa_limit_32b, GFP_KERNEL_ACCOUNT);
|
||||
if (rc)
|
||||
goto out_free;
|
||||
return obj;
|
||||
out_free:
|
||||
kfree(obj);
|
||||
return ERR_PTR(rc);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allow concurrent access to the object.
|
||||
*
|
||||
* Once another thread can see the object pointer it can prevent object
|
||||
* destruction. Expect for special kernel-only objects there is no in-kernel way
|
||||
* to reliably destroy a single object. Thus all APIs that are creating objects
|
||||
* must use iommufd_object_abort() to handle their errors and only call
|
||||
* iommufd_object_finalize() once object creation cannot fail.
|
||||
*/
|
||||
void iommufd_object_finalize(struct iommufd_ctx *ictx,
|
||||
struct iommufd_object *obj)
|
||||
{
|
||||
void *old;
|
||||
|
||||
old = xa_store(&ictx->objects, obj->id, obj, GFP_KERNEL);
|
||||
/* obj->id was returned from xa_alloc() so the xa_store() cannot fail */
|
||||
WARN_ON(old);
|
||||
}
|
||||
|
||||
/* Undo _iommufd_object_alloc() if iommufd_object_finalize() was not called */
|
||||
void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj)
|
||||
{
|
||||
void *old;
|
||||
|
||||
old = xa_erase(&ictx->objects, obj->id);
|
||||
WARN_ON(old);
|
||||
kfree(obj);
|
||||
}
|
||||
|
||||
/*
|
||||
* Abort an object that has been fully initialized and needs destroy, but has
|
||||
* not been finalized.
|
||||
*/
|
||||
void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx,
|
||||
struct iommufd_object *obj)
|
||||
{
|
||||
iommufd_object_ops[obj->type].destroy(obj);
|
||||
iommufd_object_abort(ictx, obj);
|
||||
}
|
||||
|
||||
struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id,
|
||||
enum iommufd_object_type type)
|
||||
{
|
||||
struct iommufd_object *obj;
|
||||
|
||||
if (iommufd_should_fail())
|
||||
return ERR_PTR(-ENOENT);
|
||||
|
||||
xa_lock(&ictx->objects);
|
||||
obj = xa_load(&ictx->objects, id);
|
||||
if (!obj || (type != IOMMUFD_OBJ_ANY && obj->type != type) ||
|
||||
!iommufd_lock_obj(obj))
|
||||
obj = ERR_PTR(-ENOENT);
|
||||
xa_unlock(&ictx->objects);
|
||||
return obj;
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller holds a users refcount and wants to destroy the object. Returns
|
||||
* true if the object was destroyed. In all cases the caller no longer has a
|
||||
* reference on obj.
|
||||
*/
|
||||
bool iommufd_object_destroy_user(struct iommufd_ctx *ictx,
|
||||
struct iommufd_object *obj)
|
||||
{
|
||||
/*
|
||||
* The purpose of the destroy_rwsem is to ensure deterministic
|
||||
* destruction of objects used by external drivers and destroyed by this
|
||||
* function. Any temporary increment of the refcount must hold the read
|
||||
* side of this, such as during ioctl execution.
|
||||
*/
|
||||
down_write(&obj->destroy_rwsem);
|
||||
xa_lock(&ictx->objects);
|
||||
refcount_dec(&obj->users);
|
||||
if (!refcount_dec_if_one(&obj->users)) {
|
||||
xa_unlock(&ictx->objects);
|
||||
up_write(&obj->destroy_rwsem);
|
||||
return false;
|
||||
}
|
||||
__xa_erase(&ictx->objects, obj->id);
|
||||
if (ictx->vfio_ioas && &ictx->vfio_ioas->obj == obj)
|
||||
ictx->vfio_ioas = NULL;
|
||||
xa_unlock(&ictx->objects);
|
||||
up_write(&obj->destroy_rwsem);
|
||||
|
||||
iommufd_object_ops[obj->type].destroy(obj);
|
||||
kfree(obj);
|
||||
return true;
|
||||
}
|
||||
|
||||
static int iommufd_destroy(struct iommufd_ucmd *ucmd)
|
||||
{
|
||||
struct iommu_destroy *cmd = ucmd->cmd;
|
||||
struct iommufd_object *obj;
|
||||
|
||||
obj = iommufd_get_object(ucmd->ictx, cmd->id, IOMMUFD_OBJ_ANY);
|
||||
if (IS_ERR(obj))
|
||||
return PTR_ERR(obj);
|
||||
iommufd_ref_to_users(obj);
|
||||
/* See iommufd_ref_to_users() */
|
||||
if (!iommufd_object_destroy_user(ucmd->ictx, obj))
|
||||
return -EBUSY;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int iommufd_fops_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct iommufd_ctx *ictx;
|
||||
|
||||
ictx = kzalloc(sizeof(*ictx), GFP_KERNEL_ACCOUNT);
|
||||
if (!ictx)
|
||||
return -ENOMEM;
|
||||
|
||||
/*
|
||||
* For compatibility with VFIO when /dev/vfio/vfio is opened we default
|
||||
* to the same rlimit accounting as vfio uses.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER) &&
|
||||
filp->private_data == &vfio_misc_dev) {
|
||||
ictx->account_mode = IOPT_PAGES_ACCOUNT_MM;
|
||||
pr_info_once("IOMMUFD is providing /dev/vfio/vfio, not VFIO.\n");
|
||||
}
|
||||
|
||||
xa_init_flags(&ictx->objects, XA_FLAGS_ALLOC1 | XA_FLAGS_ACCOUNT);
|
||||
ictx->file = filp;
|
||||
filp->private_data = ictx;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int iommufd_fops_release(struct inode *inode, struct file *filp)
|
||||
{
|
||||
struct iommufd_ctx *ictx = filp->private_data;
|
||||
struct iommufd_object *obj;
|
||||
|
||||
/*
|
||||
* The objects in the xarray form a graph of "users" counts, and we have
|
||||
* to destroy them in a depth first manner. Leaf objects will reduce the
|
||||
* users count of interior objects when they are destroyed.
|
||||
*
|
||||
* Repeatedly destroying all the "1 users" leaf objects will progress
|
||||
* until the entire list is destroyed. If this can't progress then there
|
||||
* is some bug related to object refcounting.
|
||||
*/
|
||||
while (!xa_empty(&ictx->objects)) {
|
||||
unsigned int destroyed = 0;
|
||||
unsigned long index;
|
||||
|
||||
xa_for_each(&ictx->objects, index, obj) {
|
||||
if (!refcount_dec_if_one(&obj->users))
|
||||
continue;
|
||||
destroyed++;
|
||||
xa_erase(&ictx->objects, index);
|
||||
iommufd_object_ops[obj->type].destroy(obj);
|
||||
kfree(obj);
|
||||
}
|
||||
/* Bug related to users refcount */
|
||||
if (WARN_ON(!destroyed))
|
||||
break;
|
||||
}
|
||||
kfree(ictx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int iommufd_option(struct iommufd_ucmd *ucmd)
|
||||
{
|
||||
struct iommu_option *cmd = ucmd->cmd;
|
||||
int rc;
|
||||
|
||||
if (cmd->__reserved)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
switch (cmd->option_id) {
|
||||
case IOMMU_OPTION_RLIMIT_MODE:
|
||||
rc = iommufd_option_rlimit_mode(cmd, ucmd->ictx);
|
||||
break;
|
||||
case IOMMU_OPTION_HUGE_PAGES:
|
||||
rc = iommufd_ioas_option(ucmd);
|
||||
break;
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
if (rc)
|
||||
return rc;
|
||||
if (copy_to_user(&((struct iommu_option __user *)ucmd->ubuffer)->val64,
|
||||
&cmd->val64, sizeof(cmd->val64)))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
}
|
||||
|
||||
union ucmd_buffer {
|
||||
struct iommu_destroy destroy;
|
||||
struct iommu_ioas_alloc alloc;
|
||||
struct iommu_ioas_allow_iovas allow_iovas;
|
||||
struct iommu_ioas_iova_ranges iova_ranges;
|
||||
struct iommu_ioas_map map;
|
||||
struct iommu_ioas_unmap unmap;
|
||||
#ifdef CONFIG_IOMMUFD_TEST
|
||||
struct iommu_test_cmd test;
|
||||
#endif
|
||||
};
|
||||
|
||||
struct iommufd_ioctl_op {
|
||||
unsigned int size;
|
||||
unsigned int min_size;
|
||||
unsigned int ioctl_num;
|
||||
int (*execute)(struct iommufd_ucmd *ucmd);
|
||||
};
|
||||
|
||||
#define IOCTL_OP(_ioctl, _fn, _struct, _last) \
|
||||
[_IOC_NR(_ioctl) - IOMMUFD_CMD_BASE] = { \
|
||||
.size = sizeof(_struct) + \
|
||||
BUILD_BUG_ON_ZERO(sizeof(union ucmd_buffer) < \
|
||||
sizeof(_struct)), \
|
||||
.min_size = offsetofend(_struct, _last), \
|
||||
.ioctl_num = _ioctl, \
|
||||
.execute = _fn, \
|
||||
}
|
||||
static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
|
||||
IOCTL_OP(IOMMU_DESTROY, iommufd_destroy, struct iommu_destroy, id),
|
||||
IOCTL_OP(IOMMU_IOAS_ALLOC, iommufd_ioas_alloc_ioctl,
|
||||
struct iommu_ioas_alloc, out_ioas_id),
|
||||
IOCTL_OP(IOMMU_IOAS_ALLOW_IOVAS, iommufd_ioas_allow_iovas,
|
||||
struct iommu_ioas_allow_iovas, allowed_iovas),
|
||||
IOCTL_OP(IOMMU_IOAS_COPY, iommufd_ioas_copy, struct iommu_ioas_copy,
|
||||
src_iova),
|
||||
IOCTL_OP(IOMMU_IOAS_IOVA_RANGES, iommufd_ioas_iova_ranges,
|
||||
struct iommu_ioas_iova_ranges, out_iova_alignment),
|
||||
IOCTL_OP(IOMMU_IOAS_MAP, iommufd_ioas_map, struct iommu_ioas_map,
|
||||
iova),
|
||||
IOCTL_OP(IOMMU_IOAS_UNMAP, iommufd_ioas_unmap, struct iommu_ioas_unmap,
|
||||
length),
|
||||
IOCTL_OP(IOMMU_OPTION, iommufd_option, struct iommu_option,
|
||||
val64),
|
||||
IOCTL_OP(IOMMU_VFIO_IOAS, iommufd_vfio_ioas, struct iommu_vfio_ioas,
|
||||
__reserved),
|
||||
#ifdef CONFIG_IOMMUFD_TEST
|
||||
IOCTL_OP(IOMMU_TEST_CMD, iommufd_test, struct iommu_test_cmd, last),
|
||||
#endif
|
||||
};
|
||||
|
||||
static long iommufd_fops_ioctl(struct file *filp, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
{
|
||||
struct iommufd_ctx *ictx = filp->private_data;
|
||||
const struct iommufd_ioctl_op *op;
|
||||
struct iommufd_ucmd ucmd = {};
|
||||
union ucmd_buffer buf;
|
||||
unsigned int nr;
|
||||
int ret;
|
||||
|
||||
nr = _IOC_NR(cmd);
|
||||
if (nr < IOMMUFD_CMD_BASE ||
|
||||
(nr - IOMMUFD_CMD_BASE) >= ARRAY_SIZE(iommufd_ioctl_ops))
|
||||
return iommufd_vfio_ioctl(ictx, cmd, arg);
|
||||
|
||||
ucmd.ictx = ictx;
|
||||
ucmd.ubuffer = (void __user *)arg;
|
||||
ret = get_user(ucmd.user_size, (u32 __user *)ucmd.ubuffer);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
op = &iommufd_ioctl_ops[nr - IOMMUFD_CMD_BASE];
|
||||
if (op->ioctl_num != cmd)
|
||||
return -ENOIOCTLCMD;
|
||||
if (ucmd.user_size < op->min_size)
|
||||
return -EINVAL;
|
||||
|
||||
ucmd.cmd = &buf;
|
||||
ret = copy_struct_from_user(ucmd.cmd, op->size, ucmd.ubuffer,
|
||||
ucmd.user_size);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = op->execute(&ucmd);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct file_operations iommufd_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = iommufd_fops_open,
|
||||
.release = iommufd_fops_release,
|
||||
.unlocked_ioctl = iommufd_fops_ioctl,
|
||||
};
|
||||
|
||||
/**
|
||||
* iommufd_ctx_get - Get a context reference
|
||||
* @ictx: Context to get
|
||||
*
|
||||
* The caller must already hold a valid reference to ictx.
|
||||
*/
|
||||
void iommufd_ctx_get(struct iommufd_ctx *ictx)
|
||||
{
|
||||
get_file(ictx->file);
|
||||
}
|
||||
EXPORT_SYMBOL_NS_GPL(iommufd_ctx_get, IOMMUFD);
|
||||
|
||||
/**
|
||||
* iommufd_ctx_from_file - Acquires a reference to the iommufd context
|
||||
* @file: File to obtain the reference from
|
||||
*
|
||||
* Returns a pointer to the iommufd_ctx, otherwise ERR_PTR. The struct file
|
||||
* remains owned by the caller and the caller must still do fput. On success
|
||||
* the caller is responsible to call iommufd_ctx_put().
|
||||
*/
|
||||
struct iommufd_ctx *iommufd_ctx_from_file(struct file *file)
|
||||
{
|
||||
struct iommufd_ctx *ictx;
|
||||
|
||||
if (file->f_op != &iommufd_fops)
|
||||
return ERR_PTR(-EBADFD);
|
||||
ictx = file->private_data;
|
||||
iommufd_ctx_get(ictx);
|
||||
return ictx;
|
||||
}
|
||||
EXPORT_SYMBOL_NS_GPL(iommufd_ctx_from_file, IOMMUFD);
|
||||
|
||||
/**
|
||||
* iommufd_ctx_put - Put back a reference
|
||||
* @ictx: Context to put back
|
||||
*/
|
||||
void iommufd_ctx_put(struct iommufd_ctx *ictx)
|
||||
{
|
||||
fput(ictx->file);
|
||||
}
|
||||
EXPORT_SYMBOL_NS_GPL(iommufd_ctx_put, IOMMUFD);
|
||||
|
||||
static const struct iommufd_object_ops iommufd_object_ops[] = {
|
||||
[IOMMUFD_OBJ_ACCESS] = {
|
||||
.destroy = iommufd_access_destroy_object,
|
||||
},
|
||||
[IOMMUFD_OBJ_DEVICE] = {
|
||||
.destroy = iommufd_device_destroy,
|
||||
},
|
||||
[IOMMUFD_OBJ_IOAS] = {
|
||||
.destroy = iommufd_ioas_destroy,
|
||||
},
|
||||
[IOMMUFD_OBJ_HW_PAGETABLE] = {
|
||||
.destroy = iommufd_hw_pagetable_destroy,
|
||||
},
|
||||
#ifdef CONFIG_IOMMUFD_TEST
|
||||
[IOMMUFD_OBJ_SELFTEST] = {
|
||||
.destroy = iommufd_selftest_destroy,
|
||||
},
|
||||
#endif
|
||||
};
|
||||
|
||||
static struct miscdevice iommu_misc_dev = {
|
||||
.minor = MISC_DYNAMIC_MINOR,
|
||||
.name = "iommu",
|
||||
.fops = &iommufd_fops,
|
||||
.nodename = "iommu",
|
||||
.mode = 0660,
|
||||
};
|
||||
|
||||
|
||||
static struct miscdevice vfio_misc_dev = {
|
||||
.minor = VFIO_MINOR,
|
||||
.name = "vfio",
|
||||
.fops = &iommufd_fops,
|
||||
.nodename = "vfio/vfio",
|
||||
.mode = 0666,
|
||||
};
|
||||
|
||||
static int __init iommufd_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = misc_register(&iommu_misc_dev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER)) {
|
||||
ret = misc_register(&vfio_misc_dev);
|
||||
if (ret)
|
||||
goto err_misc;
|
||||
}
|
||||
iommufd_test_init();
|
||||
return 0;
|
||||
err_misc:
|
||||
misc_deregister(&iommu_misc_dev);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __exit iommufd_exit(void)
|
||||
{
|
||||
iommufd_test_exit();
|
||||
if (IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER))
|
||||
misc_deregister(&vfio_misc_dev);
|
||||
misc_deregister(&iommu_misc_dev);
|
||||
}
|
||||
|
||||
module_init(iommufd_init);
|
||||
module_exit(iommufd_exit);
|
||||
|
||||
#if IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER)
|
||||
MODULE_ALIAS_MISCDEV(VFIO_MINOR);
|
||||
MODULE_ALIAS("devname:vfio/vfio");
|
||||
#endif
|
||||
MODULE_DESCRIPTION("I/O Address Space Management for passthrough devices");
|
||||
MODULE_LICENSE("GPL");
|
1977
drivers/iommu/iommufd/pages.c
Normal file
1977
drivers/iommu/iommufd/pages.c
Normal file
File diff suppressed because it is too large
Load Diff
853
drivers/iommu/iommufd/selftest.c
Normal file
853
drivers/iommu/iommufd/selftest.c
Normal file
@ -0,0 +1,853 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES.
|
||||
*
|
||||
* Kernel side components to support tools/testing/selftests/iommu
|
||||
*/
|
||||
#include <linux/slab.h>
|
||||
#include <linux/iommu.h>
|
||||
#include <linux/xarray.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/anon_inodes.h>
|
||||
#include <linux/fault-inject.h>
|
||||
#include <uapi/linux/iommufd.h>
|
||||
|
||||
#include "io_pagetable.h"
|
||||
#include "iommufd_private.h"
|
||||
#include "iommufd_test.h"
|
||||
|
||||
static DECLARE_FAULT_ATTR(fail_iommufd);
|
||||
static struct dentry *dbgfs_root;
|
||||
|
||||
size_t iommufd_test_memory_limit = 65536;
|
||||
|
||||
enum {
|
||||
MOCK_IO_PAGE_SIZE = PAGE_SIZE / 2,
|
||||
|
||||
/*
|
||||
* Like a real page table alignment requires the low bits of the address
|
||||
* to be zero. xarray also requires the high bit to be zero, so we store
|
||||
* the pfns shifted. The upper bits are used for metadata.
|
||||
*/
|
||||
MOCK_PFN_MASK = ULONG_MAX / MOCK_IO_PAGE_SIZE,
|
||||
|
||||
_MOCK_PFN_START = MOCK_PFN_MASK + 1,
|
||||
MOCK_PFN_START_IOVA = _MOCK_PFN_START,
|
||||
MOCK_PFN_LAST_IOVA = _MOCK_PFN_START,
|
||||
};
|
||||
|
||||
/*
|
||||
* Syzkaller has trouble randomizing the correct iova to use since it is linked
|
||||
* to the map ioctl's output, and it has no ide about that. So, simplify things.
|
||||
* In syzkaller mode the 64 bit IOVA is converted into an nth area and offset
|
||||
* value. This has a much smaller randomization space and syzkaller can hit it.
|
||||
*/
|
||||
static unsigned long iommufd_test_syz_conv_iova(struct io_pagetable *iopt,
|
||||
u64 *iova)
|
||||
{
|
||||
struct syz_layout {
|
||||
__u32 nth_area;
|
||||
__u32 offset;
|
||||
};
|
||||
struct syz_layout *syz = (void *)iova;
|
||||
unsigned int nth = syz->nth_area;
|
||||
struct iopt_area *area;
|
||||
|
||||
down_read(&iopt->iova_rwsem);
|
||||
for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area;
|
||||
area = iopt_area_iter_next(area, 0, ULONG_MAX)) {
|
||||
if (nth == 0) {
|
||||
up_read(&iopt->iova_rwsem);
|
||||
return iopt_area_iova(area) + syz->offset;
|
||||
}
|
||||
nth--;
|
||||
}
|
||||
up_read(&iopt->iova_rwsem);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd,
|
||||
unsigned int ioas_id, u64 *iova, u32 *flags)
|
||||
{
|
||||
struct iommufd_ioas *ioas;
|
||||
|
||||
if (!(*flags & MOCK_FLAGS_ACCESS_SYZ))
|
||||
return;
|
||||
*flags &= ~(u32)MOCK_FLAGS_ACCESS_SYZ;
|
||||
|
||||
ioas = iommufd_get_ioas(ucmd, ioas_id);
|
||||
if (IS_ERR(ioas))
|
||||
return;
|
||||
*iova = iommufd_test_syz_conv_iova(&ioas->iopt, iova);
|
||||
iommufd_put_object(&ioas->obj);
|
||||
}
|
||||
|
||||
struct mock_iommu_domain {
|
||||
struct iommu_domain domain;
|
||||
struct xarray pfns;
|
||||
};
|
||||
|
||||
enum selftest_obj_type {
|
||||
TYPE_IDEV,
|
||||
};
|
||||
|
||||
struct selftest_obj {
|
||||
struct iommufd_object obj;
|
||||
enum selftest_obj_type type;
|
||||
|
||||
union {
|
||||
struct {
|
||||
struct iommufd_hw_pagetable *hwpt;
|
||||
struct iommufd_ctx *ictx;
|
||||
struct device mock_dev;
|
||||
} idev;
|
||||
};
|
||||
};
|
||||
|
||||
static struct iommu_domain *mock_domain_alloc(unsigned int iommu_domain_type)
|
||||
{
|
||||
struct mock_iommu_domain *mock;
|
||||
|
||||
if (WARN_ON(iommu_domain_type != IOMMU_DOMAIN_UNMANAGED))
|
||||
return NULL;
|
||||
|
||||
mock = kzalloc(sizeof(*mock), GFP_KERNEL);
|
||||
if (!mock)
|
||||
return NULL;
|
||||
mock->domain.geometry.aperture_start = MOCK_APERTURE_START;
|
||||
mock->domain.geometry.aperture_end = MOCK_APERTURE_LAST;
|
||||
mock->domain.pgsize_bitmap = MOCK_IO_PAGE_SIZE;
|
||||
xa_init(&mock->pfns);
|
||||
return &mock->domain;
|
||||
}
|
||||
|
||||
static void mock_domain_free(struct iommu_domain *domain)
|
||||
{
|
||||
struct mock_iommu_domain *mock =
|
||||
container_of(domain, struct mock_iommu_domain, domain);
|
||||
|
||||
WARN_ON(!xa_empty(&mock->pfns));
|
||||
kfree(mock);
|
||||
}
|
||||
|
||||
static int mock_domain_map_pages(struct iommu_domain *domain,
|
||||
unsigned long iova, phys_addr_t paddr,
|
||||
size_t pgsize, size_t pgcount, int prot,
|
||||
gfp_t gfp, size_t *mapped)
|
||||
{
|
||||
struct mock_iommu_domain *mock =
|
||||
container_of(domain, struct mock_iommu_domain, domain);
|
||||
unsigned long flags = MOCK_PFN_START_IOVA;
|
||||
unsigned long start_iova = iova;
|
||||
|
||||
/*
|
||||
* xarray does not reliably work with fault injection because it does a
|
||||
* retry allocation, so put our own failure point.
|
||||
*/
|
||||
if (iommufd_should_fail())
|
||||
return -ENOENT;
|
||||
|
||||
WARN_ON(iova % MOCK_IO_PAGE_SIZE);
|
||||
WARN_ON(pgsize % MOCK_IO_PAGE_SIZE);
|
||||
for (; pgcount; pgcount--) {
|
||||
size_t cur;
|
||||
|
||||
for (cur = 0; cur != pgsize; cur += MOCK_IO_PAGE_SIZE) {
|
||||
void *old;
|
||||
|
||||
if (pgcount == 1 && cur + MOCK_IO_PAGE_SIZE == pgsize)
|
||||
flags = MOCK_PFN_LAST_IOVA;
|
||||
old = xa_store(&mock->pfns, iova / MOCK_IO_PAGE_SIZE,
|
||||
xa_mk_value((paddr / MOCK_IO_PAGE_SIZE) |
|
||||
flags),
|
||||
gfp);
|
||||
if (xa_is_err(old)) {
|
||||
for (; start_iova != iova;
|
||||
start_iova += MOCK_IO_PAGE_SIZE)
|
||||
xa_erase(&mock->pfns,
|
||||
start_iova /
|
||||
MOCK_IO_PAGE_SIZE);
|
||||
return xa_err(old);
|
||||
}
|
||||
WARN_ON(old);
|
||||
iova += MOCK_IO_PAGE_SIZE;
|
||||
paddr += MOCK_IO_PAGE_SIZE;
|
||||
*mapped += MOCK_IO_PAGE_SIZE;
|
||||
flags = 0;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static size_t mock_domain_unmap_pages(struct iommu_domain *domain,
|
||||
unsigned long iova, size_t pgsize,
|
||||
size_t pgcount,
|
||||
struct iommu_iotlb_gather *iotlb_gather)
|
||||
{
|
||||
struct mock_iommu_domain *mock =
|
||||
container_of(domain, struct mock_iommu_domain, domain);
|
||||
bool first = true;
|
||||
size_t ret = 0;
|
||||
void *ent;
|
||||
|
||||
WARN_ON(iova % MOCK_IO_PAGE_SIZE);
|
||||
WARN_ON(pgsize % MOCK_IO_PAGE_SIZE);
|
||||
|
||||
for (; pgcount; pgcount--) {
|
||||
size_t cur;
|
||||
|
||||
for (cur = 0; cur != pgsize; cur += MOCK_IO_PAGE_SIZE) {
|
||||
ent = xa_erase(&mock->pfns, iova / MOCK_IO_PAGE_SIZE);
|
||||
WARN_ON(!ent);
|
||||
/*
|
||||
* iommufd generates unmaps that must be a strict
|
||||
* superset of the map's performend So every starting
|
||||
* IOVA should have been an iova passed to map, and the
|
||||
*
|
||||
* First IOVA must be present and have been a first IOVA
|
||||
* passed to map_pages
|
||||
*/
|
||||
if (first) {
|
||||
WARN_ON(!(xa_to_value(ent) &
|
||||
MOCK_PFN_START_IOVA));
|
||||
first = false;
|
||||
}
|
||||
if (pgcount == 1 && cur + MOCK_IO_PAGE_SIZE == pgsize)
|
||||
WARN_ON(!(xa_to_value(ent) &
|
||||
MOCK_PFN_LAST_IOVA));
|
||||
|
||||
iova += MOCK_IO_PAGE_SIZE;
|
||||
ret += MOCK_IO_PAGE_SIZE;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static phys_addr_t mock_domain_iova_to_phys(struct iommu_domain *domain,
|
||||
dma_addr_t iova)
|
||||
{
|
||||
struct mock_iommu_domain *mock =
|
||||
container_of(domain, struct mock_iommu_domain, domain);
|
||||
void *ent;
|
||||
|
||||
WARN_ON(iova % MOCK_IO_PAGE_SIZE);
|
||||
ent = xa_load(&mock->pfns, iova / MOCK_IO_PAGE_SIZE);
|
||||
WARN_ON(!ent);
|
||||
return (xa_to_value(ent) & MOCK_PFN_MASK) * MOCK_IO_PAGE_SIZE;
|
||||
}
|
||||
|
||||
static const struct iommu_ops mock_ops = {
|
||||
.owner = THIS_MODULE,
|
||||
.pgsize_bitmap = MOCK_IO_PAGE_SIZE,
|
||||
.domain_alloc = mock_domain_alloc,
|
||||
.default_domain_ops =
|
||||
&(struct iommu_domain_ops){
|
||||
.free = mock_domain_free,
|
||||
.map_pages = mock_domain_map_pages,
|
||||
.unmap_pages = mock_domain_unmap_pages,
|
||||
.iova_to_phys = mock_domain_iova_to_phys,
|
||||
},
|
||||
};
|
||||
|
||||
static inline struct iommufd_hw_pagetable *
|
||||
get_md_pagetable(struct iommufd_ucmd *ucmd, u32 mockpt_id,
|
||||
struct mock_iommu_domain **mock)
|
||||
{
|
||||
struct iommufd_hw_pagetable *hwpt;
|
||||
struct iommufd_object *obj;
|
||||
|
||||
obj = iommufd_get_object(ucmd->ictx, mockpt_id,
|
||||
IOMMUFD_OBJ_HW_PAGETABLE);
|
||||
if (IS_ERR(obj))
|
||||
return ERR_CAST(obj);
|
||||
hwpt = container_of(obj, struct iommufd_hw_pagetable, obj);
|
||||
if (hwpt->domain->ops != mock_ops.default_domain_ops) {
|
||||
iommufd_put_object(&hwpt->obj);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
*mock = container_of(hwpt->domain, struct mock_iommu_domain, domain);
|
||||
return hwpt;
|
||||
}
|
||||
|
||||
/* Create an hw_pagetable with the mock domain so we can test the domain ops */
|
||||
static int iommufd_test_mock_domain(struct iommufd_ucmd *ucmd,
|
||||
struct iommu_test_cmd *cmd)
|
||||
{
|
||||
static struct bus_type mock_bus = { .iommu_ops = &mock_ops };
|
||||
struct iommufd_hw_pagetable *hwpt;
|
||||
struct selftest_obj *sobj;
|
||||
struct iommufd_ioas *ioas;
|
||||
int rc;
|
||||
|
||||
ioas = iommufd_get_ioas(ucmd, cmd->id);
|
||||
if (IS_ERR(ioas))
|
||||
return PTR_ERR(ioas);
|
||||
|
||||
sobj = iommufd_object_alloc(ucmd->ictx, sobj, IOMMUFD_OBJ_SELFTEST);
|
||||
if (IS_ERR(sobj)) {
|
||||
rc = PTR_ERR(sobj);
|
||||
goto out_ioas;
|
||||
}
|
||||
sobj->idev.ictx = ucmd->ictx;
|
||||
sobj->type = TYPE_IDEV;
|
||||
sobj->idev.mock_dev.bus = &mock_bus;
|
||||
|
||||
hwpt = iommufd_device_selftest_attach(ucmd->ictx, ioas,
|
||||
&sobj->idev.mock_dev);
|
||||
if (IS_ERR(hwpt)) {
|
||||
rc = PTR_ERR(hwpt);
|
||||
goto out_sobj;
|
||||
}
|
||||
sobj->idev.hwpt = hwpt;
|
||||
|
||||
/* Userspace must destroy both of these IDs to destroy the object */
|
||||
cmd->mock_domain.out_hwpt_id = hwpt->obj.id;
|
||||
cmd->mock_domain.out_device_id = sobj->obj.id;
|
||||
iommufd_object_finalize(ucmd->ictx, &sobj->obj);
|
||||
iommufd_put_object(&ioas->obj);
|
||||
return iommufd_ucmd_respond(ucmd, sizeof(*cmd));
|
||||
|
||||
out_sobj:
|
||||
iommufd_object_abort(ucmd->ictx, &sobj->obj);
|
||||
out_ioas:
|
||||
iommufd_put_object(&ioas->obj);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Add an additional reserved IOVA to the IOAS */
|
||||
static int iommufd_test_add_reserved(struct iommufd_ucmd *ucmd,
|
||||
unsigned int mockpt_id,
|
||||
unsigned long start, size_t length)
|
||||
{
|
||||
struct iommufd_ioas *ioas;
|
||||
int rc;
|
||||
|
||||
ioas = iommufd_get_ioas(ucmd, mockpt_id);
|
||||
if (IS_ERR(ioas))
|
||||
return PTR_ERR(ioas);
|
||||
down_write(&ioas->iopt.iova_rwsem);
|
||||
rc = iopt_reserve_iova(&ioas->iopt, start, start + length - 1, NULL);
|
||||
up_write(&ioas->iopt.iova_rwsem);
|
||||
iommufd_put_object(&ioas->obj);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Check that every pfn under each iova matches the pfn under a user VA */
|
||||
static int iommufd_test_md_check_pa(struct iommufd_ucmd *ucmd,
|
||||
unsigned int mockpt_id, unsigned long iova,
|
||||
size_t length, void __user *uptr)
|
||||
{
|
||||
struct iommufd_hw_pagetable *hwpt;
|
||||
struct mock_iommu_domain *mock;
|
||||
int rc;
|
||||
|
||||
if (iova % MOCK_IO_PAGE_SIZE || length % MOCK_IO_PAGE_SIZE ||
|
||||
(uintptr_t)uptr % MOCK_IO_PAGE_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
hwpt = get_md_pagetable(ucmd, mockpt_id, &mock);
|
||||
if (IS_ERR(hwpt))
|
||||
return PTR_ERR(hwpt);
|
||||
|
||||
for (; length; length -= MOCK_IO_PAGE_SIZE) {
|
||||
struct page *pages[1];
|
||||
unsigned long pfn;
|
||||
long npages;
|
||||
void *ent;
|
||||
|
||||
npages = get_user_pages_fast((uintptr_t)uptr & PAGE_MASK, 1, 0,
|
||||
pages);
|
||||
if (npages < 0) {
|
||||
rc = npages;
|
||||
goto out_put;
|
||||
}
|
||||
if (WARN_ON(npages != 1)) {
|
||||
rc = -EFAULT;
|
||||
goto out_put;
|
||||
}
|
||||
pfn = page_to_pfn(pages[0]);
|
||||
put_page(pages[0]);
|
||||
|
||||
ent = xa_load(&mock->pfns, iova / MOCK_IO_PAGE_SIZE);
|
||||
if (!ent ||
|
||||
(xa_to_value(ent) & MOCK_PFN_MASK) * MOCK_IO_PAGE_SIZE !=
|
||||
pfn * PAGE_SIZE + ((uintptr_t)uptr % PAGE_SIZE)) {
|
||||
rc = -EINVAL;
|
||||
goto out_put;
|
||||
}
|
||||
iova += MOCK_IO_PAGE_SIZE;
|
||||
uptr += MOCK_IO_PAGE_SIZE;
|
||||
}
|
||||
rc = 0;
|
||||
|
||||
out_put:
|
||||
iommufd_put_object(&hwpt->obj);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Check that the page ref count matches, to look for missing pin/unpins */
|
||||
static int iommufd_test_md_check_refs(struct iommufd_ucmd *ucmd,
|
||||
void __user *uptr, size_t length,
|
||||
unsigned int refs)
|
||||
{
|
||||
if (length % PAGE_SIZE || (uintptr_t)uptr % PAGE_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
for (; length; length -= PAGE_SIZE) {
|
||||
struct page *pages[1];
|
||||
long npages;
|
||||
|
||||
npages = get_user_pages_fast((uintptr_t)uptr, 1, 0, pages);
|
||||
if (npages < 0)
|
||||
return npages;
|
||||
if (WARN_ON(npages != 1))
|
||||
return -EFAULT;
|
||||
if (!PageCompound(pages[0])) {
|
||||
unsigned int count;
|
||||
|
||||
count = page_ref_count(pages[0]);
|
||||
if (count / GUP_PIN_COUNTING_BIAS != refs) {
|
||||
put_page(pages[0]);
|
||||
return -EIO;
|
||||
}
|
||||
}
|
||||
put_page(pages[0]);
|
||||
uptr += PAGE_SIZE;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct selftest_access {
|
||||
struct iommufd_access *access;
|
||||
struct file *file;
|
||||
struct mutex lock;
|
||||
struct list_head items;
|
||||
unsigned int next_id;
|
||||
bool destroying;
|
||||
};
|
||||
|
||||
struct selftest_access_item {
|
||||
struct list_head items_elm;
|
||||
unsigned long iova;
|
||||
size_t length;
|
||||
unsigned int id;
|
||||
};
|
||||
|
||||
static const struct file_operations iommfd_test_staccess_fops;
|
||||
|
||||
static struct selftest_access *iommufd_access_get(int fd)
|
||||
{
|
||||
struct file *file;
|
||||
|
||||
file = fget(fd);
|
||||
if (!file)
|
||||
return ERR_PTR(-EBADFD);
|
||||
|
||||
if (file->f_op != &iommfd_test_staccess_fops) {
|
||||
fput(file);
|
||||
return ERR_PTR(-EBADFD);
|
||||
}
|
||||
return file->private_data;
|
||||
}
|
||||
|
||||
static void iommufd_test_access_unmap(void *data, unsigned long iova,
|
||||
unsigned long length)
|
||||
{
|
||||
unsigned long iova_last = iova + length - 1;
|
||||
struct selftest_access *staccess = data;
|
||||
struct selftest_access_item *item;
|
||||
struct selftest_access_item *tmp;
|
||||
|
||||
mutex_lock(&staccess->lock);
|
||||
list_for_each_entry_safe(item, tmp, &staccess->items, items_elm) {
|
||||
if (iova > item->iova + item->length - 1 ||
|
||||
iova_last < item->iova)
|
||||
continue;
|
||||
list_del(&item->items_elm);
|
||||
iommufd_access_unpin_pages(staccess->access, item->iova,
|
||||
item->length);
|
||||
kfree(item);
|
||||
}
|
||||
mutex_unlock(&staccess->lock);
|
||||
}
|
||||
|
||||
static int iommufd_test_access_item_destroy(struct iommufd_ucmd *ucmd,
|
||||
unsigned int access_id,
|
||||
unsigned int item_id)
|
||||
{
|
||||
struct selftest_access_item *item;
|
||||
struct selftest_access *staccess;
|
||||
|
||||
staccess = iommufd_access_get(access_id);
|
||||
if (IS_ERR(staccess))
|
||||
return PTR_ERR(staccess);
|
||||
|
||||
mutex_lock(&staccess->lock);
|
||||
list_for_each_entry(item, &staccess->items, items_elm) {
|
||||
if (item->id == item_id) {
|
||||
list_del(&item->items_elm);
|
||||
iommufd_access_unpin_pages(staccess->access, item->iova,
|
||||
item->length);
|
||||
mutex_unlock(&staccess->lock);
|
||||
kfree(item);
|
||||
fput(staccess->file);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&staccess->lock);
|
||||
fput(staccess->file);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
static int iommufd_test_staccess_release(struct inode *inode,
|
||||
struct file *filep)
|
||||
{
|
||||
struct selftest_access *staccess = filep->private_data;
|
||||
|
||||
if (staccess->access) {
|
||||
iommufd_test_access_unmap(staccess, 0, ULONG_MAX);
|
||||
iommufd_access_destroy(staccess->access);
|
||||
}
|
||||
mutex_destroy(&staccess->lock);
|
||||
kfree(staccess);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct iommufd_access_ops selftest_access_ops_pin = {
|
||||
.needs_pin_pages = 1,
|
||||
.unmap = iommufd_test_access_unmap,
|
||||
};
|
||||
|
||||
static const struct iommufd_access_ops selftest_access_ops = {
|
||||
.unmap = iommufd_test_access_unmap,
|
||||
};
|
||||
|
||||
static const struct file_operations iommfd_test_staccess_fops = {
|
||||
.release = iommufd_test_staccess_release,
|
||||
};
|
||||
|
||||
static struct selftest_access *iommufd_test_alloc_access(void)
|
||||
{
|
||||
struct selftest_access *staccess;
|
||||
struct file *filep;
|
||||
|
||||
staccess = kzalloc(sizeof(*staccess), GFP_KERNEL_ACCOUNT);
|
||||
if (!staccess)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
INIT_LIST_HEAD(&staccess->items);
|
||||
mutex_init(&staccess->lock);
|
||||
|
||||
filep = anon_inode_getfile("[iommufd_test_staccess]",
|
||||
&iommfd_test_staccess_fops, staccess,
|
||||
O_RDWR);
|
||||
if (IS_ERR(filep)) {
|
||||
kfree(staccess);
|
||||
return ERR_CAST(filep);
|
||||
}
|
||||
staccess->file = filep;
|
||||
return staccess;
|
||||
}
|
||||
|
||||
static int iommufd_test_create_access(struct iommufd_ucmd *ucmd,
|
||||
unsigned int ioas_id, unsigned int flags)
|
||||
{
|
||||
struct iommu_test_cmd *cmd = ucmd->cmd;
|
||||
struct selftest_access *staccess;
|
||||
struct iommufd_access *access;
|
||||
int fdno;
|
||||
int rc;
|
||||
|
||||
if (flags & ~MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
staccess = iommufd_test_alloc_access();
|
||||
if (IS_ERR(staccess))
|
||||
return PTR_ERR(staccess);
|
||||
|
||||
fdno = get_unused_fd_flags(O_CLOEXEC);
|
||||
if (fdno < 0) {
|
||||
rc = -ENOMEM;
|
||||
goto out_free_staccess;
|
||||
}
|
||||
|
||||
access = iommufd_access_create(
|
||||
ucmd->ictx, ioas_id,
|
||||
(flags & MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES) ?
|
||||
&selftest_access_ops_pin :
|
||||
&selftest_access_ops,
|
||||
staccess);
|
||||
if (IS_ERR(access)) {
|
||||
rc = PTR_ERR(access);
|
||||
goto out_put_fdno;
|
||||
}
|
||||
cmd->create_access.out_access_fd = fdno;
|
||||
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
|
||||
if (rc)
|
||||
goto out_destroy;
|
||||
|
||||
staccess->access = access;
|
||||
fd_install(fdno, staccess->file);
|
||||
return 0;
|
||||
|
||||
out_destroy:
|
||||
iommufd_access_destroy(access);
|
||||
out_put_fdno:
|
||||
put_unused_fd(fdno);
|
||||
out_free_staccess:
|
||||
fput(staccess->file);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Check that the pages in a page array match the pages in the user VA */
|
||||
static int iommufd_test_check_pages(void __user *uptr, struct page **pages,
|
||||
size_t npages)
|
||||
{
|
||||
for (; npages; npages--) {
|
||||
struct page *tmp_pages[1];
|
||||
long rc;
|
||||
|
||||
rc = get_user_pages_fast((uintptr_t)uptr, 1, 0, tmp_pages);
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
if (WARN_ON(rc != 1))
|
||||
return -EFAULT;
|
||||
put_page(tmp_pages[0]);
|
||||
if (tmp_pages[0] != *pages)
|
||||
return -EBADE;
|
||||
pages++;
|
||||
uptr += PAGE_SIZE;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int iommufd_test_access_pages(struct iommufd_ucmd *ucmd,
|
||||
unsigned int access_id, unsigned long iova,
|
||||
size_t length, void __user *uptr,
|
||||
u32 flags)
|
||||
{
|
||||
struct iommu_test_cmd *cmd = ucmd->cmd;
|
||||
struct selftest_access_item *item;
|
||||
struct selftest_access *staccess;
|
||||
struct page **pages;
|
||||
size_t npages;
|
||||
int rc;
|
||||
|
||||
/* Prevent syzkaller from triggering a WARN_ON in kvzalloc() */
|
||||
if (length > 16*1024*1024)
|
||||
return -ENOMEM;
|
||||
|
||||
if (flags & ~(MOCK_FLAGS_ACCESS_WRITE | MOCK_FLAGS_ACCESS_SYZ))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
staccess = iommufd_access_get(access_id);
|
||||
if (IS_ERR(staccess))
|
||||
return PTR_ERR(staccess);
|
||||
|
||||
if (staccess->access->ops != &selftest_access_ops_pin) {
|
||||
rc = -EOPNOTSUPP;
|
||||
goto out_put;
|
||||
}
|
||||
|
||||
if (flags & MOCK_FLAGS_ACCESS_SYZ)
|
||||
iova = iommufd_test_syz_conv_iova(&staccess->access->ioas->iopt,
|
||||
&cmd->access_pages.iova);
|
||||
|
||||
npages = (ALIGN(iova + length, PAGE_SIZE) -
|
||||
ALIGN_DOWN(iova, PAGE_SIZE)) /
|
||||
PAGE_SIZE;
|
||||
pages = kvcalloc(npages, sizeof(*pages), GFP_KERNEL_ACCOUNT);
|
||||
if (!pages) {
|
||||
rc = -ENOMEM;
|
||||
goto out_put;
|
||||
}
|
||||
|
||||
/*
|
||||
* Drivers will need to think very carefully about this locking. The
|
||||
* core code can do multiple unmaps instantaneously after
|
||||
* iommufd_access_pin_pages() and *all* the unmaps must not return until
|
||||
* the range is unpinned. This simple implementation puts a global lock
|
||||
* around the pin, which may not suit drivers that want this to be a
|
||||
* performance path. drivers that get this wrong will trigger WARN_ON
|
||||
* races and cause EDEADLOCK failures to userspace.
|
||||
*/
|
||||
mutex_lock(&staccess->lock);
|
||||
rc = iommufd_access_pin_pages(staccess->access, iova, length, pages,
|
||||
flags & MOCK_FLAGS_ACCESS_WRITE);
|
||||
if (rc)
|
||||
goto out_unlock;
|
||||
|
||||
/* For syzkaller allow uptr to be NULL to skip this check */
|
||||
if (uptr) {
|
||||
rc = iommufd_test_check_pages(
|
||||
uptr - (iova - ALIGN_DOWN(iova, PAGE_SIZE)), pages,
|
||||
npages);
|
||||
if (rc)
|
||||
goto out_unaccess;
|
||||
}
|
||||
|
||||
item = kzalloc(sizeof(*item), GFP_KERNEL_ACCOUNT);
|
||||
if (!item) {
|
||||
rc = -ENOMEM;
|
||||
goto out_unaccess;
|
||||
}
|
||||
|
||||
item->iova = iova;
|
||||
item->length = length;
|
||||
item->id = staccess->next_id++;
|
||||
list_add_tail(&item->items_elm, &staccess->items);
|
||||
|
||||
cmd->access_pages.out_access_pages_id = item->id;
|
||||
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
|
||||
if (rc)
|
||||
goto out_free_item;
|
||||
goto out_unlock;
|
||||
|
||||
out_free_item:
|
||||
list_del(&item->items_elm);
|
||||
kfree(item);
|
||||
out_unaccess:
|
||||
iommufd_access_unpin_pages(staccess->access, iova, length);
|
||||
out_unlock:
|
||||
mutex_unlock(&staccess->lock);
|
||||
kvfree(pages);
|
||||
out_put:
|
||||
fput(staccess->file);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int iommufd_test_access_rw(struct iommufd_ucmd *ucmd,
|
||||
unsigned int access_id, unsigned long iova,
|
||||
size_t length, void __user *ubuf,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct iommu_test_cmd *cmd = ucmd->cmd;
|
||||
struct selftest_access *staccess;
|
||||
void *tmp;
|
||||
int rc;
|
||||
|
||||
/* Prevent syzkaller from triggering a WARN_ON in kvzalloc() */
|
||||
if (length > 16*1024*1024)
|
||||
return -ENOMEM;
|
||||
|
||||
if (flags & ~(MOCK_ACCESS_RW_WRITE | MOCK_ACCESS_RW_SLOW_PATH |
|
||||
MOCK_FLAGS_ACCESS_SYZ))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
staccess = iommufd_access_get(access_id);
|
||||
if (IS_ERR(staccess))
|
||||
return PTR_ERR(staccess);
|
||||
|
||||
tmp = kvzalloc(length, GFP_KERNEL_ACCOUNT);
|
||||
if (!tmp) {
|
||||
rc = -ENOMEM;
|
||||
goto out_put;
|
||||
}
|
||||
|
||||
if (flags & MOCK_ACCESS_RW_WRITE) {
|
||||
if (copy_from_user(tmp, ubuf, length)) {
|
||||
rc = -EFAULT;
|
||||
goto out_free;
|
||||
}
|
||||
}
|
||||
|
||||
if (flags & MOCK_FLAGS_ACCESS_SYZ)
|
||||
iova = iommufd_test_syz_conv_iova(&staccess->access->ioas->iopt,
|
||||
&cmd->access_rw.iova);
|
||||
|
||||
rc = iommufd_access_rw(staccess->access, iova, tmp, length, flags);
|
||||
if (rc)
|
||||
goto out_free;
|
||||
if (!(flags & MOCK_ACCESS_RW_WRITE)) {
|
||||
if (copy_to_user(ubuf, tmp, length)) {
|
||||
rc = -EFAULT;
|
||||
goto out_free;
|
||||
}
|
||||
}
|
||||
|
||||
out_free:
|
||||
kvfree(tmp);
|
||||
out_put:
|
||||
fput(staccess->file);
|
||||
return rc;
|
||||
}
|
||||
static_assert((unsigned int)MOCK_ACCESS_RW_WRITE == IOMMUFD_ACCESS_RW_WRITE);
|
||||
static_assert((unsigned int)MOCK_ACCESS_RW_SLOW_PATH ==
|
||||
__IOMMUFD_ACCESS_RW_SLOW_PATH);
|
||||
|
||||
void iommufd_selftest_destroy(struct iommufd_object *obj)
|
||||
{
|
||||
struct selftest_obj *sobj = container_of(obj, struct selftest_obj, obj);
|
||||
|
||||
switch (sobj->type) {
|
||||
case TYPE_IDEV:
|
||||
iommufd_device_selftest_detach(sobj->idev.ictx,
|
||||
sobj->idev.hwpt);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int iommufd_test(struct iommufd_ucmd *ucmd)
|
||||
{
|
||||
struct iommu_test_cmd *cmd = ucmd->cmd;
|
||||
|
||||
switch (cmd->op) {
|
||||
case IOMMU_TEST_OP_ADD_RESERVED:
|
||||
return iommufd_test_add_reserved(ucmd, cmd->id,
|
||||
cmd->add_reserved.start,
|
||||
cmd->add_reserved.length);
|
||||
case IOMMU_TEST_OP_MOCK_DOMAIN:
|
||||
return iommufd_test_mock_domain(ucmd, cmd);
|
||||
case IOMMU_TEST_OP_MD_CHECK_MAP:
|
||||
return iommufd_test_md_check_pa(
|
||||
ucmd, cmd->id, cmd->check_map.iova,
|
||||
cmd->check_map.length,
|
||||
u64_to_user_ptr(cmd->check_map.uptr));
|
||||
case IOMMU_TEST_OP_MD_CHECK_REFS:
|
||||
return iommufd_test_md_check_refs(
|
||||
ucmd, u64_to_user_ptr(cmd->check_refs.uptr),
|
||||
cmd->check_refs.length, cmd->check_refs.refs);
|
||||
case IOMMU_TEST_OP_CREATE_ACCESS:
|
||||
return iommufd_test_create_access(ucmd, cmd->id,
|
||||
cmd->create_access.flags);
|
||||
case IOMMU_TEST_OP_ACCESS_PAGES:
|
||||
return iommufd_test_access_pages(
|
||||
ucmd, cmd->id, cmd->access_pages.iova,
|
||||
cmd->access_pages.length,
|
||||
u64_to_user_ptr(cmd->access_pages.uptr),
|
||||
cmd->access_pages.flags);
|
||||
case IOMMU_TEST_OP_ACCESS_RW:
|
||||
return iommufd_test_access_rw(
|
||||
ucmd, cmd->id, cmd->access_rw.iova,
|
||||
cmd->access_rw.length,
|
||||
u64_to_user_ptr(cmd->access_rw.uptr),
|
||||
cmd->access_rw.flags);
|
||||
case IOMMU_TEST_OP_DESTROY_ACCESS_PAGES:
|
||||
return iommufd_test_access_item_destroy(
|
||||
ucmd, cmd->id, cmd->destroy_access_pages.access_pages_id);
|
||||
case IOMMU_TEST_OP_SET_TEMP_MEMORY_LIMIT:
|
||||
/* Protect _batch_init(), can not be less than elmsz */
|
||||
if (cmd->memory_limit.limit <
|
||||
sizeof(unsigned long) + sizeof(u32))
|
||||
return -EINVAL;
|
||||
iommufd_test_memory_limit = cmd->memory_limit.limit;
|
||||
return 0;
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
|
||||
bool iommufd_should_fail(void)
|
||||
{
|
||||
return should_fail(&fail_iommufd, 1);
|
||||
}
|
||||
|
||||
void __init iommufd_test_init(void)
|
||||
{
|
||||
dbgfs_root =
|
||||
fault_create_debugfs_attr("fail_iommufd", NULL, &fail_iommufd);
|
||||
}
|
||||
|
||||
void iommufd_test_exit(void)
|
||||
{
|
||||
debugfs_remove_recursive(dbgfs_root);
|
||||
}
|
472
drivers/iommu/iommufd/vfio_compat.c
Normal file
472
drivers/iommu/iommufd/vfio_compat.c
Normal file
@ -0,0 +1,472 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
|
||||
*/
|
||||
#include <linux/file.h>
|
||||
#include <linux/interval_tree.h>
|
||||
#include <linux/iommu.h>
|
||||
#include <linux/iommufd.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/vfio.h>
|
||||
#include <uapi/linux/vfio.h>
|
||||
#include <uapi/linux/iommufd.h>
|
||||
|
||||
#include "iommufd_private.h"
|
||||
|
||||
static struct iommufd_ioas *get_compat_ioas(struct iommufd_ctx *ictx)
|
||||
{
|
||||
struct iommufd_ioas *ioas = ERR_PTR(-ENODEV);
|
||||
|
||||
xa_lock(&ictx->objects);
|
||||
if (!ictx->vfio_ioas || !iommufd_lock_obj(&ictx->vfio_ioas->obj))
|
||||
goto out_unlock;
|
||||
ioas = ictx->vfio_ioas;
|
||||
out_unlock:
|
||||
xa_unlock(&ictx->objects);
|
||||
return ioas;
|
||||
}
|
||||
|
||||
/**
|
||||
* iommufd_vfio_compat_ioas_id - Return the IOAS ID that vfio should use
|
||||
* @ictx: Context to operate on
|
||||
* @out_ioas_id: The ioas_id the caller should use
|
||||
*
|
||||
* The compatibility IOAS is the IOAS that the vfio compatibility ioctls operate
|
||||
* on since they do not have an IOAS ID input in their ABI. Only attaching a
|
||||
* group should cause a default creation of the internal ioas, this returns the
|
||||
* existing ioas if it has already been assigned somehow.
|
||||
*/
|
||||
int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id)
|
||||
{
|
||||
struct iommufd_ioas *ioas = NULL;
|
||||
struct iommufd_ioas *out_ioas;
|
||||
|
||||
ioas = iommufd_ioas_alloc(ictx);
|
||||
if (IS_ERR(ioas))
|
||||
return PTR_ERR(ioas);
|
||||
|
||||
xa_lock(&ictx->objects);
|
||||
if (ictx->vfio_ioas && iommufd_lock_obj(&ictx->vfio_ioas->obj))
|
||||
out_ioas = ictx->vfio_ioas;
|
||||
else {
|
||||
out_ioas = ioas;
|
||||
ictx->vfio_ioas = ioas;
|
||||
}
|
||||
xa_unlock(&ictx->objects);
|
||||
|
||||
*out_ioas_id = out_ioas->obj.id;
|
||||
if (out_ioas != ioas) {
|
||||
iommufd_put_object(&out_ioas->obj);
|
||||
iommufd_object_abort(ictx, &ioas->obj);
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* An automatically created compat IOAS is treated as a userspace
|
||||
* created object. Userspace can learn the ID via IOMMU_VFIO_IOAS_GET,
|
||||
* and if not manually destroyed it will be destroyed automatically
|
||||
* at iommufd release.
|
||||
*/
|
||||
iommufd_object_finalize(ictx, &ioas->obj);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_id, IOMMUFD_VFIO);
|
||||
|
||||
int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd)
|
||||
{
|
||||
struct iommu_vfio_ioas *cmd = ucmd->cmd;
|
||||
struct iommufd_ioas *ioas;
|
||||
|
||||
if (cmd->__reserved)
|
||||
return -EOPNOTSUPP;
|
||||
switch (cmd->op) {
|
||||
case IOMMU_VFIO_IOAS_GET:
|
||||
ioas = get_compat_ioas(ucmd->ictx);
|
||||
if (IS_ERR(ioas))
|
||||
return PTR_ERR(ioas);
|
||||
cmd->ioas_id = ioas->obj.id;
|
||||
iommufd_put_object(&ioas->obj);
|
||||
return iommufd_ucmd_respond(ucmd, sizeof(*cmd));
|
||||
|
||||
case IOMMU_VFIO_IOAS_SET:
|
||||
ioas = iommufd_get_ioas(ucmd, cmd->ioas_id);
|
||||
if (IS_ERR(ioas))
|
||||
return PTR_ERR(ioas);
|
||||
xa_lock(&ucmd->ictx->objects);
|
||||
ucmd->ictx->vfio_ioas = ioas;
|
||||
xa_unlock(&ucmd->ictx->objects);
|
||||
iommufd_put_object(&ioas->obj);
|
||||
return 0;
|
||||
|
||||
case IOMMU_VFIO_IOAS_CLEAR:
|
||||
xa_lock(&ucmd->ictx->objects);
|
||||
ucmd->ictx->vfio_ioas = NULL;
|
||||
xa_unlock(&ucmd->ictx->objects);
|
||||
return 0;
|
||||
default:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
|
||||
static int iommufd_vfio_map_dma(struct iommufd_ctx *ictx, unsigned int cmd,
|
||||
void __user *arg)
|
||||
{
|
||||
u32 supported_flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
|
||||
size_t minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
|
||||
struct vfio_iommu_type1_dma_map map;
|
||||
int iommu_prot = IOMMU_CACHE;
|
||||
struct iommufd_ioas *ioas;
|
||||
unsigned long iova;
|
||||
int rc;
|
||||
|
||||
if (copy_from_user(&map, arg, minsz))
|
||||
return -EFAULT;
|
||||
|
||||
if (map.argsz < minsz || map.flags & ~supported_flags)
|
||||
return -EINVAL;
|
||||
|
||||
if (map.flags & VFIO_DMA_MAP_FLAG_READ)
|
||||
iommu_prot |= IOMMU_READ;
|
||||
if (map.flags & VFIO_DMA_MAP_FLAG_WRITE)
|
||||
iommu_prot |= IOMMU_WRITE;
|
||||
|
||||
ioas = get_compat_ioas(ictx);
|
||||
if (IS_ERR(ioas))
|
||||
return PTR_ERR(ioas);
|
||||
|
||||
/*
|
||||
* Maps created through the legacy interface always use VFIO compatible
|
||||
* rlimit accounting. If the user wishes to use the faster user based
|
||||
* rlimit accounting then they must use the new interface.
|
||||
*/
|
||||
iova = map.iova;
|
||||
rc = iopt_map_user_pages(ictx, &ioas->iopt, &iova, u64_to_user_ptr(map.vaddr),
|
||||
map.size, iommu_prot, 0);
|
||||
iommufd_put_object(&ioas->obj);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int iommufd_vfio_unmap_dma(struct iommufd_ctx *ictx, unsigned int cmd,
|
||||
void __user *arg)
|
||||
{
|
||||
size_t minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, size);
|
||||
/*
|
||||
* VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP is obsoleted by the new
|
||||
* dirty tracking direction:
|
||||
* https://lore.kernel.org/kvm/20220731125503.142683-1-yishaih@nvidia.com/
|
||||
* https://lore.kernel.org/kvm/20220428210933.3583-1-joao.m.martins@oracle.com/
|
||||
*/
|
||||
u32 supported_flags = VFIO_DMA_UNMAP_FLAG_ALL;
|
||||
struct vfio_iommu_type1_dma_unmap unmap;
|
||||
unsigned long unmapped = 0;
|
||||
struct iommufd_ioas *ioas;
|
||||
int rc;
|
||||
|
||||
if (copy_from_user(&unmap, arg, minsz))
|
||||
return -EFAULT;
|
||||
|
||||
if (unmap.argsz < minsz || unmap.flags & ~supported_flags)
|
||||
return -EINVAL;
|
||||
|
||||
ioas = get_compat_ioas(ictx);
|
||||
if (IS_ERR(ioas))
|
||||
return PTR_ERR(ioas);
|
||||
|
||||
if (unmap.flags & VFIO_DMA_UNMAP_FLAG_ALL) {
|
||||
if (unmap.iova != 0 || unmap.size != 0) {
|
||||
rc = -EINVAL;
|
||||
goto err_put;
|
||||
}
|
||||
rc = iopt_unmap_all(&ioas->iopt, &unmapped);
|
||||
} else {
|
||||
if (READ_ONCE(ioas->iopt.disable_large_pages)) {
|
||||
/*
|
||||
* Create cuts at the start and last of the requested
|
||||
* range. If the start IOVA is 0 then it doesn't need to
|
||||
* be cut.
|
||||
*/
|
||||
unsigned long iovas[] = { unmap.iova + unmap.size - 1,
|
||||
unmap.iova - 1 };
|
||||
|
||||
rc = iopt_cut_iova(&ioas->iopt, iovas,
|
||||
unmap.iova ? 2 : 1);
|
||||
if (rc)
|
||||
goto err_put;
|
||||
}
|
||||
rc = iopt_unmap_iova(&ioas->iopt, unmap.iova, unmap.size,
|
||||
&unmapped);
|
||||
}
|
||||
unmap.size = unmapped;
|
||||
if (copy_to_user(arg, &unmap, minsz))
|
||||
rc = -EFAULT;
|
||||
|
||||
err_put:
|
||||
iommufd_put_object(&ioas->obj);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int iommufd_vfio_cc_iommu(struct iommufd_ctx *ictx)
|
||||
{
|
||||
struct iommufd_hw_pagetable *hwpt;
|
||||
struct iommufd_ioas *ioas;
|
||||
int rc = 1;
|
||||
|
||||
ioas = get_compat_ioas(ictx);
|
||||
if (IS_ERR(ioas))
|
||||
return PTR_ERR(ioas);
|
||||
|
||||
mutex_lock(&ioas->mutex);
|
||||
list_for_each_entry(hwpt, &ioas->hwpt_list, hwpt_item) {
|
||||
if (!hwpt->enforce_cache_coherency) {
|
||||
rc = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&ioas->mutex);
|
||||
|
||||
iommufd_put_object(&ioas->obj);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int iommufd_vfio_check_extension(struct iommufd_ctx *ictx,
|
||||
unsigned long type)
|
||||
{
|
||||
switch (type) {
|
||||
case VFIO_TYPE1_IOMMU:
|
||||
case VFIO_TYPE1v2_IOMMU:
|
||||
case VFIO_UNMAP_ALL:
|
||||
return 1;
|
||||
|
||||
case VFIO_DMA_CC_IOMMU:
|
||||
return iommufd_vfio_cc_iommu(ictx);
|
||||
|
||||
/*
|
||||
* This is obsolete, and to be removed from VFIO. It was an incomplete
|
||||
* idea that got merged.
|
||||
* https://lore.kernel.org/kvm/0-v1-0093c9b0e345+19-vfio_no_nesting_jgg@nvidia.com/
|
||||
*/
|
||||
case VFIO_TYPE1_NESTING_IOMMU:
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* VFIO_DMA_MAP_FLAG_VADDR
|
||||
* https://lore.kernel.org/kvm/1611939252-7240-1-git-send-email-steven.sistare@oracle.com/
|
||||
* https://lore.kernel.org/all/Yz777bJZjTyLrHEQ@nvidia.com/
|
||||
*
|
||||
* It is hard to see how this could be implemented safely.
|
||||
*/
|
||||
case VFIO_UPDATE_VADDR:
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int iommufd_vfio_set_iommu(struct iommufd_ctx *ictx, unsigned long type)
|
||||
{
|
||||
struct iommufd_ioas *ioas = NULL;
|
||||
int rc = 0;
|
||||
|
||||
if (type != VFIO_TYPE1_IOMMU && type != VFIO_TYPE1v2_IOMMU)
|
||||
return -EINVAL;
|
||||
|
||||
/* VFIO fails the set_iommu if there is no group */
|
||||
ioas = get_compat_ioas(ictx);
|
||||
if (IS_ERR(ioas))
|
||||
return PTR_ERR(ioas);
|
||||
|
||||
/*
|
||||
* The difference between TYPE1 and TYPE1v2 is the ability to unmap in
|
||||
* the middle of mapped ranges. This is complicated by huge page support
|
||||
* which creates single large IOPTEs that cannot be split by the iommu
|
||||
* driver. TYPE1 is very old at this point and likely nothing uses it,
|
||||
* however it is simple enough to emulate by simply disabling the
|
||||
* problematic large IOPTEs. Then we can safely unmap within any range.
|
||||
*/
|
||||
if (type == VFIO_TYPE1_IOMMU)
|
||||
rc = iopt_disable_large_pages(&ioas->iopt);
|
||||
iommufd_put_object(&ioas->obj);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static unsigned long iommufd_get_pagesizes(struct iommufd_ioas *ioas)
|
||||
{
|
||||
struct io_pagetable *iopt = &ioas->iopt;
|
||||
unsigned long pgsize_bitmap = ULONG_MAX;
|
||||
struct iommu_domain *domain;
|
||||
unsigned long index;
|
||||
|
||||
down_read(&iopt->domains_rwsem);
|
||||
xa_for_each(&iopt->domains, index, domain)
|
||||
pgsize_bitmap &= domain->pgsize_bitmap;
|
||||
|
||||
/* See vfio_update_pgsize_bitmap() */
|
||||
if (pgsize_bitmap & ~PAGE_MASK) {
|
||||
pgsize_bitmap &= PAGE_MASK;
|
||||
pgsize_bitmap |= PAGE_SIZE;
|
||||
}
|
||||
pgsize_bitmap = max(pgsize_bitmap, ioas->iopt.iova_alignment);
|
||||
up_read(&iopt->domains_rwsem);
|
||||
return pgsize_bitmap;
|
||||
}
|
||||
|
||||
static int iommufd_fill_cap_iova(struct iommufd_ioas *ioas,
|
||||
struct vfio_info_cap_header __user *cur,
|
||||
size_t avail)
|
||||
{
|
||||
struct vfio_iommu_type1_info_cap_iova_range __user *ucap_iovas =
|
||||
container_of(cur,
|
||||
struct vfio_iommu_type1_info_cap_iova_range __user,
|
||||
header);
|
||||
struct vfio_iommu_type1_info_cap_iova_range cap_iovas = {
|
||||
.header = {
|
||||
.id = VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE,
|
||||
.version = 1,
|
||||
},
|
||||
};
|
||||
struct interval_tree_span_iter span;
|
||||
|
||||
interval_tree_for_each_span(&span, &ioas->iopt.reserved_itree, 0,
|
||||
ULONG_MAX) {
|
||||
struct vfio_iova_range range;
|
||||
|
||||
if (!span.is_hole)
|
||||
continue;
|
||||
range.start = span.start_hole;
|
||||
range.end = span.last_hole;
|
||||
if (avail >= struct_size(&cap_iovas, iova_ranges,
|
||||
cap_iovas.nr_iovas + 1) &&
|
||||
copy_to_user(&ucap_iovas->iova_ranges[cap_iovas.nr_iovas],
|
||||
&range, sizeof(range)))
|
||||
return -EFAULT;
|
||||
cap_iovas.nr_iovas++;
|
||||
}
|
||||
if (avail >= struct_size(&cap_iovas, iova_ranges, cap_iovas.nr_iovas) &&
|
||||
copy_to_user(ucap_iovas, &cap_iovas, sizeof(cap_iovas)))
|
||||
return -EFAULT;
|
||||
return struct_size(&cap_iovas, iova_ranges, cap_iovas.nr_iovas);
|
||||
}
|
||||
|
||||
static int iommufd_fill_cap_dma_avail(struct iommufd_ioas *ioas,
|
||||
struct vfio_info_cap_header __user *cur,
|
||||
size_t avail)
|
||||
{
|
||||
struct vfio_iommu_type1_info_dma_avail cap_dma = {
|
||||
.header = {
|
||||
.id = VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL,
|
||||
.version = 1,
|
||||
},
|
||||
/*
|
||||
* iommufd's limit is based on the cgroup's memory limit.
|
||||
* Normally vfio would return U16_MAX here, and provide a module
|
||||
* parameter to adjust it. Since S390 qemu userspace actually
|
||||
* pays attention and needs a value bigger than U16_MAX return
|
||||
* U32_MAX.
|
||||
*/
|
||||
.avail = U32_MAX,
|
||||
};
|
||||
|
||||
if (avail >= sizeof(cap_dma) &&
|
||||
copy_to_user(cur, &cap_dma, sizeof(cap_dma)))
|
||||
return -EFAULT;
|
||||
return sizeof(cap_dma);
|
||||
}
|
||||
|
||||
static int iommufd_vfio_iommu_get_info(struct iommufd_ctx *ictx,
|
||||
void __user *arg)
|
||||
{
|
||||
typedef int (*fill_cap_fn)(struct iommufd_ioas *ioas,
|
||||
struct vfio_info_cap_header __user *cur,
|
||||
size_t avail);
|
||||
static const fill_cap_fn fill_fns[] = {
|
||||
iommufd_fill_cap_dma_avail,
|
||||
iommufd_fill_cap_iova,
|
||||
};
|
||||
size_t minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes);
|
||||
struct vfio_info_cap_header __user *last_cap = NULL;
|
||||
struct vfio_iommu_type1_info info;
|
||||
struct iommufd_ioas *ioas;
|
||||
size_t total_cap_size;
|
||||
int rc;
|
||||
int i;
|
||||
|
||||
if (copy_from_user(&info, arg, minsz))
|
||||
return -EFAULT;
|
||||
|
||||
if (info.argsz < minsz)
|
||||
return -EINVAL;
|
||||
minsz = min_t(size_t, info.argsz, sizeof(info));
|
||||
|
||||
ioas = get_compat_ioas(ictx);
|
||||
if (IS_ERR(ioas))
|
||||
return PTR_ERR(ioas);
|
||||
|
||||
info.flags = VFIO_IOMMU_INFO_PGSIZES;
|
||||
info.iova_pgsizes = iommufd_get_pagesizes(ioas);
|
||||
info.cap_offset = 0;
|
||||
|
||||
down_read(&ioas->iopt.iova_rwsem);
|
||||
total_cap_size = sizeof(info);
|
||||
for (i = 0; i != ARRAY_SIZE(fill_fns); i++) {
|
||||
int cap_size;
|
||||
|
||||
if (info.argsz > total_cap_size)
|
||||
cap_size = fill_fns[i](ioas, arg + total_cap_size,
|
||||
info.argsz - total_cap_size);
|
||||
else
|
||||
cap_size = fill_fns[i](ioas, NULL, 0);
|
||||
if (cap_size < 0) {
|
||||
rc = cap_size;
|
||||
goto out_put;
|
||||
}
|
||||
if (last_cap && info.argsz >= total_cap_size &&
|
||||
put_user(total_cap_size, &last_cap->next)) {
|
||||
rc = -EFAULT;
|
||||
goto out_put;
|
||||
}
|
||||
last_cap = arg + total_cap_size;
|
||||
total_cap_size += cap_size;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the user did not provide enough space then only some caps are
|
||||
* returned and the argsz will be updated to the correct amount to get
|
||||
* all caps.
|
||||
*/
|
||||
if (info.argsz >= total_cap_size)
|
||||
info.cap_offset = sizeof(info);
|
||||
info.argsz = total_cap_size;
|
||||
info.flags |= VFIO_IOMMU_INFO_CAPS;
|
||||
if (copy_to_user(arg, &info, minsz)) {
|
||||
rc = -EFAULT;
|
||||
goto out_put;
|
||||
}
|
||||
rc = 0;
|
||||
|
||||
out_put:
|
||||
up_read(&ioas->iopt.iova_rwsem);
|
||||
iommufd_put_object(&ioas->obj);
|
||||
return rc;
|
||||
}
|
||||
|
||||
int iommufd_vfio_ioctl(struct iommufd_ctx *ictx, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
{
|
||||
void __user *uarg = (void __user *)arg;
|
||||
|
||||
switch (cmd) {
|
||||
case VFIO_GET_API_VERSION:
|
||||
return VFIO_API_VERSION;
|
||||
case VFIO_SET_IOMMU:
|
||||
return iommufd_vfio_set_iommu(ictx, arg);
|
||||
case VFIO_CHECK_EXTENSION:
|
||||
return iommufd_vfio_check_extension(ictx, arg);
|
||||
case VFIO_IOMMU_GET_INFO:
|
||||
return iommufd_vfio_iommu_get_info(ictx, uarg);
|
||||
case VFIO_IOMMU_MAP_DMA:
|
||||
return iommufd_vfio_map_dma(ictx, cmd, uarg);
|
||||
case VFIO_IOMMU_UNMAP_DMA:
|
||||
return iommufd_vfio_unmap_dma(ictx, cmd, uarg);
|
||||
case VFIO_IOMMU_DIRTY_PAGES:
|
||||
default:
|
||||
return -ENOIOCTLCMD;
|
||||
}
|
||||
return -ENOIOCTLCMD;
|
||||
}
|
@ -628,8 +628,6 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain,
|
||||
* Something is wrong, we can't attach two devices using
|
||||
* different IOMMUs to the same domain.
|
||||
*/
|
||||
dev_err(dev, "Can't attach IPMMU %s to domain on IPMMU %s\n",
|
||||
dev_name(mmu->dev), dev_name(domain->mmu->dev));
|
||||
ret = -EINVAL;
|
||||
} else
|
||||
dev_info(dev, "Reusing IPMMU context %u\n", domain->context_id);
|
||||
|
@ -609,7 +609,7 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom,
|
||||
dom->iop = alloc_io_pgtable_ops(ARM_V7S, &dom->cfg, data);
|
||||
if (!dom->iop) {
|
||||
dev_err(data->dev, "Failed to alloc io pgtable\n");
|
||||
return -EINVAL;
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* Update our support page sizes bitmap */
|
||||
@ -668,7 +668,7 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain,
|
||||
ret = mtk_iommu_domain_finalise(dom, frstdata, region_id);
|
||||
if (ret) {
|
||||
mutex_unlock(&dom->mutex);
|
||||
return -ENODEV;
|
||||
return ret;
|
||||
}
|
||||
dom->bank = &data->bank[bankid];
|
||||
}
|
||||
|
@ -1414,7 +1414,7 @@ static int omap_iommu_attach_init(struct device *dev,
|
||||
|
||||
odomain->num_iommus = omap_iommu_count(dev);
|
||||
if (!odomain->num_iommus)
|
||||
return -EINVAL;
|
||||
return -ENODEV;
|
||||
|
||||
odomain->iommus = kcalloc(odomain->num_iommus, sizeof(*iommu),
|
||||
GFP_ATOMIC);
|
||||
@ -1464,7 +1464,7 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
|
||||
|
||||
if (!arch_data || !arch_data->iommu_dev) {
|
||||
dev_err(dev, "device doesn't have an associated iommu\n");
|
||||
return -EINVAL;
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
spin_lock(&omap_domain->lock);
|
||||
@ -1472,7 +1472,7 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
|
||||
/* only a single client device can be attached to a domain */
|
||||
if (omap_domain->dev) {
|
||||
dev_err(dev, "iommu domain is already attached\n");
|
||||
ret = -EBUSY;
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
@ -237,10 +237,8 @@ static int sprd_iommu_attach_device(struct iommu_domain *domain,
|
||||
struct sprd_iommu_domain *dom = to_sprd_domain(domain);
|
||||
size_t pgt_size = sprd_iommu_pgt_size(domain);
|
||||
|
||||
if (dom->sdev) {
|
||||
pr_err("There's already a device attached to this domain.\n");
|
||||
if (dom->sdev)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
dom->pgt_va = dma_alloc_coherent(sdev->dev, pgt_size, &dom->pgt_pa, GFP_KERNEL);
|
||||
if (!dom->pgt_va)
|
||||
|
@ -112,7 +112,7 @@ static int gart_iommu_attach_dev(struct iommu_domain *domain,
|
||||
spin_lock(&gart->dom_lock);
|
||||
|
||||
if (gart->active_domain && gart->active_domain != domain) {
|
||||
ret = -EBUSY;
|
||||
ret = -EINVAL;
|
||||
} else if (dev_iommu_priv_get(dev) != domain) {
|
||||
dev_iommu_priv_set(dev, domain);
|
||||
gart->active_domain = domain;
|
||||
|
@ -670,7 +670,7 @@ static int viommu_domain_finalise(struct viommu_endpoint *vdev,
|
||||
dev_err(vdev->dev,
|
||||
"granule 0x%lx larger than system page size 0x%lx\n",
|
||||
viommu_page_size, PAGE_SIZE);
|
||||
return -EINVAL;
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
ret = ida_alloc_range(&viommu->domain_ids, viommu->first_domain,
|
||||
@ -697,7 +697,7 @@ static int viommu_domain_finalise(struct viommu_endpoint *vdev,
|
||||
if (ret) {
|
||||
ida_free(&viommu->domain_ids, vdomain->id);
|
||||
vdomain->viommu = NULL;
|
||||
return -EOPNOTSUPP;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
@ -734,8 +734,7 @@ static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev)
|
||||
*/
|
||||
ret = viommu_domain_finalise(vdev, domain);
|
||||
} else if (vdomain->viommu != vdev->viommu) {
|
||||
dev_err(dev, "cannot attach to foreign vIOMMU\n");
|
||||
ret = -EXDEV;
|
||||
ret = -EINVAL;
|
||||
}
|
||||
mutex_unlock(&vdomain->mutex);
|
||||
|
||||
|
@ -108,7 +108,7 @@ static int uacce_bind_queue(struct uacce_device *uacce, struct uacce_queue *q)
|
||||
if (!(uacce->flags & UACCE_DEV_SVA))
|
||||
return 0;
|
||||
|
||||
handle = iommu_sva_bind_device(uacce->parent, current->mm, NULL);
|
||||
handle = iommu_sva_bind_device(uacce->parent, current->mm);
|
||||
if (IS_ERR(handle))
|
||||
return PTR_ERR(handle);
|
||||
|
||||
|
@ -382,6 +382,9 @@ int pci_enable_pasid(struct pci_dev *pdev, int features)
|
||||
if (!pasid)
|
||||
return -EINVAL;
|
||||
|
||||
if (!pci_acs_path_enabled(pdev, NULL, PCI_ACS_RR | PCI_ACS_UF))
|
||||
return -EINVAL;
|
||||
|
||||
pci_read_config_word(pdev, pasid + PCI_PASID_CAP, &supported);
|
||||
supported &= PCI_PASID_CAP_EXEC | PCI_PASID_CAP_PRIV;
|
||||
|
||||
|
@ -588,6 +588,9 @@ static const struct vfio_device_ops vfio_ccw_dev_ops = {
|
||||
.ioctl = vfio_ccw_mdev_ioctl,
|
||||
.request = vfio_ccw_mdev_request,
|
||||
.dma_unmap = vfio_ccw_dma_unmap,
|
||||
.bind_iommufd = vfio_iommufd_emulated_bind,
|
||||
.unbind_iommufd = vfio_iommufd_emulated_unbind,
|
||||
.attach_ioas = vfio_iommufd_emulated_attach_ioas,
|
||||
};
|
||||
|
||||
struct mdev_driver vfio_ccw_mdev_driver = {
|
||||
|
@ -1535,13 +1535,29 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void unmap_iova(struct ap_matrix_mdev *matrix_mdev, u64 iova, u64 length)
|
||||
{
|
||||
struct ap_queue_table *qtable = &matrix_mdev->qtable;
|
||||
struct vfio_ap_queue *q;
|
||||
int loop_cursor;
|
||||
|
||||
hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) {
|
||||
if (q->saved_iova >= iova && q->saved_iova < iova + length)
|
||||
vfio_ap_irq_disable(q);
|
||||
}
|
||||
}
|
||||
|
||||
static void vfio_ap_mdev_dma_unmap(struct vfio_device *vdev, u64 iova,
|
||||
u64 length)
|
||||
{
|
||||
struct ap_matrix_mdev *matrix_mdev =
|
||||
container_of(vdev, struct ap_matrix_mdev, vdev);
|
||||
|
||||
vfio_unpin_pages(&matrix_mdev->vdev, iova, 1);
|
||||
mutex_lock(&matrix_dev->mdevs_lock);
|
||||
|
||||
unmap_iova(matrix_mdev, iova, length);
|
||||
|
||||
mutex_unlock(&matrix_dev->mdevs_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1789,6 +1805,9 @@ static const struct vfio_device_ops vfio_ap_matrix_dev_ops = {
|
||||
.close_device = vfio_ap_mdev_close_device,
|
||||
.ioctl = vfio_ap_mdev_ioctl,
|
||||
.dma_unmap = vfio_ap_mdev_dma_unmap,
|
||||
.bind_iommufd = vfio_iommufd_emulated_bind,
|
||||
.unbind_iommufd = vfio_iommufd_emulated_unbind,
|
||||
.attach_ioas = vfio_iommufd_emulated_attach_ioas,
|
||||
};
|
||||
|
||||
static struct mdev_driver vfio_ap_matrix_driver = {
|
||||
|
@ -2,8 +2,9 @@
|
||||
menuconfig VFIO
|
||||
tristate "VFIO Non-Privileged userspace driver framework"
|
||||
select IOMMU_API
|
||||
select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64)
|
||||
depends on IOMMUFD || !IOMMUFD
|
||||
select INTERVAL_TREE
|
||||
select VFIO_CONTAINER if IOMMUFD=n
|
||||
help
|
||||
VFIO provides a framework for secure userspace device drivers.
|
||||
See Documentation/driver-api/vfio.rst for more details.
|
||||
@ -11,6 +12,18 @@ menuconfig VFIO
|
||||
If you don't know what to do here, say N.
|
||||
|
||||
if VFIO
|
||||
config VFIO_CONTAINER
|
||||
bool "Support for the VFIO container /dev/vfio/vfio"
|
||||
select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64)
|
||||
default y
|
||||
help
|
||||
The VFIO container is the classic interface to VFIO for establishing
|
||||
IOMMU mappings. If N is selected here then IOMMUFD must be used to
|
||||
manage the mappings.
|
||||
|
||||
Unless testing IOMMUFD say Y here.
|
||||
|
||||
if VFIO_CONTAINER
|
||||
config VFIO_IOMMU_TYPE1
|
||||
tristate
|
||||
default n
|
||||
@ -20,16 +33,6 @@ config VFIO_IOMMU_SPAPR_TCE
|
||||
depends on SPAPR_TCE_IOMMU
|
||||
default VFIO
|
||||
|
||||
config VFIO_SPAPR_EEH
|
||||
tristate
|
||||
depends on EEH && VFIO_IOMMU_SPAPR_TCE
|
||||
default VFIO
|
||||
|
||||
config VFIO_VIRQFD
|
||||
tristate
|
||||
select EVENTFD
|
||||
default n
|
||||
|
||||
config VFIO_NOIOMMU
|
||||
bool "VFIO No-IOMMU support"
|
||||
help
|
||||
@ -43,6 +46,17 @@ config VFIO_NOIOMMU
|
||||
this mode since there is no IOMMU to provide DMA translation.
|
||||
|
||||
If you don't know what to do here, say N.
|
||||
endif
|
||||
|
||||
config VFIO_SPAPR_EEH
|
||||
tristate
|
||||
depends on EEH && VFIO_IOMMU_SPAPR_TCE
|
||||
default VFIO
|
||||
|
||||
config VFIO_VIRQFD
|
||||
tristate
|
||||
select EVENTFD
|
||||
default n
|
||||
|
||||
source "drivers/vfio/pci/Kconfig"
|
||||
source "drivers/vfio/platform/Kconfig"
|
||||
|
@ -4,8 +4,10 @@ vfio_virqfd-y := virqfd.o
|
||||
obj-$(CONFIG_VFIO) += vfio.o
|
||||
|
||||
vfio-y += vfio_main.o \
|
||||
iova_bitmap.o \
|
||||
container.o
|
||||
group.o \
|
||||
iova_bitmap.o
|
||||
vfio-$(CONFIG_IOMMUFD) += iommufd.o
|
||||
vfio-$(CONFIG_VFIO_CONTAINER) += container.o
|
||||
|
||||
obj-$(CONFIG_VFIO_VIRQFD) += vfio_virqfd.o
|
||||
obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
|
||||
|
@ -188,8 +188,9 @@ void vfio_device_container_unregister(struct vfio_device *device)
|
||||
device->group->container->iommu_data, device);
|
||||
}
|
||||
|
||||
long vfio_container_ioctl_check_extension(struct vfio_container *container,
|
||||
unsigned long arg)
|
||||
static long
|
||||
vfio_container_ioctl_check_extension(struct vfio_container *container,
|
||||
unsigned long arg)
|
||||
{
|
||||
struct vfio_iommu_driver *driver;
|
||||
long ret = 0;
|
||||
@ -511,14 +512,15 @@ void vfio_group_detach_container(struct vfio_group *group)
|
||||
vfio_container_put(container);
|
||||
}
|
||||
|
||||
int vfio_device_assign_container(struct vfio_device *device)
|
||||
int vfio_group_use_container(struct vfio_group *group)
|
||||
{
|
||||
struct vfio_group *group = device->group;
|
||||
|
||||
lockdep_assert_held(&group->group_lock);
|
||||
|
||||
if (!group->container || !group->container->iommu_driver ||
|
||||
WARN_ON(!group->container_users))
|
||||
/*
|
||||
* The container fd has been assigned with VFIO_GROUP_SET_CONTAINER but
|
||||
* VFIO_SET_IOMMU hasn't been done yet.
|
||||
*/
|
||||
if (!group->container->iommu_driver)
|
||||
return -EINVAL;
|
||||
|
||||
if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
|
||||
@ -529,122 +531,56 @@ int vfio_device_assign_container(struct vfio_device *device)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void vfio_device_unassign_container(struct vfio_device *device)
|
||||
void vfio_group_unuse_container(struct vfio_group *group)
|
||||
{
|
||||
mutex_lock(&device->group->group_lock);
|
||||
WARN_ON(device->group->container_users <= 1);
|
||||
device->group->container_users--;
|
||||
fput(device->group->opened_file);
|
||||
mutex_unlock(&device->group->group_lock);
|
||||
lockdep_assert_held(&group->group_lock);
|
||||
|
||||
WARN_ON(group->container_users <= 1);
|
||||
group->container_users--;
|
||||
fput(group->opened_file);
|
||||
}
|
||||
|
||||
/*
|
||||
* Pin contiguous user pages and return their associated host pages for local
|
||||
* domain only.
|
||||
* @device [in] : device
|
||||
* @iova [in] : starting IOVA of user pages to be pinned.
|
||||
* @npage [in] : count of pages to be pinned. This count should not
|
||||
* be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
|
||||
* @prot [in] : protection flags
|
||||
* @pages[out] : array of host pages
|
||||
* Return error or number of pages pinned.
|
||||
*
|
||||
* A driver may only call this function if the vfio_device was created
|
||||
* by vfio_register_emulated_iommu_dev().
|
||||
*/
|
||||
int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova,
|
||||
int npage, int prot, struct page **pages)
|
||||
int vfio_device_container_pin_pages(struct vfio_device *device,
|
||||
dma_addr_t iova, int npage,
|
||||
int prot, struct page **pages)
|
||||
{
|
||||
struct vfio_container *container;
|
||||
struct vfio_group *group = device->group;
|
||||
struct vfio_iommu_driver *driver;
|
||||
int ret;
|
||||
|
||||
if (!pages || !npage || !vfio_assert_device_open(device))
|
||||
return -EINVAL;
|
||||
struct vfio_container *container = device->group->container;
|
||||
struct iommu_group *iommu_group = device->group->iommu_group;
|
||||
struct vfio_iommu_driver *driver = container->iommu_driver;
|
||||
|
||||
if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
|
||||
return -E2BIG;
|
||||
|
||||
/* group->container cannot change while a vfio device is open */
|
||||
container = group->container;
|
||||
driver = container->iommu_driver;
|
||||
if (likely(driver && driver->ops->pin_pages))
|
||||
ret = driver->ops->pin_pages(container->iommu_data,
|
||||
group->iommu_group, iova,
|
||||
npage, prot, pages);
|
||||
else
|
||||
ret = -ENOTTY;
|
||||
|
||||
return ret;
|
||||
if (unlikely(!driver || !driver->ops->pin_pages))
|
||||
return -ENOTTY;
|
||||
return driver->ops->pin_pages(container->iommu_data, iommu_group, iova,
|
||||
npage, prot, pages);
|
||||
}
|
||||
EXPORT_SYMBOL(vfio_pin_pages);
|
||||
|
||||
/*
|
||||
* Unpin contiguous host pages for local domain only.
|
||||
* @device [in] : device
|
||||
* @iova [in] : starting address of user pages to be unpinned.
|
||||
* @npage [in] : count of pages to be unpinned. This count should not
|
||||
* be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
|
||||
*/
|
||||
void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage)
|
||||
void vfio_device_container_unpin_pages(struct vfio_device *device,
|
||||
dma_addr_t iova, int npage)
|
||||
{
|
||||
struct vfio_container *container;
|
||||
struct vfio_iommu_driver *driver;
|
||||
struct vfio_container *container = device->group->container;
|
||||
|
||||
if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES))
|
||||
return;
|
||||
|
||||
if (WARN_ON(!vfio_assert_device_open(device)))
|
||||
return;
|
||||
|
||||
/* group->container cannot change while a vfio device is open */
|
||||
container = device->group->container;
|
||||
driver = container->iommu_driver;
|
||||
|
||||
driver->ops->unpin_pages(container->iommu_data, iova, npage);
|
||||
container->iommu_driver->ops->unpin_pages(container->iommu_data, iova,
|
||||
npage);
|
||||
}
|
||||
EXPORT_SYMBOL(vfio_unpin_pages);
|
||||
|
||||
/*
|
||||
* This interface allows the CPUs to perform some sort of virtual DMA on
|
||||
* behalf of the device.
|
||||
*
|
||||
* CPUs read/write from/into a range of IOVAs pointing to user space memory
|
||||
* into/from a kernel buffer.
|
||||
*
|
||||
* As the read/write of user space memory is conducted via the CPUs and is
|
||||
* not a real device DMA, it is not necessary to pin the user space memory.
|
||||
*
|
||||
* @device [in] : VFIO device
|
||||
* @iova [in] : base IOVA of a user space buffer
|
||||
* @data [in] : pointer to kernel buffer
|
||||
* @len [in] : kernel buffer length
|
||||
* @write : indicate read or write
|
||||
* Return error code on failure or 0 on success.
|
||||
*/
|
||||
int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data,
|
||||
size_t len, bool write)
|
||||
int vfio_device_container_dma_rw(struct vfio_device *device,
|
||||
dma_addr_t iova, void *data,
|
||||
size_t len, bool write)
|
||||
{
|
||||
struct vfio_container *container;
|
||||
struct vfio_iommu_driver *driver;
|
||||
int ret = 0;
|
||||
struct vfio_container *container = device->group->container;
|
||||
struct vfio_iommu_driver *driver = container->iommu_driver;
|
||||
|
||||
if (!data || len <= 0 || !vfio_assert_device_open(device))
|
||||
return -EINVAL;
|
||||
|
||||
/* group->container cannot change while a vfio device is open */
|
||||
container = device->group->container;
|
||||
driver = container->iommu_driver;
|
||||
|
||||
if (likely(driver && driver->ops->dma_rw))
|
||||
ret = driver->ops->dma_rw(container->iommu_data,
|
||||
iova, data, len, write);
|
||||
else
|
||||
ret = -ENOTTY;
|
||||
return ret;
|
||||
if (unlikely(!driver || !driver->ops->dma_rw))
|
||||
return -ENOTTY;
|
||||
return driver->ops->dma_rw(container->iommu_data, iova, data, len,
|
||||
write);
|
||||
}
|
||||
EXPORT_SYMBOL(vfio_dma_rw);
|
||||
|
||||
int __init vfio_container_init(void)
|
||||
{
|
||||
@ -678,3 +614,6 @@ void vfio_container_cleanup(void)
|
||||
misc_deregister(&vfio_dev);
|
||||
mutex_destroy(&vfio.iommu_drivers_lock);
|
||||
}
|
||||
|
||||
MODULE_ALIAS_MISCDEV(VFIO_MINOR);
|
||||
MODULE_ALIAS("devname:vfio/vfio");
|
||||
|
@ -592,6 +592,9 @@ static const struct vfio_device_ops vfio_fsl_mc_ops = {
|
||||
.read = vfio_fsl_mc_read,
|
||||
.write = vfio_fsl_mc_write,
|
||||
.mmap = vfio_fsl_mc_mmap,
|
||||
.bind_iommufd = vfio_iommufd_physical_bind,
|
||||
.unbind_iommufd = vfio_iommufd_physical_unbind,
|
||||
.attach_ioas = vfio_iommufd_physical_attach_ioas,
|
||||
};
|
||||
|
||||
static struct fsl_mc_driver vfio_fsl_mc_driver = {
|
||||
|
877
drivers/vfio/group.c
Normal file
877
drivers/vfio/group.c
Normal file
@ -0,0 +1,877 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* VFIO core
|
||||
*
|
||||
* Copyright (C) 2012 Red Hat, Inc. All rights reserved.
|
||||
* Author: Alex Williamson <alex.williamson@redhat.com>
|
||||
*
|
||||
* Derived from original vfio:
|
||||
* Copyright 2010 Cisco Systems, Inc. All rights reserved.
|
||||
* Author: Tom Lyon, pugs@cisco.com
|
||||
*/
|
||||
|
||||
#include <linux/vfio.h>
|
||||
#include <linux/iommufd.h>
|
||||
#include <linux/anon_inodes.h>
|
||||
#include "vfio.h"
|
||||
|
||||
static struct vfio {
|
||||
struct class *class;
|
||||
struct list_head group_list;
|
||||
struct mutex group_lock; /* locks group_list */
|
||||
struct ida group_ida;
|
||||
dev_t group_devt;
|
||||
} vfio;
|
||||
|
||||
static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
|
||||
char *buf)
|
||||
{
|
||||
struct vfio_device *it, *device = ERR_PTR(-ENODEV);
|
||||
|
||||
mutex_lock(&group->device_lock);
|
||||
list_for_each_entry(it, &group->device_list, group_next) {
|
||||
int ret;
|
||||
|
||||
if (it->ops->match) {
|
||||
ret = it->ops->match(it, buf);
|
||||
if (ret < 0) {
|
||||
device = ERR_PTR(ret);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
ret = !strcmp(dev_name(it->dev), buf);
|
||||
}
|
||||
|
||||
if (ret && vfio_device_try_get_registration(it)) {
|
||||
device = it;
|
||||
break;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&group->device_lock);
|
||||
|
||||
return device;
|
||||
}
|
||||
|
||||
/*
|
||||
* VFIO Group fd, /dev/vfio/$GROUP
|
||||
*/
|
||||
static bool vfio_group_has_iommu(struct vfio_group *group)
|
||||
{
|
||||
lockdep_assert_held(&group->group_lock);
|
||||
/*
|
||||
* There can only be users if there is a container, and if there is a
|
||||
* container there must be users.
|
||||
*/
|
||||
WARN_ON(!group->container != !group->container_users);
|
||||
|
||||
return group->container || group->iommufd;
|
||||
}
|
||||
|
||||
/*
|
||||
* VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or
|
||||
* if there was no container to unset. Since the ioctl is called on
|
||||
* the group, we know that still exists, therefore the only valid
|
||||
* transition here is 1->0.
|
||||
*/
|
||||
static int vfio_group_ioctl_unset_container(struct vfio_group *group)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&group->group_lock);
|
||||
if (!vfio_group_has_iommu(group)) {
|
||||
ret = -EINVAL;
|
||||
goto out_unlock;
|
||||
}
|
||||
if (group->container) {
|
||||
if (group->container_users != 1) {
|
||||
ret = -EBUSY;
|
||||
goto out_unlock;
|
||||
}
|
||||
vfio_group_detach_container(group);
|
||||
}
|
||||
if (group->iommufd) {
|
||||
iommufd_ctx_put(group->iommufd);
|
||||
group->iommufd = NULL;
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&group->group_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int vfio_group_ioctl_set_container(struct vfio_group *group,
|
||||
int __user *arg)
|
||||
{
|
||||
struct vfio_container *container;
|
||||
struct iommufd_ctx *iommufd;
|
||||
struct fd f;
|
||||
int ret;
|
||||
int fd;
|
||||
|
||||
if (get_user(fd, arg))
|
||||
return -EFAULT;
|
||||
|
||||
f = fdget(fd);
|
||||
if (!f.file)
|
||||
return -EBADF;
|
||||
|
||||
mutex_lock(&group->group_lock);
|
||||
if (vfio_group_has_iommu(group)) {
|
||||
ret = -EINVAL;
|
||||
goto out_unlock;
|
||||
}
|
||||
if (!group->iommu_group) {
|
||||
ret = -ENODEV;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
container = vfio_container_from_file(f.file);
|
||||
if (container) {
|
||||
ret = vfio_container_attach_group(container, group);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
iommufd = iommufd_ctx_from_file(f.file);
|
||||
if (!IS_ERR(iommufd)) {
|
||||
u32 ioas_id;
|
||||
|
||||
ret = iommufd_vfio_compat_ioas_id(iommufd, &ioas_id);
|
||||
if (ret) {
|
||||
iommufd_ctx_put(group->iommufd);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
group->iommufd = iommufd;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/* The FD passed is not recognized. */
|
||||
ret = -EBADFD;
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&group->group_lock);
|
||||
fdput(f);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int vfio_device_group_open(struct vfio_device *device)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mutex_lock(&device->group->group_lock);
|
||||
if (!vfio_group_has_iommu(device->group)) {
|
||||
ret = -EINVAL;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* Here we pass the KVM pointer with the group under the lock. If the
|
||||
* device driver will use it, it must obtain a reference and release it
|
||||
* during close_device.
|
||||
*/
|
||||
ret = vfio_device_open(device, device->group->iommufd,
|
||||
device->group->kvm);
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&device->group->group_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void vfio_device_group_close(struct vfio_device *device)
|
||||
{
|
||||
mutex_lock(&device->group->group_lock);
|
||||
vfio_device_close(device, device->group->iommufd);
|
||||
mutex_unlock(&device->group->group_lock);
|
||||
}
|
||||
|
||||
static struct file *vfio_device_open_file(struct vfio_device *device)
|
||||
{
|
||||
struct file *filep;
|
||||
int ret;
|
||||
|
||||
ret = vfio_device_group_open(device);
|
||||
if (ret)
|
||||
goto err_out;
|
||||
|
||||
/*
|
||||
* We can't use anon_inode_getfd() because we need to modify
|
||||
* the f_mode flags directly to allow more than just ioctls
|
||||
*/
|
||||
filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops,
|
||||
device, O_RDWR);
|
||||
if (IS_ERR(filep)) {
|
||||
ret = PTR_ERR(filep);
|
||||
goto err_close_device;
|
||||
}
|
||||
|
||||
/*
|
||||
* TODO: add an anon_inode interface to do this.
|
||||
* Appears to be missing by lack of need rather than
|
||||
* explicitly prevented. Now there's need.
|
||||
*/
|
||||
filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE);
|
||||
|
||||
if (device->group->type == VFIO_NO_IOMMU)
|
||||
dev_warn(device->dev, "vfio-noiommu device opened by user "
|
||||
"(%s:%d)\n", current->comm, task_pid_nr(current));
|
||||
/*
|
||||
* On success the ref of device is moved to the file and
|
||||
* put in vfio_device_fops_release()
|
||||
*/
|
||||
return filep;
|
||||
|
||||
err_close_device:
|
||||
vfio_device_group_close(device);
|
||||
err_out:
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
static int vfio_group_ioctl_get_device_fd(struct vfio_group *group,
|
||||
char __user *arg)
|
||||
{
|
||||
struct vfio_device *device;
|
||||
struct file *filep;
|
||||
char *buf;
|
||||
int fdno;
|
||||
int ret;
|
||||
|
||||
buf = strndup_user(arg, PAGE_SIZE);
|
||||
if (IS_ERR(buf))
|
||||
return PTR_ERR(buf);
|
||||
|
||||
device = vfio_device_get_from_name(group, buf);
|
||||
kfree(buf);
|
||||
if (IS_ERR(device))
|
||||
return PTR_ERR(device);
|
||||
|
||||
fdno = get_unused_fd_flags(O_CLOEXEC);
|
||||
if (fdno < 0) {
|
||||
ret = fdno;
|
||||
goto err_put_device;
|
||||
}
|
||||
|
||||
filep = vfio_device_open_file(device);
|
||||
if (IS_ERR(filep)) {
|
||||
ret = PTR_ERR(filep);
|
||||
goto err_put_fdno;
|
||||
}
|
||||
|
||||
fd_install(fdno, filep);
|
||||
return fdno;
|
||||
|
||||
err_put_fdno:
|
||||
put_unused_fd(fdno);
|
||||
err_put_device:
|
||||
vfio_device_put_registration(device);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int vfio_group_ioctl_get_status(struct vfio_group *group,
|
||||
struct vfio_group_status __user *arg)
|
||||
{
|
||||
unsigned long minsz = offsetofend(struct vfio_group_status, flags);
|
||||
struct vfio_group_status status;
|
||||
|
||||
if (copy_from_user(&status, arg, minsz))
|
||||
return -EFAULT;
|
||||
|
||||
if (status.argsz < minsz)
|
||||
return -EINVAL;
|
||||
|
||||
status.flags = 0;
|
||||
|
||||
mutex_lock(&group->group_lock);
|
||||
if (!group->iommu_group) {
|
||||
mutex_unlock(&group->group_lock);
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
/*
|
||||
* With the container FD the iommu_group_claim_dma_owner() is done
|
||||
* during SET_CONTAINER but for IOMMFD this is done during
|
||||
* VFIO_GROUP_GET_DEVICE_FD. Meaning that with iommufd
|
||||
* VFIO_GROUP_FLAGS_VIABLE could be set but GET_DEVICE_FD will fail due
|
||||
* to viability.
|
||||
*/
|
||||
if (vfio_group_has_iommu(group))
|
||||
status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET |
|
||||
VFIO_GROUP_FLAGS_VIABLE;
|
||||
else if (!iommu_group_dma_owner_claimed(group->iommu_group))
|
||||
status.flags |= VFIO_GROUP_FLAGS_VIABLE;
|
||||
mutex_unlock(&group->group_lock);
|
||||
|
||||
if (copy_to_user(arg, &status, minsz))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long vfio_group_fops_unl_ioctl(struct file *filep,
|
||||
unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
struct vfio_group *group = filep->private_data;
|
||||
void __user *uarg = (void __user *)arg;
|
||||
|
||||
switch (cmd) {
|
||||
case VFIO_GROUP_GET_DEVICE_FD:
|
||||
return vfio_group_ioctl_get_device_fd(group, uarg);
|
||||
case VFIO_GROUP_GET_STATUS:
|
||||
return vfio_group_ioctl_get_status(group, uarg);
|
||||
case VFIO_GROUP_SET_CONTAINER:
|
||||
return vfio_group_ioctl_set_container(group, uarg);
|
||||
case VFIO_GROUP_UNSET_CONTAINER:
|
||||
return vfio_group_ioctl_unset_container(group);
|
||||
default:
|
||||
return -ENOTTY;
|
||||
}
|
||||
}
|
||||
|
||||
static int vfio_group_fops_open(struct inode *inode, struct file *filep)
|
||||
{
|
||||
struct vfio_group *group =
|
||||
container_of(inode->i_cdev, struct vfio_group, cdev);
|
||||
int ret;
|
||||
|
||||
mutex_lock(&group->group_lock);
|
||||
|
||||
/*
|
||||
* drivers can be zero if this races with vfio_device_remove_group(), it
|
||||
* will be stable at 0 under the group rwsem
|
||||
*/
|
||||
if (refcount_read(&group->drivers) == 0) {
|
||||
ret = -ENODEV;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) {
|
||||
ret = -EPERM;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* Do we need multiple instances of the group open? Seems not.
|
||||
*/
|
||||
if (group->opened_file) {
|
||||
ret = -EBUSY;
|
||||
goto out_unlock;
|
||||
}
|
||||
group->opened_file = filep;
|
||||
filep->private_data = group;
|
||||
ret = 0;
|
||||
out_unlock:
|
||||
mutex_unlock(&group->group_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int vfio_group_fops_release(struct inode *inode, struct file *filep)
|
||||
{
|
||||
struct vfio_group *group = filep->private_data;
|
||||
|
||||
filep->private_data = NULL;
|
||||
|
||||
mutex_lock(&group->group_lock);
|
||||
/*
|
||||
* Device FDs hold a group file reference, therefore the group release
|
||||
* is only called when there are no open devices.
|
||||
*/
|
||||
WARN_ON(group->notifier.head);
|
||||
if (group->container)
|
||||
vfio_group_detach_container(group);
|
||||
if (group->iommufd) {
|
||||
iommufd_ctx_put(group->iommufd);
|
||||
group->iommufd = NULL;
|
||||
}
|
||||
group->opened_file = NULL;
|
||||
mutex_unlock(&group->group_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct file_operations vfio_group_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.unlocked_ioctl = vfio_group_fops_unl_ioctl,
|
||||
.compat_ioctl = compat_ptr_ioctl,
|
||||
.open = vfio_group_fops_open,
|
||||
.release = vfio_group_fops_release,
|
||||
};
|
||||
|
||||
/*
|
||||
* Group objects - create, release, get, put, search
|
||||
*/
|
||||
static struct vfio_group *
|
||||
vfio_group_find_from_iommu(struct iommu_group *iommu_group)
|
||||
{
|
||||
struct vfio_group *group;
|
||||
|
||||
lockdep_assert_held(&vfio.group_lock);
|
||||
|
||||
/*
|
||||
* group->iommu_group from the vfio.group_list cannot be NULL
|
||||
* under the vfio.group_lock.
|
||||
*/
|
||||
list_for_each_entry(group, &vfio.group_list, vfio_next) {
|
||||
if (group->iommu_group == iommu_group)
|
||||
return group;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void vfio_group_release(struct device *dev)
|
||||
{
|
||||
struct vfio_group *group = container_of(dev, struct vfio_group, dev);
|
||||
|
||||
mutex_destroy(&group->device_lock);
|
||||
mutex_destroy(&group->group_lock);
|
||||
WARN_ON(group->iommu_group);
|
||||
ida_free(&vfio.group_ida, MINOR(group->dev.devt));
|
||||
kfree(group);
|
||||
}
|
||||
|
||||
static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group,
|
||||
enum vfio_group_type type)
|
||||
{
|
||||
struct vfio_group *group;
|
||||
int minor;
|
||||
|
||||
group = kzalloc(sizeof(*group), GFP_KERNEL);
|
||||
if (!group)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
minor = ida_alloc_max(&vfio.group_ida, MINORMASK, GFP_KERNEL);
|
||||
if (minor < 0) {
|
||||
kfree(group);
|
||||
return ERR_PTR(minor);
|
||||
}
|
||||
|
||||
device_initialize(&group->dev);
|
||||
group->dev.devt = MKDEV(MAJOR(vfio.group_devt), minor);
|
||||
group->dev.class = vfio.class;
|
||||
group->dev.release = vfio_group_release;
|
||||
cdev_init(&group->cdev, &vfio_group_fops);
|
||||
group->cdev.owner = THIS_MODULE;
|
||||
|
||||
refcount_set(&group->drivers, 1);
|
||||
mutex_init(&group->group_lock);
|
||||
INIT_LIST_HEAD(&group->device_list);
|
||||
mutex_init(&group->device_lock);
|
||||
group->iommu_group = iommu_group;
|
||||
/* put in vfio_group_release() */
|
||||
iommu_group_ref_get(iommu_group);
|
||||
group->type = type;
|
||||
BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
|
||||
|
||||
return group;
|
||||
}
|
||||
|
||||
static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
|
||||
enum vfio_group_type type)
|
||||
{
|
||||
struct vfio_group *group;
|
||||
struct vfio_group *ret;
|
||||
int err;
|
||||
|
||||
lockdep_assert_held(&vfio.group_lock);
|
||||
|
||||
group = vfio_group_alloc(iommu_group, type);
|
||||
if (IS_ERR(group))
|
||||
return group;
|
||||
|
||||
err = dev_set_name(&group->dev, "%s%d",
|
||||
group->type == VFIO_NO_IOMMU ? "noiommu-" : "",
|
||||
iommu_group_id(iommu_group));
|
||||
if (err) {
|
||||
ret = ERR_PTR(err);
|
||||
goto err_put;
|
||||
}
|
||||
|
||||
err = cdev_device_add(&group->cdev, &group->dev);
|
||||
if (err) {
|
||||
ret = ERR_PTR(err);
|
||||
goto err_put;
|
||||
}
|
||||
|
||||
list_add(&group->vfio_next, &vfio.group_list);
|
||||
|
||||
return group;
|
||||
|
||||
err_put:
|
||||
put_device(&group->dev);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev,
|
||||
enum vfio_group_type type)
|
||||
{
|
||||
struct iommu_group *iommu_group;
|
||||
struct vfio_group *group;
|
||||
int ret;
|
||||
|
||||
iommu_group = iommu_group_alloc();
|
||||
if (IS_ERR(iommu_group))
|
||||
return ERR_CAST(iommu_group);
|
||||
|
||||
ret = iommu_group_set_name(iommu_group, "vfio-noiommu");
|
||||
if (ret)
|
||||
goto out_put_group;
|
||||
ret = iommu_group_add_device(iommu_group, dev);
|
||||
if (ret)
|
||||
goto out_put_group;
|
||||
|
||||
mutex_lock(&vfio.group_lock);
|
||||
group = vfio_create_group(iommu_group, type);
|
||||
mutex_unlock(&vfio.group_lock);
|
||||
if (IS_ERR(group)) {
|
||||
ret = PTR_ERR(group);
|
||||
goto out_remove_device;
|
||||
}
|
||||
iommu_group_put(iommu_group);
|
||||
return group;
|
||||
|
||||
out_remove_device:
|
||||
iommu_group_remove_device(dev);
|
||||
out_put_group:
|
||||
iommu_group_put(iommu_group);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
static bool vfio_group_has_device(struct vfio_group *group, struct device *dev)
|
||||
{
|
||||
struct vfio_device *device;
|
||||
|
||||
mutex_lock(&group->device_lock);
|
||||
list_for_each_entry(device, &group->device_list, group_next) {
|
||||
if (device->dev == dev) {
|
||||
mutex_unlock(&group->device_lock);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&group->device_lock);
|
||||
return false;
|
||||
}
|
||||
|
||||
static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
|
||||
{
|
||||
struct iommu_group *iommu_group;
|
||||
struct vfio_group *group;
|
||||
|
||||
iommu_group = iommu_group_get(dev);
|
||||
if (!iommu_group && vfio_noiommu) {
|
||||
/*
|
||||
* With noiommu enabled, create an IOMMU group for devices that
|
||||
* don't already have one, implying no IOMMU hardware/driver
|
||||
* exists. Taint the kernel because we're about to give a DMA
|
||||
* capable device to a user without IOMMU protection.
|
||||
*/
|
||||
group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU);
|
||||
if (!IS_ERR(group)) {
|
||||
add_taint(TAINT_USER, LOCKDEP_STILL_OK);
|
||||
dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
|
||||
}
|
||||
return group;
|
||||
}
|
||||
|
||||
if (!iommu_group)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
/*
|
||||
* VFIO always sets IOMMU_CACHE because we offer no way for userspace to
|
||||
* restore cache coherency. It has to be checked here because it is only
|
||||
* valid for cases where we are using iommu groups.
|
||||
*/
|
||||
if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) {
|
||||
iommu_group_put(iommu_group);
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
mutex_lock(&vfio.group_lock);
|
||||
group = vfio_group_find_from_iommu(iommu_group);
|
||||
if (group) {
|
||||
if (WARN_ON(vfio_group_has_device(group, dev)))
|
||||
group = ERR_PTR(-EINVAL);
|
||||
else
|
||||
refcount_inc(&group->drivers);
|
||||
} else {
|
||||
group = vfio_create_group(iommu_group, VFIO_IOMMU);
|
||||
}
|
||||
mutex_unlock(&vfio.group_lock);
|
||||
|
||||
/* The vfio_group holds a reference to the iommu_group */
|
||||
iommu_group_put(iommu_group);
|
||||
return group;
|
||||
}
|
||||
|
||||
int vfio_device_set_group(struct vfio_device *device,
|
||||
enum vfio_group_type type)
|
||||
{
|
||||
struct vfio_group *group;
|
||||
|
||||
if (type == VFIO_IOMMU)
|
||||
group = vfio_group_find_or_alloc(device->dev);
|
||||
else
|
||||
group = vfio_noiommu_group_alloc(device->dev, type);
|
||||
|
||||
if (IS_ERR(group))
|
||||
return PTR_ERR(group);
|
||||
|
||||
/* Our reference on group is moved to the device */
|
||||
device->group = group;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void vfio_device_remove_group(struct vfio_device *device)
|
||||
{
|
||||
struct vfio_group *group = device->group;
|
||||
struct iommu_group *iommu_group;
|
||||
|
||||
if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU)
|
||||
iommu_group_remove_device(device->dev);
|
||||
|
||||
/* Pairs with vfio_create_group() / vfio_group_get_from_iommu() */
|
||||
if (!refcount_dec_and_mutex_lock(&group->drivers, &vfio.group_lock))
|
||||
return;
|
||||
list_del(&group->vfio_next);
|
||||
|
||||
/*
|
||||
* We could concurrently probe another driver in the group that might
|
||||
* race vfio_device_remove_group() with vfio_get_group(), so we have to
|
||||
* ensure that the sysfs is all cleaned up under lock otherwise the
|
||||
* cdev_device_add() will fail due to the name aready existing.
|
||||
*/
|
||||
cdev_device_del(&group->cdev, &group->dev);
|
||||
|
||||
mutex_lock(&group->group_lock);
|
||||
/*
|
||||
* These data structures all have paired operations that can only be
|
||||
* undone when the caller holds a live reference on the device. Since
|
||||
* all pairs must be undone these WARN_ON's indicate some caller did not
|
||||
* properly hold the group reference.
|
||||
*/
|
||||
WARN_ON(!list_empty(&group->device_list));
|
||||
WARN_ON(group->notifier.head);
|
||||
|
||||
/*
|
||||
* Revoke all users of group->iommu_group. At this point we know there
|
||||
* are no devices active because we are unplugging the last one. Setting
|
||||
* iommu_group to NULL blocks all new users.
|
||||
*/
|
||||
if (group->container)
|
||||
vfio_group_detach_container(group);
|
||||
iommu_group = group->iommu_group;
|
||||
group->iommu_group = NULL;
|
||||
mutex_unlock(&group->group_lock);
|
||||
mutex_unlock(&vfio.group_lock);
|
||||
|
||||
iommu_group_put(iommu_group);
|
||||
put_device(&group->dev);
|
||||
}
|
||||
|
||||
void vfio_device_group_register(struct vfio_device *device)
|
||||
{
|
||||
mutex_lock(&device->group->device_lock);
|
||||
list_add(&device->group_next, &device->group->device_list);
|
||||
mutex_unlock(&device->group->device_lock);
|
||||
}
|
||||
|
||||
void vfio_device_group_unregister(struct vfio_device *device)
|
||||
{
|
||||
mutex_lock(&device->group->device_lock);
|
||||
list_del(&device->group_next);
|
||||
mutex_unlock(&device->group->device_lock);
|
||||
}
|
||||
|
||||
int vfio_device_group_use_iommu(struct vfio_device *device)
|
||||
{
|
||||
struct vfio_group *group = device->group;
|
||||
int ret = 0;
|
||||
|
||||
lockdep_assert_held(&group->group_lock);
|
||||
|
||||
if (WARN_ON(!group->container))
|
||||
return -EINVAL;
|
||||
|
||||
ret = vfio_group_use_container(group);
|
||||
if (ret)
|
||||
return ret;
|
||||
vfio_device_container_register(device);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void vfio_device_group_unuse_iommu(struct vfio_device *device)
|
||||
{
|
||||
struct vfio_group *group = device->group;
|
||||
|
||||
lockdep_assert_held(&group->group_lock);
|
||||
|
||||
if (WARN_ON(!group->container))
|
||||
return;
|
||||
|
||||
vfio_device_container_unregister(device);
|
||||
vfio_group_unuse_container(group);
|
||||
}
|
||||
|
||||
bool vfio_device_has_container(struct vfio_device *device)
|
||||
{
|
||||
return device->group->container;
|
||||
}
|
||||
|
||||
/**
|
||||
* vfio_file_iommu_group - Return the struct iommu_group for the vfio group file
|
||||
* @file: VFIO group file
|
||||
*
|
||||
* The returned iommu_group is valid as long as a ref is held on the file. This
|
||||
* returns a reference on the group. This function is deprecated, only the SPAPR
|
||||
* path in kvm should call it.
|
||||
*/
|
||||
struct iommu_group *vfio_file_iommu_group(struct file *file)
|
||||
{
|
||||
struct vfio_group *group = file->private_data;
|
||||
struct iommu_group *iommu_group = NULL;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_SPAPR_TCE_IOMMU))
|
||||
return NULL;
|
||||
|
||||
if (!vfio_file_is_group(file))
|
||||
return NULL;
|
||||
|
||||
mutex_lock(&group->group_lock);
|
||||
if (group->iommu_group) {
|
||||
iommu_group = group->iommu_group;
|
||||
iommu_group_ref_get(iommu_group);
|
||||
}
|
||||
mutex_unlock(&group->group_lock);
|
||||
return iommu_group;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vfio_file_iommu_group);
|
||||
|
||||
/**
|
||||
* vfio_file_is_group - True if the file is usable with VFIO aPIS
|
||||
* @file: VFIO group file
|
||||
*/
|
||||
bool vfio_file_is_group(struct file *file)
|
||||
{
|
||||
return file->f_op == &vfio_group_fops;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vfio_file_is_group);
|
||||
|
||||
/**
|
||||
* vfio_file_enforced_coherent - True if the DMA associated with the VFIO file
|
||||
* is always CPU cache coherent
|
||||
* @file: VFIO group file
|
||||
*
|
||||
* Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop
|
||||
* bit in DMA transactions. A return of false indicates that the user has
|
||||
* rights to access additional instructions such as wbinvd on x86.
|
||||
*/
|
||||
bool vfio_file_enforced_coherent(struct file *file)
|
||||
{
|
||||
struct vfio_group *group = file->private_data;
|
||||
struct vfio_device *device;
|
||||
bool ret = true;
|
||||
|
||||
if (!vfio_file_is_group(file))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* If the device does not have IOMMU_CAP_ENFORCE_CACHE_COHERENCY then
|
||||
* any domain later attached to it will also not support it. If the cap
|
||||
* is set then the iommu_domain eventually attached to the device/group
|
||||
* must use a domain with enforce_cache_coherency().
|
||||
*/
|
||||
mutex_lock(&group->device_lock);
|
||||
list_for_each_entry(device, &group->device_list, group_next) {
|
||||
if (!device_iommu_capable(device->dev,
|
||||
IOMMU_CAP_ENFORCE_CACHE_COHERENCY)) {
|
||||
ret = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&group->device_lock);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent);
|
||||
|
||||
/**
|
||||
* vfio_file_set_kvm - Link a kvm with VFIO drivers
|
||||
* @file: VFIO group file
|
||||
* @kvm: KVM to link
|
||||
*
|
||||
* When a VFIO device is first opened the KVM will be available in
|
||||
* device->kvm if one was associated with the group.
|
||||
*/
|
||||
void vfio_file_set_kvm(struct file *file, struct kvm *kvm)
|
||||
{
|
||||
struct vfio_group *group = file->private_data;
|
||||
|
||||
if (!vfio_file_is_group(file))
|
||||
return;
|
||||
|
||||
mutex_lock(&group->group_lock);
|
||||
group->kvm = kvm;
|
||||
mutex_unlock(&group->group_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vfio_file_set_kvm);
|
||||
|
||||
/**
|
||||
* vfio_file_has_dev - True if the VFIO file is a handle for device
|
||||
* @file: VFIO file to check
|
||||
* @device: Device that must be part of the file
|
||||
*
|
||||
* Returns true if given file has permission to manipulate the given device.
|
||||
*/
|
||||
bool vfio_file_has_dev(struct file *file, struct vfio_device *device)
|
||||
{
|
||||
struct vfio_group *group = file->private_data;
|
||||
|
||||
if (!vfio_file_is_group(file))
|
||||
return false;
|
||||
|
||||
return group == device->group;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vfio_file_has_dev);
|
||||
|
||||
static char *vfio_devnode(struct device *dev, umode_t *mode)
|
||||
{
|
||||
return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev));
|
||||
}
|
||||
|
||||
int __init vfio_group_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ida_init(&vfio.group_ida);
|
||||
mutex_init(&vfio.group_lock);
|
||||
INIT_LIST_HEAD(&vfio.group_list);
|
||||
|
||||
ret = vfio_container_init();
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* /dev/vfio/$GROUP */
|
||||
vfio.class = class_create(THIS_MODULE, "vfio");
|
||||
if (IS_ERR(vfio.class)) {
|
||||
ret = PTR_ERR(vfio.class);
|
||||
goto err_group_class;
|
||||
}
|
||||
|
||||
vfio.class->devnode = vfio_devnode;
|
||||
|
||||
ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio");
|
||||
if (ret)
|
||||
goto err_alloc_chrdev;
|
||||
return 0;
|
||||
|
||||
err_alloc_chrdev:
|
||||
class_destroy(vfio.class);
|
||||
vfio.class = NULL;
|
||||
err_group_class:
|
||||
vfio_container_cleanup();
|
||||
return ret;
|
||||
}
|
||||
|
||||
void vfio_group_cleanup(void)
|
||||
{
|
||||
WARN_ON(!list_empty(&vfio.group_list));
|
||||
ida_destroy(&vfio.group_ida);
|
||||
unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
|
||||
class_destroy(vfio.class);
|
||||
vfio.class = NULL;
|
||||
vfio_container_cleanup();
|
||||
}
|
158
drivers/vfio/iommufd.c
Normal file
158
drivers/vfio/iommufd.c
Normal file
@ -0,0 +1,158 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
|
||||
*/
|
||||
#include <linux/vfio.h>
|
||||
#include <linux/iommufd.h>
|
||||
|
||||
#include "vfio.h"
|
||||
|
||||
MODULE_IMPORT_NS(IOMMUFD);
|
||||
MODULE_IMPORT_NS(IOMMUFD_VFIO);
|
||||
|
||||
int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx)
|
||||
{
|
||||
u32 ioas_id;
|
||||
u32 device_id;
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(&vdev->dev_set->lock);
|
||||
|
||||
/*
|
||||
* If the driver doesn't provide this op then it means the device does
|
||||
* not do DMA at all. So nothing to do.
|
||||
*/
|
||||
if (!vdev->ops->bind_iommufd)
|
||||
return 0;
|
||||
|
||||
ret = vdev->ops->bind_iommufd(vdev, ictx, &device_id);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = iommufd_vfio_compat_ioas_id(ictx, &ioas_id);
|
||||
if (ret)
|
||||
goto err_unbind;
|
||||
ret = vdev->ops->attach_ioas(vdev, &ioas_id);
|
||||
if (ret)
|
||||
goto err_unbind;
|
||||
|
||||
/*
|
||||
* The legacy path has no way to return the device id or the selected
|
||||
* pt_id
|
||||
*/
|
||||
return 0;
|
||||
|
||||
err_unbind:
|
||||
if (vdev->ops->unbind_iommufd)
|
||||
vdev->ops->unbind_iommufd(vdev);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void vfio_iommufd_unbind(struct vfio_device *vdev)
|
||||
{
|
||||
lockdep_assert_held(&vdev->dev_set->lock);
|
||||
|
||||
if (vdev->ops->unbind_iommufd)
|
||||
vdev->ops->unbind_iommufd(vdev);
|
||||
}
|
||||
|
||||
/*
|
||||
* The physical standard ops mean that the iommufd_device is bound to the
|
||||
* physical device vdev->dev that was provided to vfio_init_group_dev(). Drivers
|
||||
* using this ops set should call vfio_register_group_dev()
|
||||
*/
|
||||
int vfio_iommufd_physical_bind(struct vfio_device *vdev,
|
||||
struct iommufd_ctx *ictx, u32 *out_device_id)
|
||||
{
|
||||
struct iommufd_device *idev;
|
||||
|
||||
idev = iommufd_device_bind(ictx, vdev->dev, out_device_id);
|
||||
if (IS_ERR(idev))
|
||||
return PTR_ERR(idev);
|
||||
vdev->iommufd_device = idev;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vfio_iommufd_physical_bind);
|
||||
|
||||
void vfio_iommufd_physical_unbind(struct vfio_device *vdev)
|
||||
{
|
||||
lockdep_assert_held(&vdev->dev_set->lock);
|
||||
|
||||
if (vdev->iommufd_attached) {
|
||||
iommufd_device_detach(vdev->iommufd_device);
|
||||
vdev->iommufd_attached = false;
|
||||
}
|
||||
iommufd_device_unbind(vdev->iommufd_device);
|
||||
vdev->iommufd_device = NULL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vfio_iommufd_physical_unbind);
|
||||
|
||||
int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id)
|
||||
{
|
||||
int rc;
|
||||
|
||||
rc = iommufd_device_attach(vdev->iommufd_device, pt_id);
|
||||
if (rc)
|
||||
return rc;
|
||||
vdev->iommufd_attached = true;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vfio_iommufd_physical_attach_ioas);
|
||||
|
||||
/*
|
||||
* The emulated standard ops mean that vfio_device is going to use the
|
||||
* "mdev path" and will call vfio_pin_pages()/vfio_dma_rw(). Drivers using this
|
||||
* ops set should call vfio_register_emulated_iommu_dev().
|
||||
*/
|
||||
|
||||
static void vfio_emulated_unmap(void *data, unsigned long iova,
|
||||
unsigned long length)
|
||||
{
|
||||
struct vfio_device *vdev = data;
|
||||
|
||||
vdev->ops->dma_unmap(vdev, iova, length);
|
||||
}
|
||||
|
||||
static const struct iommufd_access_ops vfio_user_ops = {
|
||||
.needs_pin_pages = 1,
|
||||
.unmap = vfio_emulated_unmap,
|
||||
};
|
||||
|
||||
int vfio_iommufd_emulated_bind(struct vfio_device *vdev,
|
||||
struct iommufd_ctx *ictx, u32 *out_device_id)
|
||||
{
|
||||
lockdep_assert_held(&vdev->dev_set->lock);
|
||||
|
||||
vdev->iommufd_ictx = ictx;
|
||||
iommufd_ctx_get(ictx);
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vfio_iommufd_emulated_bind);
|
||||
|
||||
void vfio_iommufd_emulated_unbind(struct vfio_device *vdev)
|
||||
{
|
||||
lockdep_assert_held(&vdev->dev_set->lock);
|
||||
|
||||
if (vdev->iommufd_access) {
|
||||
iommufd_access_destroy(vdev->iommufd_access);
|
||||
vdev->iommufd_access = NULL;
|
||||
}
|
||||
iommufd_ctx_put(vdev->iommufd_ictx);
|
||||
vdev->iommufd_ictx = NULL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vfio_iommufd_emulated_unbind);
|
||||
|
||||
int vfio_iommufd_emulated_attach_ioas(struct vfio_device *vdev, u32 *pt_id)
|
||||
{
|
||||
struct iommufd_access *user;
|
||||
|
||||
lockdep_assert_held(&vdev->dev_set->lock);
|
||||
|
||||
user = iommufd_access_create(vdev->iommufd_ictx, *pt_id, &vfio_user_ops,
|
||||
vdev);
|
||||
if (IS_ERR(user))
|
||||
return PTR_ERR(user);
|
||||
vdev->iommufd_access = user;
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vfio_iommufd_emulated_attach_ioas);
|
@ -1246,6 +1246,9 @@ static const struct vfio_device_ops hisi_acc_vfio_pci_migrn_ops = {
|
||||
.mmap = hisi_acc_vfio_pci_mmap,
|
||||
.request = vfio_pci_core_request,
|
||||
.match = vfio_pci_core_match,
|
||||
.bind_iommufd = vfio_iommufd_physical_bind,
|
||||
.unbind_iommufd = vfio_iommufd_physical_unbind,
|
||||
.attach_ioas = vfio_iommufd_physical_attach_ioas,
|
||||
};
|
||||
|
||||
static const struct vfio_device_ops hisi_acc_vfio_pci_ops = {
|
||||
@ -1261,6 +1264,9 @@ static const struct vfio_device_ops hisi_acc_vfio_pci_ops = {
|
||||
.mmap = vfio_pci_core_mmap,
|
||||
.request = vfio_pci_core_request,
|
||||
.match = vfio_pci_core_match,
|
||||
.bind_iommufd = vfio_iommufd_physical_bind,
|
||||
.unbind_iommufd = vfio_iommufd_physical_unbind,
|
||||
.attach_ioas = vfio_iommufd_physical_attach_ioas,
|
||||
};
|
||||
|
||||
static int hisi_acc_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
||||
|
@ -623,6 +623,9 @@ static const struct vfio_device_ops mlx5vf_pci_ops = {
|
||||
.mmap = vfio_pci_core_mmap,
|
||||
.request = vfio_pci_core_request,
|
||||
.match = vfio_pci_core_match,
|
||||
.bind_iommufd = vfio_iommufd_physical_bind,
|
||||
.unbind_iommufd = vfio_iommufd_physical_unbind,
|
||||
.attach_ioas = vfio_iommufd_physical_attach_ioas,
|
||||
};
|
||||
|
||||
static int mlx5vf_pci_probe(struct pci_dev *pdev,
|
||||
|
@ -138,6 +138,9 @@ static const struct vfio_device_ops vfio_pci_ops = {
|
||||
.mmap = vfio_pci_core_mmap,
|
||||
.request = vfio_pci_core_request,
|
||||
.match = vfio_pci_core_match,
|
||||
.bind_iommufd = vfio_iommufd_physical_bind,
|
||||
.unbind_iommufd = vfio_iommufd_physical_unbind,
|
||||
.attach_ioas = vfio_iommufd_physical_attach_ioas,
|
||||
};
|
||||
|
||||
static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
||||
|
@ -117,6 +117,9 @@ static const struct vfio_device_ops vfio_amba_ops = {
|
||||
.read = vfio_platform_read,
|
||||
.write = vfio_platform_write,
|
||||
.mmap = vfio_platform_mmap,
|
||||
.bind_iommufd = vfio_iommufd_physical_bind,
|
||||
.unbind_iommufd = vfio_iommufd_physical_unbind,
|
||||
.attach_ioas = vfio_iommufd_physical_attach_ioas,
|
||||
};
|
||||
|
||||
static const struct amba_id pl330_ids[] = {
|
||||
|
@ -106,6 +106,9 @@ static const struct vfio_device_ops vfio_platform_ops = {
|
||||
.read = vfio_platform_read,
|
||||
.write = vfio_platform_write,
|
||||
.mmap = vfio_platform_mmap,
|
||||
.bind_iommufd = vfio_iommufd_physical_bind,
|
||||
.unbind_iommufd = vfio_iommufd_physical_unbind,
|
||||
.attach_ioas = vfio_iommufd_physical_attach_ioas,
|
||||
};
|
||||
|
||||
static struct platform_driver vfio_platform_driver = {
|
||||
|
@ -6,14 +6,25 @@
|
||||
#ifndef __VFIO_VFIO_H__
|
||||
#define __VFIO_VFIO_H__
|
||||
|
||||
#include <linux/file.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/cdev.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
struct iommufd_ctx;
|
||||
struct iommu_group;
|
||||
struct vfio_device;
|
||||
struct vfio_container;
|
||||
|
||||
void vfio_device_put_registration(struct vfio_device *device);
|
||||
bool vfio_device_try_get_registration(struct vfio_device *device);
|
||||
int vfio_device_open(struct vfio_device *device,
|
||||
struct iommufd_ctx *iommufd, struct kvm *kvm);
|
||||
void vfio_device_close(struct vfio_device *device,
|
||||
struct iommufd_ctx *iommufd);
|
||||
|
||||
extern const struct file_operations vfio_device_fops;
|
||||
|
||||
enum vfio_group_type {
|
||||
/*
|
||||
* Physical device with IOMMU backing.
|
||||
@ -54,14 +65,30 @@ struct vfio_group {
|
||||
struct list_head device_list;
|
||||
struct mutex device_lock;
|
||||
struct list_head vfio_next;
|
||||
#if IS_ENABLED(CONFIG_VFIO_CONTAINER)
|
||||
struct list_head container_next;
|
||||
#endif
|
||||
enum vfio_group_type type;
|
||||
struct mutex group_lock;
|
||||
struct kvm *kvm;
|
||||
struct file *opened_file;
|
||||
struct blocking_notifier_head notifier;
|
||||
struct iommufd_ctx *iommufd;
|
||||
};
|
||||
|
||||
int vfio_device_set_group(struct vfio_device *device,
|
||||
enum vfio_group_type type);
|
||||
void vfio_device_remove_group(struct vfio_device *device);
|
||||
void vfio_device_group_register(struct vfio_device *device);
|
||||
void vfio_device_group_unregister(struct vfio_device *device);
|
||||
int vfio_device_group_use_iommu(struct vfio_device *device);
|
||||
void vfio_device_group_unuse_iommu(struct vfio_device *device);
|
||||
void vfio_device_group_close(struct vfio_device *device);
|
||||
bool vfio_device_has_container(struct vfio_device *device);
|
||||
int __init vfio_group_init(void);
|
||||
void vfio_group_cleanup(void);
|
||||
|
||||
#if IS_ENABLED(CONFIG_VFIO_CONTAINER)
|
||||
/* events for the backend driver notify callback */
|
||||
enum vfio_iommu_notify_type {
|
||||
VFIO_IOMMU_CONTAINER_CLOSE = 0,
|
||||
@ -109,20 +136,101 @@ struct vfio_iommu_driver {
|
||||
int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);
|
||||
void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops);
|
||||
|
||||
bool vfio_assert_device_open(struct vfio_device *device);
|
||||
|
||||
struct vfio_container *vfio_container_from_file(struct file *filep);
|
||||
int vfio_device_assign_container(struct vfio_device *device);
|
||||
void vfio_device_unassign_container(struct vfio_device *device);
|
||||
int vfio_group_use_container(struct vfio_group *group);
|
||||
void vfio_group_unuse_container(struct vfio_group *group);
|
||||
int vfio_container_attach_group(struct vfio_container *container,
|
||||
struct vfio_group *group);
|
||||
void vfio_group_detach_container(struct vfio_group *group);
|
||||
void vfio_device_container_register(struct vfio_device *device);
|
||||
void vfio_device_container_unregister(struct vfio_device *device);
|
||||
long vfio_container_ioctl_check_extension(struct vfio_container *container,
|
||||
unsigned long arg);
|
||||
int vfio_device_container_pin_pages(struct vfio_device *device,
|
||||
dma_addr_t iova, int npage,
|
||||
int prot, struct page **pages);
|
||||
void vfio_device_container_unpin_pages(struct vfio_device *device,
|
||||
dma_addr_t iova, int npage);
|
||||
int vfio_device_container_dma_rw(struct vfio_device *device,
|
||||
dma_addr_t iova, void *data,
|
||||
size_t len, bool write);
|
||||
|
||||
int __init vfio_container_init(void);
|
||||
void vfio_container_cleanup(void);
|
||||
#else
|
||||
static inline struct vfio_container *
|
||||
vfio_container_from_file(struct file *filep)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline int vfio_group_use_container(struct vfio_group *group)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static inline void vfio_group_unuse_container(struct vfio_group *group)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int vfio_container_attach_group(struct vfio_container *container,
|
||||
struct vfio_group *group)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static inline void vfio_group_detach_container(struct vfio_group *group)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void vfio_device_container_register(struct vfio_device *device)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void vfio_device_container_unregister(struct vfio_device *device)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int vfio_device_container_pin_pages(struct vfio_device *device,
|
||||
dma_addr_t iova, int npage,
|
||||
int prot, struct page **pages)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static inline void vfio_device_container_unpin_pages(struct vfio_device *device,
|
||||
dma_addr_t iova, int npage)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int vfio_device_container_dma_rw(struct vfio_device *device,
|
||||
dma_addr_t iova, void *data,
|
||||
size_t len, bool write)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static inline int vfio_container_init(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline void vfio_container_cleanup(void)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#if IS_ENABLED(CONFIG_IOMMUFD)
|
||||
int vfio_iommufd_bind(struct vfio_device *device, struct iommufd_ctx *ictx);
|
||||
void vfio_iommufd_unbind(struct vfio_device *device);
|
||||
#else
|
||||
static inline int vfio_iommufd_bind(struct vfio_device *device,
|
||||
struct iommufd_ctx *ictx)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static inline void vfio_iommufd_unbind(struct vfio_device *device)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_VFIO_NOIOMMU
|
||||
extern bool vfio_noiommu __read_mostly;
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -13,17 +13,4 @@
|
||||
#define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x20)
|
||||
#define PRQ_DEPTH ((0x1000 << PRQ_ORDER) >> 5)
|
||||
|
||||
/*
|
||||
* The SVM_FLAG_SUPERVISOR_MODE flag requests a PASID which can be used only
|
||||
* for access to kernel addresses. No IOTLB flushes are automatically done
|
||||
* for kernel mappings; it is valid only for access to the kernel's static
|
||||
* 1:1 mapping of physical memory — not to vmalloc or even module mappings.
|
||||
* A future API addition may permit the use of such ranges, by means of an
|
||||
* explicit IOTLB flush call (akin to the DMA API's unmap method).
|
||||
*
|
||||
* It is unlikely that we will ever hook into flush_tlb_kernel_range() to
|
||||
* do such IOTLB flushes automatically.
|
||||
*/
|
||||
#define SVM_FLAG_SUPERVISOR_MODE BIT(0)
|
||||
|
||||
#endif /* __INTEL_SVM_H__ */
|
||||
|
@ -27,4 +27,62 @@ extern struct interval_tree_node *
|
||||
interval_tree_iter_next(struct interval_tree_node *node,
|
||||
unsigned long start, unsigned long last);
|
||||
|
||||
/**
|
||||
* struct interval_tree_span_iter - Find used and unused spans.
|
||||
* @start_hole: Start of an interval for a hole when is_hole == 1
|
||||
* @last_hole: Inclusive end of an interval for a hole when is_hole == 1
|
||||
* @start_used: Start of a used interval when is_hole == 0
|
||||
* @last_used: Inclusive end of a used interval when is_hole == 0
|
||||
* @is_hole: 0 == used, 1 == is_hole, -1 == done iteration
|
||||
*
|
||||
* This iterator travels over spans in an interval tree. It does not return
|
||||
* nodes but classifies each span as either a hole, where no nodes intersect, or
|
||||
* a used, which is fully covered by nodes. Each iteration step toggles between
|
||||
* hole and used until the entire range is covered. The returned spans always
|
||||
* fully cover the requested range.
|
||||
*
|
||||
* The iterator is greedy, it always returns the largest hole or used possible,
|
||||
* consolidating all consecutive nodes.
|
||||
*
|
||||
* Use interval_tree_span_iter_done() to detect end of iteration.
|
||||
*/
|
||||
struct interval_tree_span_iter {
|
||||
/* private: not for use by the caller */
|
||||
struct interval_tree_node *nodes[2];
|
||||
unsigned long first_index;
|
||||
unsigned long last_index;
|
||||
|
||||
/* public: */
|
||||
union {
|
||||
unsigned long start_hole;
|
||||
unsigned long start_used;
|
||||
};
|
||||
union {
|
||||
unsigned long last_hole;
|
||||
unsigned long last_used;
|
||||
};
|
||||
int is_hole;
|
||||
};
|
||||
|
||||
void interval_tree_span_iter_first(struct interval_tree_span_iter *state,
|
||||
struct rb_root_cached *itree,
|
||||
unsigned long first_index,
|
||||
unsigned long last_index);
|
||||
void interval_tree_span_iter_advance(struct interval_tree_span_iter *iter,
|
||||
struct rb_root_cached *itree,
|
||||
unsigned long new_index);
|
||||
void interval_tree_span_iter_next(struct interval_tree_span_iter *state);
|
||||
|
||||
static inline bool
|
||||
interval_tree_span_iter_done(struct interval_tree_span_iter *state)
|
||||
{
|
||||
return state->is_hole == -1;
|
||||
}
|
||||
|
||||
#define interval_tree_for_each_span(span, itree, first_index, last_index) \
|
||||
for (interval_tree_span_iter_first(span, itree, \
|
||||
first_index, last_index); \
|
||||
!interval_tree_span_iter_done(span); \
|
||||
interval_tree_span_iter_next(span))
|
||||
|
||||
#endif /* _LINUX_INTERVAL_TREE_H */
|
||||
|
@ -64,6 +64,8 @@ struct iommu_domain_geometry {
|
||||
#define __IOMMU_DOMAIN_PT (1U << 2) /* Domain is identity mapped */
|
||||
#define __IOMMU_DOMAIN_DMA_FQ (1U << 3) /* DMA-API uses flush queue */
|
||||
|
||||
#define __IOMMU_DOMAIN_SVA (1U << 4) /* Shared process address space */
|
||||
|
||||
/*
|
||||
* This are the possible domain-types
|
||||
*
|
||||
@ -77,6 +79,8 @@ struct iommu_domain_geometry {
|
||||
* certain optimizations for these domains
|
||||
* IOMMU_DOMAIN_DMA_FQ - As above, but definitely using batched TLB
|
||||
* invalidation.
|
||||
* IOMMU_DOMAIN_SVA - DMA addresses are shared process addresses
|
||||
* represented by mm_struct's.
|
||||
*/
|
||||
#define IOMMU_DOMAIN_BLOCKED (0U)
|
||||
#define IOMMU_DOMAIN_IDENTITY (__IOMMU_DOMAIN_PT)
|
||||
@ -86,15 +90,27 @@ struct iommu_domain_geometry {
|
||||
#define IOMMU_DOMAIN_DMA_FQ (__IOMMU_DOMAIN_PAGING | \
|
||||
__IOMMU_DOMAIN_DMA_API | \
|
||||
__IOMMU_DOMAIN_DMA_FQ)
|
||||
#define IOMMU_DOMAIN_SVA (__IOMMU_DOMAIN_SVA)
|
||||
|
||||
struct iommu_domain {
|
||||
unsigned type;
|
||||
const struct iommu_domain_ops *ops;
|
||||
unsigned long pgsize_bitmap; /* Bitmap of page sizes in use */
|
||||
iommu_fault_handler_t handler;
|
||||
void *handler_token;
|
||||
struct iommu_domain_geometry geometry;
|
||||
struct iommu_dma_cookie *iova_cookie;
|
||||
enum iommu_page_response_code (*iopf_handler)(struct iommu_fault *fault,
|
||||
void *data);
|
||||
void *fault_data;
|
||||
union {
|
||||
struct {
|
||||
iommu_fault_handler_t handler;
|
||||
void *handler_token;
|
||||
};
|
||||
struct { /* IOMMU_DOMAIN_SVA */
|
||||
struct mm_struct *mm;
|
||||
int users;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
static inline bool iommu_is_dma_domain(struct iommu_domain *domain)
|
||||
@ -108,6 +124,11 @@ enum iommu_cap {
|
||||
IOMMU_CAP_NOEXEC, /* IOMMU_NOEXEC flag */
|
||||
IOMMU_CAP_PRE_BOOT_PROTECTION, /* Firmware says it used the IOMMU for
|
||||
DMA protection and we should too */
|
||||
/*
|
||||
* Per-device flag indicating if enforce_cache_coherency() will work on
|
||||
* this device.
|
||||
*/
|
||||
IOMMU_CAP_ENFORCE_CACHE_COHERENCY,
|
||||
};
|
||||
|
||||
/* These are the possible reserved region types */
|
||||
@ -214,15 +235,15 @@ struct iommu_iotlb_gather {
|
||||
* driver init to device driver init (default no)
|
||||
* @dev_enable/disable_feat: per device entries to enable/disable
|
||||
* iommu specific features.
|
||||
* @sva_bind: Bind process address space to device
|
||||
* @sva_unbind: Unbind process address space from device
|
||||
* @sva_get_pasid: Get PASID associated to a SVA handle
|
||||
* @page_response: handle page request response
|
||||
* @def_domain_type: device default domain type, return value:
|
||||
* - IOMMU_DOMAIN_IDENTITY: must use an identity domain
|
||||
* - IOMMU_DOMAIN_DMA: must use a dma domain
|
||||
* - 0: use the default setting
|
||||
* @default_domain_ops: the default ops for domains
|
||||
* @remove_dev_pasid: Remove any translation configurations of a specific
|
||||
* pasid, so that any DMA transactions with this pasid
|
||||
* will be blocked by the hardware.
|
||||
* @pgsize_bitmap: bitmap of all possible supported page sizes
|
||||
* @owner: Driver module providing these ops
|
||||
*/
|
||||
@ -247,16 +268,12 @@ struct iommu_ops {
|
||||
int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f);
|
||||
int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f);
|
||||
|
||||
struct iommu_sva *(*sva_bind)(struct device *dev, struct mm_struct *mm,
|
||||
void *drvdata);
|
||||
void (*sva_unbind)(struct iommu_sva *handle);
|
||||
u32 (*sva_get_pasid)(struct iommu_sva *handle);
|
||||
|
||||
int (*page_response)(struct device *dev,
|
||||
struct iommu_fault_event *evt,
|
||||
struct iommu_page_response *msg);
|
||||
|
||||
int (*def_domain_type)(struct device *dev);
|
||||
void (*remove_dev_pasid)(struct device *dev, ioasid_t pasid);
|
||||
|
||||
const struct iommu_domain_ops *default_domain_ops;
|
||||
unsigned long pgsize_bitmap;
|
||||
@ -266,7 +283,20 @@ struct iommu_ops {
|
||||
/**
|
||||
* struct iommu_domain_ops - domain specific operations
|
||||
* @attach_dev: attach an iommu domain to a device
|
||||
* Return:
|
||||
* * 0 - success
|
||||
* * EINVAL - can indicate that device and domain are incompatible due to
|
||||
* some previous configuration of the domain, in which case the
|
||||
* driver shouldn't log an error, since it is legitimate for a
|
||||
* caller to test reuse of existing domains. Otherwise, it may
|
||||
* still represent some other fundamental problem
|
||||
* * ENOMEM - out of memory
|
||||
* * ENOSPC - non-ENOMEM type of resource allocation failures
|
||||
* * EBUSY - device is attached to a domain and cannot be changed
|
||||
* * ENODEV - device specific errors, not able to be attached
|
||||
* * <others> - treated as ENODEV by the caller. Use is discouraged
|
||||
* @detach_dev: detach an iommu domain from a device
|
||||
* @set_dev_pasid: set an iommu domain to a pasid of device
|
||||
* @map: map a physically contiguous memory region to an iommu domain
|
||||
* @map_pages: map a physically contiguous set of pages of the same size to
|
||||
* an iommu domain.
|
||||
@ -287,6 +317,8 @@ struct iommu_ops {
|
||||
struct iommu_domain_ops {
|
||||
int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
|
||||
void (*detach_dev)(struct iommu_domain *domain, struct device *dev);
|
||||
int (*set_dev_pasid)(struct iommu_domain *domain, struct device *dev,
|
||||
ioasid_t pasid);
|
||||
|
||||
int (*map)(struct iommu_domain *domain, unsigned long iova,
|
||||
phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
|
||||
@ -322,12 +354,14 @@ struct iommu_domain_ops {
|
||||
* @list: Used by the iommu-core to keep a list of registered iommus
|
||||
* @ops: iommu-ops for talking to this iommu
|
||||
* @dev: struct device for sysfs handling
|
||||
* @max_pasids: number of supported PASIDs
|
||||
*/
|
||||
struct iommu_device {
|
||||
struct list_head list;
|
||||
const struct iommu_ops *ops;
|
||||
struct fwnode_handle *fwnode;
|
||||
struct device *dev;
|
||||
u32 max_pasids;
|
||||
};
|
||||
|
||||
/**
|
||||
@ -366,6 +400,7 @@ struct iommu_fault_param {
|
||||
* @fwspec: IOMMU fwspec data
|
||||
* @iommu_dev: IOMMU device this device is linked to
|
||||
* @priv: IOMMU Driver private data
|
||||
* @max_pasids: number of PASIDs this device can consume
|
||||
*
|
||||
* TODO: migrate other per device data pointers under iommu_dev_data, e.g.
|
||||
* struct iommu_group *iommu_group;
|
||||
@ -377,6 +412,7 @@ struct dev_iommu {
|
||||
struct iommu_fwspec *fwspec;
|
||||
struct iommu_device *iommu_dev;
|
||||
void *priv;
|
||||
u32 max_pasids;
|
||||
};
|
||||
|
||||
int iommu_device_register(struct iommu_device *iommu,
|
||||
@ -626,6 +662,7 @@ struct iommu_fwspec {
|
||||
*/
|
||||
struct iommu_sva {
|
||||
struct device *dev;
|
||||
struct iommu_domain *domain;
|
||||
};
|
||||
|
||||
int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
|
||||
@ -667,12 +704,6 @@ void iommu_release_device(struct device *dev);
|
||||
int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f);
|
||||
int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f);
|
||||
|
||||
struct iommu_sva *iommu_sva_bind_device(struct device *dev,
|
||||
struct mm_struct *mm,
|
||||
void *drvdata);
|
||||
void iommu_sva_unbind_device(struct iommu_sva *handle);
|
||||
u32 iommu_sva_get_pasid(struct iommu_sva *handle);
|
||||
|
||||
int iommu_device_use_default_domain(struct device *dev);
|
||||
void iommu_device_unuse_default_domain(struct device *dev);
|
||||
|
||||
@ -680,6 +711,18 @@ int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner);
|
||||
void iommu_group_release_dma_owner(struct iommu_group *group);
|
||||
bool iommu_group_dma_owner_claimed(struct iommu_group *group);
|
||||
|
||||
int iommu_device_claim_dma_owner(struct device *dev, void *owner);
|
||||
void iommu_device_release_dma_owner(struct device *dev);
|
||||
|
||||
struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
|
||||
struct mm_struct *mm);
|
||||
int iommu_attach_device_pasid(struct iommu_domain *domain,
|
||||
struct device *dev, ioasid_t pasid);
|
||||
void iommu_detach_device_pasid(struct iommu_domain *domain,
|
||||
struct device *dev, ioasid_t pasid);
|
||||
struct iommu_domain *
|
||||
iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid,
|
||||
unsigned int type);
|
||||
#else /* CONFIG_IOMMU_API */
|
||||
|
||||
struct iommu_ops {};
|
||||
@ -999,21 +1042,6 @@ iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat)
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
static inline struct iommu_sva *
|
||||
iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void iommu_sva_unbind_device(struct iommu_sva *handle)
|
||||
{
|
||||
}
|
||||
|
||||
static inline u32 iommu_sva_get_pasid(struct iommu_sva *handle)
|
||||
{
|
||||
return IOMMU_PASID_INVALID;
|
||||
}
|
||||
|
||||
static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev)
|
||||
{
|
||||
return NULL;
|
||||
@ -1042,6 +1070,39 @@ static inline bool iommu_group_dma_owner_claimed(struct iommu_group *group)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void iommu_device_release_dma_owner(struct device *dev)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int iommu_device_claim_dma_owner(struct device *dev, void *owner)
|
||||
{
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
static inline struct iommu_domain *
|
||||
iommu_sva_domain_alloc(struct device *dev, struct mm_struct *mm)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline int iommu_attach_device_pasid(struct iommu_domain *domain,
|
||||
struct device *dev, ioasid_t pasid)
|
||||
{
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
static inline void iommu_detach_device_pasid(struct iommu_domain *domain,
|
||||
struct device *dev, ioasid_t pasid)
|
||||
{
|
||||
}
|
||||
|
||||
static inline struct iommu_domain *
|
||||
iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid,
|
||||
unsigned int type)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
#endif /* CONFIG_IOMMU_API */
|
||||
|
||||
/**
|
||||
@ -1124,4 +1185,26 @@ static inline bool tegra_dev_iommu_get_stream_id(struct device *dev, u32 *stream
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_IOMMU_SVA
|
||||
struct iommu_sva *iommu_sva_bind_device(struct device *dev,
|
||||
struct mm_struct *mm);
|
||||
void iommu_sva_unbind_device(struct iommu_sva *handle);
|
||||
u32 iommu_sva_get_pasid(struct iommu_sva *handle);
|
||||
#else
|
||||
static inline struct iommu_sva *
|
||||
iommu_sva_bind_device(struct device *dev, struct mm_struct *mm)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void iommu_sva_unbind_device(struct iommu_sva *handle)
|
||||
{
|
||||
}
|
||||
|
||||
static inline u32 iommu_sva_get_pasid(struct iommu_sva *handle)
|
||||
{
|
||||
return IOMMU_PASID_INVALID;
|
||||
}
|
||||
#endif /* CONFIG_IOMMU_SVA */
|
||||
|
||||
#endif /* __LINUX_IOMMU_H */
|
||||
|
98
include/linux/iommufd.h
Normal file
98
include/linux/iommufd.h
Normal file
@ -0,0 +1,98 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
|
||||
*/
|
||||
#ifndef __LINUX_IOMMUFD_H
|
||||
#define __LINUX_IOMMUFD_H
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/err.h>
|
||||
|
||||
struct device;
|
||||
struct iommufd_device;
|
||||
struct page;
|
||||
struct iommufd_ctx;
|
||||
struct iommufd_access;
|
||||
struct file;
|
||||
|
||||
struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx,
|
||||
struct device *dev, u32 *id);
|
||||
void iommufd_device_unbind(struct iommufd_device *idev);
|
||||
|
||||
int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id);
|
||||
void iommufd_device_detach(struct iommufd_device *idev);
|
||||
|
||||
struct iommufd_access_ops {
|
||||
u8 needs_pin_pages : 1;
|
||||
void (*unmap)(void *data, unsigned long iova, unsigned long length);
|
||||
};
|
||||
|
||||
enum {
|
||||
IOMMUFD_ACCESS_RW_READ = 0,
|
||||
IOMMUFD_ACCESS_RW_WRITE = 1 << 0,
|
||||
/* Set if the caller is in a kthread then rw will use kthread_use_mm() */
|
||||
IOMMUFD_ACCESS_RW_KTHREAD = 1 << 1,
|
||||
|
||||
/* Only for use by selftest */
|
||||
__IOMMUFD_ACCESS_RW_SLOW_PATH = 1 << 2,
|
||||
};
|
||||
|
||||
struct iommufd_access *
|
||||
iommufd_access_create(struct iommufd_ctx *ictx, u32 ioas_id,
|
||||
const struct iommufd_access_ops *ops, void *data);
|
||||
void iommufd_access_destroy(struct iommufd_access *access);
|
||||
|
||||
void iommufd_ctx_get(struct iommufd_ctx *ictx);
|
||||
|
||||
#if IS_ENABLED(CONFIG_IOMMUFD)
|
||||
struct iommufd_ctx *iommufd_ctx_from_file(struct file *file);
|
||||
void iommufd_ctx_put(struct iommufd_ctx *ictx);
|
||||
|
||||
int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova,
|
||||
unsigned long length, struct page **out_pages,
|
||||
unsigned int flags);
|
||||
void iommufd_access_unpin_pages(struct iommufd_access *access,
|
||||
unsigned long iova, unsigned long length);
|
||||
int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
|
||||
void *data, size_t len, unsigned int flags);
|
||||
int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id);
|
||||
#else /* !CONFIG_IOMMUFD */
|
||||
static inline struct iommufd_ctx *iommufd_ctx_from_file(struct file *file)
|
||||
{
|
||||
return ERR_PTR(-EOPNOTSUPP);
|
||||
}
|
||||
|
||||
static inline void iommufd_ctx_put(struct iommufd_ctx *ictx)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int iommufd_access_pin_pages(struct iommufd_access *access,
|
||||
unsigned long iova,
|
||||
unsigned long length,
|
||||
struct page **out_pages,
|
||||
unsigned int flags)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static inline void iommufd_access_unpin_pages(struct iommufd_access *access,
|
||||
unsigned long iova,
|
||||
unsigned long length)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
|
||||
void *data, size_t len, unsigned int flags)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static inline int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx,
|
||||
u32 *out_ioas_id)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
#endif /* CONFIG_IOMMUFD */
|
||||
#endif
|
@ -25,7 +25,7 @@ struct user_struct {
|
||||
|
||||
#if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL) || \
|
||||
defined(CONFIG_NET) || defined(CONFIG_IO_URING) || \
|
||||
defined(CONFIG_VFIO_PCI_ZDEV_KVM)
|
||||
defined(CONFIG_VFIO_PCI_ZDEV_KVM) || IS_ENABLED(CONFIG_IOMMUFD)
|
||||
atomic_long_t locked_vm;
|
||||
#endif
|
||||
#ifdef CONFIG_WATCH_QUEUE
|
||||
|
@ -17,6 +17,9 @@
|
||||
#include <linux/iova_bitmap.h>
|
||||
|
||||
struct kvm;
|
||||
struct iommufd_ctx;
|
||||
struct iommufd_device;
|
||||
struct iommufd_access;
|
||||
|
||||
/*
|
||||
* VFIO devices can be placed in a set, this allows all devices to share this
|
||||
@ -54,6 +57,12 @@ struct vfio_device {
|
||||
struct completion comp;
|
||||
struct list_head group_next;
|
||||
struct list_head iommu_entry;
|
||||
struct iommufd_access *iommufd_access;
|
||||
#if IS_ENABLED(CONFIG_IOMMUFD)
|
||||
struct iommufd_device *iommufd_device;
|
||||
struct iommufd_ctx *iommufd_ictx;
|
||||
bool iommufd_attached;
|
||||
#endif
|
||||
};
|
||||
|
||||
/**
|
||||
@ -80,6 +89,10 @@ struct vfio_device_ops {
|
||||
char *name;
|
||||
int (*init)(struct vfio_device *vdev);
|
||||
void (*release)(struct vfio_device *vdev);
|
||||
int (*bind_iommufd)(struct vfio_device *vdev,
|
||||
struct iommufd_ctx *ictx, u32 *out_device_id);
|
||||
void (*unbind_iommufd)(struct vfio_device *vdev);
|
||||
int (*attach_ioas)(struct vfio_device *vdev, u32 *pt_id);
|
||||
int (*open_device)(struct vfio_device *vdev);
|
||||
void (*close_device)(struct vfio_device *vdev);
|
||||
ssize_t (*read)(struct vfio_device *vdev, char __user *buf,
|
||||
@ -96,6 +109,32 @@ struct vfio_device_ops {
|
||||
void __user *arg, size_t argsz);
|
||||
};
|
||||
|
||||
#if IS_ENABLED(CONFIG_IOMMUFD)
|
||||
int vfio_iommufd_physical_bind(struct vfio_device *vdev,
|
||||
struct iommufd_ctx *ictx, u32 *out_device_id);
|
||||
void vfio_iommufd_physical_unbind(struct vfio_device *vdev);
|
||||
int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id);
|
||||
int vfio_iommufd_emulated_bind(struct vfio_device *vdev,
|
||||
struct iommufd_ctx *ictx, u32 *out_device_id);
|
||||
void vfio_iommufd_emulated_unbind(struct vfio_device *vdev);
|
||||
int vfio_iommufd_emulated_attach_ioas(struct vfio_device *vdev, u32 *pt_id);
|
||||
#else
|
||||
#define vfio_iommufd_physical_bind \
|
||||
((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx, \
|
||||
u32 *out_device_id)) NULL)
|
||||
#define vfio_iommufd_physical_unbind \
|
||||
((void (*)(struct vfio_device *vdev)) NULL)
|
||||
#define vfio_iommufd_physical_attach_ioas \
|
||||
((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL)
|
||||
#define vfio_iommufd_emulated_bind \
|
||||
((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx, \
|
||||
u32 *out_device_id)) NULL)
|
||||
#define vfio_iommufd_emulated_unbind \
|
||||
((void (*)(struct vfio_device *vdev)) NULL)
|
||||
#define vfio_iommufd_emulated_attach_ioas \
|
||||
((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @migration_set_state: Optional callback to change the migration state for
|
||||
* devices that support migration. It's mandatory for
|
||||
|
347
include/uapi/linux/iommufd.h
Normal file
347
include/uapi/linux/iommufd.h
Normal file
@ -0,0 +1,347 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
|
||||
/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES.
|
||||
*/
|
||||
#ifndef _UAPI_IOMMUFD_H
|
||||
#define _UAPI_IOMMUFD_H
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/ioctl.h>
|
||||
|
||||
#define IOMMUFD_TYPE (';')
|
||||
|
||||
/**
|
||||
* DOC: General ioctl format
|
||||
*
|
||||
* The ioctl interface follows a general format to allow for extensibility. Each
|
||||
* ioctl is passed in a structure pointer as the argument providing the size of
|
||||
* the structure in the first u32. The kernel checks that any structure space
|
||||
* beyond what it understands is 0. This allows userspace to use the backward
|
||||
* compatible portion while consistently using the newer, larger, structures.
|
||||
*
|
||||
* ioctls use a standard meaning for common errnos:
|
||||
*
|
||||
* - ENOTTY: The IOCTL number itself is not supported at all
|
||||
* - E2BIG: The IOCTL number is supported, but the provided structure has
|
||||
* non-zero in a part the kernel does not understand.
|
||||
* - EOPNOTSUPP: The IOCTL number is supported, and the structure is
|
||||
* understood, however a known field has a value the kernel does not
|
||||
* understand or support.
|
||||
* - EINVAL: Everything about the IOCTL was understood, but a field is not
|
||||
* correct.
|
||||
* - ENOENT: An ID or IOVA provided does not exist.
|
||||
* - ENOMEM: Out of memory.
|
||||
* - EOVERFLOW: Mathematics overflowed.
|
||||
*
|
||||
* As well as additional errnos, within specific ioctls.
|
||||
*/
|
||||
enum {
|
||||
IOMMUFD_CMD_BASE = 0x80,
|
||||
IOMMUFD_CMD_DESTROY = IOMMUFD_CMD_BASE,
|
||||
IOMMUFD_CMD_IOAS_ALLOC,
|
||||
IOMMUFD_CMD_IOAS_ALLOW_IOVAS,
|
||||
IOMMUFD_CMD_IOAS_COPY,
|
||||
IOMMUFD_CMD_IOAS_IOVA_RANGES,
|
||||
IOMMUFD_CMD_IOAS_MAP,
|
||||
IOMMUFD_CMD_IOAS_UNMAP,
|
||||
IOMMUFD_CMD_OPTION,
|
||||
IOMMUFD_CMD_VFIO_IOAS,
|
||||
};
|
||||
|
||||
/**
|
||||
* struct iommu_destroy - ioctl(IOMMU_DESTROY)
|
||||
* @size: sizeof(struct iommu_destroy)
|
||||
* @id: iommufd object ID to destroy. Can be any destroyable object type.
|
||||
*
|
||||
* Destroy any object held within iommufd.
|
||||
*/
|
||||
struct iommu_destroy {
|
||||
__u32 size;
|
||||
__u32 id;
|
||||
};
|
||||
#define IOMMU_DESTROY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_DESTROY)
|
||||
|
||||
/**
|
||||
* struct iommu_ioas_alloc - ioctl(IOMMU_IOAS_ALLOC)
|
||||
* @size: sizeof(struct iommu_ioas_alloc)
|
||||
* @flags: Must be 0
|
||||
* @out_ioas_id: Output IOAS ID for the allocated object
|
||||
*
|
||||
* Allocate an IO Address Space (IOAS) which holds an IO Virtual Address (IOVA)
|
||||
* to memory mapping.
|
||||
*/
|
||||
struct iommu_ioas_alloc {
|
||||
__u32 size;
|
||||
__u32 flags;
|
||||
__u32 out_ioas_id;
|
||||
};
|
||||
#define IOMMU_IOAS_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOC)
|
||||
|
||||
/**
|
||||
* struct iommu_iova_range - ioctl(IOMMU_IOVA_RANGE)
|
||||
* @start: First IOVA
|
||||
* @last: Inclusive last IOVA
|
||||
*
|
||||
* An interval in IOVA space.
|
||||
*/
|
||||
struct iommu_iova_range {
|
||||
__aligned_u64 start;
|
||||
__aligned_u64 last;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct iommu_ioas_iova_ranges - ioctl(IOMMU_IOAS_IOVA_RANGES)
|
||||
* @size: sizeof(struct iommu_ioas_iova_ranges)
|
||||
* @ioas_id: IOAS ID to read ranges from
|
||||
* @num_iovas: Input/Output total number of ranges in the IOAS
|
||||
* @__reserved: Must be 0
|
||||
* @allowed_iovas: Pointer to the output array of struct iommu_iova_range
|
||||
* @out_iova_alignment: Minimum alignment required for mapping IOVA
|
||||
*
|
||||
* Query an IOAS for ranges of allowed IOVAs. Mapping IOVA outside these ranges
|
||||
* is not allowed. num_iovas will be set to the total number of iovas and
|
||||
* the allowed_iovas[] will be filled in as space permits.
|
||||
*
|
||||
* The allowed ranges are dependent on the HW path the DMA operation takes, and
|
||||
* can change during the lifetime of the IOAS. A fresh empty IOAS will have a
|
||||
* full range, and each attached device will narrow the ranges based on that
|
||||
* device's HW restrictions. Detaching a device can widen the ranges. Userspace
|
||||
* should query ranges after every attach/detach to know what IOVAs are valid
|
||||
* for mapping.
|
||||
*
|
||||
* On input num_iovas is the length of the allowed_iovas array. On output it is
|
||||
* the total number of iovas filled in. The ioctl will return -EMSGSIZE and set
|
||||
* num_iovas to the required value if num_iovas is too small. In this case the
|
||||
* caller should allocate a larger output array and re-issue the ioctl.
|
||||
*
|
||||
* out_iova_alignment returns the minimum IOVA alignment that can be given
|
||||
* to IOMMU_IOAS_MAP/COPY. IOVA's must satisfy::
|
||||
*
|
||||
* starting_iova % out_iova_alignment == 0
|
||||
* (starting_iova + length) % out_iova_alignment == 0
|
||||
*
|
||||
* out_iova_alignment can be 1 indicating any IOVA is allowed. It cannot
|
||||
* be higher than the system PAGE_SIZE.
|
||||
*/
|
||||
struct iommu_ioas_iova_ranges {
|
||||
__u32 size;
|
||||
__u32 ioas_id;
|
||||
__u32 num_iovas;
|
||||
__u32 __reserved;
|
||||
__aligned_u64 allowed_iovas;
|
||||
__aligned_u64 out_iova_alignment;
|
||||
};
|
||||
#define IOMMU_IOAS_IOVA_RANGES _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_IOVA_RANGES)
|
||||
|
||||
/**
|
||||
* struct iommu_ioas_allow_iovas - ioctl(IOMMU_IOAS_ALLOW_IOVAS)
|
||||
* @size: sizeof(struct iommu_ioas_allow_iovas)
|
||||
* @ioas_id: IOAS ID to allow IOVAs from
|
||||
* @num_iovas: Input/Output total number of ranges in the IOAS
|
||||
* @__reserved: Must be 0
|
||||
* @allowed_iovas: Pointer to array of struct iommu_iova_range
|
||||
*
|
||||
* Ensure a range of IOVAs are always available for allocation. If this call
|
||||
* succeeds then IOMMU_IOAS_IOVA_RANGES will never return a list of IOVA ranges
|
||||
* that are narrower than the ranges provided here. This call will fail if
|
||||
* IOMMU_IOAS_IOVA_RANGES is currently narrower than the given ranges.
|
||||
*
|
||||
* When an IOAS is first created the IOVA_RANGES will be maximally sized, and as
|
||||
* devices are attached the IOVA will narrow based on the device restrictions.
|
||||
* When an allowed range is specified any narrowing will be refused, ie device
|
||||
* attachment can fail if the device requires limiting within the allowed range.
|
||||
*
|
||||
* Automatic IOVA allocation is also impacted by this call. MAP will only
|
||||
* allocate within the allowed IOVAs if they are present.
|
||||
*
|
||||
* This call replaces the entire allowed list with the given list.
|
||||
*/
|
||||
struct iommu_ioas_allow_iovas {
|
||||
__u32 size;
|
||||
__u32 ioas_id;
|
||||
__u32 num_iovas;
|
||||
__u32 __reserved;
|
||||
__aligned_u64 allowed_iovas;
|
||||
};
|
||||
#define IOMMU_IOAS_ALLOW_IOVAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOW_IOVAS)
|
||||
|
||||
/**
|
||||
* enum iommufd_ioas_map_flags - Flags for map and copy
|
||||
* @IOMMU_IOAS_MAP_FIXED_IOVA: If clear the kernel will compute an appropriate
|
||||
* IOVA to place the mapping at
|
||||
* @IOMMU_IOAS_MAP_WRITEABLE: DMA is allowed to write to this mapping
|
||||
* @IOMMU_IOAS_MAP_READABLE: DMA is allowed to read from this mapping
|
||||
*/
|
||||
enum iommufd_ioas_map_flags {
|
||||
IOMMU_IOAS_MAP_FIXED_IOVA = 1 << 0,
|
||||
IOMMU_IOAS_MAP_WRITEABLE = 1 << 1,
|
||||
IOMMU_IOAS_MAP_READABLE = 1 << 2,
|
||||
};
|
||||
|
||||
/**
|
||||
* struct iommu_ioas_map - ioctl(IOMMU_IOAS_MAP)
|
||||
* @size: sizeof(struct iommu_ioas_map)
|
||||
* @flags: Combination of enum iommufd_ioas_map_flags
|
||||
* @ioas_id: IOAS ID to change the mapping of
|
||||
* @__reserved: Must be 0
|
||||
* @user_va: Userspace pointer to start mapping from
|
||||
* @length: Number of bytes to map
|
||||
* @iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is set
|
||||
* then this must be provided as input.
|
||||
*
|
||||
* Set an IOVA mapping from a user pointer. If FIXED_IOVA is specified then the
|
||||
* mapping will be established at iova, otherwise a suitable location based on
|
||||
* the reserved and allowed lists will be automatically selected and returned in
|
||||
* iova.
|
||||
*
|
||||
* If IOMMU_IOAS_MAP_FIXED_IOVA is specified then the iova range must currently
|
||||
* be unused, existing IOVA cannot be replaced.
|
||||
*/
|
||||
struct iommu_ioas_map {
|
||||
__u32 size;
|
||||
__u32 flags;
|
||||
__u32 ioas_id;
|
||||
__u32 __reserved;
|
||||
__aligned_u64 user_va;
|
||||
__aligned_u64 length;
|
||||
__aligned_u64 iova;
|
||||
};
|
||||
#define IOMMU_IOAS_MAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP)
|
||||
|
||||
/**
|
||||
* struct iommu_ioas_copy - ioctl(IOMMU_IOAS_COPY)
|
||||
* @size: sizeof(struct iommu_ioas_copy)
|
||||
* @flags: Combination of enum iommufd_ioas_map_flags
|
||||
* @dst_ioas_id: IOAS ID to change the mapping of
|
||||
* @src_ioas_id: IOAS ID to copy from
|
||||
* @length: Number of bytes to copy and map
|
||||
* @dst_iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is
|
||||
* set then this must be provided as input.
|
||||
* @src_iova: IOVA to start the copy
|
||||
*
|
||||
* Copy an already existing mapping from src_ioas_id and establish it in
|
||||
* dst_ioas_id. The src iova/length must exactly match a range used with
|
||||
* IOMMU_IOAS_MAP.
|
||||
*
|
||||
* This may be used to efficiently clone a subset of an IOAS to another, or as a
|
||||
* kind of 'cache' to speed up mapping. Copy has an efficiency advantage over
|
||||
* establishing equivalent new mappings, as internal resources are shared, and
|
||||
* the kernel will pin the user memory only once.
|
||||
*/
|
||||
struct iommu_ioas_copy {
|
||||
__u32 size;
|
||||
__u32 flags;
|
||||
__u32 dst_ioas_id;
|
||||
__u32 src_ioas_id;
|
||||
__aligned_u64 length;
|
||||
__aligned_u64 dst_iova;
|
||||
__aligned_u64 src_iova;
|
||||
};
|
||||
#define IOMMU_IOAS_COPY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_COPY)
|
||||
|
||||
/**
|
||||
* struct iommu_ioas_unmap - ioctl(IOMMU_IOAS_UNMAP)
|
||||
* @size: sizeof(struct iommu_ioas_unmap)
|
||||
* @ioas_id: IOAS ID to change the mapping of
|
||||
* @iova: IOVA to start the unmapping at
|
||||
* @length: Number of bytes to unmap, and return back the bytes unmapped
|
||||
*
|
||||
* Unmap an IOVA range. The iova/length must be a superset of a previously
|
||||
* mapped range used with IOMMU_IOAS_MAP or IOMMU_IOAS_COPY. Splitting or
|
||||
* truncating ranges is not allowed. The values 0 to U64_MAX will unmap
|
||||
* everything.
|
||||
*/
|
||||
struct iommu_ioas_unmap {
|
||||
__u32 size;
|
||||
__u32 ioas_id;
|
||||
__aligned_u64 iova;
|
||||
__aligned_u64 length;
|
||||
};
|
||||
#define IOMMU_IOAS_UNMAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_UNMAP)
|
||||
|
||||
/**
|
||||
* enum iommufd_option - ioctl(IOMMU_OPTION_RLIMIT_MODE) and
|
||||
* ioctl(IOMMU_OPTION_HUGE_PAGES)
|
||||
* @IOMMU_OPTION_RLIMIT_MODE:
|
||||
* Change how RLIMIT_MEMLOCK accounting works. The caller must have privilege
|
||||
* to invoke this. Value 0 (default) is user based accouting, 1 uses process
|
||||
* based accounting. Global option, object_id must be 0
|
||||
* @IOMMU_OPTION_HUGE_PAGES:
|
||||
* Value 1 (default) allows contiguous pages to be combined when generating
|
||||
* iommu mappings. Value 0 disables combining, everything is mapped to
|
||||
* PAGE_SIZE. This can be useful for benchmarking. This is a per-IOAS
|
||||
* option, the object_id must be the IOAS ID.
|
||||
*/
|
||||
enum iommufd_option {
|
||||
IOMMU_OPTION_RLIMIT_MODE = 0,
|
||||
IOMMU_OPTION_HUGE_PAGES = 1,
|
||||
};
|
||||
|
||||
/**
|
||||
* enum iommufd_option_ops - ioctl(IOMMU_OPTION_OP_SET) and
|
||||
* ioctl(IOMMU_OPTION_OP_GET)
|
||||
* @IOMMU_OPTION_OP_SET: Set the option's value
|
||||
* @IOMMU_OPTION_OP_GET: Get the option's value
|
||||
*/
|
||||
enum iommufd_option_ops {
|
||||
IOMMU_OPTION_OP_SET = 0,
|
||||
IOMMU_OPTION_OP_GET = 1,
|
||||
};
|
||||
|
||||
/**
|
||||
* struct iommu_option - iommu option multiplexer
|
||||
* @size: sizeof(struct iommu_option)
|
||||
* @option_id: One of enum iommufd_option
|
||||
* @op: One of enum iommufd_option_ops
|
||||
* @__reserved: Must be 0
|
||||
* @object_id: ID of the object if required
|
||||
* @val64: Option value to set or value returned on get
|
||||
*
|
||||
* Change a simple option value. This multiplexor allows controlling options
|
||||
* on objects. IOMMU_OPTION_OP_SET will load an option and IOMMU_OPTION_OP_GET
|
||||
* will return the current value.
|
||||
*/
|
||||
struct iommu_option {
|
||||
__u32 size;
|
||||
__u32 option_id;
|
||||
__u16 op;
|
||||
__u16 __reserved;
|
||||
__u32 object_id;
|
||||
__aligned_u64 val64;
|
||||
};
|
||||
#define IOMMU_OPTION _IO(IOMMUFD_TYPE, IOMMUFD_CMD_OPTION)
|
||||
|
||||
/**
|
||||
* enum iommufd_vfio_ioas_op - IOMMU_VFIO_IOAS_* ioctls
|
||||
* @IOMMU_VFIO_IOAS_GET: Get the current compatibility IOAS
|
||||
* @IOMMU_VFIO_IOAS_SET: Change the current compatibility IOAS
|
||||
* @IOMMU_VFIO_IOAS_CLEAR: Disable VFIO compatibility
|
||||
*/
|
||||
enum iommufd_vfio_ioas_op {
|
||||
IOMMU_VFIO_IOAS_GET = 0,
|
||||
IOMMU_VFIO_IOAS_SET = 1,
|
||||
IOMMU_VFIO_IOAS_CLEAR = 2,
|
||||
};
|
||||
|
||||
/**
|
||||
* struct iommu_vfio_ioas - ioctl(IOMMU_VFIO_IOAS)
|
||||
* @size: sizeof(struct iommu_vfio_ioas)
|
||||
* @ioas_id: For IOMMU_VFIO_IOAS_SET the input IOAS ID to set
|
||||
* For IOMMU_VFIO_IOAS_GET will output the IOAS ID
|
||||
* @op: One of enum iommufd_vfio_ioas_op
|
||||
* @__reserved: Must be 0
|
||||
*
|
||||
* The VFIO compatibility support uses a single ioas because VFIO APIs do not
|
||||
* support the ID field. Set or Get the IOAS that VFIO compatibility will use.
|
||||
* When VFIO_GROUP_SET_CONTAINER is used on an iommufd it will get the
|
||||
* compatibility ioas, either by taking what is already set, or auto creating
|
||||
* one. From then on VFIO will continue to use that ioas and is not effected by
|
||||
* this ioctl. SET or CLEAR does not destroy any auto-created IOAS.
|
||||
*/
|
||||
struct iommu_vfio_ioas {
|
||||
__u32 size;
|
||||
__u32 ioas_id;
|
||||
__u16 op;
|
||||
__u16 __reserved;
|
||||
};
|
||||
#define IOMMU_VFIO_IOAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VFIO_IOAS)
|
||||
#endif
|
@ -185,6 +185,7 @@ void free_uid(struct user_struct *up)
|
||||
if (refcount_dec_and_lock_irqsave(&up->__count, &uidhash_lock, &flags))
|
||||
free_user(up, flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(free_uid);
|
||||
|
||||
struct user_struct *alloc_uid(kuid_t uid)
|
||||
{
|
||||
|
@ -480,6 +480,10 @@ config INTERVAL_TREE
|
||||
|
||||
for more information.
|
||||
|
||||
config INTERVAL_TREE_SPAN_ITER
|
||||
bool
|
||||
depends on INTERVAL_TREE
|
||||
|
||||
config XARRAY_MULTI
|
||||
bool
|
||||
help
|
||||
|
@ -15,3 +15,135 @@ EXPORT_SYMBOL_GPL(interval_tree_insert);
|
||||
EXPORT_SYMBOL_GPL(interval_tree_remove);
|
||||
EXPORT_SYMBOL_GPL(interval_tree_iter_first);
|
||||
EXPORT_SYMBOL_GPL(interval_tree_iter_next);
|
||||
|
||||
#ifdef CONFIG_INTERVAL_TREE_SPAN_ITER
|
||||
/*
|
||||
* Roll nodes[1] into nodes[0] by advancing nodes[1] to the end of a contiguous
|
||||
* span of nodes. This makes nodes[0]->last the end of that contiguous used span
|
||||
* indexes that started at the original nodes[1]->start. nodes[1] is now the
|
||||
* first node starting the next used span. A hole span is between nodes[0]->last
|
||||
* and nodes[1]->start. nodes[1] must be !NULL.
|
||||
*/
|
||||
static void
|
||||
interval_tree_span_iter_next_gap(struct interval_tree_span_iter *state)
|
||||
{
|
||||
struct interval_tree_node *cur = state->nodes[1];
|
||||
|
||||
state->nodes[0] = cur;
|
||||
do {
|
||||
if (cur->last > state->nodes[0]->last)
|
||||
state->nodes[0] = cur;
|
||||
cur = interval_tree_iter_next(cur, state->first_index,
|
||||
state->last_index);
|
||||
} while (cur && (state->nodes[0]->last >= cur->start ||
|
||||
state->nodes[0]->last + 1 == cur->start));
|
||||
state->nodes[1] = cur;
|
||||
}
|
||||
|
||||
void interval_tree_span_iter_first(struct interval_tree_span_iter *iter,
|
||||
struct rb_root_cached *itree,
|
||||
unsigned long first_index,
|
||||
unsigned long last_index)
|
||||
{
|
||||
iter->first_index = first_index;
|
||||
iter->last_index = last_index;
|
||||
iter->nodes[0] = NULL;
|
||||
iter->nodes[1] =
|
||||
interval_tree_iter_first(itree, first_index, last_index);
|
||||
if (!iter->nodes[1]) {
|
||||
/* No nodes intersect the span, whole span is hole */
|
||||
iter->start_hole = first_index;
|
||||
iter->last_hole = last_index;
|
||||
iter->is_hole = 1;
|
||||
return;
|
||||
}
|
||||
if (iter->nodes[1]->start > first_index) {
|
||||
/* Leading hole on first iteration */
|
||||
iter->start_hole = first_index;
|
||||
iter->last_hole = iter->nodes[1]->start - 1;
|
||||
iter->is_hole = 1;
|
||||
interval_tree_span_iter_next_gap(iter);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Starting inside a used */
|
||||
iter->start_used = first_index;
|
||||
iter->is_hole = 0;
|
||||
interval_tree_span_iter_next_gap(iter);
|
||||
iter->last_used = iter->nodes[0]->last;
|
||||
if (iter->last_used >= last_index) {
|
||||
iter->last_used = last_index;
|
||||
iter->nodes[0] = NULL;
|
||||
iter->nodes[1] = NULL;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(interval_tree_span_iter_first);
|
||||
|
||||
void interval_tree_span_iter_next(struct interval_tree_span_iter *iter)
|
||||
{
|
||||
if (!iter->nodes[0] && !iter->nodes[1]) {
|
||||
iter->is_hole = -1;
|
||||
return;
|
||||
}
|
||||
|
||||
if (iter->is_hole) {
|
||||
iter->start_used = iter->last_hole + 1;
|
||||
iter->last_used = iter->nodes[0]->last;
|
||||
if (iter->last_used >= iter->last_index) {
|
||||
iter->last_used = iter->last_index;
|
||||
iter->nodes[0] = NULL;
|
||||
iter->nodes[1] = NULL;
|
||||
}
|
||||
iter->is_hole = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
if (!iter->nodes[1]) {
|
||||
/* Trailing hole */
|
||||
iter->start_hole = iter->nodes[0]->last + 1;
|
||||
iter->last_hole = iter->last_index;
|
||||
iter->nodes[0] = NULL;
|
||||
iter->is_hole = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
/* must have both nodes[0] and [1], interior hole */
|
||||
iter->start_hole = iter->nodes[0]->last + 1;
|
||||
iter->last_hole = iter->nodes[1]->start - 1;
|
||||
iter->is_hole = 1;
|
||||
interval_tree_span_iter_next_gap(iter);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(interval_tree_span_iter_next);
|
||||
|
||||
/*
|
||||
* Advance the iterator index to a specific position. The returned used/hole is
|
||||
* updated to start at new_index. This is faster than calling
|
||||
* interval_tree_span_iter_first() as it can avoid full searches in several
|
||||
* cases where the iterator is already set.
|
||||
*/
|
||||
void interval_tree_span_iter_advance(struct interval_tree_span_iter *iter,
|
||||
struct rb_root_cached *itree,
|
||||
unsigned long new_index)
|
||||
{
|
||||
if (iter->is_hole == -1)
|
||||
return;
|
||||
|
||||
iter->first_index = new_index;
|
||||
if (new_index > iter->last_index) {
|
||||
iter->is_hole = -1;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Rely on the union aliasing hole/used */
|
||||
if (iter->start_hole <= new_index && new_index <= iter->last_hole) {
|
||||
iter->start_hole = new_index;
|
||||
return;
|
||||
}
|
||||
if (new_index == iter->last_hole + 1)
|
||||
interval_tree_span_iter_next(iter);
|
||||
else
|
||||
interval_tree_span_iter_first(iter, itree, new_index,
|
||||
iter->last_index);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(interval_tree_span_iter_advance);
|
||||
#endif
|
||||
|
@ -256,6 +256,7 @@ my $doc_inline_sect = '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)';
|
||||
my $doc_inline_end = '^\s*\*/\s*$';
|
||||
my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$';
|
||||
my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;';
|
||||
my $export_symbol_ns = '^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*\w+\)\s*;';
|
||||
my $function_pointer = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)};
|
||||
my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i;
|
||||
|
||||
@ -1960,6 +1961,10 @@ sub process_export_file($) {
|
||||
next if (defined($nosymbol_table{$2}));
|
||||
$function_table{$2} = 1;
|
||||
}
|
||||
if (/$export_symbol_ns/) {
|
||||
next if (defined($nosymbol_table{$2}));
|
||||
$function_table{$2} = 1;
|
||||
}
|
||||
}
|
||||
|
||||
close(IN);
|
||||
@ -2431,12 +2436,12 @@ found on PATH.
|
||||
=item -export
|
||||
|
||||
Only output documentation for the symbols that have been exported using
|
||||
EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL() in any input FILE or -export-file FILE.
|
||||
EXPORT_SYMBOL() and related macros in any input FILE or -export-file FILE.
|
||||
|
||||
=item -internal
|
||||
|
||||
Only output documentation for the symbols that have NOT been exported using
|
||||
EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL() in any input FILE or -export-file FILE.
|
||||
EXPORT_SYMBOL() and related macros in any input FILE or -export-file FILE.
|
||||
|
||||
=item -function NAME
|
||||
|
||||
@ -2463,8 +2468,7 @@ Do not output DOC: sections.
|
||||
|
||||
=item -export-file FILE
|
||||
|
||||
Specify an additional FILE in which to look for EXPORT_SYMBOL() and
|
||||
EXPORT_SYMBOL_GPL().
|
||||
Specify an additional FILE in which to look for EXPORT_SYMBOL information.
|
||||
|
||||
To be used with -export or -internal.
|
||||
|
||||
|
@ -27,6 +27,7 @@ TARGETS += ftrace
|
||||
TARGETS += futex
|
||||
TARGETS += gpio
|
||||
TARGETS += intel_pstate
|
||||
TARGETS += iommu
|
||||
TARGETS += ipc
|
||||
TARGETS += ir
|
||||
TARGETS += kcmp
|
||||
|
3
tools/testing/selftests/iommu/.gitignore
vendored
Normal file
3
tools/testing/selftests/iommu/.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
/iommufd
|
||||
/iommufd_fail_nth
|
12
tools/testing/selftests/iommu/Makefile
Normal file
12
tools/testing/selftests/iommu/Makefile
Normal file
@ -0,0 +1,12 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
CFLAGS += -Wall -O2 -Wno-unused-function
|
||||
CFLAGS += -I../../../../include/uapi/
|
||||
CFLAGS += -I../../../../include/
|
||||
|
||||
CFLAGS += -D_GNU_SOURCE
|
||||
|
||||
TEST_GEN_PROGS :=
|
||||
TEST_GEN_PROGS += iommufd
|
||||
TEST_GEN_PROGS += iommufd_fail_nth
|
||||
|
||||
include ../lib.mk
|
2
tools/testing/selftests/iommu/config
Normal file
2
tools/testing/selftests/iommu/config
Normal file
@ -0,0 +1,2 @@
|
||||
CONFIG_IOMMUFD
|
||||
CONFIG_IOMMUFD_TEST
|
1654
tools/testing/selftests/iommu/iommufd.c
Normal file
1654
tools/testing/selftests/iommu/iommufd.c
Normal file
File diff suppressed because it is too large
Load Diff
580
tools/testing/selftests/iommu/iommufd_fail_nth.c
Normal file
580
tools/testing/selftests/iommu/iommufd_fail_nth.c
Normal file
@ -0,0 +1,580 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
|
||||
*
|
||||
* These tests are "kernel integrity" tests. They are looking for kernel
|
||||
* WARN/OOPS/kasn/etc splats triggered by kernel sanitizers & debugging
|
||||
* features. It does not attempt to verify that the system calls are doing what
|
||||
* they are supposed to do.
|
||||
*
|
||||
* The basic philosophy is to run a sequence of calls that will succeed and then
|
||||
* sweep every failure injection point on that call chain to look for
|
||||
* interesting things in error handling.
|
||||
*
|
||||
* This test is best run with:
|
||||
* echo 1 > /proc/sys/kernel/panic_on_warn
|
||||
* If something is actually going wrong.
|
||||
*/
|
||||
#include <fcntl.h>
|
||||
#include <dirent.h>
|
||||
|
||||
#define __EXPORTED_HEADERS__
|
||||
#include <linux/vfio.h>
|
||||
|
||||
#include "iommufd_utils.h"
|
||||
|
||||
static bool have_fault_injection;
|
||||
|
||||
static int writeat(int dfd, const char *fn, const char *val)
|
||||
{
|
||||
size_t val_len = strlen(val);
|
||||
ssize_t res;
|
||||
int fd;
|
||||
|
||||
fd = openat(dfd, fn, O_WRONLY);
|
||||
if (fd == -1)
|
||||
return -1;
|
||||
res = write(fd, val, val_len);
|
||||
assert(res == val_len);
|
||||
close(fd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __attribute__((constructor)) void setup_buffer(void)
|
||||
{
|
||||
BUFFER_SIZE = 2*1024*1024;
|
||||
|
||||
buffer = mmap(0, BUFFER_SIZE, PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* This sets up fail_injection in a way that is useful for this test.
|
||||
* It does not attempt to restore things back to how they were.
|
||||
*/
|
||||
static __attribute__((constructor)) void setup_fault_injection(void)
|
||||
{
|
||||
DIR *debugfs = opendir("/sys/kernel/debug/");
|
||||
struct dirent *dent;
|
||||
|
||||
if (!debugfs)
|
||||
return;
|
||||
|
||||
/* Allow any allocation call to be fault injected */
|
||||
if (writeat(dirfd(debugfs), "failslab/ignore-gfp-wait", "N"))
|
||||
return;
|
||||
writeat(dirfd(debugfs), "fail_page_alloc/ignore-gfp-wait", "N");
|
||||
writeat(dirfd(debugfs), "fail_page_alloc/ignore-gfp-highmem", "N");
|
||||
|
||||
while ((dent = readdir(debugfs))) {
|
||||
char fn[300];
|
||||
|
||||
if (strncmp(dent->d_name, "fail", 4) != 0)
|
||||
continue;
|
||||
|
||||
/* We are looking for kernel splats, quiet down the log */
|
||||
snprintf(fn, sizeof(fn), "%s/verbose", dent->d_name);
|
||||
writeat(dirfd(debugfs), fn, "0");
|
||||
}
|
||||
closedir(debugfs);
|
||||
have_fault_injection = true;
|
||||
}
|
||||
|
||||
struct fail_nth_state {
|
||||
int proc_fd;
|
||||
unsigned int iteration;
|
||||
};
|
||||
|
||||
static void fail_nth_first(struct __test_metadata *_metadata,
|
||||
struct fail_nth_state *nth_state)
|
||||
{
|
||||
char buf[300];
|
||||
|
||||
snprintf(buf, sizeof(buf), "/proc/self/task/%u/fail-nth", getpid());
|
||||
nth_state->proc_fd = open(buf, O_RDWR);
|
||||
ASSERT_NE(-1, nth_state->proc_fd);
|
||||
}
|
||||
|
||||
static bool fail_nth_next(struct __test_metadata *_metadata,
|
||||
struct fail_nth_state *nth_state,
|
||||
int test_result)
|
||||
{
|
||||
static const char disable_nth[] = "0";
|
||||
char buf[300];
|
||||
|
||||
/*
|
||||
* This is just an arbitrary limit based on the current kernel
|
||||
* situation. Changes in the kernel can dramtically change the number of
|
||||
* required fault injection sites, so if this hits it doesn't
|
||||
* necessarily mean a test failure, just that the limit has to be made
|
||||
* bigger.
|
||||
*/
|
||||
ASSERT_GT(400, nth_state->iteration);
|
||||
if (nth_state->iteration != 0) {
|
||||
ssize_t res;
|
||||
ssize_t res2;
|
||||
|
||||
buf[0] = 0;
|
||||
/*
|
||||
* Annoyingly disabling the nth can also fail. This means
|
||||
* the test passed without triggering failure
|
||||
*/
|
||||
res = pread(nth_state->proc_fd, buf, sizeof(buf), 0);
|
||||
if (res == -1 && errno == EFAULT) {
|
||||
buf[0] = '1';
|
||||
buf[1] = '\n';
|
||||
res = 2;
|
||||
}
|
||||
|
||||
res2 = pwrite(nth_state->proc_fd, disable_nth,
|
||||
ARRAY_SIZE(disable_nth) - 1, 0);
|
||||
if (res2 == -1 && errno == EFAULT) {
|
||||
res2 = pwrite(nth_state->proc_fd, disable_nth,
|
||||
ARRAY_SIZE(disable_nth) - 1, 0);
|
||||
buf[0] = '1';
|
||||
buf[1] = '\n';
|
||||
}
|
||||
ASSERT_EQ(ARRAY_SIZE(disable_nth) - 1, res2);
|
||||
|
||||
/* printf(" nth %u result=%d nth=%u\n", nth_state->iteration,
|
||||
test_result, atoi(buf)); */
|
||||
fflush(stdout);
|
||||
ASSERT_LT(1, res);
|
||||
if (res != 2 || buf[0] != '0' || buf[1] != '\n')
|
||||
return false;
|
||||
} else {
|
||||
/* printf(" nth %u result=%d\n", nth_state->iteration,
|
||||
test_result); */
|
||||
}
|
||||
nth_state->iteration++;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is called during the test to start failure injection. It allows the test
|
||||
* to do some setup that has already been swept and thus reduce the required
|
||||
* iterations.
|
||||
*/
|
||||
void __fail_nth_enable(struct __test_metadata *_metadata,
|
||||
struct fail_nth_state *nth_state)
|
||||
{
|
||||
char buf[300];
|
||||
size_t len;
|
||||
|
||||
if (!nth_state->iteration)
|
||||
return;
|
||||
|
||||
len = snprintf(buf, sizeof(buf), "%u", nth_state->iteration);
|
||||
ASSERT_EQ(len, pwrite(nth_state->proc_fd, buf, len, 0));
|
||||
}
|
||||
#define fail_nth_enable() __fail_nth_enable(_metadata, _nth_state)
|
||||
|
||||
#define TEST_FAIL_NTH(fixture_name, name) \
|
||||
static int test_nth_##name(struct __test_metadata *_metadata, \
|
||||
FIXTURE_DATA(fixture_name) *self, \
|
||||
const FIXTURE_VARIANT(fixture_name) \
|
||||
*variant, \
|
||||
struct fail_nth_state *_nth_state); \
|
||||
TEST_F(fixture_name, name) \
|
||||
{ \
|
||||
struct fail_nth_state nth_state = {}; \
|
||||
int test_result = 0; \
|
||||
\
|
||||
if (!have_fault_injection) \
|
||||
SKIP(return, \
|
||||
"fault injection is not enabled in the kernel"); \
|
||||
fail_nth_first(_metadata, &nth_state); \
|
||||
ASSERT_EQ(0, test_nth_##name(_metadata, self, variant, \
|
||||
&nth_state)); \
|
||||
while (fail_nth_next(_metadata, &nth_state, test_result)) { \
|
||||
fixture_name##_teardown(_metadata, self, variant); \
|
||||
fixture_name##_setup(_metadata, self, variant); \
|
||||
test_result = test_nth_##name(_metadata, self, \
|
||||
variant, &nth_state); \
|
||||
}; \
|
||||
ASSERT_EQ(0, test_result); \
|
||||
} \
|
||||
static int test_nth_##name( \
|
||||
struct __test_metadata __attribute__((unused)) *_metadata, \
|
||||
FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \
|
||||
const FIXTURE_VARIANT(fixture_name) __attribute__((unused)) \
|
||||
*variant, \
|
||||
struct fail_nth_state *_nth_state)
|
||||
|
||||
FIXTURE(basic_fail_nth)
|
||||
{
|
||||
int fd;
|
||||
uint32_t access_id;
|
||||
};
|
||||
|
||||
FIXTURE_SETUP(basic_fail_nth)
|
||||
{
|
||||
self->fd = -1;
|
||||
self->access_id = 0;
|
||||
}
|
||||
|
||||
FIXTURE_TEARDOWN(basic_fail_nth)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (self->access_id) {
|
||||
/* The access FD holds the iommufd open until it closes */
|
||||
rc = _test_cmd_destroy_access(self->access_id);
|
||||
assert(rc == 0);
|
||||
}
|
||||
teardown_iommufd(self->fd, _metadata);
|
||||
}
|
||||
|
||||
/* Cover ioas.c */
|
||||
TEST_FAIL_NTH(basic_fail_nth, basic)
|
||||
{
|
||||
struct iommu_iova_range ranges[10];
|
||||
uint32_t ioas_id;
|
||||
__u64 iova;
|
||||
|
||||
fail_nth_enable();
|
||||
|
||||
self->fd = open("/dev/iommu", O_RDWR);
|
||||
if (self->fd == -1)
|
||||
return -1;
|
||||
|
||||
if (_test_ioctl_ioas_alloc(self->fd, &ioas_id))
|
||||
return -1;
|
||||
|
||||
{
|
||||
struct iommu_ioas_iova_ranges ranges_cmd = {
|
||||
.size = sizeof(ranges_cmd),
|
||||
.num_iovas = ARRAY_SIZE(ranges),
|
||||
.ioas_id = ioas_id,
|
||||
.allowed_iovas = (uintptr_t)ranges,
|
||||
};
|
||||
if (ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES, &ranges_cmd))
|
||||
return -1;
|
||||
}
|
||||
|
||||
{
|
||||
struct iommu_ioas_allow_iovas allow_cmd = {
|
||||
.size = sizeof(allow_cmd),
|
||||
.ioas_id = ioas_id,
|
||||
.num_iovas = 1,
|
||||
.allowed_iovas = (uintptr_t)ranges,
|
||||
};
|
||||
|
||||
ranges[0].start = 16*1024;
|
||||
ranges[0].last = BUFFER_SIZE + 16 * 1024 * 600 - 1;
|
||||
if (ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd))
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, BUFFER_SIZE, &iova,
|
||||
IOMMU_IOAS_MAP_WRITEABLE |
|
||||
IOMMU_IOAS_MAP_READABLE))
|
||||
return -1;
|
||||
|
||||
{
|
||||
struct iommu_ioas_copy copy_cmd = {
|
||||
.size = sizeof(copy_cmd),
|
||||
.flags = IOMMU_IOAS_MAP_WRITEABLE |
|
||||
IOMMU_IOAS_MAP_READABLE,
|
||||
.dst_ioas_id = ioas_id,
|
||||
.src_ioas_id = ioas_id,
|
||||
.src_iova = iova,
|
||||
.length = sizeof(ranges),
|
||||
};
|
||||
|
||||
if (ioctl(self->fd, IOMMU_IOAS_COPY, ©_cmd))
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (_test_ioctl_ioas_unmap(self->fd, ioas_id, iova, BUFFER_SIZE,
|
||||
NULL))
|
||||
return -1;
|
||||
/* Failure path of no IOVA to unmap */
|
||||
_test_ioctl_ioas_unmap(self->fd, ioas_id, iova, BUFFER_SIZE, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* iopt_area_fill_domains() and iopt_area_fill_domain() */
|
||||
TEST_FAIL_NTH(basic_fail_nth, map_domain)
|
||||
{
|
||||
uint32_t ioas_id;
|
||||
__u32 device_id;
|
||||
__u32 hwpt_id;
|
||||
__u64 iova;
|
||||
|
||||
self->fd = open("/dev/iommu", O_RDWR);
|
||||
if (self->fd == -1)
|
||||
return -1;
|
||||
|
||||
if (_test_ioctl_ioas_alloc(self->fd, &ioas_id))
|
||||
return -1;
|
||||
|
||||
if (_test_ioctl_set_temp_memory_limit(self->fd, 32))
|
||||
return -1;
|
||||
|
||||
fail_nth_enable();
|
||||
|
||||
if (_test_cmd_mock_domain(self->fd, ioas_id, &device_id, &hwpt_id))
|
||||
return -1;
|
||||
|
||||
if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, 262144, &iova,
|
||||
IOMMU_IOAS_MAP_WRITEABLE |
|
||||
IOMMU_IOAS_MAP_READABLE))
|
||||
return -1;
|
||||
|
||||
if (_test_ioctl_destroy(self->fd, device_id))
|
||||
return -1;
|
||||
if (_test_ioctl_destroy(self->fd, hwpt_id))
|
||||
return -1;
|
||||
|
||||
if (_test_cmd_mock_domain(self->fd, ioas_id, &device_id, &hwpt_id))
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
TEST_FAIL_NTH(basic_fail_nth, map_two_domains)
|
||||
{
|
||||
uint32_t ioas_id;
|
||||
__u32 device_id2;
|
||||
__u32 device_id;
|
||||
__u32 hwpt_id2;
|
||||
__u32 hwpt_id;
|
||||
__u64 iova;
|
||||
|
||||
self->fd = open("/dev/iommu", O_RDWR);
|
||||
if (self->fd == -1)
|
||||
return -1;
|
||||
|
||||
if (_test_ioctl_ioas_alloc(self->fd, &ioas_id))
|
||||
return -1;
|
||||
|
||||
if (_test_ioctl_set_temp_memory_limit(self->fd, 32))
|
||||
return -1;
|
||||
|
||||
if (_test_cmd_mock_domain(self->fd, ioas_id, &device_id, &hwpt_id))
|
||||
return -1;
|
||||
|
||||
fail_nth_enable();
|
||||
|
||||
if (_test_cmd_mock_domain(self->fd, ioas_id, &device_id2, &hwpt_id2))
|
||||
return -1;
|
||||
|
||||
if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, 262144, &iova,
|
||||
IOMMU_IOAS_MAP_WRITEABLE |
|
||||
IOMMU_IOAS_MAP_READABLE))
|
||||
return -1;
|
||||
|
||||
if (_test_ioctl_destroy(self->fd, device_id))
|
||||
return -1;
|
||||
if (_test_ioctl_destroy(self->fd, hwpt_id))
|
||||
return -1;
|
||||
|
||||
if (_test_ioctl_destroy(self->fd, device_id2))
|
||||
return -1;
|
||||
if (_test_ioctl_destroy(self->fd, hwpt_id2))
|
||||
return -1;
|
||||
|
||||
if (_test_cmd_mock_domain(self->fd, ioas_id, &device_id, &hwpt_id))
|
||||
return -1;
|
||||
if (_test_cmd_mock_domain(self->fd, ioas_id, &device_id2, &hwpt_id2))
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
TEST_FAIL_NTH(basic_fail_nth, access_rw)
|
||||
{
|
||||
uint64_t tmp_big[4096];
|
||||
uint32_t ioas_id;
|
||||
uint16_t tmp[32];
|
||||
__u64 iova;
|
||||
|
||||
self->fd = open("/dev/iommu", O_RDWR);
|
||||
if (self->fd == -1)
|
||||
return -1;
|
||||
|
||||
if (_test_ioctl_ioas_alloc(self->fd, &ioas_id))
|
||||
return -1;
|
||||
|
||||
if (_test_ioctl_set_temp_memory_limit(self->fd, 32))
|
||||
return -1;
|
||||
|
||||
if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, 262144, &iova,
|
||||
IOMMU_IOAS_MAP_WRITEABLE |
|
||||
IOMMU_IOAS_MAP_READABLE))
|
||||
return -1;
|
||||
|
||||
fail_nth_enable();
|
||||
|
||||
if (_test_cmd_create_access(self->fd, ioas_id, &self->access_id, 0))
|
||||
return -1;
|
||||
|
||||
{
|
||||
struct iommu_test_cmd access_cmd = {
|
||||
.size = sizeof(access_cmd),
|
||||
.op = IOMMU_TEST_OP_ACCESS_RW,
|
||||
.id = self->access_id,
|
||||
.access_rw = { .iova = iova,
|
||||
.length = sizeof(tmp),
|
||||
.uptr = (uintptr_t)tmp },
|
||||
};
|
||||
|
||||
// READ
|
||||
if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW),
|
||||
&access_cmd))
|
||||
return -1;
|
||||
|
||||
access_cmd.access_rw.flags = MOCK_ACCESS_RW_WRITE;
|
||||
if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW),
|
||||
&access_cmd))
|
||||
return -1;
|
||||
|
||||
access_cmd.access_rw.flags = MOCK_ACCESS_RW_SLOW_PATH;
|
||||
if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW),
|
||||
&access_cmd))
|
||||
return -1;
|
||||
access_cmd.access_rw.flags = MOCK_ACCESS_RW_SLOW_PATH |
|
||||
MOCK_ACCESS_RW_WRITE;
|
||||
if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW),
|
||||
&access_cmd))
|
||||
return -1;
|
||||
}
|
||||
|
||||
{
|
||||
struct iommu_test_cmd access_cmd = {
|
||||
.size = sizeof(access_cmd),
|
||||
.op = IOMMU_TEST_OP_ACCESS_RW,
|
||||
.id = self->access_id,
|
||||
.access_rw = { .iova = iova,
|
||||
.flags = MOCK_ACCESS_RW_SLOW_PATH,
|
||||
.length = sizeof(tmp_big),
|
||||
.uptr = (uintptr_t)tmp_big },
|
||||
};
|
||||
|
||||
if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW),
|
||||
&access_cmd))
|
||||
return -1;
|
||||
}
|
||||
if (_test_cmd_destroy_access(self->access_id))
|
||||
return -1;
|
||||
self->access_id = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* pages.c access functions */
|
||||
TEST_FAIL_NTH(basic_fail_nth, access_pin)
|
||||
{
|
||||
uint32_t access_pages_id;
|
||||
uint32_t ioas_id;
|
||||
__u64 iova;
|
||||
|
||||
self->fd = open("/dev/iommu", O_RDWR);
|
||||
if (self->fd == -1)
|
||||
return -1;
|
||||
|
||||
if (_test_ioctl_ioas_alloc(self->fd, &ioas_id))
|
||||
return -1;
|
||||
|
||||
if (_test_ioctl_set_temp_memory_limit(self->fd, 32))
|
||||
return -1;
|
||||
|
||||
if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, BUFFER_SIZE, &iova,
|
||||
IOMMU_IOAS_MAP_WRITEABLE |
|
||||
IOMMU_IOAS_MAP_READABLE))
|
||||
return -1;
|
||||
|
||||
if (_test_cmd_create_access(self->fd, ioas_id, &self->access_id,
|
||||
MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES))
|
||||
return -1;
|
||||
|
||||
fail_nth_enable();
|
||||
|
||||
{
|
||||
struct iommu_test_cmd access_cmd = {
|
||||
.size = sizeof(access_cmd),
|
||||
.op = IOMMU_TEST_OP_ACCESS_PAGES,
|
||||
.id = self->access_id,
|
||||
.access_pages = { .iova = iova,
|
||||
.length = BUFFER_SIZE,
|
||||
.uptr = (uintptr_t)buffer },
|
||||
};
|
||||
|
||||
if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW),
|
||||
&access_cmd))
|
||||
return -1;
|
||||
access_pages_id = access_cmd.access_pages.out_access_pages_id;
|
||||
}
|
||||
|
||||
if (_test_cmd_destroy_access_pages(self->fd, self->access_id,
|
||||
access_pages_id))
|
||||
return -1;
|
||||
|
||||
if (_test_cmd_destroy_access(self->access_id))
|
||||
return -1;
|
||||
self->access_id = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* iopt_pages_fill_xarray() */
|
||||
TEST_FAIL_NTH(basic_fail_nth, access_pin_domain)
|
||||
{
|
||||
uint32_t access_pages_id;
|
||||
uint32_t ioas_id;
|
||||
__u32 device_id;
|
||||
__u32 hwpt_id;
|
||||
__u64 iova;
|
||||
|
||||
self->fd = open("/dev/iommu", O_RDWR);
|
||||
if (self->fd == -1)
|
||||
return -1;
|
||||
|
||||
if (_test_ioctl_ioas_alloc(self->fd, &ioas_id))
|
||||
return -1;
|
||||
|
||||
if (_test_ioctl_set_temp_memory_limit(self->fd, 32))
|
||||
return -1;
|
||||
|
||||
if (_test_cmd_mock_domain(self->fd, ioas_id, &device_id, &hwpt_id))
|
||||
return -1;
|
||||
|
||||
if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, BUFFER_SIZE, &iova,
|
||||
IOMMU_IOAS_MAP_WRITEABLE |
|
||||
IOMMU_IOAS_MAP_READABLE))
|
||||
return -1;
|
||||
|
||||
if (_test_cmd_create_access(self->fd, ioas_id, &self->access_id,
|
||||
MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES))
|
||||
return -1;
|
||||
|
||||
fail_nth_enable();
|
||||
|
||||
{
|
||||
struct iommu_test_cmd access_cmd = {
|
||||
.size = sizeof(access_cmd),
|
||||
.op = IOMMU_TEST_OP_ACCESS_PAGES,
|
||||
.id = self->access_id,
|
||||
.access_pages = { .iova = iova,
|
||||
.length = BUFFER_SIZE,
|
||||
.uptr = (uintptr_t)buffer },
|
||||
};
|
||||
|
||||
if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW),
|
||||
&access_cmd))
|
||||
return -1;
|
||||
access_pages_id = access_cmd.access_pages.out_access_pages_id;
|
||||
}
|
||||
|
||||
if (_test_cmd_destroy_access_pages(self->fd, self->access_id,
|
||||
access_pages_id))
|
||||
return -1;
|
||||
|
||||
if (_test_cmd_destroy_access(self->access_id))
|
||||
return -1;
|
||||
self->access_id = 0;
|
||||
|
||||
if (_test_ioctl_destroy(self->fd, device_id))
|
||||
return -1;
|
||||
if (_test_ioctl_destroy(self->fd, hwpt_id))
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
TEST_HARNESS_MAIN
|
278
tools/testing/selftests/iommu/iommufd_utils.h
Normal file
278
tools/testing/selftests/iommu/iommufd_utils.h
Normal file
@ -0,0 +1,278 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES */
|
||||
#ifndef __SELFTEST_IOMMUFD_UTILS
|
||||
#define __SELFTEST_IOMMUFD_UTILS
|
||||
|
||||
#include <unistd.h>
|
||||
#include <stddef.h>
|
||||
#include <sys/fcntl.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "../kselftest_harness.h"
|
||||
#include "../../../../drivers/iommu/iommufd/iommufd_test.h"
|
||||
|
||||
/* Hack to make assertions more readable */
|
||||
#define _IOMMU_TEST_CMD(x) IOMMU_TEST_CMD
|
||||
|
||||
static void *buffer;
|
||||
static unsigned long BUFFER_SIZE;
|
||||
|
||||
/*
|
||||
* Have the kernel check the refcount on pages. I don't know why a freshly
|
||||
* mmap'd anon non-compound page starts out with a ref of 3
|
||||
*/
|
||||
#define check_refs(_ptr, _length, _refs) \
|
||||
({ \
|
||||
struct iommu_test_cmd test_cmd = { \
|
||||
.size = sizeof(test_cmd), \
|
||||
.op = IOMMU_TEST_OP_MD_CHECK_REFS, \
|
||||
.check_refs = { .length = _length, \
|
||||
.uptr = (uintptr_t)(_ptr), \
|
||||
.refs = _refs }, \
|
||||
}; \
|
||||
ASSERT_EQ(0, \
|
||||
ioctl(self->fd, \
|
||||
_IOMMU_TEST_CMD(IOMMU_TEST_OP_MD_CHECK_REFS), \
|
||||
&test_cmd)); \
|
||||
})
|
||||
|
||||
static int _test_cmd_mock_domain(int fd, unsigned int ioas_id, __u32 *device_id,
|
||||
__u32 *hwpt_id)
|
||||
{
|
||||
struct iommu_test_cmd cmd = {
|
||||
.size = sizeof(cmd),
|
||||
.op = IOMMU_TEST_OP_MOCK_DOMAIN,
|
||||
.id = ioas_id,
|
||||
.mock_domain = {},
|
||||
};
|
||||
int ret;
|
||||
|
||||
ret = ioctl(fd, IOMMU_TEST_CMD, &cmd);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (device_id)
|
||||
*device_id = cmd.mock_domain.out_device_id;
|
||||
assert(cmd.id != 0);
|
||||
if (hwpt_id)
|
||||
*hwpt_id = cmd.mock_domain.out_hwpt_id;
|
||||
return 0;
|
||||
}
|
||||
#define test_cmd_mock_domain(ioas_id, device_id, hwpt_id) \
|
||||
ASSERT_EQ(0, _test_cmd_mock_domain(self->fd, ioas_id, device_id, \
|
||||
hwpt_id))
|
||||
#define test_err_mock_domain(_errno, ioas_id, device_id, hwpt_id) \
|
||||
EXPECT_ERRNO(_errno, _test_cmd_mock_domain(self->fd, ioas_id, \
|
||||
device_id, hwpt_id))
|
||||
|
||||
static int _test_cmd_create_access(int fd, unsigned int ioas_id,
|
||||
__u32 *access_id, unsigned int flags)
|
||||
{
|
||||
struct iommu_test_cmd cmd = {
|
||||
.size = sizeof(cmd),
|
||||
.op = IOMMU_TEST_OP_CREATE_ACCESS,
|
||||
.id = ioas_id,
|
||||
.create_access = { .flags = flags },
|
||||
};
|
||||
int ret;
|
||||
|
||||
ret = ioctl(fd, IOMMU_TEST_CMD, &cmd);
|
||||
if (ret)
|
||||
return ret;
|
||||
*access_id = cmd.create_access.out_access_fd;
|
||||
return 0;
|
||||
}
|
||||
#define test_cmd_create_access(ioas_id, access_id, flags) \
|
||||
ASSERT_EQ(0, _test_cmd_create_access(self->fd, ioas_id, access_id, \
|
||||
flags))
|
||||
|
||||
static int _test_cmd_destroy_access(unsigned int access_id)
|
||||
{
|
||||
return close(access_id);
|
||||
}
|
||||
#define test_cmd_destroy_access(access_id) \
|
||||
ASSERT_EQ(0, _test_cmd_destroy_access(access_id))
|
||||
|
||||
static int _test_cmd_destroy_access_pages(int fd, unsigned int access_id,
|
||||
unsigned int access_pages_id)
|
||||
{
|
||||
struct iommu_test_cmd cmd = {
|
||||
.size = sizeof(cmd),
|
||||
.op = IOMMU_TEST_OP_DESTROY_ACCESS_PAGES,
|
||||
.id = access_id,
|
||||
.destroy_access_pages = { .access_pages_id = access_pages_id },
|
||||
};
|
||||
return ioctl(fd, IOMMU_TEST_CMD, &cmd);
|
||||
}
|
||||
#define test_cmd_destroy_access_pages(access_id, access_pages_id) \
|
||||
ASSERT_EQ(0, _test_cmd_destroy_access_pages(self->fd, access_id, \
|
||||
access_pages_id))
|
||||
#define test_err_destroy_access_pages(_errno, access_id, access_pages_id) \
|
||||
EXPECT_ERRNO(_errno, _test_cmd_destroy_access_pages( \
|
||||
self->fd, access_id, access_pages_id))
|
||||
|
||||
static int _test_ioctl_destroy(int fd, unsigned int id)
|
||||
{
|
||||
struct iommu_destroy cmd = {
|
||||
.size = sizeof(cmd),
|
||||
.id = id,
|
||||
};
|
||||
return ioctl(fd, IOMMU_DESTROY, &cmd);
|
||||
}
|
||||
#define test_ioctl_destroy(id) ASSERT_EQ(0, _test_ioctl_destroy(self->fd, id))
|
||||
|
||||
static int _test_ioctl_ioas_alloc(int fd, __u32 *id)
|
||||
{
|
||||
struct iommu_ioas_alloc cmd = {
|
||||
.size = sizeof(cmd),
|
||||
};
|
||||
int ret;
|
||||
|
||||
ret = ioctl(fd, IOMMU_IOAS_ALLOC, &cmd);
|
||||
if (ret)
|
||||
return ret;
|
||||
*id = cmd.out_ioas_id;
|
||||
return 0;
|
||||
}
|
||||
#define test_ioctl_ioas_alloc(id) \
|
||||
({ \
|
||||
ASSERT_EQ(0, _test_ioctl_ioas_alloc(self->fd, id)); \
|
||||
ASSERT_NE(0, *(id)); \
|
||||
})
|
||||
|
||||
static int _test_ioctl_ioas_map(int fd, unsigned int ioas_id, void *buffer,
|
||||
size_t length, __u64 *iova, unsigned int flags)
|
||||
{
|
||||
struct iommu_ioas_map cmd = {
|
||||
.size = sizeof(cmd),
|
||||
.flags = flags,
|
||||
.ioas_id = ioas_id,
|
||||
.user_va = (uintptr_t)buffer,
|
||||
.length = length,
|
||||
};
|
||||
int ret;
|
||||
|
||||
if (flags & IOMMU_IOAS_MAP_FIXED_IOVA)
|
||||
cmd.iova = *iova;
|
||||
|
||||
ret = ioctl(fd, IOMMU_IOAS_MAP, &cmd);
|
||||
*iova = cmd.iova;
|
||||
return ret;
|
||||
}
|
||||
#define test_ioctl_ioas_map(buffer, length, iova_p) \
|
||||
ASSERT_EQ(0, _test_ioctl_ioas_map(self->fd, self->ioas_id, buffer, \
|
||||
length, iova_p, \
|
||||
IOMMU_IOAS_MAP_WRITEABLE | \
|
||||
IOMMU_IOAS_MAP_READABLE))
|
||||
|
||||
#define test_err_ioctl_ioas_map(_errno, buffer, length, iova_p) \
|
||||
EXPECT_ERRNO(_errno, \
|
||||
_test_ioctl_ioas_map(self->fd, self->ioas_id, buffer, \
|
||||
length, iova_p, \
|
||||
IOMMU_IOAS_MAP_WRITEABLE | \
|
||||
IOMMU_IOAS_MAP_READABLE))
|
||||
|
||||
#define test_ioctl_ioas_map_id(ioas_id, buffer, length, iova_p) \
|
||||
ASSERT_EQ(0, _test_ioctl_ioas_map(self->fd, ioas_id, buffer, length, \
|
||||
iova_p, \
|
||||
IOMMU_IOAS_MAP_WRITEABLE | \
|
||||
IOMMU_IOAS_MAP_READABLE))
|
||||
|
||||
#define test_ioctl_ioas_map_fixed(buffer, length, iova) \
|
||||
({ \
|
||||
__u64 __iova = iova; \
|
||||
ASSERT_EQ(0, _test_ioctl_ioas_map( \
|
||||
self->fd, self->ioas_id, buffer, length, \
|
||||
&__iova, \
|
||||
IOMMU_IOAS_MAP_FIXED_IOVA | \
|
||||
IOMMU_IOAS_MAP_WRITEABLE | \
|
||||
IOMMU_IOAS_MAP_READABLE)); \
|
||||
})
|
||||
|
||||
#define test_err_ioctl_ioas_map_fixed(_errno, buffer, length, iova) \
|
||||
({ \
|
||||
__u64 __iova = iova; \
|
||||
EXPECT_ERRNO(_errno, \
|
||||
_test_ioctl_ioas_map( \
|
||||
self->fd, self->ioas_id, buffer, length, \
|
||||
&__iova, \
|
||||
IOMMU_IOAS_MAP_FIXED_IOVA | \
|
||||
IOMMU_IOAS_MAP_WRITEABLE | \
|
||||
IOMMU_IOAS_MAP_READABLE)); \
|
||||
})
|
||||
|
||||
static int _test_ioctl_ioas_unmap(int fd, unsigned int ioas_id, uint64_t iova,
|
||||
size_t length, uint64_t *out_len)
|
||||
{
|
||||
struct iommu_ioas_unmap cmd = {
|
||||
.size = sizeof(cmd),
|
||||
.ioas_id = ioas_id,
|
||||
.iova = iova,
|
||||
.length = length,
|
||||
};
|
||||
int ret;
|
||||
|
||||
ret = ioctl(fd, IOMMU_IOAS_UNMAP, &cmd);
|
||||
if (out_len)
|
||||
*out_len = cmd.length;
|
||||
return ret;
|
||||
}
|
||||
#define test_ioctl_ioas_unmap(iova, length) \
|
||||
ASSERT_EQ(0, _test_ioctl_ioas_unmap(self->fd, self->ioas_id, iova, \
|
||||
length, NULL))
|
||||
|
||||
#define test_ioctl_ioas_unmap_id(ioas_id, iova, length) \
|
||||
ASSERT_EQ(0, _test_ioctl_ioas_unmap(self->fd, ioas_id, iova, length, \
|
||||
NULL))
|
||||
|
||||
#define test_err_ioctl_ioas_unmap(_errno, iova, length) \
|
||||
EXPECT_ERRNO(_errno, _test_ioctl_ioas_unmap(self->fd, self->ioas_id, \
|
||||
iova, length, NULL))
|
||||
|
||||
static int _test_ioctl_set_temp_memory_limit(int fd, unsigned int limit)
|
||||
{
|
||||
struct iommu_test_cmd memlimit_cmd = {
|
||||
.size = sizeof(memlimit_cmd),
|
||||
.op = IOMMU_TEST_OP_SET_TEMP_MEMORY_LIMIT,
|
||||
.memory_limit = { .limit = limit },
|
||||
};
|
||||
|
||||
return ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_SET_TEMP_MEMORY_LIMIT),
|
||||
&memlimit_cmd);
|
||||
}
|
||||
|
||||
#define test_ioctl_set_temp_memory_limit(limit) \
|
||||
ASSERT_EQ(0, _test_ioctl_set_temp_memory_limit(self->fd, limit))
|
||||
|
||||
#define test_ioctl_set_default_memory_limit() \
|
||||
test_ioctl_set_temp_memory_limit(65536)
|
||||
|
||||
static void teardown_iommufd(int fd, struct __test_metadata *_metadata)
|
||||
{
|
||||
struct iommu_test_cmd test_cmd = {
|
||||
.size = sizeof(test_cmd),
|
||||
.op = IOMMU_TEST_OP_MD_CHECK_REFS,
|
||||
.check_refs = { .length = BUFFER_SIZE,
|
||||
.uptr = (uintptr_t)buffer },
|
||||
};
|
||||
|
||||
if (fd == -1)
|
||||
return;
|
||||
|
||||
EXPECT_EQ(0, close(fd));
|
||||
|
||||
fd = open("/dev/iommu", O_RDWR);
|
||||
EXPECT_NE(-1, fd);
|
||||
EXPECT_EQ(0, ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_MD_CHECK_REFS),
|
||||
&test_cmd));
|
||||
EXPECT_EQ(0, close(fd));
|
||||
}
|
||||
|
||||
#define EXPECT_ERRNO(expected_errno, cmd) \
|
||||
({ \
|
||||
ASSERT_EQ(-1, cmd); \
|
||||
EXPECT_EQ(expected_errno, errno); \
|
||||
})
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user