Merge branches 'iommu/fixes', 'arm/rockchip', 'arm/smmu', 'x86/vt-d', 'x86/amd', 'virtio' and 'core' into next

This commit is contained in:
Joerg Roedel 2021-06-25 15:23:25 +02:00
56 changed files with 2175 additions and 784 deletions

View File

@ -301,6 +301,9 @@
allowed anymore to lift isolation
requirements as needed. This option
does not override iommu=pt
force_enable - Force enable the IOMMU on platforms known
to be buggy with IOMMU enabled. Use this
option with care.
amd_iommu_dump= [HW,X86-64]
Enable AMD IOMMU driver option to dump the ACPI table
@ -1987,7 +1990,7 @@
forcing Dual Address Cycle for PCI cards supporting
greater than 32-bit addressing.
iommu.strict= [ARM64] Configure TLB invalidation behaviour
iommu.strict= [ARM64, X86] Configure TLB invalidation behaviour
Format: { "0" | "1" }
0 - Lazy mode.
Request that DMA unmap operations use deferred
@ -1998,6 +2001,10 @@
1 - Strict mode (default).
DMA unmap operations invalidate IOMMU hardware TLBs
synchronously.
Note: on x86, the default behaviour depends on the
equivalent driver-specific parameters, but a strict
mode explicitly specified by either method takes
precedence.
iommu.passthrough=
[ARM64, X86] Configure DMA to bypass the IOMMU by default.

View File

@ -92,6 +92,24 @@ Optional properties:
tagging DMA transactions with an address space identifier. By default,
this is 0, which means that the device only has one address space.
- dma-can-stall: When present, the master can wait for a transaction to
complete for an indefinite amount of time. Upon translation fault some
IOMMUs, instead of aborting the translation immediately, may first
notify the driver and keep the transaction in flight. This allows the OS
to inspect the fault and, for example, make physical pages resident
before updating the mappings and completing the transaction. Such IOMMU
accepts a limited number of simultaneous stalled transactions before
having to either put back-pressure on the master, or abort new faulting
transactions.
Firmware has to opt-in stalling, because most buses and masters don't
support it. In particular it isn't compatible with PCI, where
transactions have to complete before a time limit. More generally it
won't work in systems and masters that haven't been designed for
stalling. For example the OS, in order to handle a stalled transaction,
may attempt to retrieve pages from secondary storage in a stalled
domain, leading to a deadlock.
Notes:
======

View File

@ -1,38 +0,0 @@
Rockchip IOMMU
==============
A Rockchip DRM iommu translates io virtual addresses to physical addresses for
its master device. Each slave device is bound to a single master device, and
shares its clocks, power domain and irq.
Required properties:
- compatible : Should be "rockchip,iommu"
- reg : Address space for the configuration registers
- interrupts : Interrupt specifier for the IOMMU instance
- interrupt-names : Interrupt name for the IOMMU instance
- #iommu-cells : Should be <0>. This indicates the iommu is a
"single-master" device, and needs no additional information
to associate with its master device. See:
Documentation/devicetree/bindings/iommu/iommu.txt
- clocks : A list of clocks required for the IOMMU to be accessible by
the host CPU.
- clock-names : Should contain the following:
"iface" - Main peripheral bus clock (PCLK/HCL) (required)
"aclk" - AXI bus clock (required)
Optional properties:
- rockchip,disable-mmu-reset : Don't use the mmu reset operation.
Some mmu instances may produce unexpected results
when the reset operation is used.
Example:
vopl_mmu: iommu@ff940300 {
compatible = "rockchip,iommu";
reg = <0xff940300 0x100>;
interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "vopl_mmu";
clocks = <&cru ACLK_VOP1>, <&cru HCLK_VOP1>;
clock-names = "aclk", "iface";
#iommu-cells = <0>;
};

View File

@ -0,0 +1,85 @@
# SPDX-License-Identifier: GPL-2.0-only
%YAML 1.2
---
$id: http://devicetree.org/schemas/iommu/rockchip,iommu.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Rockchip IOMMU
maintainers:
- Heiko Stuebner <heiko@sntech.de>
description: |+
A Rockchip DRM iommu translates io virtual addresses to physical addresses for
its master device. Each slave device is bound to a single master device and
shares its clocks, power domain and irq.
For information on assigning IOMMU controller to its peripheral devices,
see generic IOMMU bindings.
properties:
compatible:
enum:
- rockchip,iommu
- rockchip,rk3568-iommu
reg:
items:
- description: configuration registers for MMU instance 0
- description: configuration registers for MMU instance 1
minItems: 1
maxItems: 2
interrupts:
items:
- description: interruption for MMU instance 0
- description: interruption for MMU instance 1
minItems: 1
maxItems: 2
clocks:
items:
- description: Core clock
- description: Interface clock
clock-names:
items:
- const: aclk
- const: iface
"#iommu-cells":
const: 0
power-domains:
maxItems: 1
rockchip,disable-mmu-reset:
$ref: /schemas/types.yaml#/definitions/flag
description: |
Do not use the mmu reset operation.
Some mmu instances may produce unexpected results
when the reset operation is used.
required:
- compatible
- reg
- interrupts
- clocks
- clock-names
- "#iommu-cells"
additionalProperties: false
examples:
- |
#include <dt-bindings/clock/rk3399-cru.h>
#include <dt-bindings/interrupt-controller/arm-gic.h>
vopl_mmu: iommu@ff940300 {
compatible = "rockchip,iommu";
reg = <0xff940300 0x100>;
interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&cru ACLK_VOP1>, <&cru HCLK_VOP1>;
clock-names = "aclk", "iface";
#iommu-cells = <0>;
};

View File

@ -431,6 +431,14 @@ W: https://01.org/linux-acpi
B: https://bugzilla.kernel.org
F: drivers/acpi/acpi_video.c
ACPI VIOT DRIVER
M: Jean-Philippe Brucker <jean-philippe@linaro.org>
L: linux-acpi@vger.kernel.org
L: iommu@lists.linux-foundation.org
S: Maintained
F: drivers/acpi/viot.c
F: include/linux/acpi_viot.h
ACPI WMI DRIVER
L: platform-driver-x86@vger.kernel.org
S: Orphan

View File

@ -1136,7 +1136,7 @@
};
adreno_smmu: iommu@b40000 {
compatible = "qcom,msm8996-smmu-v2", "qcom,smmu-v2";
compatible = "qcom,msm8996-smmu-v2", "qcom,adreno-smmu", "qcom,smmu-v2";
reg = <0x00b40000 0x10000>;
#global-interrupts = <1>;

View File

@ -50,7 +50,7 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
dev->dma_coherent = coherent;
if (iommu)
iommu_setup_dma_ops(dev, dma_base, size);
iommu_setup_dma_ops(dev, dma_base, dma_base + size - 1);
#ifdef CONFIG_XEN
if (xen_swiotlb_detect())

View File

@ -526,6 +526,9 @@ endif
source "drivers/acpi/pmic/Kconfig"
config ACPI_VIOT
bool
endif # ACPI
config X86_PM_TIMER

View File

@ -118,3 +118,5 @@ video-objs += acpi_video.o video_detect.o
obj-y += dptf/
obj-$(CONFIG_ARM64) += arm64/
obj-$(CONFIG_ACPI_VIOT) += viot.o

View File

@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_ACPI_IORT) += iort.o
obj-$(CONFIG_ACPI_GTDT) += gtdt.o
obj-y += dma.o

50
drivers/acpi/arm64/dma.c Normal file
View File

@ -0,0 +1,50 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/acpi.h>
#include <linux/acpi_iort.h>
#include <linux/device.h>
#include <linux/dma-direct.h>
void acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size)
{
int ret;
u64 end, mask;
u64 dmaaddr = 0, size = 0, offset = 0;
/*
* If @dev is expected to be DMA-capable then the bus code that created
* it should have initialised its dma_mask pointer by this point. For
* now, we'll continue the legacy behaviour of coercing it to the
* coherent mask if not, but we'll no longer do so quietly.
*/
if (!dev->dma_mask) {
dev_warn(dev, "DMA mask not set\n");
dev->dma_mask = &dev->coherent_dma_mask;
}
if (dev->coherent_dma_mask)
size = max(dev->coherent_dma_mask, dev->coherent_dma_mask + 1);
else
size = 1ULL << 32;
ret = acpi_dma_get_range(dev, &dmaaddr, &offset, &size);
if (ret == -ENODEV)
ret = iort_dma_get_ranges(dev, &size);
if (!ret) {
/*
* Limit coherent and dma mask based on size retrieved from
* firmware.
*/
end = dmaaddr + size - 1;
mask = DMA_BIT_MASK(ilog2(end) + 1);
dev->bus_dma_limit = end;
dev->coherent_dma_mask = min(dev->coherent_dma_mask, mask);
*dev->dma_mask = min(*dev->dma_mask, mask);
}
*dma_addr = dmaaddr;
*dma_size = size;
ret = dma_direct_set_offset(dev, dmaaddr + offset, dmaaddr, size);
dev_dbg(dev, "dma_offset(%#08llx)%s\n", offset, ret ? " failed!" : "");
}

View File

@ -806,23 +806,6 @@ static struct acpi_iort_node *iort_get_msi_resv_iommu(struct device *dev)
return NULL;
}
static inline const struct iommu_ops *iort_fwspec_iommu_ops(struct device *dev)
{
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
return (fwspec && fwspec->ops) ? fwspec->ops : NULL;
}
static inline int iort_add_device_replay(struct device *dev)
{
int err = 0;
if (dev->bus && !device_iommu_mapped(dev))
err = iommu_probe_device(dev);
return err;
}
/**
* iort_iommu_msi_get_resv_regions - Reserved region driver helper
* @dev: Device from iommu_get_resv_regions()
@ -900,18 +883,6 @@ static inline bool iort_iommu_driver_enabled(u8 type)
}
}
static int arm_smmu_iort_xlate(struct device *dev, u32 streamid,
struct fwnode_handle *fwnode,
const struct iommu_ops *ops)
{
int ret = iommu_fwspec_init(dev, fwnode, ops);
if (!ret)
ret = iommu_fwspec_add_ids(dev, &streamid, 1);
return ret;
}
static bool iort_pci_rc_supports_ats(struct acpi_iort_node *node)
{
struct acpi_iort_root_complex *pci_rc;
@ -946,7 +917,7 @@ static int iort_iommu_xlate(struct device *dev, struct acpi_iort_node *node,
return iort_iommu_driver_enabled(node->type) ?
-EPROBE_DEFER : -ENODEV;
return arm_smmu_iort_xlate(dev, streamid, iort_fwnode, ops);
return acpi_iommu_fwspec_init(dev, streamid, iort_fwnode, ops);
}
struct iort_pci_alias_info {
@ -968,13 +939,15 @@ static int iort_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data)
static void iort_named_component_init(struct device *dev,
struct acpi_iort_node *node)
{
struct property_entry props[2] = {};
struct property_entry props[3] = {};
struct acpi_iort_named_component *nc;
nc = (struct acpi_iort_named_component *)node->node_data;
props[0] = PROPERTY_ENTRY_U32("pasid-num-bits",
FIELD_GET(ACPI_IORT_NC_PASID_BITS,
nc->node_flags));
if (nc->node_flags & ACPI_IORT_NC_STALL_SUPPORTED)
props[1] = PROPERTY_ENTRY_BOOL("dma-can-stall");
if (device_add_properties(dev, props))
dev_warn(dev, "Could not add device properties\n");
@ -1020,24 +993,13 @@ static int iort_nc_iommu_map_id(struct device *dev,
* @dev: device to configure
* @id_in: optional input id const value pointer
*
* Returns: iommu_ops pointer on configuration success
* NULL on configuration failure
* Returns: 0 on success, <0 on failure
*/
const struct iommu_ops *iort_iommu_configure_id(struct device *dev,
const u32 *id_in)
int iort_iommu_configure_id(struct device *dev, const u32 *id_in)
{
struct acpi_iort_node *node;
const struct iommu_ops *ops;
int err = -ENODEV;
/*
* If we already translated the fwspec there
* is nothing left to do, return the iommu_ops.
*/
ops = iort_fwspec_iommu_ops(dev);
if (ops)
return ops;
if (dev_is_pci(dev)) {
struct iommu_fwspec *fwspec;
struct pci_bus *bus = to_pci_dev(dev)->bus;
@ -1046,7 +1008,7 @@ const struct iommu_ops *iort_iommu_configure_id(struct device *dev,
node = iort_scan_node(ACPI_IORT_NODE_PCI_ROOT_COMPLEX,
iort_match_node_callback, &bus->dev);
if (!node)
return NULL;
return -ENODEV;
info.node = node;
err = pci_for_each_dma_alias(to_pci_dev(dev),
@ -1059,7 +1021,7 @@ const struct iommu_ops *iort_iommu_configure_id(struct device *dev,
node = iort_scan_node(ACPI_IORT_NODE_NAMED_COMPONENT,
iort_match_node_callback, dev);
if (!node)
return NULL;
return -ENODEV;
err = id_in ? iort_nc_iommu_map_id(dev, node, id_in) :
iort_nc_iommu_map(dev, node);
@ -1068,32 +1030,14 @@ const struct iommu_ops *iort_iommu_configure_id(struct device *dev,
iort_named_component_init(dev, node);
}
/*
* If we have reason to believe the IOMMU driver missed the initial
* add_device callback for dev, replay it to get things in order.
*/
if (!err) {
ops = iort_fwspec_iommu_ops(dev);
err = iort_add_device_replay(dev);
}
/* Ignore all other errors apart from EPROBE_DEFER */
if (err == -EPROBE_DEFER) {
ops = ERR_PTR(err);
} else if (err) {
dev_dbg(dev, "Adding to IOMMU failed: %d\n", err);
ops = NULL;
}
return ops;
return err;
}
#else
int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head)
{ return 0; }
const struct iommu_ops *iort_iommu_configure_id(struct device *dev,
const u32 *input_id)
{ return NULL; }
int iort_iommu_configure_id(struct device *dev, const u32 *input_id)
{ return -ENODEV; }
#endif
static int nc_dma_get_range(struct device *dev, u64 *size)
@ -1144,56 +1088,18 @@ static int rc_dma_get_range(struct device *dev, u64 *size)
}
/**
* iort_dma_setup() - Set-up device DMA parameters.
* iort_dma_get_ranges() - Look up DMA addressing limit for the device
* @dev: device to lookup
* @size: DMA range size result pointer
*
* @dev: device to configure
* @dma_addr: device DMA address result pointer
* @dma_size: DMA range size result pointer
* Return: 0 on success, an error otherwise.
*/
void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size)
int iort_dma_get_ranges(struct device *dev, u64 *size)
{
u64 end, mask, dmaaddr = 0, size = 0, offset = 0;
int ret;
/*
* If @dev is expected to be DMA-capable then the bus code that created
* it should have initialised its dma_mask pointer by this point. For
* now, we'll continue the legacy behaviour of coercing it to the
* coherent mask if not, but we'll no longer do so quietly.
*/
if (!dev->dma_mask) {
dev_warn(dev, "DMA mask not set\n");
dev->dma_mask = &dev->coherent_dma_mask;
}
if (dev->coherent_dma_mask)
size = max(dev->coherent_dma_mask, dev->coherent_dma_mask + 1);
if (dev_is_pci(dev))
return rc_dma_get_range(dev, size);
else
size = 1ULL << 32;
ret = acpi_dma_get_range(dev, &dmaaddr, &offset, &size);
if (ret == -ENODEV)
ret = dev_is_pci(dev) ? rc_dma_get_range(dev, &size)
: nc_dma_get_range(dev, &size);
if (!ret) {
/*
* Limit coherent and dma mask based on size retrieved from
* firmware.
*/
end = dmaaddr + size - 1;
mask = DMA_BIT_MASK(ilog2(end) + 1);
dev->bus_dma_limit = end;
dev->coherent_dma_mask = min(dev->coherent_dma_mask, mask);
*dev->dma_mask = min(*dev->dma_mask, mask);
}
*dma_addr = dmaaddr;
*dma_size = size;
ret = dma_direct_set_offset(dev, dmaaddr + offset, dmaaddr, size);
dev_dbg(dev, "dma_offset(%#08llx)%s\n", offset, ret ? " failed!" : "");
return nc_dma_get_range(dev, size);
}
static void __init acpi_iort_register_irq(int hwirq, const char *name,

View File

@ -27,6 +27,7 @@
#include <linux/dmi.h>
#endif
#include <linux/acpi_iort.h>
#include <linux/acpi_viot.h>
#include <linux/pci.h>
#include <acpi/apei.h>
#include <linux/suspend.h>
@ -1334,6 +1335,7 @@ static int __init acpi_init(void)
acpi_wakeup_device_init();
acpi_debugger_init();
acpi_setup_sb_notify_handler();
acpi_viot_init();
return 0;
}

View File

@ -9,6 +9,8 @@
#include <linux/kernel.h>
#include <linux/acpi.h>
#include <linux/acpi_iort.h>
#include <linux/acpi_viot.h>
#include <linux/iommu.h>
#include <linux/signal.h>
#include <linux/kthread.h>
#include <linux/dmi.h>
@ -1520,6 +1522,78 @@ int acpi_dma_get_range(struct device *dev, u64 *dma_addr, u64 *offset,
return ret >= 0 ? 0 : ret;
}
#ifdef CONFIG_IOMMU_API
int acpi_iommu_fwspec_init(struct device *dev, u32 id,
struct fwnode_handle *fwnode,
const struct iommu_ops *ops)
{
int ret = iommu_fwspec_init(dev, fwnode, ops);
if (!ret)
ret = iommu_fwspec_add_ids(dev, &id, 1);
return ret;
}
static inline const struct iommu_ops *acpi_iommu_fwspec_ops(struct device *dev)
{
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
return fwspec ? fwspec->ops : NULL;
}
static const struct iommu_ops *acpi_iommu_configure_id(struct device *dev,
const u32 *id_in)
{
int err;
const struct iommu_ops *ops;
/*
* If we already translated the fwspec there is nothing left to do,
* return the iommu_ops.
*/
ops = acpi_iommu_fwspec_ops(dev);
if (ops)
return ops;
err = iort_iommu_configure_id(dev, id_in);
if (err && err != -EPROBE_DEFER)
err = viot_iommu_configure(dev);
/*
* If we have reason to believe the IOMMU driver missed the initial
* iommu_probe_device() call for dev, replay it to get things in order.
*/
if (!err && dev->bus && !device_iommu_mapped(dev))
err = iommu_probe_device(dev);
/* Ignore all other errors apart from EPROBE_DEFER */
if (err == -EPROBE_DEFER) {
return ERR_PTR(err);
} else if (err) {
dev_dbg(dev, "Adding to IOMMU failed: %d\n", err);
return NULL;
}
return acpi_iommu_fwspec_ops(dev);
}
#else /* !CONFIG_IOMMU_API */
int acpi_iommu_fwspec_init(struct device *dev, u32 id,
struct fwnode_handle *fwnode,
const struct iommu_ops *ops)
{
return -ENODEV;
}
static const struct iommu_ops *acpi_iommu_configure_id(struct device *dev,
const u32 *id_in)
{
return NULL;
}
#endif /* !CONFIG_IOMMU_API */
/**
* acpi_dma_configure_id - Set-up DMA configuration for the device.
* @dev: The pointer to the device
@ -1537,9 +1611,9 @@ int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr,
return 0;
}
iort_dma_setup(dev, &dma_addr, &size);
acpi_arch_dma_setup(dev, &dma_addr, &size);
iommu = iort_iommu_configure_id(dev, input_id);
iommu = acpi_iommu_configure_id(dev, input_id);
if (PTR_ERR(iommu) == -EPROBE_DEFER)
return -EPROBE_DEFER;

366
drivers/acpi/viot.c Normal file
View File

@ -0,0 +1,366 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Virtual I/O topology
*
* The Virtual I/O Translation Table (VIOT) describes the topology of
* para-virtual IOMMUs and the endpoints they manage. The OS uses it to
* initialize devices in the right order, preventing endpoints from issuing DMA
* before their IOMMU is ready.
*
* When binding a driver to a device, before calling the device driver's probe()
* method, the driver infrastructure calls dma_configure(). At that point the
* VIOT driver looks for an IOMMU associated to the device in the VIOT table.
* If an IOMMU exists and has been initialized, the VIOT driver initializes the
* device's IOMMU fwspec, allowing the DMA infrastructure to invoke the IOMMU
* ops when the device driver configures DMA mappings. If an IOMMU exists and
* hasn't yet been initialized, VIOT returns -EPROBE_DEFER to postpone probing
* the device until the IOMMU is available.
*/
#define pr_fmt(fmt) "ACPI: VIOT: " fmt
#include <linux/acpi_viot.h>
#include <linux/dma-iommu.h>
#include <linux/fwnode.h>
#include <linux/iommu.h>
#include <linux/list.h>
#include <linux/pci.h>
#include <linux/platform_device.h>
struct viot_iommu {
/* Node offset within the table */
unsigned int offset;
struct fwnode_handle *fwnode;
struct list_head list;
};
struct viot_endpoint {
union {
/* PCI range */
struct {
u16 segment_start;
u16 segment_end;
u16 bdf_start;
u16 bdf_end;
};
/* MMIO */
u64 address;
};
u32 endpoint_id;
struct viot_iommu *viommu;
struct list_head list;
};
static struct acpi_table_viot *viot;
static LIST_HEAD(viot_iommus);
static LIST_HEAD(viot_pci_ranges);
static LIST_HEAD(viot_mmio_endpoints);
static int __init viot_check_bounds(const struct acpi_viot_header *hdr)
{
struct acpi_viot_header *start, *end, *hdr_end;
start = ACPI_ADD_PTR(struct acpi_viot_header, viot,
max_t(size_t, sizeof(*viot), viot->node_offset));
end = ACPI_ADD_PTR(struct acpi_viot_header, viot, viot->header.length);
hdr_end = ACPI_ADD_PTR(struct acpi_viot_header, hdr, sizeof(*hdr));
if (hdr < start || hdr_end > end) {
pr_err(FW_BUG "Node pointer overflows\n");
return -EOVERFLOW;
}
if (hdr->length < sizeof(*hdr)) {
pr_err(FW_BUG "Empty node\n");
return -EINVAL;
}
return 0;
}
static int __init viot_get_pci_iommu_fwnode(struct viot_iommu *viommu,
u16 segment, u16 bdf)
{
struct pci_dev *pdev;
struct fwnode_handle *fwnode;
pdev = pci_get_domain_bus_and_slot(segment, PCI_BUS_NUM(bdf),
bdf & 0xff);
if (!pdev) {
pr_err("Could not find PCI IOMMU\n");
return -ENODEV;
}
fwnode = pdev->dev.fwnode;
if (!fwnode) {
/*
* PCI devices aren't necessarily described by ACPI. Create a
* fwnode so the IOMMU subsystem can identify this device.
*/
fwnode = acpi_alloc_fwnode_static();
if (!fwnode) {
pci_dev_put(pdev);
return -ENOMEM;
}
set_primary_fwnode(&pdev->dev, fwnode);
}
viommu->fwnode = pdev->dev.fwnode;
pci_dev_put(pdev);
return 0;
}
static int __init viot_get_mmio_iommu_fwnode(struct viot_iommu *viommu,
u64 address)
{
struct acpi_device *adev;
struct resource res = {
.start = address,
.end = address,
.flags = IORESOURCE_MEM,
};
adev = acpi_resource_consumer(&res);
if (!adev) {
pr_err("Could not find MMIO IOMMU\n");
return -EINVAL;
}
viommu->fwnode = &adev->fwnode;
return 0;
}
static struct viot_iommu * __init viot_get_iommu(unsigned int offset)
{
int ret;
struct viot_iommu *viommu;
struct acpi_viot_header *hdr = ACPI_ADD_PTR(struct acpi_viot_header,
viot, offset);
union {
struct acpi_viot_virtio_iommu_pci pci;
struct acpi_viot_virtio_iommu_mmio mmio;
} *node = (void *)hdr;
list_for_each_entry(viommu, &viot_iommus, list)
if (viommu->offset == offset)
return viommu;
if (viot_check_bounds(hdr))
return NULL;
viommu = kzalloc(sizeof(*viommu), GFP_KERNEL);
if (!viommu)
return NULL;
viommu->offset = offset;
switch (hdr->type) {
case ACPI_VIOT_NODE_VIRTIO_IOMMU_PCI:
if (hdr->length < sizeof(node->pci))
goto err_free;
ret = viot_get_pci_iommu_fwnode(viommu, node->pci.segment,
node->pci.bdf);
break;
case ACPI_VIOT_NODE_VIRTIO_IOMMU_MMIO:
if (hdr->length < sizeof(node->mmio))
goto err_free;
ret = viot_get_mmio_iommu_fwnode(viommu,
node->mmio.base_address);
break;
default:
ret = -EINVAL;
}
if (ret)
goto err_free;
list_add(&viommu->list, &viot_iommus);
return viommu;
err_free:
kfree(viommu);
return NULL;
}
static int __init viot_parse_node(const struct acpi_viot_header *hdr)
{
int ret = -EINVAL;
struct list_head *list;
struct viot_endpoint *ep;
union {
struct acpi_viot_mmio mmio;
struct acpi_viot_pci_range pci;
} *node = (void *)hdr;
if (viot_check_bounds(hdr))
return -EINVAL;
if (hdr->type == ACPI_VIOT_NODE_VIRTIO_IOMMU_PCI ||
hdr->type == ACPI_VIOT_NODE_VIRTIO_IOMMU_MMIO)
return 0;
ep = kzalloc(sizeof(*ep), GFP_KERNEL);
if (!ep)
return -ENOMEM;
switch (hdr->type) {
case ACPI_VIOT_NODE_PCI_RANGE:
if (hdr->length < sizeof(node->pci)) {
pr_err(FW_BUG "Invalid PCI node size\n");
goto err_free;
}
ep->segment_start = node->pci.segment_start;
ep->segment_end = node->pci.segment_end;
ep->bdf_start = node->pci.bdf_start;
ep->bdf_end = node->pci.bdf_end;
ep->endpoint_id = node->pci.endpoint_start;
ep->viommu = viot_get_iommu(node->pci.output_node);
list = &viot_pci_ranges;
break;
case ACPI_VIOT_NODE_MMIO:
if (hdr->length < sizeof(node->mmio)) {
pr_err(FW_BUG "Invalid MMIO node size\n");
goto err_free;
}
ep->address = node->mmio.base_address;
ep->endpoint_id = node->mmio.endpoint;
ep->viommu = viot_get_iommu(node->mmio.output_node);
list = &viot_mmio_endpoints;
break;
default:
pr_warn("Unsupported node %x\n", hdr->type);
ret = 0;
goto err_free;
}
if (!ep->viommu) {
pr_warn("No IOMMU node found\n");
/*
* A future version of the table may use the node for other
* purposes. Keep parsing.
*/
ret = 0;
goto err_free;
}
list_add(&ep->list, list);
return 0;
err_free:
kfree(ep);
return ret;
}
/**
* acpi_viot_init - Parse the VIOT table
*
* Parse the VIOT table, prepare the list of endpoints to be used during DMA
* setup of devices.
*/
void __init acpi_viot_init(void)
{
int i;
acpi_status status;
struct acpi_table_header *hdr;
struct acpi_viot_header *node;
status = acpi_get_table(ACPI_SIG_VIOT, 0, &hdr);
if (ACPI_FAILURE(status)) {
if (status != AE_NOT_FOUND) {
const char *msg = acpi_format_exception(status);
pr_err("Failed to get table, %s\n", msg);
}
return;
}
viot = (void *)hdr;
node = ACPI_ADD_PTR(struct acpi_viot_header, viot, viot->node_offset);
for (i = 0; i < viot->node_count; i++) {
if (viot_parse_node(node))
return;
node = ACPI_ADD_PTR(struct acpi_viot_header, node,
node->length);
}
acpi_put_table(hdr);
}
static int viot_dev_iommu_init(struct device *dev, struct viot_iommu *viommu,
u32 epid)
{
const struct iommu_ops *ops;
if (!viommu)
return -ENODEV;
/* We're not translating ourself */
if (viommu->fwnode == dev->fwnode)
return -EINVAL;
ops = iommu_ops_from_fwnode(viommu->fwnode);
if (!ops)
return IS_ENABLED(CONFIG_VIRTIO_IOMMU) ?
-EPROBE_DEFER : -ENODEV;
return acpi_iommu_fwspec_init(dev, epid, viommu->fwnode, ops);
}
static int viot_pci_dev_iommu_init(struct pci_dev *pdev, u16 dev_id, void *data)
{
u32 epid;
struct viot_endpoint *ep;
u32 domain_nr = pci_domain_nr(pdev->bus);
list_for_each_entry(ep, &viot_pci_ranges, list) {
if (domain_nr >= ep->segment_start &&
domain_nr <= ep->segment_end &&
dev_id >= ep->bdf_start &&
dev_id <= ep->bdf_end) {
epid = ((domain_nr - ep->segment_start) << 16) +
dev_id - ep->bdf_start + ep->endpoint_id;
/*
* If we found a PCI range managed by the viommu, we're
* the one that has to request ACS.
*/
pci_request_acs();
return viot_dev_iommu_init(&pdev->dev, ep->viommu,
epid);
}
}
return -ENODEV;
}
static int viot_mmio_dev_iommu_init(struct platform_device *pdev)
{
struct resource *mem;
struct viot_endpoint *ep;
mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!mem)
return -ENODEV;
list_for_each_entry(ep, &viot_mmio_endpoints, list) {
if (ep->address == mem->start)
return viot_dev_iommu_init(&pdev->dev, ep->viommu,
ep->endpoint_id);
}
return -ENODEV;
}
/**
* viot_iommu_configure - Setup IOMMU ops for an endpoint described by VIOT
* @dev: the endpoint
*
* Return: 0 on success, <0 on failure
*/
int viot_iommu_configure(struct device *dev)
{
if (dev_is_pci(dev))
return pci_for_each_dma_alias(to_pci_dev(dev),
viot_pci_dev_iommu_init, NULL);
else if (dev_is_platform(dev))
return viot_mmio_dev_iommu_init(to_platform_device(dev));
return -ENODEV;
}

View File

@ -400,9 +400,11 @@ config HYPERV_IOMMU
config VIRTIO_IOMMU
tristate "Virtio IOMMU driver"
depends on VIRTIO
depends on ARM64
depends on (ARM64 || X86)
select IOMMU_API
select IOMMU_DMA
select INTERVAL_TREE
select ACPI_VIOT if ACPI
help
Para-virtualised IOMMU driver with virtio.

View File

@ -11,8 +11,6 @@
#include "amd_iommu_types.h"
extern int amd_iommu_init_dma_ops(void);
extern int amd_iommu_init_passthrough(void);
extern irqreturn_t amd_iommu_int_thread(int irq, void *data);
extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
extern void amd_iommu_apply_erratum_63(u16 devid);

View File

@ -153,7 +153,8 @@ int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
static bool amd_iommu_detected;
static bool __initdata amd_iommu_disabled;
static bool amd_iommu_disabled __initdata;
static bool amd_iommu_force_enable __initdata;
static int amd_iommu_target_ivhd_type;
u16 amd_iommu_last_bdf; /* largest PCI device id we have
@ -231,7 +232,6 @@ enum iommu_init_state {
IOMMU_ENABLED,
IOMMU_PCI_INIT,
IOMMU_INTERRUPTS_EN,
IOMMU_DMA_OPS,
IOMMU_INITIALIZED,
IOMMU_NOT_FOUND,
IOMMU_INIT_ERROR,
@ -1908,8 +1908,8 @@ static void print_iommu_info(void)
pci_info(pdev, "Found IOMMU cap 0x%x\n", iommu->cap_ptr);
if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
pci_info(pdev, "Extended features (%#llx):",
iommu->features);
pr_info("Extended features (%#llx):", iommu->features);
for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
if (iommu_feature(iommu, (1ULL << i)))
pr_cont(" %s", feat_str[i]);
@ -2817,7 +2817,7 @@ out:
return ret;
}
static bool detect_ivrs(void)
static bool __init detect_ivrs(void)
{
struct acpi_table_header *ivrs_base;
acpi_status status;
@ -2834,6 +2834,9 @@ static bool detect_ivrs(void)
acpi_put_table(ivrs_base);
if (amd_iommu_force_enable)
goto out;
/* Don't use IOMMU if there is Stoney Ridge graphics */
for (i = 0; i < 32; i++) {
u32 pci_id;
@ -2845,6 +2848,7 @@ static bool detect_ivrs(void)
}
}
out:
/* Make sure ACS will be enabled during PCI probe */
pci_request_acs();
@ -2895,10 +2899,6 @@ static int __init state_next(void)
init_state = ret ? IOMMU_INIT_ERROR : IOMMU_INTERRUPTS_EN;
break;
case IOMMU_INTERRUPTS_EN:
ret = amd_iommu_init_dma_ops();
init_state = ret ? IOMMU_INIT_ERROR : IOMMU_DMA_OPS;
break;
case IOMMU_DMA_OPS:
init_state = IOMMU_INITIALIZED;
break;
case IOMMU_INITIALIZED:
@ -3100,6 +3100,8 @@ static int __init parse_amd_iommu_options(char *str)
for (; *str; ++str) {
if (strncmp(str, "fullflush", 9) == 0)
amd_iommu_unmap_flush = true;
if (strncmp(str, "force_enable", 12) == 0)
amd_iommu_force_enable = true;
if (strncmp(str, "off", 3) == 0)
amd_iommu_disabled = true;
if (strncmp(str, "force_isolation", 15) == 0)

View File

@ -30,7 +30,6 @@
#include <linux/msi.h>
#include <linux/irqdomain.h>
#include <linux/percpu.h>
#include <linux/iova.h>
#include <linux/io-pgtable.h>
#include <asm/irq_remapping.h>
#include <asm/io_apic.h>
@ -1713,7 +1712,7 @@ static void amd_iommu_probe_finalize(struct device *dev)
/* Domains are initialized for this device - have a look what we ended up with */
domain = iommu_get_domain_for_dev(dev);
if (domain->type == IOMMU_DOMAIN_DMA)
iommu_setup_dma_ops(dev, IOVA_START_PFN << PAGE_SHIFT, 0);
iommu_setup_dma_ops(dev, 0, U64_MAX);
else
set_dma_ops(dev, NULL);
}
@ -1773,13 +1772,22 @@ void amd_iommu_domain_update(struct protection_domain *domain)
amd_iommu_domain_flush_complete(domain);
}
static void __init amd_iommu_init_dma_ops(void)
{
swiotlb = (iommu_default_passthrough() || sme_me_mask) ? 1 : 0;
if (amd_iommu_unmap_flush)
pr_info("IO/TLB flush on unmap enabled\n");
else
pr_info("Lazy IO/TLB flushing enabled\n");
iommu_set_dma_strict(amd_iommu_unmap_flush);
}
int __init amd_iommu_init_api(void)
{
int ret, err = 0;
int err;
ret = iova_cache_get();
if (ret)
return ret;
amd_iommu_init_dma_ops();
err = bus_set_iommu(&pci_bus_type, &amd_iommu_ops);
if (err)
@ -1796,19 +1804,6 @@ int __init amd_iommu_init_api(void)
return 0;
}
int __init amd_iommu_init_dma_ops(void)
{
swiotlb = (iommu_default_passthrough() || sme_me_mask) ? 1 : 0;
if (amd_iommu_unmap_flush)
pr_info("IO/TLB flush on unmap enabled\n");
else
pr_info("Lazy IO/TLB flushing enabled\n");
iommu_set_dma_strict(amd_iommu_unmap_flush);
return 0;
}
/*****************************************************************************
*
* The following functions belong to the exported interface of AMD IOMMU

View File

@ -435,9 +435,13 @@ bool arm_smmu_sva_supported(struct arm_smmu_device *smmu)
return true;
}
static bool arm_smmu_iopf_supported(struct arm_smmu_master *master)
bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master)
{
return false;
/* We're not keeping track of SIDs in fault events */
if (master->num_streams != 1)
return false;
return master->stall_enabled;
}
bool arm_smmu_master_sva_supported(struct arm_smmu_master *master)
@ -445,8 +449,8 @@ bool arm_smmu_master_sva_supported(struct arm_smmu_master *master)
if (!(master->smmu->features & ARM_SMMU_FEAT_SVA))
return false;
/* SSID and IOPF support are mandatory for the moment */
return master->ssid_bits && arm_smmu_iopf_supported(master);
/* SSID support is mandatory for the moment */
return master->ssid_bits;
}
bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master)
@ -459,13 +463,55 @@ bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master)
return enabled;
}
static int arm_smmu_master_sva_enable_iopf(struct arm_smmu_master *master)
{
int ret;
struct device *dev = master->dev;
/*
* Drivers for devices supporting PRI or stall should enable IOPF first.
* Others have device-specific fault handlers and don't need IOPF.
*/
if (!arm_smmu_master_iopf_supported(master))
return 0;
if (!master->iopf_enabled)
return -EINVAL;
ret = iopf_queue_add_device(master->smmu->evtq.iopf, dev);
if (ret)
return ret;
ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev);
if (ret) {
iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
return ret;
}
return 0;
}
static void arm_smmu_master_sva_disable_iopf(struct arm_smmu_master *master)
{
struct device *dev = master->dev;
if (!master->iopf_enabled)
return;
iommu_unregister_device_fault_handler(dev);
iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
}
int arm_smmu_master_enable_sva(struct arm_smmu_master *master)
{
int ret;
mutex_lock(&sva_lock);
master->sva_enabled = true;
ret = arm_smmu_master_sva_enable_iopf(master);
if (!ret)
master->sva_enabled = true;
mutex_unlock(&sva_lock);
return 0;
return ret;
}
int arm_smmu_master_disable_sva(struct arm_smmu_master *master)
@ -476,6 +522,7 @@ int arm_smmu_master_disable_sva(struct arm_smmu_master *master)
mutex_unlock(&sva_lock);
return -EBUSY;
}
arm_smmu_master_sva_disable_iopf(master);
master->sva_enabled = false;
mutex_unlock(&sva_lock);

View File

@ -23,7 +23,6 @@
#include <linux/msi.h>
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_iommu.h>
#include <linux/of_platform.h>
#include <linux/pci.h>
#include <linux/pci-ats.h>
@ -32,6 +31,7 @@
#include <linux/amba/bus.h>
#include "arm-smmu-v3.h"
#include "../../iommu-sva-lib.h"
static bool disable_bypass = true;
module_param(disable_bypass, bool, 0444);
@ -313,6 +313,11 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
}
cmd[1] |= FIELD_PREP(CMDQ_PRI_1_RESP, ent->pri.resp);
break;
case CMDQ_OP_RESUME:
cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_SID, ent->resume.sid);
cmd[0] |= FIELD_PREP(CMDQ_RESUME_0_RESP, ent->resume.resp);
cmd[1] |= FIELD_PREP(CMDQ_RESUME_1_STAG, ent->resume.stag);
break;
case CMDQ_OP_CMD_SYNC:
if (ent->sync.msiaddr) {
cmd[0] |= FIELD_PREP(CMDQ_SYNC_0_CS, CMDQ_SYNC_0_CS_IRQ);
@ -352,7 +357,7 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
{
static const char *cerror_str[] = {
static const char * const cerror_str[] = {
[CMDQ_ERR_CERROR_NONE_IDX] = "No error",
[CMDQ_ERR_CERROR_ILL_IDX] = "Illegal command",
[CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch",
@ -876,6 +881,44 @@ static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
}
static int arm_smmu_page_response(struct device *dev,
struct iommu_fault_event *unused,
struct iommu_page_response *resp)
{
struct arm_smmu_cmdq_ent cmd = {0};
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
int sid = master->streams[0].id;
if (master->stall_enabled) {
cmd.opcode = CMDQ_OP_RESUME;
cmd.resume.sid = sid;
cmd.resume.stag = resp->grpid;
switch (resp->code) {
case IOMMU_PAGE_RESP_INVALID:
case IOMMU_PAGE_RESP_FAILURE:
cmd.resume.resp = CMDQ_RESUME_0_RESP_ABORT;
break;
case IOMMU_PAGE_RESP_SUCCESS:
cmd.resume.resp = CMDQ_RESUME_0_RESP_RETRY;
break;
default:
return -EINVAL;
}
} else {
return -ENODEV;
}
arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
/*
* Don't send a SYNC, it doesn't do anything for RESUME or PRI_RESP.
* RESUME consumption guarantees that the stalled transaction will be
* terminated... at some point in the future. PRI_RESP is fire and
* forget.
*/
return 0;
}
/* Context descriptor manipulation functions */
void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
{
@ -986,7 +1029,6 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
u64 val;
bool cd_live;
__le64 *cdptr;
struct arm_smmu_device *smmu = smmu_domain->smmu;
if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
return -E2BIG;
@ -1031,8 +1073,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
CTXDESC_CD_0_V;
/* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
if (smmu_domain->stall_enabled)
val |= CTXDESC_CD_0_S;
}
@ -1276,7 +1317,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
FIELD_PREP(STRTAB_STE_1_STRW, strw));
if (smmu->features & ARM_SMMU_FEAT_STALLS &&
!(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
!master->stall_enabled)
dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
@ -1353,7 +1394,6 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid)
return 0;
}
__maybe_unused
static struct arm_smmu_master *
arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
{
@ -1377,18 +1417,118 @@ arm_smmu_find_master(struct arm_smmu_device *smmu, u32 sid)
}
/* IRQ and event handlers */
static int arm_smmu_handle_evt(struct arm_smmu_device *smmu, u64 *evt)
{
int ret;
u32 reason;
u32 perm = 0;
struct arm_smmu_master *master;
bool ssid_valid = evt[0] & EVTQ_0_SSV;
u32 sid = FIELD_GET(EVTQ_0_SID, evt[0]);
struct iommu_fault_event fault_evt = { };
struct iommu_fault *flt = &fault_evt.fault;
switch (FIELD_GET(EVTQ_0_ID, evt[0])) {
case EVT_ID_TRANSLATION_FAULT:
reason = IOMMU_FAULT_REASON_PTE_FETCH;
break;
case EVT_ID_ADDR_SIZE_FAULT:
reason = IOMMU_FAULT_REASON_OOR_ADDRESS;
break;
case EVT_ID_ACCESS_FAULT:
reason = IOMMU_FAULT_REASON_ACCESS;
break;
case EVT_ID_PERMISSION_FAULT:
reason = IOMMU_FAULT_REASON_PERMISSION;
break;
default:
return -EOPNOTSUPP;
}
/* Stage-2 is always pinned at the moment */
if (evt[1] & EVTQ_1_S2)
return -EFAULT;
if (evt[1] & EVTQ_1_RnW)
perm |= IOMMU_FAULT_PERM_READ;
else
perm |= IOMMU_FAULT_PERM_WRITE;
if (evt[1] & EVTQ_1_InD)
perm |= IOMMU_FAULT_PERM_EXEC;
if (evt[1] & EVTQ_1_PnU)
perm |= IOMMU_FAULT_PERM_PRIV;
if (evt[1] & EVTQ_1_STALL) {
flt->type = IOMMU_FAULT_PAGE_REQ;
flt->prm = (struct iommu_fault_page_request) {
.flags = IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE,
.grpid = FIELD_GET(EVTQ_1_STAG, evt[1]),
.perm = perm,
.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
};
if (ssid_valid) {
flt->prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID;
flt->prm.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
}
} else {
flt->type = IOMMU_FAULT_DMA_UNRECOV;
flt->event = (struct iommu_fault_unrecoverable) {
.reason = reason,
.flags = IOMMU_FAULT_UNRECOV_ADDR_VALID,
.perm = perm,
.addr = FIELD_GET(EVTQ_2_ADDR, evt[2]),
};
if (ssid_valid) {
flt->event.flags |= IOMMU_FAULT_UNRECOV_PASID_VALID;
flt->event.pasid = FIELD_GET(EVTQ_0_SSID, evt[0]);
}
}
mutex_lock(&smmu->streams_mutex);
master = arm_smmu_find_master(smmu, sid);
if (!master) {
ret = -EINVAL;
goto out_unlock;
}
ret = iommu_report_device_fault(master->dev, &fault_evt);
if (ret && flt->type == IOMMU_FAULT_PAGE_REQ) {
/* Nobody cared, abort the access */
struct iommu_page_response resp = {
.pasid = flt->prm.pasid,
.grpid = flt->prm.grpid,
.code = IOMMU_PAGE_RESP_FAILURE,
};
arm_smmu_page_response(master->dev, &fault_evt, &resp);
}
out_unlock:
mutex_unlock(&smmu->streams_mutex);
return ret;
}
static irqreturn_t arm_smmu_evtq_thread(int irq, void *dev)
{
int i;
int i, ret;
struct arm_smmu_device *smmu = dev;
struct arm_smmu_queue *q = &smmu->evtq.q;
struct arm_smmu_ll_queue *llq = &q->llq;
static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
u64 evt[EVTQ_ENT_DWORDS];
do {
while (!queue_remove_raw(q, evt)) {
u8 id = FIELD_GET(EVTQ_0_ID, evt[0]);
ret = arm_smmu_handle_evt(smmu, evt);
if (!ret || !__ratelimit(&rs))
continue;
dev_info(smmu->dev, "event 0x%02x received:\n", id);
for (i = 0; i < ARRAY_SIZE(evt); ++i)
dev_info(smmu->dev, "\t0x%016llx\n",
@ -1923,6 +2063,8 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
cfg->s1cdmax = master->ssid_bits;
smmu_domain->stall_enabled = master->stall_enabled;
ret = arm_smmu_alloc_cd_tables(smmu_domain);
if (ret)
goto out_free_asid;
@ -2270,6 +2412,12 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
ret = -EINVAL;
goto out_unlock;
} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
smmu_domain->stall_enabled != master->stall_enabled) {
dev_err(dev, "cannot attach to stall-%s domain\n",
smmu_domain->stall_enabled ? "enabled" : "disabled");
ret = -EINVAL;
goto out_unlock;
}
master->domain = smmu_domain;
@ -2508,6 +2656,11 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev)
master->ssid_bits = min_t(u8, master->ssid_bits,
CTXDESC_LINEAR_CDMAX);
if ((smmu->features & ARM_SMMU_FEAT_STALLS &&
device_property_read_bool(dev, "dma-can-stall")) ||
smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
master->stall_enabled = true;
return &smmu->iommu;
err_free_master:
@ -2525,7 +2678,8 @@ static void arm_smmu_release_device(struct device *dev)
return;
master = dev_iommu_priv_get(dev);
WARN_ON(arm_smmu_master_sva_enabled(master));
if (WARN_ON(arm_smmu_master_sva_enabled(master)))
iopf_queue_remove_device(master->smmu->evtq.iopf, dev);
arm_smmu_detach_dev(master);
arm_smmu_disable_pasid(master);
arm_smmu_remove_master(master);
@ -2595,6 +2749,8 @@ static bool arm_smmu_dev_has_feature(struct device *dev,
return false;
switch (feat) {
case IOMMU_DEV_FEAT_IOPF:
return arm_smmu_master_iopf_supported(master);
case IOMMU_DEV_FEAT_SVA:
return arm_smmu_master_sva_supported(master);
default:
@ -2611,6 +2767,8 @@ static bool arm_smmu_dev_feature_enabled(struct device *dev,
return false;
switch (feat) {
case IOMMU_DEV_FEAT_IOPF:
return master->iopf_enabled;
case IOMMU_DEV_FEAT_SVA:
return arm_smmu_master_sva_enabled(master);
default:
@ -2621,6 +2779,8 @@ static bool arm_smmu_dev_feature_enabled(struct device *dev,
static int arm_smmu_dev_enable_feature(struct device *dev,
enum iommu_dev_features feat)
{
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
if (!arm_smmu_dev_has_feature(dev, feat))
return -ENODEV;
@ -2628,8 +2788,11 @@ static int arm_smmu_dev_enable_feature(struct device *dev,
return -EBUSY;
switch (feat) {
case IOMMU_DEV_FEAT_IOPF:
master->iopf_enabled = true;
return 0;
case IOMMU_DEV_FEAT_SVA:
return arm_smmu_master_enable_sva(dev_iommu_priv_get(dev));
return arm_smmu_master_enable_sva(master);
default:
return -EINVAL;
}
@ -2638,12 +2801,19 @@ static int arm_smmu_dev_enable_feature(struct device *dev,
static int arm_smmu_dev_disable_feature(struct device *dev,
enum iommu_dev_features feat)
{
struct arm_smmu_master *master = dev_iommu_priv_get(dev);
if (!arm_smmu_dev_feature_enabled(dev, feat))
return -EINVAL;
switch (feat) {
case IOMMU_DEV_FEAT_IOPF:
if (master->sva_enabled)
return -EBUSY;
master->iopf_enabled = false;
return 0;
case IOMMU_DEV_FEAT_SVA:
return arm_smmu_master_disable_sva(dev_iommu_priv_get(dev));
return arm_smmu_master_disable_sva(master);
default:
return -EINVAL;
}
@ -2673,6 +2843,7 @@ static struct iommu_ops arm_smmu_ops = {
.sva_bind = arm_smmu_sva_bind,
.sva_unbind = arm_smmu_sva_unbind,
.sva_get_pasid = arm_smmu_sva_get_pasid,
.page_response = arm_smmu_page_response,
.pgsize_bitmap = -1UL, /* Restricted during device attach */
.owner = THIS_MODULE,
};
@ -2771,6 +2942,13 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
if (ret)
return ret;
if ((smmu->features & ARM_SMMU_FEAT_SVA) &&
(smmu->features & ARM_SMMU_FEAT_STALLS)) {
smmu->evtq.iopf = iopf_queue_alloc(dev_name(smmu->dev));
if (!smmu->evtq.iopf)
return -ENOMEM;
}
/* priq */
if (!(smmu->features & ARM_SMMU_FEAT_PRI))
return 0;
@ -2788,10 +2966,8 @@ static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu)
void *strtab = smmu->strtab_cfg.strtab;
cfg->l1_desc = devm_kzalloc(smmu->dev, size, GFP_KERNEL);
if (!cfg->l1_desc) {
dev_err(smmu->dev, "failed to allocate l1 stream table desc\n");
if (!cfg->l1_desc)
return -ENOMEM;
}
for (i = 0; i < cfg->num_l1_ents; ++i) {
arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]);
@ -3582,10 +3758,8 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
bool bypass;
smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
if (!smmu) {
dev_err(dev, "failed to allocate arm_smmu_device\n");
if (!smmu)
return -ENOMEM;
}
smmu->dev = dev;
if (dev->of_node) {
@ -3669,10 +3843,20 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
ret = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
if (ret) {
dev_err(dev, "Failed to register iommu\n");
return ret;
goto err_sysfs_remove;
}
return arm_smmu_set_bus_ops(&arm_smmu_ops);
ret = arm_smmu_set_bus_ops(&arm_smmu_ops);
if (ret)
goto err_unregister_device;
return 0;
err_unregister_device:
iommu_device_unregister(&smmu->iommu);
err_sysfs_remove:
iommu_device_sysfs_remove(&smmu->iommu);
return ret;
}
static int arm_smmu_device_remove(struct platform_device *pdev)
@ -3683,6 +3867,7 @@ static int arm_smmu_device_remove(struct platform_device *pdev)
iommu_device_unregister(&smmu->iommu);
iommu_device_sysfs_remove(&smmu->iommu);
arm_smmu_device_disable(smmu);
iopf_queue_free(smmu->evtq.iopf);
return 0;
}

View File

@ -184,6 +184,7 @@
#else
#define Q_MAX_SZ_SHIFT (PAGE_SHIFT + MAX_ORDER - 1)
#endif
#define Q_MIN_SZ_SHIFT (PAGE_SHIFT)
/*
* Stream table.
@ -354,6 +355,13 @@
#define CMDQ_PRI_1_GRPID GENMASK_ULL(8, 0)
#define CMDQ_PRI_1_RESP GENMASK_ULL(13, 12)
#define CMDQ_RESUME_0_RESP_TERM 0UL
#define CMDQ_RESUME_0_RESP_RETRY 1UL
#define CMDQ_RESUME_0_RESP_ABORT 2UL
#define CMDQ_RESUME_0_RESP GENMASK_ULL(13, 12)
#define CMDQ_RESUME_0_SID GENMASK_ULL(63, 32)
#define CMDQ_RESUME_1_STAG GENMASK_ULL(15, 0)
#define CMDQ_SYNC_0_CS GENMASK_ULL(13, 12)
#define CMDQ_SYNC_0_CS_NONE 0
#define CMDQ_SYNC_0_CS_IRQ 1
@ -366,14 +374,33 @@
/* Event queue */
#define EVTQ_ENT_SZ_SHIFT 5
#define EVTQ_ENT_DWORDS ((1 << EVTQ_ENT_SZ_SHIFT) >> 3)
#define EVTQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - EVTQ_ENT_SZ_SHIFT)
#define EVTQ_MAX_SZ_SHIFT (Q_MIN_SZ_SHIFT - EVTQ_ENT_SZ_SHIFT)
#define EVTQ_0_ID GENMASK_ULL(7, 0)
#define EVT_ID_TRANSLATION_FAULT 0x10
#define EVT_ID_ADDR_SIZE_FAULT 0x11
#define EVT_ID_ACCESS_FAULT 0x12
#define EVT_ID_PERMISSION_FAULT 0x13
#define EVTQ_0_SSV (1UL << 11)
#define EVTQ_0_SSID GENMASK_ULL(31, 12)
#define EVTQ_0_SID GENMASK_ULL(63, 32)
#define EVTQ_1_STAG GENMASK_ULL(15, 0)
#define EVTQ_1_STALL (1UL << 31)
#define EVTQ_1_PnU (1UL << 33)
#define EVTQ_1_InD (1UL << 34)
#define EVTQ_1_RnW (1UL << 35)
#define EVTQ_1_S2 (1UL << 39)
#define EVTQ_1_CLASS GENMASK_ULL(41, 40)
#define EVTQ_1_TT_READ (1UL << 44)
#define EVTQ_2_ADDR GENMASK_ULL(63, 0)
#define EVTQ_3_IPA GENMASK_ULL(51, 12)
/* PRI queue */
#define PRIQ_ENT_SZ_SHIFT 4
#define PRIQ_ENT_DWORDS ((1 << PRIQ_ENT_SZ_SHIFT) >> 3)
#define PRIQ_MAX_SZ_SHIFT (Q_MAX_SZ_SHIFT - PRIQ_ENT_SZ_SHIFT)
#define PRIQ_MAX_SZ_SHIFT (Q_MIN_SZ_SHIFT - PRIQ_ENT_SZ_SHIFT)
#define PRIQ_0_SID GENMASK_ULL(31, 0)
#define PRIQ_0_SSID GENMASK_ULL(51, 32)
@ -462,6 +489,13 @@ struct arm_smmu_cmdq_ent {
enum pri_resp resp;
} pri;
#define CMDQ_OP_RESUME 0x44
struct {
u32 sid;
u16 stag;
u8 resp;
} resume;
#define CMDQ_OP_CMD_SYNC 0x46
struct {
u64 msiaddr;
@ -520,6 +554,7 @@ struct arm_smmu_cmdq_batch {
struct arm_smmu_evtq {
struct arm_smmu_queue q;
struct iopf_queue *iopf;
u32 max_stalls;
};
@ -657,7 +692,9 @@ struct arm_smmu_master {
struct arm_smmu_stream *streams;
unsigned int num_streams;
bool ats_enabled;
bool stall_enabled;
bool sva_enabled;
bool iopf_enabled;
struct list_head bonds;
unsigned int ssid_bits;
};
@ -675,6 +712,7 @@ struct arm_smmu_domain {
struct mutex init_mutex; /* Protects smmu pointer */
struct io_pgtable_ops *pgtbl_ops;
bool stall_enabled;
atomic_t nr_ats_masters;
enum arm_smmu_domain_stage stage;
@ -716,6 +754,7 @@ bool arm_smmu_master_sva_supported(struct arm_smmu_master *master);
bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master);
int arm_smmu_master_enable_sva(struct arm_smmu_master *master);
int arm_smmu_master_disable_sva(struct arm_smmu_master *master);
bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master);
struct iommu_sva *arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm,
void *drvdata);
void arm_smmu_sva_unbind(struct iommu_sva *handle);
@ -747,6 +786,11 @@ static inline int arm_smmu_master_disable_sva(struct arm_smmu_master *master)
return -ENODEV;
}
static inline bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master)
{
return false;
}
static inline struct iommu_sva *
arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
{

View File

@ -3,6 +3,7 @@
* Copyright (c) 2019, The Linux Foundation. All rights reserved.
*/
#include <linux/acpi.h>
#include <linux/adreno-smmu-priv.h>
#include <linux/of_device.h>
#include <linux/qcom_scm.h>
@ -130,6 +131,16 @@ static int qcom_adreno_smmu_alloc_context_bank(struct arm_smmu_domain *smmu_doma
return __arm_smmu_alloc_bitmap(smmu->context_map, start, count);
}
static bool qcom_adreno_can_do_ttbr1(struct arm_smmu_device *smmu)
{
const struct device_node *np = smmu->dev->of_node;
if (of_device_is_compatible(np, "qcom,msm8996-smmu-v2"))
return false;
return true;
}
static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
struct io_pgtable_cfg *pgtbl_cfg, struct device *dev)
{
@ -144,7 +155,8 @@ static int qcom_adreno_smmu_init_context(struct arm_smmu_domain *smmu_domain,
* be AARCH64 stage 1 but double check because the arm-smmu code assumes
* that is the case when the TTBR1 quirk is enabled
*/
if ((smmu_domain->stage == ARM_SMMU_DOMAIN_S1) &&
if (qcom_adreno_can_do_ttbr1(smmu_domain->smmu) &&
(smmu_domain->stage == ARM_SMMU_DOMAIN_S1) &&
(smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64))
pgtbl_cfg->quirks |= IO_PGTABLE_QUIRK_ARM_TTBR1;
@ -166,6 +178,7 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = {
{ .compatible = "qcom,mdss" },
{ .compatible = "qcom,sc7180-mdss" },
{ .compatible = "qcom,sc7180-mss-pil" },
{ .compatible = "qcom,sc7280-mdss" },
{ .compatible = "qcom,sc8180x-mdss" },
{ .compatible = "qcom,sdm845-mdss" },
{ .compatible = "qcom,sdm845-mss-pil" },
@ -330,24 +343,48 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu,
static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
{ .compatible = "qcom,msm8998-smmu-v2" },
{ .compatible = "qcom,sc7180-smmu-500" },
{ .compatible = "qcom,sc7280-smmu-500" },
{ .compatible = "qcom,sc8180x-smmu-500" },
{ .compatible = "qcom,sdm630-smmu-v2" },
{ .compatible = "qcom,sdm845-smmu-500" },
{ .compatible = "qcom,sm6125-smmu-500" },
{ .compatible = "qcom,sm8150-smmu-500" },
{ .compatible = "qcom,sm8250-smmu-500" },
{ .compatible = "qcom,sm8350-smmu-500" },
{ }
};
#ifdef CONFIG_ACPI
static struct acpi_platform_list qcom_acpi_platlist[] = {
{ "LENOVO", "CB-01 ", 0x8180, ACPI_SIG_IORT, equal, "QCOM SMMU" },
{ "QCOM ", "QCOMEDK2", 0x8180, ACPI_SIG_IORT, equal, "QCOM SMMU" },
{ }
};
#endif
struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu)
{
const struct device_node *np = smmu->dev->of_node;
if (of_match_node(qcom_smmu_impl_of_match, np))
return qcom_smmu_create(smmu, &qcom_smmu_impl);
#ifdef CONFIG_ACPI
if (np == NULL) {
/* Match platform for ACPI boot */
if (acpi_match_platform_list(qcom_acpi_platlist) >= 0)
return qcom_smmu_create(smmu, &qcom_smmu_impl);
}
#endif
/*
* Do not change this order of implementation, i.e., first adreno
* smmu impl and then apss smmu since we can have both implementing
* arm,mmu-500 in which case we will miss setting adreno smmu specific
* features if the order is changed.
*/
if (of_device_is_compatible(np, "qcom,adreno-smmu"))
return qcom_smmu_create(smmu, &qcom_adreno_smmu_impl);
if (of_match_node(qcom_smmu_impl_of_match, np))
return qcom_smmu_create(smmu, &qcom_smmu_impl);
return smmu;
}

View File

@ -31,7 +31,6 @@
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_device.h>
#include <linux/of_iommu.h>
#include <linux/pci.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
@ -74,7 +73,7 @@ static bool using_legacy_binding, using_generic_binding;
static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
{
if (pm_runtime_enabled(smmu->dev))
return pm_runtime_get_sync(smmu->dev);
return pm_runtime_resume_and_get(smmu->dev);
return 0;
}
@ -1271,6 +1270,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
u64 phys;
unsigned long va, flags;
int ret, idx = cfg->cbndx;
phys_addr_t addr = 0;
ret = arm_smmu_rpm_get(smmu);
if (ret < 0)
@ -1290,6 +1290,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
dev_err(dev,
"iova to phys timed out on %pad. Falling back to software table walk.\n",
&iova);
arm_smmu_rpm_put(smmu);
return ops->iova_to_phys(ops, iova);
}
@ -1298,12 +1299,14 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
if (phys & ARM_SMMU_CB_PAR_F) {
dev_err(dev, "translation fault!\n");
dev_err(dev, "PAR = 0x%llx\n", phys);
return 0;
goto out;
}
addr = (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
out:
arm_smmu_rpm_put(smmu);
return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
return addr;
}
static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
@ -1450,6 +1453,18 @@ static void arm_smmu_release_device(struct device *dev)
iommu_fwspec_free(dev);
}
static void arm_smmu_probe_finalize(struct device *dev)
{
struct arm_smmu_master_cfg *cfg;
struct arm_smmu_device *smmu;
cfg = dev_iommu_priv_get(dev);
smmu = cfg->smmu;
if (smmu->impl && smmu->impl->probe_finalize)
smmu->impl->probe_finalize(smmu, dev);
}
static struct iommu_group *arm_smmu_device_group(struct device *dev)
{
struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
@ -1569,6 +1584,7 @@ static struct iommu_ops arm_smmu_ops = {
.iova_to_phys = arm_smmu_iova_to_phys,
.probe_device = arm_smmu_probe_device,
.release_device = arm_smmu_release_device,
.probe_finalize = arm_smmu_probe_finalize,
.device_group = arm_smmu_device_group,
.enable_nesting = arm_smmu_enable_nesting,
.set_pgtable_quirks = arm_smmu_set_pgtable_quirks,
@ -2164,7 +2180,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
err = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
if (err) {
dev_err(dev, "Failed to register iommu\n");
return err;
goto err_sysfs_remove;
}
platform_set_drvdata(pdev, smmu);
@ -2187,10 +2203,19 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
* any device which might need it, so we want the bus ops in place
* ready to handle default domain setup as soon as any SMMU exists.
*/
if (!using_legacy_binding)
return arm_smmu_bus_init(&arm_smmu_ops);
if (!using_legacy_binding) {
err = arm_smmu_bus_init(&arm_smmu_ops);
if (err)
goto err_unregister_device;
}
return 0;
err_unregister_device:
iommu_device_unregister(&smmu->iommu);
err_sysfs_remove:
iommu_device_sysfs_remove(&smmu->iommu);
return err;
}
static int arm_smmu_device_remove(struct platform_device *pdev)

View File

@ -439,6 +439,7 @@ struct arm_smmu_impl {
struct device *dev, int start);
void (*write_s2cr)(struct arm_smmu_device *smmu, int idx);
void (*write_sctlr)(struct arm_smmu_device *smmu, int idx, u32 reg);
void (*probe_finalize)(struct arm_smmu_device *smmu, struct device *dev);
};
#define INVALID_SMENDX -1

View File

@ -25,7 +25,6 @@
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_device.h>
#include <linux/of_iommu.h>
#include <linux/platform_device.h>
#include <linux/pm.h>
#include <linux/pm_runtime.h>
@ -850,10 +849,12 @@ static int qcom_iommu_device_probe(struct platform_device *pdev)
ret = iommu_device_register(&qcom_iommu->iommu, &qcom_iommu_ops, dev);
if (ret) {
dev_err(dev, "Failed to register iommu\n");
return ret;
goto err_sysfs_remove;
}
bus_set_iommu(&platform_bus_type, &qcom_iommu_ops);
ret = bus_set_iommu(&platform_bus_type, &qcom_iommu_ops);
if (ret)
goto err_unregister_device;
if (qcom_iommu->local_base) {
pm_runtime_get_sync(dev);
@ -862,6 +863,13 @@ static int qcom_iommu_device_probe(struct platform_device *pdev)
}
return 0;
err_unregister_device:
iommu_device_unregister(&qcom_iommu->iommu);
err_sysfs_remove:
iommu_device_sysfs_remove(&qcom_iommu->iommu);
return ret;
}
static int qcom_iommu_device_remove(struct platform_device *pdev)

View File

@ -243,9 +243,11 @@ resv_iova:
lo = iova_pfn(iovad, start);
hi = iova_pfn(iovad, end);
reserve_iova(iovad, lo, hi);
} else {
} else if (end < start) {
/* dma_ranges list should be sorted */
dev_err(&dev->dev, "Failed to reserve IOVA\n");
dev_err(&dev->dev,
"Failed to reserve IOVA [%pa-%pa]\n",
&start, &end);
return -EINVAL;
}
@ -319,16 +321,16 @@ static bool dev_is_untrusted(struct device *dev)
* iommu_dma_init_domain - Initialise a DMA mapping domain
* @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
* @base: IOVA at which the mappable address space starts
* @size: Size of IOVA space
* @limit: Last address of the IOVA space
* @dev: Device the domain is being initialised for
*
* @base and @size should be exact multiples of IOMMU page granularity to
* @base and @limit + 1 should be exact multiples of IOMMU page granularity to
* avoid rounding surprises. If necessary, we reserve the page at address 0
* to ensure it is an invalid IOVA. It is safe to reinitialise a domain, but
* any change which could make prior IOVAs invalid will fail.
*/
static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
u64 size, struct device *dev)
dma_addr_t limit, struct device *dev)
{
struct iommu_dma_cookie *cookie = domain->iova_cookie;
unsigned long order, base_pfn;
@ -346,7 +348,7 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
/* Check the domain allows at least some access to the device... */
if (domain->geometry.force_aperture) {
if (base > domain->geometry.aperture_end ||
base + size <= domain->geometry.aperture_start) {
limit < domain->geometry.aperture_start) {
pr_warn("specified DMA range outside IOMMU capability\n");
return -EFAULT;
}
@ -1308,7 +1310,7 @@ static const struct dma_map_ops iommu_dma_ops = {
* The IOMMU core code allocates the default DMA domain, which the underlying
* IOMMU driver needs to support via the dma-iommu layer.
*/
void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size)
void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit)
{
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
@ -1320,7 +1322,7 @@ void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size)
* underlying IOMMU driver needs to support via the dma-iommu layer.
*/
if (domain->type == IOMMU_DOMAIN_DMA) {
if (iommu_dma_init_domain(domain, dma_base, size, dev))
if (iommu_dma_init_domain(domain, dma_base, dma_limit, dev))
goto out_err;
dev->dma_ops = &iommu_dma_ops;
}
@ -1330,6 +1332,7 @@ out_err:
pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
dev_name(dev));
}
EXPORT_SYMBOL_GPL(iommu_setup_dma_ops);
static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
phys_addr_t msi_addr, struct iommu_domain *domain)

View File

@ -17,7 +17,6 @@
#include <linux/kmemleak.h>
#include <linux/list.h>
#include <linux/of.h>
#include <linux/of_iommu.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>

View File

@ -3,6 +3,9 @@
config DMAR_TABLE
bool
config DMAR_PERF
bool
config INTEL_IOMMU
bool "Support for Intel IOMMU using DMA Remapping Devices"
depends on PCI_MSI && ACPI && (X86 || IA64)
@ -14,6 +17,7 @@ config INTEL_IOMMU
select SWIOTLB
select IOASID
select IOMMU_DMA
select PCI_ATS
help
DMA remapping (DMAR) devices support enables independent address
translations for Direct Memory Access (DMA) from devices.
@ -24,6 +28,7 @@ config INTEL_IOMMU
config INTEL_IOMMU_DEBUGFS
bool "Export Intel IOMMU internals in Debugfs"
depends on INTEL_IOMMU && IOMMU_DEBUGFS
select DMAR_PERF
help
!!!WARNING!!!
@ -41,6 +46,7 @@ config INTEL_IOMMU_SVM
select PCI_PRI
select MMU_NOTIFIER
select IOASID
select IOMMU_SVA_LIB
help
Shared Virtual Memory (SVM) provides a facility for devices
to access DMA resources through process address space by

View File

@ -2,6 +2,7 @@
obj-$(CONFIG_DMAR_TABLE) += dmar.o
obj-$(CONFIG_INTEL_IOMMU) += iommu.o pasid.o
obj-$(CONFIG_DMAR_TABLE) += trace.o cap_audit.o
obj-$(CONFIG_DMAR_PERF) += perf.o
obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += debugfs.o
obj-$(CONFIG_INTEL_IOMMU_SVM) += svm.o
obj-$(CONFIG_IRQ_REMAP) += irq_remapping.o

View File

@ -16,6 +16,7 @@
#include <asm/irq_remapping.h>
#include "pasid.h"
#include "perf.h"
struct tbl_walk {
u16 bus;
@ -31,6 +32,9 @@ struct iommu_regset {
const char *regs;
};
#define DEBUG_BUFFER_SIZE 1024
static char debug_buf[DEBUG_BUFFER_SIZE];
#define IOMMU_REGSET_ENTRY(_reg_) \
{ DMAR_##_reg_##_REG, __stringify(_reg_) }
@ -538,6 +542,111 @@ static int ir_translation_struct_show(struct seq_file *m, void *unused)
DEFINE_SHOW_ATTRIBUTE(ir_translation_struct);
#endif
static void latency_show_one(struct seq_file *m, struct intel_iommu *iommu,
struct dmar_drhd_unit *drhd)
{
int ret;
seq_printf(m, "IOMMU: %s Register Base Address: %llx\n",
iommu->name, drhd->reg_base_addr);
ret = dmar_latency_snapshot(iommu, debug_buf, DEBUG_BUFFER_SIZE);
if (ret < 0)
seq_puts(m, "Failed to get latency snapshot");
else
seq_puts(m, debug_buf);
seq_puts(m, "\n");
}
static int latency_show(struct seq_file *m, void *v)
{
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
rcu_read_lock();
for_each_active_iommu(iommu, drhd)
latency_show_one(m, iommu, drhd);
rcu_read_unlock();
return 0;
}
static int dmar_perf_latency_open(struct inode *inode, struct file *filp)
{
return single_open(filp, latency_show, NULL);
}
static ssize_t dmar_perf_latency_write(struct file *filp,
const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
int counting;
char buf[64];
if (cnt > 63)
cnt = 63;
if (copy_from_user(&buf, ubuf, cnt))
return -EFAULT;
buf[cnt] = 0;
if (kstrtoint(buf, 0, &counting))
return -EINVAL;
switch (counting) {
case 0:
rcu_read_lock();
for_each_active_iommu(iommu, drhd) {
dmar_latency_disable(iommu, DMAR_LATENCY_INV_IOTLB);
dmar_latency_disable(iommu, DMAR_LATENCY_INV_DEVTLB);
dmar_latency_disable(iommu, DMAR_LATENCY_INV_IEC);
dmar_latency_disable(iommu, DMAR_LATENCY_PRQ);
}
rcu_read_unlock();
break;
case 1:
rcu_read_lock();
for_each_active_iommu(iommu, drhd)
dmar_latency_enable(iommu, DMAR_LATENCY_INV_IOTLB);
rcu_read_unlock();
break;
case 2:
rcu_read_lock();
for_each_active_iommu(iommu, drhd)
dmar_latency_enable(iommu, DMAR_LATENCY_INV_DEVTLB);
rcu_read_unlock();
break;
case 3:
rcu_read_lock();
for_each_active_iommu(iommu, drhd)
dmar_latency_enable(iommu, DMAR_LATENCY_INV_IEC);
rcu_read_unlock();
break;
case 4:
rcu_read_lock();
for_each_active_iommu(iommu, drhd)
dmar_latency_enable(iommu, DMAR_LATENCY_PRQ);
rcu_read_unlock();
break;
default:
return -EINVAL;
}
*ppos += cnt;
return cnt;
}
static const struct file_operations dmar_perf_latency_fops = {
.open = dmar_perf_latency_open,
.write = dmar_perf_latency_write,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};
void __init intel_iommu_debugfs_init(void)
{
struct dentry *intel_iommu_debug = debugfs_create_dir("intel",
@ -556,4 +665,6 @@ void __init intel_iommu_debugfs_init(void)
debugfs_create_file("ir_translation_struct", 0444, intel_iommu_debug,
NULL, &ir_translation_struct_fops);
#endif
debugfs_create_file("dmar_perf_latency", 0644, intel_iommu_debug,
NULL, &dmar_perf_latency_fops);
}

View File

@ -34,6 +34,7 @@
#include <trace/events/intel_iommu.h>
#include "../irq_remapping.h"
#include "perf.h"
typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *);
struct dmar_res_callback {
@ -1342,15 +1343,33 @@ int qi_submit_sync(struct intel_iommu *iommu, struct qi_desc *desc,
unsigned int count, unsigned long options)
{
struct q_inval *qi = iommu->qi;
s64 devtlb_start_ktime = 0;
s64 iotlb_start_ktime = 0;
s64 iec_start_ktime = 0;
struct qi_desc wait_desc;
int wait_index, index;
unsigned long flags;
int offset, shift;
int rc, i;
u64 type;
if (!qi)
return 0;
type = desc->qw0 & GENMASK_ULL(3, 0);
if ((type == QI_IOTLB_TYPE || type == QI_EIOTLB_TYPE) &&
dmar_latency_enabled(iommu, DMAR_LATENCY_INV_IOTLB))
iotlb_start_ktime = ktime_to_ns(ktime_get());
if ((type == QI_DIOTLB_TYPE || type == QI_DEIOTLB_TYPE) &&
dmar_latency_enabled(iommu, DMAR_LATENCY_INV_DEVTLB))
devtlb_start_ktime = ktime_to_ns(ktime_get());
if (type == QI_IEC_TYPE &&
dmar_latency_enabled(iommu, DMAR_LATENCY_INV_IEC))
iec_start_ktime = ktime_to_ns(ktime_get());
restart:
rc = 0;
@ -1425,6 +1444,18 @@ restart:
if (rc == -EAGAIN)
goto restart;
if (iotlb_start_ktime)
dmar_latency_update(iommu, DMAR_LATENCY_INV_IOTLB,
ktime_to_ns(ktime_get()) - iotlb_start_ktime);
if (devtlb_start_ktime)
dmar_latency_update(iommu, DMAR_LATENCY_INV_DEVTLB,
ktime_to_ns(ktime_get()) - devtlb_start_ktime);
if (iec_start_ktime)
dmar_latency_update(iommu, DMAR_LATENCY_INV_IEC,
ktime_to_ns(ktime_get()) - iec_start_ktime);
return rc;
}
@ -1913,16 +1944,23 @@ static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
reason = dmar_get_fault_reason(fault_reason, &fault_type);
if (fault_type == INTR_REMAP)
pr_err("[INTR-REMAP] Request device [%02x:%02x.%d] fault index %llx [fault reason %02d] %s\n",
source_id >> 8, PCI_SLOT(source_id & 0xFF),
PCI_FUNC(source_id & 0xFF), addr >> 48,
fault_reason, reason);
else
pr_err("[%s] Request device [%02x:%02x.%d] PASID %x fault addr %llx [fault reason %02d] %s\n",
pr_err("[INTR-REMAP] Request device [0x%02x:0x%02x.%d] fault index 0x%llx [fault reason 0x%02x] %s\n",
source_id >> 8, PCI_SLOT(source_id & 0xFF),
PCI_FUNC(source_id & 0xFF), addr >> 48,
fault_reason, reason);
else if (pasid == INVALID_IOASID)
pr_err("[%s NO_PASID] Request device [0x%02x:0x%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n",
type ? "DMA Read" : "DMA Write",
source_id >> 8, PCI_SLOT(source_id & 0xFF),
PCI_FUNC(source_id & 0xFF), pasid, addr,
PCI_FUNC(source_id & 0xFF), addr,
fault_reason, reason);
else
pr_err("[%s PASID 0x%x] Request device [0x%02x:0x%02x.%d] fault addr 0x%llx [fault reason 0x%02x] %s\n",
type ? "DMA Read" : "DMA Write", pasid,
source_id >> 8, PCI_SLOT(source_id & 0xFF),
PCI_FUNC(source_id & 0xFF), addr,
fault_reason, reason);
return 0;
}
@ -1989,7 +2027,7 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
if (!ratelimited)
/* Using pasid -1 if pasid is not present */
dmar_fault_do_one(iommu, type, fault_reason,
pasid_present ? pasid : -1,
pasid_present ? pasid : INVALID_IOASID,
source_id, guest_addr);
fault_index++;

View File

@ -46,6 +46,7 @@
#include <asm/iommu.h>
#include "../irq_remapping.h"
#include "../iommu-sva-lib.h"
#include "pasid.h"
#include "cap_audit.h"
@ -564,7 +565,7 @@ static inline int domain_pfn_supported(struct dmar_domain *domain,
static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
{
unsigned long sagaw;
int agaw = -1;
int agaw;
sagaw = cap_sagaw(iommu->cap);
for (agaw = width_to_agaw(max_gaw);
@ -625,12 +626,12 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain)
bool found = false;
int i;
domain->iommu_coherency = 1;
domain->iommu_coherency = true;
for_each_domain_iommu(i, domain) {
found = true;
if (!iommu_paging_structure_coherency(g_iommus[i])) {
domain->iommu_coherency = 0;
domain->iommu_coherency = false;
break;
}
}
@ -641,18 +642,18 @@ static void domain_update_iommu_coherency(struct dmar_domain *domain)
rcu_read_lock();
for_each_active_iommu(iommu, drhd) {
if (!iommu_paging_structure_coherency(iommu)) {
domain->iommu_coherency = 0;
domain->iommu_coherency = false;
break;
}
}
rcu_read_unlock();
}
static int domain_update_iommu_snooping(struct intel_iommu *skip)
static bool domain_update_iommu_snooping(struct intel_iommu *skip)
{
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
int ret = 1;
bool ret = true;
rcu_read_lock();
for_each_active_iommu(iommu, drhd) {
@ -665,7 +666,7 @@ static int domain_update_iommu_snooping(struct intel_iommu *skip)
*/
if (!sm_supported(iommu) &&
!ecap_sc_support(iommu->ecap)) {
ret = 0;
ret = false;
break;
}
}
@ -682,9 +683,8 @@ static int domain_update_iommu_superpage(struct dmar_domain *domain,
struct intel_iommu *iommu;
int mask = 0x3;
if (!intel_iommu_superpage) {
if (!intel_iommu_superpage)
return 0;
}
/* set iommu_superpage to the smallest common denominator */
rcu_read_lock();
@ -1919,7 +1919,6 @@ static int domain_attach_iommu(struct dmar_domain *domain,
assert_spin_locked(&iommu->lock);
domain->iommu_refcnt[iommu->seq_id] += 1;
domain->iommu_count += 1;
if (domain->iommu_refcnt[iommu->seq_id] == 1) {
ndomains = cap_ndoms(iommu->cap);
num = find_first_zero_bit(iommu->domain_ids, ndomains);
@ -1927,7 +1926,6 @@ static int domain_attach_iommu(struct dmar_domain *domain,
if (num >= ndomains) {
pr_err("%s: No free domain ids\n", iommu->name);
domain->iommu_refcnt[iommu->seq_id] -= 1;
domain->iommu_count -= 1;
return -ENOSPC;
}
@ -1943,16 +1941,15 @@ static int domain_attach_iommu(struct dmar_domain *domain,
return 0;
}
static int domain_detach_iommu(struct dmar_domain *domain,
struct intel_iommu *iommu)
static void domain_detach_iommu(struct dmar_domain *domain,
struct intel_iommu *iommu)
{
int num, count;
int num;
assert_spin_locked(&device_domain_lock);
assert_spin_locked(&iommu->lock);
domain->iommu_refcnt[iommu->seq_id] -= 1;
count = --domain->iommu_count;
if (domain->iommu_refcnt[iommu->seq_id] == 0) {
num = domain->iommu_did[iommu->seq_id];
clear_bit(num, iommu->domain_ids);
@ -1961,8 +1958,6 @@ static int domain_detach_iommu(struct dmar_domain *domain,
domain_update_iommu_cap(domain);
domain->iommu_did[iommu->seq_id] = 0;
}
return count;
}
static inline int guestwidth_to_adjustwidth(int gaw)
@ -4138,62 +4133,56 @@ static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev)
return container_of(iommu_dev, struct intel_iommu, iommu);
}
static ssize_t intel_iommu_show_version(struct device *dev,
struct device_attribute *attr,
char *buf)
static ssize_t version_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct intel_iommu *iommu = dev_to_intel_iommu(dev);
u32 ver = readl(iommu->reg + DMAR_VER_REG);
return sprintf(buf, "%d:%d\n",
DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
}
static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
static DEVICE_ATTR_RO(version);
static ssize_t intel_iommu_show_address(struct device *dev,
struct device_attribute *attr,
char *buf)
static ssize_t address_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct intel_iommu *iommu = dev_to_intel_iommu(dev);
return sprintf(buf, "%llx\n", iommu->reg_phys);
}
static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
static DEVICE_ATTR_RO(address);
static ssize_t intel_iommu_show_cap(struct device *dev,
struct device_attribute *attr,
char *buf)
static ssize_t cap_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct intel_iommu *iommu = dev_to_intel_iommu(dev);
return sprintf(buf, "%llx\n", iommu->cap);
}
static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
static DEVICE_ATTR_RO(cap);
static ssize_t intel_iommu_show_ecap(struct device *dev,
struct device_attribute *attr,
char *buf)
static ssize_t ecap_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct intel_iommu *iommu = dev_to_intel_iommu(dev);
return sprintf(buf, "%llx\n", iommu->ecap);
}
static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
static DEVICE_ATTR_RO(ecap);
static ssize_t intel_iommu_show_ndoms(struct device *dev,
struct device_attribute *attr,
char *buf)
static ssize_t domains_supported_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct intel_iommu *iommu = dev_to_intel_iommu(dev);
return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
}
static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
static DEVICE_ATTR_RO(domains_supported);
static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
struct device_attribute *attr,
char *buf)
static ssize_t domains_used_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct intel_iommu *iommu = dev_to_intel_iommu(dev);
return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
cap_ndoms(iommu->cap)));
}
static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
static DEVICE_ATTR_RO(domains_used);
static struct attribute *intel_iommu_attrs[] = {
&dev_attr_version.attr,
@ -4511,13 +4500,13 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
adjust_width = guestwidth_to_adjustwidth(guest_width);
domain->agaw = width_to_agaw(adjust_width);
domain->iommu_coherency = 0;
domain->iommu_snooping = 0;
domain->iommu_coherency = false;
domain->iommu_snooping = false;
domain->iommu_superpage = 0;
domain->max_addr = 0;
/* always allocate the top pgd */
domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
domain->pgd = alloc_pgtable_page(domain->nid);
if (!domain->pgd)
return -ENOMEM;
domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
@ -4757,6 +4746,13 @@ static int prepare_domain_attach_device(struct iommu_domain *domain,
if (!iommu)
return -ENODEV;
if ((dmar_domain->flags & DOMAIN_FLAG_NESTING_MODE) &&
!ecap_nest(iommu->ecap)) {
dev_err(dev, "%s: iommu not support nested translation\n",
iommu->name);
return -EINVAL;
}
/* check if this iommu agaw is sufficient for max mapped address */
addr_width = agaw_to_width(iommu->agaw);
if (addr_width > cap_mgaw(iommu->cap))
@ -4778,8 +4774,7 @@ static int prepare_domain_attach_device(struct iommu_domain *domain,
pte = dmar_domain->pgd;
if (dma_pte_present(pte)) {
dmar_domain->pgd = (struct dma_pte *)
phys_to_virt(dma_pte_addr(pte));
dmar_domain->pgd = phys_to_virt(dma_pte_addr(pte));
free_pgtable_page(pte);
}
dmar_domain->agaw--;
@ -5129,7 +5124,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
static bool intel_iommu_capable(enum iommu_cap cap)
{
if (cap == IOMMU_CAP_CACHE_COHERENCY)
return domain_update_iommu_snooping(NULL) == 1;
return domain_update_iommu_snooping(NULL);
if (cap == IOMMU_CAP_INTR_REMAP)
return irq_remapping_enabled == 1;
@ -5165,13 +5160,10 @@ static void intel_iommu_release_device(struct device *dev)
static void intel_iommu_probe_finalize(struct device *dev)
{
dma_addr_t base = IOVA_START_PFN << VTD_PAGE_SHIFT;
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
if (domain && domain->type == IOMMU_DOMAIN_DMA)
iommu_setup_dma_ops(dev, base,
__DOMAIN_MAX_ADDR(dmar_domain->gaw) - base);
iommu_setup_dma_ops(dev, 0, U64_MAX);
else
set_dma_ops(dev, NULL);
}
@ -5331,6 +5323,48 @@ static int intel_iommu_disable_auxd(struct device *dev)
return 0;
}
static int intel_iommu_enable_sva(struct device *dev)
{
struct device_domain_info *info = get_domain_info(dev);
struct intel_iommu *iommu;
int ret;
if (!info || dmar_disabled)
return -EINVAL;
iommu = info->iommu;
if (!iommu)
return -EINVAL;
if (!(iommu->flags & VTD_FLAG_SVM_CAPABLE))
return -ENODEV;
if (intel_iommu_enable_pasid(iommu, dev))
return -ENODEV;
if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled)
return -EINVAL;
ret = iopf_queue_add_device(iommu->iopf_queue, dev);
if (!ret)
ret = iommu_register_device_fault_handler(dev, iommu_queue_iopf, dev);
return ret;
}
static int intel_iommu_disable_sva(struct device *dev)
{
struct device_domain_info *info = get_domain_info(dev);
struct intel_iommu *iommu = info->iommu;
int ret;
ret = iommu_unregister_device_fault_handler(dev);
if (!ret)
ret = iopf_queue_remove_device(iommu->iopf_queue, dev);
return ret;
}
/*
* A PCI express designated vendor specific extended capability is defined
* in the section 3.7 of Intel scalable I/O virtualization technical spec
@ -5392,35 +5426,37 @@ intel_iommu_dev_has_feat(struct device *dev, enum iommu_dev_features feat)
static int
intel_iommu_dev_enable_feat(struct device *dev, enum iommu_dev_features feat)
{
if (feat == IOMMU_DEV_FEAT_AUX)
switch (feat) {
case IOMMU_DEV_FEAT_AUX:
return intel_iommu_enable_auxd(dev);
if (feat == IOMMU_DEV_FEAT_IOPF)
case IOMMU_DEV_FEAT_IOPF:
return intel_iommu_dev_has_feat(dev, feat) ? 0 : -ENODEV;
if (feat == IOMMU_DEV_FEAT_SVA) {
struct device_domain_info *info = get_domain_info(dev);
case IOMMU_DEV_FEAT_SVA:
return intel_iommu_enable_sva(dev);
if (!info)
return -EINVAL;
if (!info->pasid_enabled || !info->pri_enabled || !info->ats_enabled)
return -EINVAL;
if (info->iommu->flags & VTD_FLAG_SVM_CAPABLE)
return 0;
default:
return -ENODEV;
}
return -ENODEV;
}
static int
intel_iommu_dev_disable_feat(struct device *dev, enum iommu_dev_features feat)
{
if (feat == IOMMU_DEV_FEAT_AUX)
switch (feat) {
case IOMMU_DEV_FEAT_AUX:
return intel_iommu_disable_auxd(dev);
return -ENODEV;
case IOMMU_DEV_FEAT_IOPF:
return 0;
case IOMMU_DEV_FEAT_SVA:
return intel_iommu_disable_sva(dev);
default:
return -ENODEV;
}
}
static bool
@ -5457,7 +5493,7 @@ intel_iommu_enable_nesting(struct iommu_domain *domain)
int ret = -ENODEV;
spin_lock_irqsave(&device_domain_lock, flags);
if (nested_mode_support() && list_empty(&dmar_domain->devices)) {
if (list_empty(&dmar_domain->devices)) {
dmar_domain->flags |= DOMAIN_FLAG_NESTING_MODE;
dmar_domain->flags &= ~DOMAIN_FLAG_USE_FIRST_LEVEL;
ret = 0;

View File

@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
/**
/*
* intel-pasid.c - PASID idr, table and entry manipulation
*
* Copyright (C) 2018 Intel Corporation

166
drivers/iommu/intel/perf.c Normal file
View File

@ -0,0 +1,166 @@
// SPDX-License-Identifier: GPL-2.0
/**
* perf.c - performance monitor
*
* Copyright (C) 2021 Intel Corporation
*
* Author: Lu Baolu <baolu.lu@linux.intel.com>
* Fenghua Yu <fenghua.yu@intel.com>
*/
#include <linux/spinlock.h>
#include <linux/intel-iommu.h>
#include "perf.h"
static DEFINE_SPINLOCK(latency_lock);
bool dmar_latency_enabled(struct intel_iommu *iommu, enum latency_type type)
{
struct latency_statistic *lstat = iommu->perf_statistic;
return lstat && lstat[type].enabled;
}
int dmar_latency_enable(struct intel_iommu *iommu, enum latency_type type)
{
struct latency_statistic *lstat;
unsigned long flags;
int ret = -EBUSY;
if (dmar_latency_enabled(iommu, type))
return 0;
spin_lock_irqsave(&latency_lock, flags);
if (!iommu->perf_statistic) {
iommu->perf_statistic = kzalloc(sizeof(*lstat) * DMAR_LATENCY_NUM,
GFP_ATOMIC);
if (!iommu->perf_statistic) {
ret = -ENOMEM;
goto unlock_out;
}
}
lstat = iommu->perf_statistic;
if (!lstat[type].enabled) {
lstat[type].enabled = true;
lstat[type].counter[COUNTS_MIN] = UINT_MAX;
ret = 0;
}
unlock_out:
spin_unlock_irqrestore(&latency_lock, flags);
return ret;
}
void dmar_latency_disable(struct intel_iommu *iommu, enum latency_type type)
{
struct latency_statistic *lstat = iommu->perf_statistic;
unsigned long flags;
if (!dmar_latency_enabled(iommu, type))
return;
spin_lock_irqsave(&latency_lock, flags);
memset(&lstat[type], 0, sizeof(*lstat) * DMAR_LATENCY_NUM);
spin_unlock_irqrestore(&latency_lock, flags);
}
void dmar_latency_update(struct intel_iommu *iommu, enum latency_type type, u64 latency)
{
struct latency_statistic *lstat = iommu->perf_statistic;
unsigned long flags;
u64 min, max;
if (!dmar_latency_enabled(iommu, type))
return;
spin_lock_irqsave(&latency_lock, flags);
if (latency < 100)
lstat[type].counter[COUNTS_10e2]++;
else if (latency < 1000)
lstat[type].counter[COUNTS_10e3]++;
else if (latency < 10000)
lstat[type].counter[COUNTS_10e4]++;
else if (latency < 100000)
lstat[type].counter[COUNTS_10e5]++;
else if (latency < 1000000)
lstat[type].counter[COUNTS_10e6]++;
else if (latency < 10000000)
lstat[type].counter[COUNTS_10e7]++;
else
lstat[type].counter[COUNTS_10e8_plus]++;
min = lstat[type].counter[COUNTS_MIN];
max = lstat[type].counter[COUNTS_MAX];
lstat[type].counter[COUNTS_MIN] = min_t(u64, min, latency);
lstat[type].counter[COUNTS_MAX] = max_t(u64, max, latency);
lstat[type].counter[COUNTS_SUM] += latency;
lstat[type].samples++;
spin_unlock_irqrestore(&latency_lock, flags);
}
static char *latency_counter_names[] = {
" <0.1us",
" 0.1us-1us", " 1us-10us", " 10us-100us",
" 100us-1ms", " 1ms-10ms", " >=10ms",
" min(us)", " max(us)", " average(us)"
};
static char *latency_type_names[] = {
" inv_iotlb", " inv_devtlb", " inv_iec",
" svm_prq"
};
int dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size)
{
struct latency_statistic *lstat = iommu->perf_statistic;
unsigned long flags;
int bytes = 0, i, j;
memset(str, 0, size);
for (i = 0; i < COUNTS_NUM; i++)
bytes += snprintf(str + bytes, size - bytes,
"%s", latency_counter_names[i]);
spin_lock_irqsave(&latency_lock, flags);
for (i = 0; i < DMAR_LATENCY_NUM; i++) {
if (!dmar_latency_enabled(iommu, i))
continue;
bytes += snprintf(str + bytes, size - bytes,
"\n%s", latency_type_names[i]);
for (j = 0; j < COUNTS_NUM; j++) {
u64 val = lstat[i].counter[j];
switch (j) {
case COUNTS_MIN:
if (val == UINT_MAX)
val = 0;
else
val = div_u64(val, 1000);
break;
case COUNTS_MAX:
val = div_u64(val, 1000);
break;
case COUNTS_SUM:
if (lstat[i].samples)
val = div_u64(val, (lstat[i].samples * 1000));
else
val = 0;
break;
default:
break;
}
bytes += snprintf(str + bytes, size - bytes,
"%12lld", val);
}
}
spin_unlock_irqrestore(&latency_lock, flags);
return bytes;
}

View File

@ -0,0 +1,73 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* perf.h - performance monitor header
*
* Copyright (C) 2021 Intel Corporation
*
* Author: Lu Baolu <baolu.lu@linux.intel.com>
*/
enum latency_type {
DMAR_LATENCY_INV_IOTLB = 0,
DMAR_LATENCY_INV_DEVTLB,
DMAR_LATENCY_INV_IEC,
DMAR_LATENCY_PRQ,
DMAR_LATENCY_NUM
};
enum latency_count {
COUNTS_10e2 = 0, /* < 0.1us */
COUNTS_10e3, /* 0.1us ~ 1us */
COUNTS_10e4, /* 1us ~ 10us */
COUNTS_10e5, /* 10us ~ 100us */
COUNTS_10e6, /* 100us ~ 1ms */
COUNTS_10e7, /* 1ms ~ 10ms */
COUNTS_10e8_plus, /* 10ms and plus*/
COUNTS_MIN,
COUNTS_MAX,
COUNTS_SUM,
COUNTS_NUM
};
struct latency_statistic {
bool enabled;
u64 counter[COUNTS_NUM];
u64 samples;
};
#ifdef CONFIG_DMAR_PERF
int dmar_latency_enable(struct intel_iommu *iommu, enum latency_type type);
void dmar_latency_disable(struct intel_iommu *iommu, enum latency_type type);
bool dmar_latency_enabled(struct intel_iommu *iommu, enum latency_type type);
void dmar_latency_update(struct intel_iommu *iommu, enum latency_type type,
u64 latency);
int dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size);
#else
static inline int
dmar_latency_enable(struct intel_iommu *iommu, enum latency_type type)
{
return -EINVAL;
}
static inline void
dmar_latency_disable(struct intel_iommu *iommu, enum latency_type type)
{
}
static inline bool
dmar_latency_enabled(struct intel_iommu *iommu, enum latency_type type)
{
return false;
}
static inline void
dmar_latency_update(struct intel_iommu *iommu, enum latency_type type, u64 latency)
{
}
static inline int
dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size)
{
return 0;
}
#endif /* CONFIG_DMAR_PERF */

View File

@ -17,19 +17,76 @@
#include <linux/dmar.h>
#include <linux/interrupt.h>
#include <linux/mm_types.h>
#include <linux/xarray.h>
#include <linux/ioasid.h>
#include <asm/page.h>
#include <asm/fpu/api.h>
#include <trace/events/intel_iommu.h>
#include "pasid.h"
#include "perf.h"
#include "../iommu-sva-lib.h"
static irqreturn_t prq_event_thread(int irq, void *d);
static void intel_svm_drain_prq(struct device *dev, u32 pasid);
#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva)
#define PRQ_ORDER 0
static DEFINE_XARRAY_ALLOC(pasid_private_array);
static int pasid_private_add(ioasid_t pasid, void *priv)
{
return xa_alloc(&pasid_private_array, &pasid, priv,
XA_LIMIT(pasid, pasid), GFP_ATOMIC);
}
static void pasid_private_remove(ioasid_t pasid)
{
xa_erase(&pasid_private_array, pasid);
}
static void *pasid_private_find(ioasid_t pasid)
{
return xa_load(&pasid_private_array, pasid);
}
static struct intel_svm_dev *
svm_lookup_device_by_sid(struct intel_svm *svm, u16 sid)
{
struct intel_svm_dev *sdev = NULL, *t;
rcu_read_lock();
list_for_each_entry_rcu(t, &svm->devs, list) {
if (t->sid == sid) {
sdev = t;
break;
}
}
rcu_read_unlock();
return sdev;
}
static struct intel_svm_dev *
svm_lookup_device_by_dev(struct intel_svm *svm, struct device *dev)
{
struct intel_svm_dev *sdev = NULL, *t;
rcu_read_lock();
list_for_each_entry_rcu(t, &svm->devs, list) {
if (t->dev == dev) {
sdev = t;
break;
}
}
rcu_read_unlock();
return sdev;
}
int intel_svm_enable_prq(struct intel_iommu *iommu)
{
struct iopf_queue *iopfq;
struct page *pages;
int irq, ret;
@ -46,13 +103,20 @@ int intel_svm_enable_prq(struct intel_iommu *iommu)
pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
iommu->name);
ret = -EINVAL;
err:
free_pages((unsigned long)iommu->prq, PRQ_ORDER);
iommu->prq = NULL;
return ret;
goto free_prq;
}
iommu->pr_irq = irq;
snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name),
"dmar%d-iopfq", iommu->seq_id);
iopfq = iopf_queue_alloc(iommu->iopfq_name);
if (!iopfq) {
pr_err("IOMMU: %s: Failed to allocate iopf queue\n", iommu->name);
ret = -ENOMEM;
goto free_hwirq;
}
iommu->iopf_queue = iopfq;
snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);
ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
@ -60,9 +124,7 @@ int intel_svm_enable_prq(struct intel_iommu *iommu)
if (ret) {
pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
iommu->name);
dmar_free_hwirq(irq);
iommu->pr_irq = 0;
goto err;
goto free_iopfq;
}
dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
@ -71,6 +133,18 @@ int intel_svm_enable_prq(struct intel_iommu *iommu)
init_completion(&iommu->prq_complete);
return 0;
free_iopfq:
iopf_queue_free(iommu->iopf_queue);
iommu->iopf_queue = NULL;
free_hwirq:
dmar_free_hwirq(irq);
iommu->pr_irq = 0;
free_prq:
free_pages((unsigned long)iommu->prq, PRQ_ORDER);
iommu->prq = NULL;
return ret;
}
int intel_svm_finish_prq(struct intel_iommu *iommu)
@ -85,6 +159,11 @@ int intel_svm_finish_prq(struct intel_iommu *iommu)
iommu->pr_irq = 0;
}
if (iommu->iopf_queue) {
iopf_queue_free(iommu->iopf_queue);
iommu->iopf_queue = NULL;
}
free_pages((unsigned long)iommu->prq, PRQ_ORDER);
iommu->prq = NULL;
@ -204,17 +283,12 @@ static const struct mmu_notifier_ops intel_mmuops = {
};
static DEFINE_MUTEX(pasid_mutex);
static LIST_HEAD(global_svm_list);
#define for_each_svm_dev(sdev, svm, d) \
list_for_each_entry((sdev), &(svm)->devs, list) \
if ((d) != (sdev)->dev) {} else
static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
struct intel_svm **rsvm,
struct intel_svm_dev **rsdev)
{
struct intel_svm_dev *d, *sdev = NULL;
struct intel_svm_dev *sdev = NULL;
struct intel_svm *svm;
/* The caller should hold the pasid_mutex lock */
@ -224,7 +298,7 @@ static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
if (pasid == INVALID_IOASID || pasid >= PASID_MAX)
return -EINVAL;
svm = ioasid_find(NULL, pasid, NULL);
svm = pasid_private_find(pasid);
if (IS_ERR(svm))
return PTR_ERR(svm);
@ -237,15 +311,7 @@ static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid,
*/
if (WARN_ON(list_empty(&svm->devs)))
return -EINVAL;
rcu_read_lock();
list_for_each_entry_rcu(d, &svm->devs, list) {
if (d->dev == dev) {
sdev = d;
break;
}
}
rcu_read_unlock();
sdev = svm_lookup_device_by_dev(svm, dev);
out:
*rsvm = svm;
@ -334,7 +400,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
svm->gpasid = data->gpasid;
svm->flags |= SVM_FLAG_GUEST_PASID;
}
ioasid_set_data(data->hpasid, svm);
pasid_private_add(data->hpasid, svm);
INIT_LIST_HEAD_RCU(&svm->devs);
mmput(svm->mm);
}
@ -388,7 +454,7 @@ int intel_svm_bind_gpasid(struct iommu_domain *domain, struct device *dev,
list_add_rcu(&sdev->list, &svm->devs);
out:
if (!IS_ERR_OR_NULL(svm) && list_empty(&svm->devs)) {
ioasid_set_data(data->hpasid, NULL);
pasid_private_remove(data->hpasid);
kfree(svm);
}
@ -431,7 +497,7 @@ int intel_svm_unbind_gpasid(struct device *dev, u32 pasid)
* the unbind, IOMMU driver will get notified
* and perform cleanup.
*/
ioasid_set_data(pasid, NULL);
pasid_private_remove(pasid);
kfree(svm);
}
}
@ -459,79 +525,81 @@ static void load_pasid(struct mm_struct *mm, u32 pasid)
mutex_unlock(&mm->context.lock);
}
/* Caller must hold pasid_mutex, mm reference */
static int
intel_svm_bind_mm(struct device *dev, unsigned int flags,
struct mm_struct *mm, struct intel_svm_dev **sd)
static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm,
unsigned int flags)
{
struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
struct intel_svm *svm = NULL, *t;
struct device_domain_info *info;
ioasid_t max_pasid = dev_is_pci(dev) ?
pci_max_pasids(to_pci_dev(dev)) : intel_pasid_max_id;
return iommu_sva_alloc_pasid(mm, PASID_MIN, max_pasid - 1);
}
static void intel_svm_free_pasid(struct mm_struct *mm)
{
iommu_sva_free_pasid(mm);
}
static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu,
struct device *dev,
struct mm_struct *mm,
unsigned int flags)
{
struct device_domain_info *info = get_domain_info(dev);
unsigned long iflags, sflags;
struct intel_svm_dev *sdev;
unsigned long iflags;
int pasid_max;
int ret;
struct intel_svm *svm;
int ret = 0;
if (!iommu || dmar_disabled)
return -EINVAL;
svm = pasid_private_find(mm->pasid);
if (!svm) {
svm = kzalloc(sizeof(*svm), GFP_KERNEL);
if (!svm)
return ERR_PTR(-ENOMEM);
if (!intel_svm_capable(iommu))
return -ENOTSUPP;
svm->pasid = mm->pasid;
svm->mm = mm;
svm->flags = flags;
INIT_LIST_HEAD_RCU(&svm->devs);
if (dev_is_pci(dev)) {
pasid_max = pci_max_pasids(to_pci_dev(dev));
if (pasid_max < 0)
return -EINVAL;
} else
pasid_max = 1 << 20;
if (!(flags & SVM_FLAG_SUPERVISOR_MODE)) {
svm->notifier.ops = &intel_mmuops;
ret = mmu_notifier_register(&svm->notifier, mm);
if (ret) {
kfree(svm);
return ERR_PTR(ret);
}
}
/* Bind supervisor PASID shuld have mm = NULL */
if (flags & SVM_FLAG_SUPERVISOR_MODE) {
if (!ecap_srs(iommu->ecap) || mm) {
pr_err("Supervisor PASID with user provided mm.\n");
return -EINVAL;
ret = pasid_private_add(svm->pasid, svm);
if (ret) {
if (svm->notifier.ops)
mmu_notifier_unregister(&svm->notifier, mm);
kfree(svm);
return ERR_PTR(ret);
}
}
list_for_each_entry(t, &global_svm_list, list) {
if (t->mm != mm)
continue;
svm = t;
if (svm->pasid >= pasid_max) {
dev_warn(dev,
"Limited PASID width. Cannot use existing PASID %d\n",
svm->pasid);
ret = -ENOSPC;
goto out;
}
/* Find the matching device in svm list */
for_each_svm_dev(sdev, svm, dev) {
sdev->users++;
goto success;
}
break;
/* Find the matching device in svm list */
sdev = svm_lookup_device_by_dev(svm, dev);
if (sdev) {
sdev->users++;
goto success;
}
sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
if (!sdev) {
ret = -ENOMEM;
goto out;
goto free_svm;
}
sdev->dev = dev;
sdev->iommu = iommu;
ret = intel_iommu_enable_pasid(iommu, dev);
if (ret) {
kfree(sdev);
goto out;
}
info = get_domain_info(dev);
sdev->did = FLPT_DEFAULT_DID;
sdev->sid = PCI_DEVID(info->bus, info->devfn);
sdev->users = 1;
sdev->pasid = svm->pasid;
sdev->sva.dev = dev;
init_rcu_head(&sdev->rcu);
if (info->ats_enabled) {
sdev->dev_iotlb = 1;
sdev->qdep = info->ats_qdep;
@ -539,95 +607,37 @@ intel_svm_bind_mm(struct device *dev, unsigned int flags,
sdev->qdep = 0;
}
/* Finish the setup now we know we're keeping it */
sdev->users = 1;
init_rcu_head(&sdev->rcu);
/* Setup the pasid table: */
sflags = (flags & SVM_FLAG_SUPERVISOR_MODE) ?
PASID_FLAG_SUPERVISOR_MODE : 0;
sflags |= cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;
spin_lock_irqsave(&iommu->lock, iflags);
ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid,
FLPT_DEFAULT_DID, sflags);
spin_unlock_irqrestore(&iommu->lock, iflags);
if (!svm) {
svm = kzalloc(sizeof(*svm), GFP_KERNEL);
if (!svm) {
ret = -ENOMEM;
kfree(sdev);
goto out;
}
if (ret)
goto free_sdev;
if (pasid_max > intel_pasid_max_id)
pasid_max = intel_pasid_max_id;
/* The newly allocated pasid is loaded to the mm. */
if (!(flags & SVM_FLAG_SUPERVISOR_MODE) && list_empty(&svm->devs))
load_pasid(mm, svm->pasid);
/* Do not use PASID 0, reserved for RID to PASID */
svm->pasid = ioasid_alloc(NULL, PASID_MIN,
pasid_max - 1, svm);
if (svm->pasid == INVALID_IOASID) {
kfree(svm);
kfree(sdev);
ret = -ENOSPC;
goto out;
}
svm->notifier.ops = &intel_mmuops;
svm->mm = mm;
svm->flags = flags;
INIT_LIST_HEAD_RCU(&svm->devs);
INIT_LIST_HEAD(&svm->list);
ret = -ENOMEM;
if (mm) {
ret = mmu_notifier_register(&svm->notifier, mm);
if (ret) {
ioasid_put(svm->pasid);
kfree(svm);
kfree(sdev);
goto out;
}
}
spin_lock_irqsave(&iommu->lock, iflags);
ret = intel_pasid_setup_first_level(iommu, dev,
mm ? mm->pgd : init_mm.pgd,
svm->pasid, FLPT_DEFAULT_DID,
(mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) |
(cpu_feature_enabled(X86_FEATURE_LA57) ?
PASID_FLAG_FL5LP : 0));
spin_unlock_irqrestore(&iommu->lock, iflags);
if (ret) {
if (mm)
mmu_notifier_unregister(&svm->notifier, mm);
ioasid_put(svm->pasid);
kfree(svm);
kfree(sdev);
goto out;
}
list_add_tail(&svm->list, &global_svm_list);
if (mm) {
/* The newly allocated pasid is loaded to the mm. */
load_pasid(mm, svm->pasid);
}
} else {
/*
* Binding a new device with existing PASID, need to setup
* the PASID entry.
*/
spin_lock_irqsave(&iommu->lock, iflags);
ret = intel_pasid_setup_first_level(iommu, dev,
mm ? mm->pgd : init_mm.pgd,
svm->pasid, FLPT_DEFAULT_DID,
(mm ? 0 : PASID_FLAG_SUPERVISOR_MODE) |
(cpu_feature_enabled(X86_FEATURE_LA57) ?
PASID_FLAG_FL5LP : 0));
spin_unlock_irqrestore(&iommu->lock, iflags);
if (ret) {
kfree(sdev);
goto out;
}
}
list_add_rcu(&sdev->list, &svm->devs);
success:
sdev->pasid = svm->pasid;
sdev->sva.dev = dev;
if (sd)
*sd = sdev;
ret = 0;
out:
return ret;
return &sdev->sva;
free_sdev:
kfree(sdev);
free_svm:
if (list_empty(&svm->devs)) {
if (svm->notifier.ops)
mmu_notifier_unregister(&svm->notifier, mm);
pasid_private_remove(mm->pasid);
kfree(svm);
}
return ERR_PTR(ret);
}
/* Caller must hold pasid_mutex */
@ -636,6 +646,7 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
struct intel_svm_dev *sdev;
struct intel_iommu *iommu;
struct intel_svm *svm;
struct mm_struct *mm;
int ret = -EINVAL;
iommu = device_to_iommu(dev, NULL, NULL);
@ -645,6 +656,7 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev);
if (ret)
goto out;
mm = svm->mm;
if (sdev) {
sdev->users--;
@ -663,13 +675,13 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
kfree_rcu(sdev, rcu);
if (list_empty(&svm->devs)) {
ioasid_put(svm->pasid);
if (svm->mm) {
mmu_notifier_unregister(&svm->notifier, svm->mm);
intel_svm_free_pasid(mm);
if (svm->notifier.ops) {
mmu_notifier_unregister(&svm->notifier, mm);
/* Clear mm's pasid. */
load_pasid(svm->mm, PASID_DISABLED);
load_pasid(mm, PASID_DISABLED);
}
list_del(&svm->list);
pasid_private_remove(svm->pasid);
/* We mandate that no page faults may be outstanding
* for the PASID when intel_svm_unbind_mm() is called.
* If that is not obeyed, subtle errors will happen.
@ -714,22 +726,6 @@ struct page_req_dsc {
#define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x20)
static bool access_error(struct vm_area_struct *vma, struct page_req_dsc *req)
{
unsigned long requested = 0;
if (req->exe_req)
requested |= VM_EXEC;
if (req->rd_req)
requested |= VM_READ;
if (req->wr_req)
requested |= VM_WRITE;
return (requested & ~vma->vm_flags) != 0;
}
static bool is_canonical_address(u64 addr)
{
int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1);
@ -799,6 +795,8 @@ prq_retry:
goto prq_retry;
}
iopf_queue_flush_dev(dev);
/*
* Perform steps described in VT-d spec CH7.10 to drain page
* requests and responses in hardware.
@ -841,8 +839,8 @@ static int prq_to_iommu_prot(struct page_req_dsc *req)
return prot;
}
static int
intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc)
static int intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev,
struct page_req_dsc *desc)
{
struct iommu_fault_event event;
@ -872,159 +870,136 @@ intel_svm_prq_report(struct device *dev, struct page_req_dsc *desc)
*/
event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE;
event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA;
memcpy(event.fault.prm.private_data, desc->priv_data,
sizeof(desc->priv_data));
event.fault.prm.private_data[0] = desc->priv_data[0];
event.fault.prm.private_data[1] = desc->priv_data[1];
} else if (dmar_latency_enabled(iommu, DMAR_LATENCY_PRQ)) {
/*
* If the private data fields are not used by hardware, use it
* to monitor the prq handle latency.
*/
event.fault.prm.private_data[0] = ktime_to_ns(ktime_get());
}
return iommu_report_device_fault(dev, &event);
}
static void handle_bad_prq_event(struct intel_iommu *iommu,
struct page_req_dsc *req, int result)
{
struct qi_desc desc;
pr_err("%s: Invalid page request: %08llx %08llx\n",
iommu->name, ((unsigned long long *)req)[0],
((unsigned long long *)req)[1]);
/*
* Per VT-d spec. v3.0 ch7.7, system software must
* respond with page group response if private data
* is present (PDP) or last page in group (LPIG) bit
* is set. This is an additional VT-d feature beyond
* PCI ATS spec.
*/
if (!req->lpig && !req->priv_data_present)
return;
desc.qw0 = QI_PGRP_PASID(req->pasid) |
QI_PGRP_DID(req->rid) |
QI_PGRP_PASID_P(req->pasid_present) |
QI_PGRP_PDP(req->priv_data_present) |
QI_PGRP_RESP_CODE(result) |
QI_PGRP_RESP_TYPE;
desc.qw1 = QI_PGRP_IDX(req->prg_index) |
QI_PGRP_LPIG(req->lpig);
if (req->priv_data_present) {
desc.qw2 = req->priv_data[0];
desc.qw3 = req->priv_data[1];
} else {
desc.qw2 = 0;
desc.qw3 = 0;
}
qi_submit_sync(iommu, &desc, 1, 0);
}
static irqreturn_t prq_event_thread(int irq, void *d)
{
struct intel_svm_dev *sdev = NULL;
struct intel_iommu *iommu = d;
struct intel_svm *svm = NULL;
int head, tail, handled = 0;
unsigned int flags = 0;
struct page_req_dsc *req;
int head, tail, handled;
u64 address;
/* Clear PPR bit before reading head/tail registers, to
* ensure that we get a new interrupt if needed. */
/*
* Clear PPR bit before reading head/tail registers, to ensure that
* we get a new interrupt if needed.
*/
writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
handled = (head != tail);
while (head != tail) {
struct vm_area_struct *vma;
struct page_req_dsc *req;
struct qi_desc resp;
int result;
vm_fault_t ret;
u64 address;
handled = 1;
req = &iommu->prq[head / sizeof(*req)];
result = QI_RESP_INVALID;
address = (u64)req->addr << VTD_PAGE_SHIFT;
if (!req->pasid_present) {
pr_err("%s: Page request without PASID: %08llx %08llx\n",
iommu->name, ((unsigned long long *)req)[0],
((unsigned long long *)req)[1]);
goto no_pasid;
if (unlikely(!req->pasid_present)) {
pr_err("IOMMU: %s: Page request without PASID\n",
iommu->name);
bad_req:
svm = NULL;
sdev = NULL;
handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
goto prq_advance;
}
/* We shall not receive page request for supervisor SVM */
if (req->pm_req && (req->rd_req | req->wr_req)) {
pr_err("Unexpected page request in Privilege Mode");
/* No need to find the matching sdev as for bad_req */
goto no_pasid;
if (unlikely(!is_canonical_address(address))) {
pr_err("IOMMU: %s: Address is not canonical\n",
iommu->name);
goto bad_req;
}
/* DMA read with exec requeset is not supported. */
if (req->exe_req && req->rd_req) {
pr_err("Execution request not supported\n");
goto no_pasid;
if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) {
pr_err("IOMMU: %s: Page request in Privilege Mode\n",
iommu->name);
goto bad_req;
}
if (unlikely(req->exe_req && req->rd_req)) {
pr_err("IOMMU: %s: Execution request not supported\n",
iommu->name);
goto bad_req;
}
if (!svm || svm->pasid != req->pasid) {
rcu_read_lock();
svm = ioasid_find(NULL, req->pasid, NULL);
/* It *can't* go away, because the driver is not permitted
/*
* It can't go away, because the driver is not permitted
* to unbind the mm while any page faults are outstanding.
* So we only need RCU to protect the internal idr code. */
rcu_read_unlock();
if (IS_ERR_OR_NULL(svm)) {
pr_err("%s: Page request for invalid PASID %d: %08llx %08llx\n",
iommu->name, req->pasid, ((unsigned long long *)req)[0],
((unsigned long long *)req)[1]);
goto no_pasid;
}
*/
svm = pasid_private_find(req->pasid);
if (IS_ERR_OR_NULL(svm) || (svm->flags & SVM_FLAG_SUPERVISOR_MODE))
goto bad_req;
}
if (!sdev || sdev->sid != req->rid) {
struct intel_svm_dev *t;
sdev = NULL;
rcu_read_lock();
list_for_each_entry_rcu(t, &svm->devs, list) {
if (t->sid == req->rid) {
sdev = t;
break;
}
}
rcu_read_unlock();
sdev = svm_lookup_device_by_sid(svm, req->rid);
if (!sdev)
goto bad_req;
}
/* Since we're using init_mm.pgd directly, we should never take
* any faults on kernel addresses. */
if (!svm->mm)
goto bad_req;
/* If address is not canonical, return invalid response */
if (!is_canonical_address(address))
goto bad_req;
sdev->prq_seq_number++;
/*
* If prq is to be handled outside iommu driver via receiver of
* the fault notifiers, we skip the page response here.
*/
if (svm->flags & SVM_FLAG_GUEST_MODE) {
if (sdev && !intel_svm_prq_report(sdev->dev, req))
goto prq_advance;
else
goto bad_req;
}
if (intel_svm_prq_report(iommu, sdev->dev, req))
handle_bad_prq_event(iommu, req, QI_RESP_INVALID);
/* If the mm is already defunct, don't handle faults. */
if (!mmget_not_zero(svm->mm))
goto bad_req;
mmap_read_lock(svm->mm);
vma = find_extend_vma(svm->mm, address);
if (!vma || address < vma->vm_start)
goto invalid;
if (access_error(vma, req))
goto invalid;
flags = FAULT_FLAG_USER | FAULT_FLAG_REMOTE;
if (req->wr_req)
flags |= FAULT_FLAG_WRITE;
ret = handle_mm_fault(vma, address, flags, NULL);
if (ret & VM_FAULT_ERROR)
goto invalid;
result = QI_RESP_SUCCESS;
invalid:
mmap_read_unlock(svm->mm);
mmput(svm->mm);
bad_req:
/* We get here in the error case where the PASID lookup failed,
and these can be NULL. Do not use them below this point! */
sdev = NULL;
svm = NULL;
no_pasid:
if (req->lpig || req->priv_data_present) {
/*
* Per VT-d spec. v3.0 ch7.7, system software must
* respond with page group response if private data
* is present (PDP) or last page in group (LPIG) bit
* is set. This is an additional VT-d feature beyond
* PCI ATS spec.
*/
resp.qw0 = QI_PGRP_PASID(req->pasid) |
QI_PGRP_DID(req->rid) |
QI_PGRP_PASID_P(req->pasid_present) |
QI_PGRP_PDP(req->priv_data_present) |
QI_PGRP_RESP_CODE(result) |
QI_PGRP_RESP_TYPE;
resp.qw1 = QI_PGRP_IDX(req->prg_index) |
QI_PGRP_LPIG(req->lpig);
resp.qw2 = 0;
resp.qw3 = 0;
if (req->priv_data_present)
memcpy(&resp.qw2, req->priv_data,
sizeof(req->priv_data));
qi_submit_sync(iommu, &resp, 1, 0);
}
trace_prq_report(iommu, sdev->dev, req->qw_0, req->qw_1,
req->priv_data[0], req->priv_data[1],
sdev->prq_seq_number);
prq_advance:
head = (head + sizeof(*req)) & PRQ_RING_MASK;
}
@ -1041,6 +1016,7 @@ prq_advance:
head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
if (head == tail) {
iopf_queue_discard_partial(iommu->iopf_queue);
writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG);
pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared",
iommu->name);
@ -1053,31 +1029,42 @@ prq_advance:
return IRQ_RETVAL(handled);
}
#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva)
struct iommu_sva *
intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
{
struct iommu_sva *sva = ERR_PTR(-EINVAL);
struct intel_svm_dev *sdev = NULL;
struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
unsigned int flags = 0;
struct iommu_sva *sva;
int ret;
/*
* TODO: Consolidate with generic iommu-sva bind after it is merged.
* It will require shared SVM data structures, i.e. combine io_mm
* and intel_svm etc.
*/
if (drvdata)
flags = *(unsigned int *)drvdata;
mutex_lock(&pasid_mutex);
ret = intel_svm_bind_mm(dev, flags, mm, &sdev);
if (ret)
sva = ERR_PTR(ret);
else if (sdev)
sva = &sdev->sva;
else
WARN(!sdev, "SVM bind succeeded with no sdev!\n");
if (flags & SVM_FLAG_SUPERVISOR_MODE) {
if (!ecap_srs(iommu->ecap)) {
dev_err(dev, "%s: Supervisor PASID not supported\n",
iommu->name);
return ERR_PTR(-EOPNOTSUPP);
}
if (mm) {
dev_err(dev, "%s: Supervisor PASID with user provided mm\n",
iommu->name);
return ERR_PTR(-EINVAL);
}
mm = &init_mm;
}
mutex_lock(&pasid_mutex);
ret = intel_svm_alloc_pasid(dev, mm, flags);
if (ret) {
mutex_unlock(&pasid_mutex);
return ERR_PTR(ret);
}
sva = intel_svm_bind_mm(iommu, dev, mm, flags);
if (IS_ERR_OR_NULL(sva))
intel_svm_free_pasid(mm);
mutex_unlock(&pasid_mutex);
return sva;
@ -1085,10 +1072,9 @@ intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
void intel_svm_unbind(struct iommu_sva *sva)
{
struct intel_svm_dev *sdev;
struct intel_svm_dev *sdev = to_intel_svm_dev(sva);
mutex_lock(&pasid_mutex);
sdev = to_intel_svm_dev(sva);
intel_svm_unbind_mm(sdev->dev, sdev->pasid);
mutex_unlock(&pasid_mutex);
}
@ -1194,9 +1180,14 @@ int intel_svm_page_response(struct device *dev,
desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page);
desc.qw2 = 0;
desc.qw3 = 0;
if (private_present)
memcpy(&desc.qw2, prm->private_data,
sizeof(prm->private_data));
if (private_present) {
desc.qw2 = prm->private_data[0];
desc.qw3 = prm->private_data[1];
} else if (prm->private_data[0]) {
dmar_latency_update(iommu, DMAR_LATENCY_PRQ,
ktime_to_ns(ktime_get()) - prm->private_data[0]);
}
qi_submit_sync(iommu, &desc, 1, 0);
}

View File

@ -3059,9 +3059,6 @@ static int iommu_change_dev_def_domain(struct iommu_group *group,
int ret, dev_def_dom;
struct device *dev;
if (!group)
return -EINVAL;
mutex_lock(&group->mutex);
if (group->default_domain != group->domain) {

View File

@ -412,12 +412,11 @@ private_find_iova(struct iova_domain *iovad, unsigned long pfn)
return NULL;
}
static void private_free_iova(struct iova_domain *iovad, struct iova *iova)
static void remove_iova(struct iova_domain *iovad, struct iova *iova)
{
assert_spin_locked(&iovad->iova_rbtree_lock);
__cached_rbnode_delete_update(iovad, iova);
rb_erase(&iova->node, &iovad->rbroot);
free_iova_mem(iova);
}
/**
@ -452,8 +451,9 @@ __free_iova(struct iova_domain *iovad, struct iova *iova)
unsigned long flags;
spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
private_free_iova(iovad, iova);
remove_iova(iovad, iova);
spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
free_iova_mem(iova);
}
EXPORT_SYMBOL_GPL(__free_iova);
@ -472,10 +472,13 @@ free_iova(struct iova_domain *iovad, unsigned long pfn)
spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
iova = private_find_iova(iovad, pfn);
if (iova)
private_free_iova(iovad, iova);
if (!iova) {
spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
return;
}
remove_iova(iovad, iova);
spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
free_iova_mem(iova);
}
EXPORT_SYMBOL_GPL(free_iova);
@ -825,7 +828,8 @@ iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad)
if (WARN_ON(!iova))
continue;
private_free_iova(iovad, iova);
remove_iova(iovad, iova);
free_iova_mem(iova);
}
spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);

View File

@ -19,7 +19,6 @@
#include <linux/iommu.h>
#include <linux/of.h>
#include <linux/of_device.h>
#include <linux/of_iommu.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
#include <linux/sizes.h>

View File

@ -18,7 +18,6 @@
#include <linux/iommu.h>
#include <linux/clk.h>
#include <linux/err.h>
#include <linux/of_iommu.h>
#include <asm/cacheflush.h>
#include <linux/sizes.h>

View File

@ -19,7 +19,6 @@
#include <linux/mfd/syscon.h>
#include <linux/module.h>
#include <linux/of_address.h>
#include <linux/of_iommu.h>
#include <linux/of_irq.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>

View File

@ -22,7 +22,6 @@
#include <linux/list.h>
#include <linux/module.h>
#include <linux/of_address.h>
#include <linux/of_iommu.h>
#include <linux/of_irq.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>

View File

@ -19,74 +19,6 @@
#define NO_IOMMU 1
/**
* of_get_dma_window - Parse *dma-window property and returns 0 if found.
*
* @dn: device node
* @prefix: prefix for property name if any
* @index: index to start to parse
* @busno: Returns busno if supported. Otherwise pass NULL
* @addr: Returns address that DMA starts
* @size: Returns the range that DMA can handle
*
* This supports different formats flexibly. "prefix" can be
* configured if any. "busno" and "index" are optionally
* specified. Set 0(or NULL) if not used.
*/
int of_get_dma_window(struct device_node *dn, const char *prefix, int index,
unsigned long *busno, dma_addr_t *addr, size_t *size)
{
const __be32 *dma_window, *end;
int bytes, cur_index = 0;
char propname[NAME_MAX], addrname[NAME_MAX], sizename[NAME_MAX];
if (!dn || !addr || !size)
return -EINVAL;
if (!prefix)
prefix = "";
snprintf(propname, sizeof(propname), "%sdma-window", prefix);
snprintf(addrname, sizeof(addrname), "%s#dma-address-cells", prefix);
snprintf(sizename, sizeof(sizename), "%s#dma-size-cells", prefix);
dma_window = of_get_property(dn, propname, &bytes);
if (!dma_window)
return -ENODEV;
end = dma_window + bytes / sizeof(*dma_window);
while (dma_window < end) {
u32 cells;
const void *prop;
/* busno is one cell if supported */
if (busno)
*busno = be32_to_cpup(dma_window++);
prop = of_get_property(dn, addrname, NULL);
if (!prop)
prop = of_get_property(dn, "#address-cells", NULL);
cells = prop ? be32_to_cpup(prop) : of_n_addr_cells(dn);
if (!cells)
return -EINVAL;
*addr = of_read_number(dma_window, cells);
dma_window += cells;
prop = of_get_property(dn, sizename, NULL);
cells = prop ? be32_to_cpup(prop) : of_n_size_cells(dn);
if (!cells)
return -EINVAL;
*size = of_read_number(dma_window, cells);
dma_window += cells;
if (cur_index++ == index)
break;
}
return 0;
}
EXPORT_SYMBOL_GPL(of_get_dma_window);
static int of_iommu_xlate(struct device *dev,
struct of_phandle_args *iommu_spec)
{

View File

@ -22,7 +22,6 @@
#include <linux/io.h>
#include <linux/pm_runtime.h>
#include <linux/of.h>
#include <linux/of_iommu.h>
#include <linux/of_irq.h>
#include <linux/of_platform.h>
#include <linux/regmap.h>

View File

@ -21,7 +21,6 @@
#include <linux/mm.h>
#include <linux/init.h>
#include <linux/of.h>
#include <linux/of_iommu.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
@ -96,6 +95,15 @@ static const char * const rk_iommu_clocks[] = {
"aclk", "iface",
};
struct rk_iommu_ops {
phys_addr_t (*pt_address)(u32 dte);
u32 (*mk_dtentries)(dma_addr_t pt_dma);
u32 (*mk_ptentries)(phys_addr_t page, int prot);
phys_addr_t (*dte_addr_phys)(u32 addr);
u32 (*dma_addr_dte)(dma_addr_t dt_dma);
u64 dma_bit_mask;
};
struct rk_iommu {
struct device *dev;
void __iomem **bases;
@ -116,6 +124,7 @@ struct rk_iommudata {
};
static struct device *dma_dev;
static const struct rk_iommu_ops *rk_ops;
static inline void rk_table_flush(struct rk_iommu_domain *dom, dma_addr_t dma,
unsigned int count)
@ -179,6 +188,33 @@ static inline phys_addr_t rk_dte_pt_address(u32 dte)
return (phys_addr_t)dte & RK_DTE_PT_ADDRESS_MASK;
}
/*
* In v2:
* 31:12 - PT address bit 31:0
* 11: 8 - PT address bit 35:32
* 7: 4 - PT address bit 39:36
* 3: 1 - Reserved
* 0 - 1 if PT @ PT address is valid
*/
#define RK_DTE_PT_ADDRESS_MASK_V2 GENMASK_ULL(31, 4)
#define DTE_HI_MASK1 GENMASK(11, 8)
#define DTE_HI_MASK2 GENMASK(7, 4)
#define DTE_HI_SHIFT1 24 /* shift bit 8 to bit 32 */
#define DTE_HI_SHIFT2 32 /* shift bit 4 to bit 36 */
#define PAGE_DESC_HI_MASK1 GENMASK_ULL(39, 36)
#define PAGE_DESC_HI_MASK2 GENMASK_ULL(35, 32)
static inline phys_addr_t rk_dte_pt_address_v2(u32 dte)
{
u64 dte_v2 = dte;
dte_v2 = ((dte_v2 & DTE_HI_MASK2) << DTE_HI_SHIFT2) |
((dte_v2 & DTE_HI_MASK1) << DTE_HI_SHIFT1) |
(dte_v2 & RK_DTE_PT_ADDRESS_MASK);
return (phys_addr_t)dte_v2;
}
static inline bool rk_dte_is_pt_valid(u32 dte)
{
return dte & RK_DTE_PT_VALID;
@ -189,6 +225,15 @@ static inline u32 rk_mk_dte(dma_addr_t pt_dma)
return (pt_dma & RK_DTE_PT_ADDRESS_MASK) | RK_DTE_PT_VALID;
}
static inline u32 rk_mk_dte_v2(dma_addr_t pt_dma)
{
pt_dma = (pt_dma & RK_DTE_PT_ADDRESS_MASK) |
((pt_dma & PAGE_DESC_HI_MASK1) >> DTE_HI_SHIFT1) |
(pt_dma & PAGE_DESC_HI_MASK2) >> DTE_HI_SHIFT2;
return (pt_dma & RK_DTE_PT_ADDRESS_MASK_V2) | RK_DTE_PT_VALID;
}
/*
* Each PTE has a Page address, some flags and a valid bit:
* +---------------------+---+-------+-+
@ -215,11 +260,6 @@ static inline u32 rk_mk_dte(dma_addr_t pt_dma)
#define RK_PTE_PAGE_READABLE BIT(1)
#define RK_PTE_PAGE_VALID BIT(0)
static inline phys_addr_t rk_pte_page_address(u32 pte)
{
return (phys_addr_t)pte & RK_PTE_PAGE_ADDRESS_MASK;
}
static inline bool rk_pte_is_page_valid(u32 pte)
{
return pte & RK_PTE_PAGE_VALID;
@ -235,6 +275,29 @@ static u32 rk_mk_pte(phys_addr_t page, int prot)
return page | flags | RK_PTE_PAGE_VALID;
}
/*
* In v2:
* 31:12 - Page address bit 31:0
* 11:9 - Page address bit 34:32
* 8:4 - Page address bit 39:35
* 3 - Security
* 2 - Readable
* 1 - Writable
* 0 - 1 if Page @ Page address is valid
*/
#define RK_PTE_PAGE_READABLE_V2 BIT(2)
#define RK_PTE_PAGE_WRITABLE_V2 BIT(1)
static u32 rk_mk_pte_v2(phys_addr_t page, int prot)
{
u32 flags = 0;
flags |= (prot & IOMMU_READ) ? RK_PTE_PAGE_READABLE_V2 : 0;
flags |= (prot & IOMMU_WRITE) ? RK_PTE_PAGE_WRITABLE_V2 : 0;
return rk_mk_dte_v2(page) | flags;
}
static u32 rk_mk_pte_invalid(u32 pte)
{
return pte & ~RK_PTE_PAGE_VALID;
@ -448,10 +511,10 @@ static int rk_iommu_force_reset(struct rk_iommu *iommu)
* and verifying that upper 5 nybbles are read back.
*/
for (i = 0; i < iommu->num_mmu; i++) {
rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, DTE_ADDR_DUMMY);
dte_addr = rk_ops->pt_address(DTE_ADDR_DUMMY);
rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, dte_addr);
dte_addr = rk_iommu_read(iommu->bases[i], RK_MMU_DTE_ADDR);
if (dte_addr != (DTE_ADDR_DUMMY & RK_DTE_PT_ADDRESS_MASK)) {
if (dte_addr != rk_iommu_read(iommu->bases[i], RK_MMU_DTE_ADDR)) {
dev_err(iommu->dev, "Error during raw reset. MMU_DTE_ADDR is not functioning\n");
return -EFAULT;
}
@ -470,6 +533,31 @@ static int rk_iommu_force_reset(struct rk_iommu *iommu)
return 0;
}
static inline phys_addr_t rk_dte_addr_phys(u32 addr)
{
return (phys_addr_t)addr;
}
static inline u32 rk_dma_addr_dte(dma_addr_t dt_dma)
{
return dt_dma;
}
#define DT_HI_MASK GENMASK_ULL(39, 32)
#define DT_SHIFT 28
static inline phys_addr_t rk_dte_addr_phys_v2(u32 addr)
{
return (phys_addr_t)(addr & RK_DTE_PT_ADDRESS_MASK) |
((addr & DT_HI_MASK) << DT_SHIFT);
}
static inline u32 rk_dma_addr_dte_v2(dma_addr_t dt_dma)
{
return (dt_dma & RK_DTE_PT_ADDRESS_MASK) |
((dt_dma & DT_HI_MASK) >> DT_SHIFT);
}
static void log_iova(struct rk_iommu *iommu, int index, dma_addr_t iova)
{
void __iomem *base = iommu->bases[index];
@ -489,7 +577,7 @@ static void log_iova(struct rk_iommu *iommu, int index, dma_addr_t iova)
page_offset = rk_iova_page_offset(iova);
mmu_dte_addr = rk_iommu_read(base, RK_MMU_DTE_ADDR);
mmu_dte_addr_phys = (phys_addr_t)mmu_dte_addr;
mmu_dte_addr_phys = rk_ops->dte_addr_phys(mmu_dte_addr);
dte_addr_phys = mmu_dte_addr_phys + (4 * dte_index);
dte_addr = phys_to_virt(dte_addr_phys);
@ -498,14 +586,14 @@ static void log_iova(struct rk_iommu *iommu, int index, dma_addr_t iova)
if (!rk_dte_is_pt_valid(dte))
goto print_it;
pte_addr_phys = rk_dte_pt_address(dte) + (pte_index * 4);
pte_addr_phys = rk_ops->pt_address(dte) + (pte_index * 4);
pte_addr = phys_to_virt(pte_addr_phys);
pte = *pte_addr;
if (!rk_pte_is_page_valid(pte))
goto print_it;
page_addr_phys = rk_pte_page_address(pte) + page_offset;
page_addr_phys = rk_ops->pt_address(pte) + page_offset;
page_flags = pte & RK_PTE_PAGE_FLAGS_MASK;
print_it:
@ -601,13 +689,13 @@ static phys_addr_t rk_iommu_iova_to_phys(struct iommu_domain *domain,
if (!rk_dte_is_pt_valid(dte))
goto out;
pt_phys = rk_dte_pt_address(dte);
pt_phys = rk_ops->pt_address(dte);
page_table = (u32 *)phys_to_virt(pt_phys);
pte = page_table[rk_iova_pte_index(iova)];
if (!rk_pte_is_page_valid(pte))
goto out;
phys = rk_pte_page_address(pte) + rk_iova_page_offset(iova);
phys = rk_ops->pt_address(pte) + rk_iova_page_offset(iova);
out:
spin_unlock_irqrestore(&rk_domain->dt_lock, flags);
@ -679,14 +767,13 @@ static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain,
return ERR_PTR(-ENOMEM);
}
dte = rk_mk_dte(pt_dma);
dte = rk_ops->mk_dtentries(pt_dma);
*dte_addr = dte;
rk_table_flush(rk_domain, pt_dma, NUM_PT_ENTRIES);
rk_table_flush(rk_domain,
rk_domain->dt_dma + dte_index * sizeof(u32), 1);
done:
pt_phys = rk_dte_pt_address(dte);
pt_phys = rk_ops->pt_address(dte);
return (u32 *)phys_to_virt(pt_phys);
}
@ -728,7 +815,7 @@ static int rk_iommu_map_iova(struct rk_iommu_domain *rk_domain, u32 *pte_addr,
if (rk_pte_is_page_valid(pte))
goto unwind;
pte_addr[pte_count] = rk_mk_pte(paddr, prot);
pte_addr[pte_count] = rk_ops->mk_ptentries(paddr, prot);
paddr += SPAGE_SIZE;
}
@ -750,7 +837,7 @@ unwind:
pte_count * SPAGE_SIZE);
iova += pte_count * SPAGE_SIZE;
page_phys = rk_pte_page_address(pte_addr[pte_count]);
page_phys = rk_ops->pt_address(pte_addr[pte_count]);
pr_err("iova: %pad already mapped to %pa cannot remap to phys: %pa prot: %#x\n",
&iova, &page_phys, &paddr, prot);
@ -785,7 +872,8 @@ static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova,
dte_index = rk_domain->dt[rk_iova_dte_index(iova)];
pte_index = rk_iova_pte_index(iova);
pte_addr = &page_table[pte_index];
pte_dma = rk_dte_pt_address(dte_index) + pte_index * sizeof(u32);
pte_dma = rk_ops->pt_address(dte_index) + pte_index * sizeof(u32);
ret = rk_iommu_map_iova(rk_domain, pte_addr, pte_dma, iova,
paddr, size, prot);
@ -821,7 +909,7 @@ static size_t rk_iommu_unmap(struct iommu_domain *domain, unsigned long _iova,
return 0;
}
pt_phys = rk_dte_pt_address(dte);
pt_phys = rk_ops->pt_address(dte);
pte_addr = (u32 *)phys_to_virt(pt_phys) + rk_iova_pte_index(iova);
pte_dma = pt_phys + rk_iova_pte_index(iova) * sizeof(u32);
unmap_size = rk_iommu_unmap_iova(rk_domain, pte_addr, pte_dma, size);
@ -879,7 +967,7 @@ static int rk_iommu_enable(struct rk_iommu *iommu)
for (i = 0; i < iommu->num_mmu; i++) {
rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR,
rk_domain->dt_dma);
rk_ops->dma_addr_dte(rk_domain->dt_dma));
rk_iommu_base_command(iommu->bases[i], RK_MMU_CMD_ZAP_CACHE);
rk_iommu_write(iommu->bases[i], RK_MMU_INT_MASK, RK_MMU_IRQ_MASK);
}
@ -1004,8 +1092,6 @@ static struct iommu_domain *rk_iommu_domain_alloc(unsigned type)
goto err_free_dt;
}
rk_table_flush(rk_domain, rk_domain->dt_dma, NUM_DT_ENTRIES);
spin_lock_init(&rk_domain->iommus_lock);
spin_lock_init(&rk_domain->dt_lock);
INIT_LIST_HEAD(&rk_domain->iommus);
@ -1037,7 +1123,7 @@ static void rk_iommu_domain_free(struct iommu_domain *domain)
for (i = 0; i < NUM_DT_ENTRIES; i++) {
u32 dte = rk_domain->dt[i];
if (rk_dte_is_pt_valid(dte)) {
phys_addr_t pt_phys = rk_dte_pt_address(dte);
phys_addr_t pt_phys = rk_ops->pt_address(dte);
u32 *page_table = phys_to_virt(pt_phys);
dma_unmap_single(dma_dev, pt_phys,
SPAGE_SIZE, DMA_TO_DEVICE);
@ -1127,6 +1213,7 @@ static int rk_iommu_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
struct rk_iommu *iommu;
struct resource *res;
const struct rk_iommu_ops *ops;
int num_res = pdev->num_resources;
int err, i;
@ -1138,6 +1225,17 @@ static int rk_iommu_probe(struct platform_device *pdev)
iommu->dev = dev;
iommu->num_mmu = 0;
ops = of_device_get_match_data(dev);
if (!rk_ops)
rk_ops = ops;
/*
* That should not happen unless different versions of the
* hardware block are embedded the same SoC
*/
if (WARN_ON(rk_ops != ops))
return -EINVAL;
iommu->bases = devm_kcalloc(dev, num_res, sizeof(*iommu->bases),
GFP_KERNEL);
if (!iommu->bases)
@ -1226,6 +1324,8 @@ static int rk_iommu_probe(struct platform_device *pdev)
}
}
dma_set_mask_and_coherent(dev, rk_ops->dma_bit_mask);
return 0;
err_remove_sysfs:
iommu_device_sysfs_remove(&iommu->iommu);
@ -1277,8 +1377,31 @@ static const struct dev_pm_ops rk_iommu_pm_ops = {
pm_runtime_force_resume)
};
static struct rk_iommu_ops iommu_data_ops_v1 = {
.pt_address = &rk_dte_pt_address,
.mk_dtentries = &rk_mk_dte,
.mk_ptentries = &rk_mk_pte,
.dte_addr_phys = &rk_dte_addr_phys,
.dma_addr_dte = &rk_dma_addr_dte,
.dma_bit_mask = DMA_BIT_MASK(32),
};
static struct rk_iommu_ops iommu_data_ops_v2 = {
.pt_address = &rk_dte_pt_address_v2,
.mk_dtentries = &rk_mk_dte_v2,
.mk_ptentries = &rk_mk_pte_v2,
.dte_addr_phys = &rk_dte_addr_phys_v2,
.dma_addr_dte = &rk_dma_addr_dte_v2,
.dma_bit_mask = DMA_BIT_MASK(40),
};
static const struct of_device_id rk_iommu_dt_ids[] = {
{ .compatible = "rockchip,iommu" },
{ .compatible = "rockchip,iommu",
.data = &iommu_data_ops_v1,
},
{ .compatible = "rockchip,rk3568-iommu",
.data = &iommu_data_ops_v2,
},
{ /* sentinel */ }
};

View File

@ -10,11 +10,11 @@
#include <linux/amba/bus.h>
#include <linux/delay.h>
#include <linux/dma-iommu.h>
#include <linux/dma-map-ops.h>
#include <linux/freezer.h>
#include <linux/interval_tree.h>
#include <linux/iommu.h>
#include <linux/module.h>
#include <linux/of_iommu.h>
#include <linux/of_platform.h>
#include <linux/pci.h>
#include <linux/platform_device.h>
@ -904,6 +904,15 @@ err_free_dev:
return ERR_PTR(ret);
}
static void viommu_probe_finalize(struct device *dev)
{
#ifndef CONFIG_ARCH_HAS_SETUP_DMA_OPS
/* First clear the DMA ops in case we're switching from a DMA domain */
set_dma_ops(dev, NULL);
iommu_setup_dma_ops(dev, 0, U64_MAX);
#endif
}
static void viommu_release_device(struct device *dev)
{
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
@ -940,6 +949,7 @@ static struct iommu_ops viommu_ops = {
.iova_to_phys = viommu_iova_to_phys,
.iotlb_sync = viommu_iotlb_sync,
.probe_device = viommu_probe_device,
.probe_finalize = viommu_probe_finalize,
.release_device = viommu_release_device,
.device_group = viommu_device_group,
.get_resv_regions = viommu_get_resv_regions,

View File

@ -17,7 +17,6 @@
#include <linux/slab.h>
#include <linux/of_address.h>
#include <linux/of_device.h>
#include <linux/of_iommu.h>
#include <linux/of_irq.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>

View File

@ -588,6 +588,9 @@ struct acpi_pci_root {
bool acpi_dma_supported(struct acpi_device *adev);
enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev);
int acpi_iommu_fwspec_init(struct device *dev, u32 id,
struct fwnode_handle *fwnode,
const struct iommu_ops *ops);
int acpi_dma_get_range(struct device *dev, u64 *dma_addr, u64 *offset,
u64 *size);
int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr,

View File

@ -259,9 +259,12 @@ void acpi_numa_x2apic_affinity_init(struct acpi_srat_x2apic_cpu_affinity *pa);
#ifdef CONFIG_ARM64
void acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa);
void acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size);
#else
static inline void
acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa) { }
static inline void
acpi_arch_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) { }
#endif
int acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma);

View File

@ -34,9 +34,8 @@ struct irq_domain *iort_get_device_domain(struct device *dev, u32 id,
void acpi_configure_pmsi_domain(struct device *dev);
int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id);
/* IOMMU interface */
void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *size);
const struct iommu_ops *iort_iommu_configure_id(struct device *dev,
const u32 *id_in);
int iort_dma_get_ranges(struct device *dev, u64 *size);
int iort_iommu_configure_id(struct device *dev, const u32 *id_in);
int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head);
phys_addr_t acpi_iort_dma_get_max_cpu_address(void);
#else
@ -48,11 +47,10 @@ static inline struct irq_domain *iort_get_device_domain(
{ return NULL; }
static inline void acpi_configure_pmsi_domain(struct device *dev) { }
/* IOMMU interface */
static inline void iort_dma_setup(struct device *dev, u64 *dma_addr,
u64 *size) { }
static inline const struct iommu_ops *iort_iommu_configure_id(
struct device *dev, const u32 *id_in)
{ return NULL; }
static inline int iort_dma_get_ranges(struct device *dev, u64 *size)
{ return -ENODEV; }
static inline int iort_iommu_configure_id(struct device *dev, const u32 *id_in)
{ return -ENODEV; }
static inline
int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head)
{ return 0; }

19
include/linux/acpi_viot.h Normal file
View File

@ -0,0 +1,19 @@
/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef __ACPI_VIOT_H__
#define __ACPI_VIOT_H__
#include <linux/acpi.h>
#ifdef CONFIG_ACPI_VIOT
void __init acpi_viot_init(void);
int viot_iommu_configure(struct device *dev);
#else
static inline void acpi_viot_init(void) {}
static inline int viot_iommu_configure(struct device *dev)
{
return -ENODEV;
}
#endif
#endif /* __ACPI_VIOT_H__ */

View File

@ -19,7 +19,7 @@ int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base);
void iommu_put_dma_cookie(struct iommu_domain *domain);
/* Setup call for arch DMA mapping code */
void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size);
void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit);
/* The DMA API isn't _quite_ the whole story, though... */
/*
@ -50,7 +50,7 @@ struct msi_msg;
struct device;
static inline void iommu_setup_dma_ops(struct device *dev, u64 dma_base,
u64 size)
u64 dma_limit)
{
}

View File

@ -537,7 +537,7 @@ struct context_entry {
struct dmar_domain {
int nid; /* node id */
unsigned iommu_refcnt[DMAR_UNITS_SUPPORTED];
unsigned int iommu_refcnt[DMAR_UNITS_SUPPORTED];
/* Refcount of devices per iommu */
@ -546,7 +546,10 @@ struct dmar_domain {
* domain ids are 16 bit wide according
* to VT-d spec, section 9.3 */
bool has_iotlb_device;
u8 has_iotlb_device: 1;
u8 iommu_coherency: 1; /* indicate coherency of iommu access */
u8 iommu_snooping: 1; /* indicate snooping control feature */
struct list_head devices; /* all devices' list */
struct list_head subdevices; /* all subdevices' list */
struct iova_domain iovad; /* iova's that belong to this domain */
@ -558,10 +561,6 @@ struct dmar_domain {
int agaw;
int flags; /* flags to find out type of domain */
int iommu_coherency;/* indicate coherency of iommu access */
int iommu_snooping; /* indicate snooping control feature*/
int iommu_count; /* reference count of iommu */
int iommu_superpage;/* Level of superpages supported:
0 == 4KiB (no superpages), 1 == 2MiB,
2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
@ -606,6 +605,8 @@ struct intel_iommu {
struct completion prq_complete;
struct ioasid_allocator_ops pasid_allocator; /* Custom allocator for PASIDs */
#endif
struct iopf_queue *iopf_queue;
unsigned char iopfq_name[16];
struct q_inval *qi; /* Queued invalidation info */
u32 *iommu_state; /* Store iommu states between suspend and resume.*/
@ -619,6 +620,7 @@ struct intel_iommu {
u32 flags; /* Software defined flags */
struct dmar_drhd_unit *drhd;
void *perf_statistic;
};
/* Per subdevice private data */
@ -776,6 +778,7 @@ struct intel_svm_dev {
struct device *dev;
struct intel_iommu *iommu;
struct iommu_sva sva;
unsigned long prq_seq_number;
u32 pasid;
int users;
u16 did;
@ -791,7 +794,6 @@ struct intel_svm {
u32 pasid;
int gpasid; /* In case that guest PASID is different from host PASID */
struct list_head devs;
struct list_head list;
};
#else
static inline void intel_svm_check(struct intel_iommu *iommu) {}
@ -827,4 +829,32 @@ static inline int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
#define intel_iommu_enabled (0)
#endif
static inline const char *decode_prq_descriptor(char *str, size_t size,
u64 dw0, u64 dw1, u64 dw2, u64 dw3)
{
char *buf = str;
int bytes;
bytes = snprintf(buf, size,
"rid=0x%llx addr=0x%llx %c%c%c%c%c pasid=0x%llx index=0x%llx",
FIELD_GET(GENMASK_ULL(31, 16), dw0),
FIELD_GET(GENMASK_ULL(63, 12), dw1),
dw1 & BIT_ULL(0) ? 'r' : '-',
dw1 & BIT_ULL(1) ? 'w' : '-',
dw0 & BIT_ULL(52) ? 'x' : '-',
dw0 & BIT_ULL(53) ? 'p' : '-',
dw1 & BIT_ULL(2) ? 'l' : '-',
FIELD_GET(GENMASK_ULL(51, 32), dw0),
FIELD_GET(GENMASK_ULL(11, 3), dw1));
/* Private Data */
if (dw0 & BIT_ULL(9)) {
size -= bytes;
buf += bytes;
snprintf(buf, size, " private=0x%llx/0x%llx\n", dw2, dw3);
}
return str;
}
#endif

View File

@ -2,29 +2,18 @@
#ifndef __OF_IOMMU_H
#define __OF_IOMMU_H
#include <linux/device.h>
#include <linux/iommu.h>
#include <linux/of.h>
struct device;
struct device_node;
struct iommu_ops;
#ifdef CONFIG_OF_IOMMU
extern int of_get_dma_window(struct device_node *dn, const char *prefix,
int index, unsigned long *busno, dma_addr_t *addr,
size_t *size);
extern const struct iommu_ops *of_iommu_configure(struct device *dev,
struct device_node *master_np,
const u32 *id);
#else
static inline int of_get_dma_window(struct device_node *dn, const char *prefix,
int index, unsigned long *busno, dma_addr_t *addr,
size_t *size)
{
return -EINVAL;
}
static inline const struct iommu_ops *of_iommu_configure(struct device *dev,
struct device_node *master_np,
const u32 *id)

View File

@ -15,6 +15,8 @@
#include <linux/tracepoint.h>
#include <linux/intel-iommu.h>
#define MSG_MAX 256
TRACE_EVENT(qi_submit,
TP_PROTO(struct intel_iommu *iommu, u64 qw0, u64 qw1, u64 qw2, u64 qw3),
@ -51,6 +53,41 @@ TRACE_EVENT(qi_submit,
__entry->qw0, __entry->qw1, __entry->qw2, __entry->qw3
)
);
TRACE_EVENT(prq_report,
TP_PROTO(struct intel_iommu *iommu, struct device *dev,
u64 dw0, u64 dw1, u64 dw2, u64 dw3,
unsigned long seq),
TP_ARGS(iommu, dev, dw0, dw1, dw2, dw3, seq),
TP_STRUCT__entry(
__field(u64, dw0)
__field(u64, dw1)
__field(u64, dw2)
__field(u64, dw3)
__field(unsigned long, seq)
__string(iommu, iommu->name)
__string(dev, dev_name(dev))
__dynamic_array(char, buff, MSG_MAX)
),
TP_fast_assign(
__entry->dw0 = dw0;
__entry->dw1 = dw1;
__entry->dw2 = dw2;
__entry->dw3 = dw3;
__entry->seq = seq;
__assign_str(iommu, iommu->name);
__assign_str(dev, dev_name(dev));
),
TP_printk("%s/%s seq# %ld: %s",
__get_str(iommu), __get_str(dev), __entry->seq,
decode_prq_descriptor(__get_str(buff), MSG_MAX, __entry->dw0,
__entry->dw1, __entry->dw2, __entry->dw3)
)
);
#endif /* _TRACE_INTEL_IOMMU_H */
/* This part must be outside protection */