diff --git a/Documentation/devicetree/bindings/iommu/allwinner,sun50i-h6-iommu.yaml b/Documentation/devicetree/bindings/iommu/allwinner,sun50i-h6-iommu.yaml index e20016f12017..a8409db4a3e3 100644 --- a/Documentation/devicetree/bindings/iommu/allwinner,sun50i-h6-iommu.yaml +++ b/Documentation/devicetree/bindings/iommu/allwinner,sun50i-h6-iommu.yaml @@ -17,7 +17,12 @@ properties: The content of the cell is the master ID. compatible: - const: allwinner,sun50i-h6-iommu + oneOf: + - const: allwinner,sun50i-h6-iommu + - const: allwinner,sun50i-h616-iommu + - items: + - const: allwinner,sun55i-a523-iommu + - const: allwinner,sun50i-h616-iommu reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index 5c130cf06a21..280b4e49f219 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -86,6 +86,7 @@ properties: - qcom,qcm2290-smmu-500 - qcom,sa8775p-smmu-500 - qcom,sc7280-smmu-500 + - qcom,sc8180x-smmu-500 - qcom,sc8280xp-smmu-500 - qcom,sm6115-smmu-500 - qcom,sm6125-smmu-500 @@ -95,6 +96,7 @@ properties: - qcom,sm8450-smmu-500 - qcom,sm8550-smmu-500 - qcom,sm8650-smmu-500 + - qcom,x1e80100-smmu-500 - const: qcom,adreno-smmu - const: qcom,smmu-500 - const: arm,mmu-500 @@ -415,6 +417,7 @@ allOf: compatible: contains: enum: + - qcom,sc8180x-smmu-500 - qcom,sm6350-smmu-v2 - qcom,sm7150-smmu-v2 - qcom,sm8150-smmu-500 @@ -520,6 +523,7 @@ allOf: - enum: - qcom,sm8550-smmu-500 - qcom,sm8650-smmu-500 + - qcom,x1e80100-smmu-500 - const: qcom,adreno-smmu - const: qcom,smmu-500 - const: arm,mmu-500 @@ -550,14 +554,12 @@ allOf: - nvidia,smmu-500 - qcom,qdu1000-smmu-500 - qcom,sc7180-smmu-500 - - qcom,sc8180x-smmu-500 - qcom,sdm670-smmu-500 - qcom,sdm845-smmu-500 - qcom,sdx55-smmu-500 - qcom,sdx65-smmu-500 - qcom,sm6350-smmu-500 - qcom,sm6375-smmu-500 - - qcom,x1e80100-smmu-500 then: properties: clock-names: false diff --git a/Documentation/devicetree/bindings/iommu/msm,iommu-v0.txt b/Documentation/devicetree/bindings/iommu/msm,iommu-v0.txt deleted file mode 100644 index 20236385f26e..000000000000 --- a/Documentation/devicetree/bindings/iommu/msm,iommu-v0.txt +++ /dev/null @@ -1,64 +0,0 @@ -* QCOM IOMMU - -The MSM IOMMU is an implementation compatible with the ARM VMSA short -descriptor page tables. It provides address translation for bus masters outside -of the CPU, each connected to the IOMMU through a port called micro-TLB. - -Required Properties: - - - compatible: Must contain "qcom,apq8064-iommu". - - reg: Base address and size of the IOMMU registers. - - interrupts: Specifiers for the MMU fault interrupts. For instances that - support secure mode two interrupts must be specified, for non-secure and - secure mode, in that order. For instances that don't support secure mode a - single interrupt must be specified. - - #iommu-cells: The number of cells needed to specify the stream id. This - is always 1. - - qcom,ncb: The total number of context banks in the IOMMU. - - clocks : List of clocks to be used during SMMU register access. See - Documentation/devicetree/bindings/clock/clock-bindings.txt - for information about the format. For each clock specified - here, there must be a corresponding entry in clock-names - (see below). - - - clock-names : List of clock names corresponding to the clocks specified in - the "clocks" property (above). - Should be "smmu_pclk" for specifying the interface clock - required for iommu's register accesses. - Should be "smmu_clk" for specifying the functional clock - required by iommu for bus accesses. - -Each bus master connected to an IOMMU must reference the IOMMU in its device -node with the following property: - - - iommus: A reference to the IOMMU in multiple cells. The first cell is a - phandle to the IOMMU and the second cell is the stream id. - A single master device can be connected to more than one iommu - and multiple contexts in each of the iommu. So multiple entries - are required to list all the iommus and the stream ids that the - master is connected to. - -Example: mdp iommu and its bus master - - mdp_port0: iommu@7500000 { - compatible = "qcom,apq8064-iommu"; - #iommu-cells = <1>; - clock-names = - "smmu_pclk", - "smmu_clk"; - clocks = - <&mmcc SMMU_AHB_CLK>, - <&mmcc MDP_AXI_CLK>; - reg = <0x07500000 0x100000>; - interrupts = - , - ; - qcom,ncb = <2>; - }; - - mdp: qcom,mdp@5100000 { - compatible = "qcom,mdp"; - ... - iommus = <&mdp_port0 0 - &mdp_port0 2>; - }; diff --git a/Documentation/devicetree/bindings/iommu/qcom,apq8064-iommu.yaml b/Documentation/devicetree/bindings/iommu/qcom,apq8064-iommu.yaml new file mode 100644 index 000000000000..9f83f851e61a --- /dev/null +++ b/Documentation/devicetree/bindings/iommu/qcom,apq8064-iommu.yaml @@ -0,0 +1,78 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- + +$id: http://devicetree.org/schemas/iommu/qcom,apq8064-iommu.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm APQ8064 IOMMU + +maintainers: + - David Heidelberg + +description: + The MSM IOMMU is an implementation compatible with the ARM VMSA short + descriptor page tables. It provides address translation for bus masters + outside of the CPU, each connected to the IOMMU through a port called micro-TLB. + +properties: + compatible: + const: qcom,apq8064-iommu + + clocks: + items: + - description: interface clock for register accesses + - description: functional clock for bus accesses + + clock-names: + items: + - const: smmu_pclk + - const: iommu_clk + + reg: + maxItems: 1 + + interrupts: + description: Specifiers for the MMU fault interrupts. + minItems: 1 + items: + - description: non-secure mode interrupt + - description: secure mode interrupt (for instances which supports it) + + "#iommu-cells": + const: 1 + description: Each IOMMU specifier describes a single Stream ID. + + qcom,ncb: + $ref: /schemas/types.yaml#/definitions/uint32 + description: The total number of context banks in the IOMMU. + minimum: 1 + maximum: 4 + +required: + - reg + - interrupts + - clocks + - clock-names + - qcom,ncb + +additionalProperties: false + +examples: + - | + #include + #include + #include + + iommu@7500000 { + compatible = "qcom,apq8064-iommu"; + reg = <0x07500000 0x100000>; + interrupts = , + ; + clocks = <&clk SMMU_AHB_CLK>, + <&clk MDP_AXI_CLK>; + clock-names = "smmu_pclk", + "iommu_clk"; + #iommu-cells = <1>; + qcom,ncb = <2>; + }; diff --git a/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml b/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml index a74eb899c381..571e5746d177 100644 --- a/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml +++ b/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml @@ -25,6 +25,7 @@ properties: - const: qcom,msm-iommu-v1 - items: - enum: + - qcom,msm8953-iommu - qcom,msm8976-iommu - const: qcom,msm-iommu-v2 diff --git a/Documentation/devicetree/bindings/pci/host-generic-pci.yaml b/Documentation/devicetree/bindings/pci/host-generic-pci.yaml index 3484e0b4b412..bcfbaf5582cc 100644 --- a/Documentation/devicetree/bindings/pci/host-generic-pci.yaml +++ b/Documentation/devicetree/bindings/pci/host-generic-pci.yaml @@ -110,6 +110,12 @@ properties: iommu-map-mask: true msi-parent: true + ats-supported: + description: + Indicates that a PCIe host controller supports ATS, and can handle Memory + Requests with Address Type (AT). + type: boolean + required: - compatible - reg diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst index 8a251d71fa6e..274cc7546efc 100644 --- a/Documentation/userspace-api/index.rst +++ b/Documentation/userspace-api/index.rst @@ -45,7 +45,6 @@ Devices and I/O accelerators/ocxl dma-buf-alloc-exchange gpio/index - iommu iommufd media/index dcdbas diff --git a/Documentation/userspace-api/iommu.rst b/Documentation/userspace-api/iommu.rst deleted file mode 100644 index d3108c1519d5..000000000000 --- a/Documentation/userspace-api/iommu.rst +++ /dev/null @@ -1,209 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 -.. iommu: - -===================================== -IOMMU Userspace API -===================================== - -IOMMU UAPI is used for virtualization cases where communications are -needed between physical and virtual IOMMU drivers. For baremetal -usage, the IOMMU is a system device which does not need to communicate -with userspace directly. - -The primary use cases are guest Shared Virtual Address (SVA) and -guest IO virtual address (IOVA), wherein the vIOMMU implementation -relies on the physical IOMMU and for this reason requires interactions -with the host driver. - -.. contents:: :local: - -Functionalities -=============== -Communications of user and kernel involve both directions. The -supported user-kernel APIs are as follows: - -1. Bind/Unbind guest PASID (e.g. Intel VT-d) -2. Bind/Unbind guest PASID table (e.g. ARM SMMU) -3. Invalidate IOMMU caches upon guest requests -4. Report errors to the guest and serve page requests - -Requirements -============ -The IOMMU UAPIs are generic and extensible to meet the following -requirements: - -1. Emulated and para-virtualised vIOMMUs -2. Multiple vendors (Intel VT-d, ARM SMMU, etc.) -3. Extensions to the UAPI shall not break existing userspace - -Interfaces -========== -Although the data structures defined in IOMMU UAPI are self-contained, -there are no user API functions introduced. Instead, IOMMU UAPI is -designed to work with existing user driver frameworks such as VFIO. - -Extension Rules & Precautions ------------------------------ -When IOMMU UAPI gets extended, the data structures can *only* be -modified in two ways: - -1. Adding new fields by re-purposing the padding[] field. No size change. -2. Adding new union members at the end. May increase the structure sizes. - -No new fields can be added *after* the variable sized union in that it -will break backward compatibility when offset moves. A new flag must -be introduced whenever a change affects the structure using either -method. The IOMMU driver processes the data based on flags which -ensures backward compatibility. - -Version field is only reserved for the unlikely event of UAPI upgrade -at its entirety. - -It's *always* the caller's responsibility to indicate the size of the -structure passed by setting argsz appropriately. -Though at the same time, argsz is user provided data which is not -trusted. The argsz field allows the user app to indicate how much data -it is providing; it's still the kernel's responsibility to validate -whether it's correct and sufficient for the requested operation. - -Compatibility Checking ----------------------- -When IOMMU UAPI extension results in some structure size increase, -IOMMU UAPI code shall handle the following cases: - -1. User and kernel has exact size match -2. An older user with older kernel header (smaller UAPI size) running on a - newer kernel (larger UAPI size) -3. A newer user with newer kernel header (larger UAPI size) running - on an older kernel. -4. A malicious/misbehaving user passing illegal/invalid size but within - range. The data may contain garbage. - -Feature Checking ----------------- -While launching a guest with vIOMMU, it is strongly advised to check -the compatibility upfront, as some subsequent errors happening during -vIOMMU operation, such as cache invalidation failures cannot be nicely -escalated to the guest due to IOMMU specifications. This can lead to -catastrophic failures for the users. - -User applications such as QEMU are expected to import kernel UAPI -headers. Backward compatibility is supported per feature flags. -For example, an older QEMU (with older kernel header) can run on newer -kernel. Newer QEMU (with new kernel header) may refuse to initialize -on an older kernel if new feature flags are not supported by older -kernel. Simply recompiling existing code with newer kernel header should -not be an issue in that only existing flags are used. - -IOMMU vendor driver should report the below features to IOMMU UAPI -consumers (e.g. via VFIO). - -1. IOMMU_NESTING_FEAT_SYSWIDE_PASID -2. IOMMU_NESTING_FEAT_BIND_PGTBL -3. IOMMU_NESTING_FEAT_BIND_PASID_TABLE -4. IOMMU_NESTING_FEAT_CACHE_INVLD -5. IOMMU_NESTING_FEAT_PAGE_REQUEST - -Take VFIO as example, upon request from VFIO userspace (e.g. QEMU), -VFIO kernel code shall query IOMMU vendor driver for the support of -the above features. Query result can then be reported back to the -userspace caller. Details can be found in -Documentation/driver-api/vfio.rst. - - -Data Passing Example with VFIO ------------------------------- -As the ubiquitous userspace driver framework, VFIO is already IOMMU -aware and shares many key concepts such as device model, group, and -protection domain. Other user driver frameworks can also be extended -to support IOMMU UAPI but it is outside the scope of this document. - -In this tight-knit VFIO-IOMMU interface, the ultimate consumer of the -IOMMU UAPI data is the host IOMMU driver. VFIO facilitates user-kernel -transport, capability checking, security, and life cycle management of -process address space ID (PASID). - -VFIO layer conveys the data structures down to the IOMMU driver. It -follows the pattern below:: - - struct { - __u32 argsz; - __u32 flags; - __u8 data[]; - }; - -Here data[] contains the IOMMU UAPI data structures. VFIO has the -freedom to bundle the data as well as parse data size based on its own flags. - -In order to determine the size and feature set of the user data, argsz -and flags (or the equivalent) are also embedded in the IOMMU UAPI data -structures. - -A "__u32 argsz" field is *always* at the beginning of each structure. - -For example: -:: - - struct iommu_cache_invalidate_info { - __u32 argsz; - #define IOMMU_CACHE_INVALIDATE_INFO_VERSION_1 1 - __u32 version; - /* IOMMU paging structure cache */ - #define IOMMU_CACHE_INV_TYPE_IOTLB (1 << 0) /* IOMMU IOTLB */ - #define IOMMU_CACHE_INV_TYPE_DEV_IOTLB (1 << 1) /* Device IOTLB */ - #define IOMMU_CACHE_INV_TYPE_PASID (1 << 2) /* PASID cache */ - #define IOMMU_CACHE_INV_TYPE_NR (3) - __u8 cache; - __u8 granularity; - __u8 padding[6]; - union { - struct iommu_inv_pasid_info pasid_info; - struct iommu_inv_addr_info addr_info; - } granu; - }; - -VFIO is responsible for checking its own argsz and flags. It then -invokes appropriate IOMMU UAPI functions. The user pointers are passed -to the IOMMU layer for further processing. The responsibilities are -divided as follows: - -- Generic IOMMU layer checks argsz range based on UAPI data in the - current kernel version. - -- Generic IOMMU layer checks content of the UAPI data for non-zero - reserved bits in flags, padding fields, and unsupported version. - This is to ensure not breaking userspace in the future when these - fields or flags are used. - -- Vendor IOMMU driver checks argsz based on vendor flags. UAPI data - is consumed based on flags. Vendor driver has access to - unadulterated argsz value in case of vendor specific future - extensions. Currently, it does not perform the copy_from_user() - itself. A __user pointer can be provided in some future scenarios - where there's vendor data outside of the structure definition. - -IOMMU code treats UAPI data in two categories: - -- structure contains vendor data - (Example: iommu_uapi_cache_invalidate()) - -- structure contains only generic data - (Example: iommu_uapi_sva_bind_gpasid()) - - - -Sharing UAPI with in-kernel users ---------------------------------- -For UAPIs that are shared with in-kernel users, a wrapper function is -provided to distinguish the callers. For example, - -Userspace caller :: - - int iommu_uapi_sva_unbind_gpasid(struct iommu_domain *domain, - struct device *dev, - void __user *udata) - -In-kernel caller :: - - int iommu_sva_unbind_gpasid(struct iommu_domain *domain, - struct device *dev, ioasid_t ioasid); diff --git a/MAINTAINERS b/MAINTAINERS index b0ae5711828b..ee99476f4b8f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11685,7 +11685,6 @@ L: iommu@lists.linux.dev S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux.git F: Documentation/devicetree/bindings/iommu/ -F: Documentation/userspace-api/iommu.rst F: drivers/iommu/ F: include/linux/iommu.h F: include/linux/iova.h diff --git a/arch/arm64/boot/dts/arm/fvp-base-revc.dts b/arch/arm64/boot/dts/arm/fvp-base-revc.dts index 60472d65a355..85f1c15cc65d 100644 --- a/arch/arm64/boot/dts/arm/fvp-base-revc.dts +++ b/arch/arm64/boot/dts/arm/fvp-base-revc.dts @@ -243,6 +243,7 @@ iommu-map = <0x0 &smmu 0x0 0x10000>; dma-coherent; + ats-supported; }; smmu: iommu@2b400000 { diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index c0b1c2c19444..1b39e9ae7ac1 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1221,10 +1221,10 @@ static bool iort_pci_rc_supports_ats(struct acpi_iort_node *node) static int iort_iommu_xlate(struct device *dev, struct acpi_iort_node *node, u32 streamid) { - const struct iommu_ops *ops; struct fwnode_handle *iort_fwnode; - if (!node) + /* If there's no SMMU driver at all, give up now */ + if (!node || !iort_iommu_driver_enabled(node->type)) return -ENODEV; iort_fwnode = iort_get_fwnode(node); @@ -1232,19 +1232,10 @@ static int iort_iommu_xlate(struct device *dev, struct acpi_iort_node *node, return -ENODEV; /* - * If the ops look-up fails, this means that either - * the SMMU drivers have not been probed yet or that - * the SMMU drivers are not built in the kernel; - * Depending on whether the SMMU drivers are built-in - * in the kernel or not, defer the IOMMU configuration - * or just abort it. + * If the SMMU drivers are enabled but not loaded/probed + * yet, this will defer. */ - ops = iommu_ops_from_fwnode(iort_fwnode); - if (!ops) - return iort_iommu_driver_enabled(node->type) ? - -EPROBE_DEFER : -ENODEV; - - return acpi_iommu_fwspec_init(dev, streamid, iort_fwnode, ops); + return acpi_iommu_fwspec_init(dev, streamid, iort_fwnode); } struct iort_pci_alias_info { diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 0aa20623525a..59771412686b 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1606,38 +1606,25 @@ int acpi_dma_get_range(struct device *dev, const struct bus_dma_region **map) #ifdef CONFIG_IOMMU_API int acpi_iommu_fwspec_init(struct device *dev, u32 id, - struct fwnode_handle *fwnode, - const struct iommu_ops *ops) + struct fwnode_handle *fwnode) { int ret; - ret = iommu_fwspec_init(dev, fwnode, ops); + ret = iommu_fwspec_init(dev, fwnode); if (ret) return ret; return iommu_fwspec_add_ids(dev, &id, 1); } -static inline const struct iommu_ops *acpi_iommu_fwspec_ops(struct device *dev) -{ - struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); - - return fwspec ? fwspec->ops : NULL; -} - static int acpi_iommu_configure_id(struct device *dev, const u32 *id_in) { int err; - const struct iommu_ops *ops; /* Serialise to make dev->iommu stable under our potential fwspec */ mutex_lock(&iommu_probe_device_lock); - /* - * If we already translated the fwspec there is nothing left to do, - * return the iommu_ops. - */ - ops = acpi_iommu_fwspec_ops(dev); - if (ops) { + /* If we already translated the fwspec there is nothing left to do */ + if (dev_iommu_fwspec_get(dev)) { mutex_unlock(&iommu_probe_device_lock); return 0; } @@ -1654,22 +1641,13 @@ static int acpi_iommu_configure_id(struct device *dev, const u32 *id_in) if (!err && dev->bus) err = iommu_probe_device(dev); - if (err == -EPROBE_DEFER) - return err; - if (err) { - dev_dbg(dev, "Adding to IOMMU failed: %d\n", err); - return err; - } - if (!acpi_iommu_fwspec_ops(dev)) - return -ENODEV; - return 0; + return err; } #else /* !CONFIG_IOMMU_API */ int acpi_iommu_fwspec_init(struct device *dev, u32 id, - struct fwnode_handle *fwnode, - const struct iommu_ops *ops) + struct fwnode_handle *fwnode) { return -ENODEV; } @@ -1703,6 +1681,8 @@ int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr, ret = acpi_iommu_configure_id(dev, input_id); if (ret == -EPROBE_DEFER) return -EPROBE_DEFER; + if (ret) + dev_dbg(dev, "Adding to IOMMU failed: %d\n", ret); arch_setup_dma_ops(dev, attr == DEV_DMA_COHERENT); diff --git a/drivers/acpi/viot.c b/drivers/acpi/viot.c index c8025921c129..2aa69a2fba73 100644 --- a/drivers/acpi/viot.c +++ b/drivers/acpi/viot.c @@ -307,21 +307,14 @@ void __init acpi_viot_init(void) static int viot_dev_iommu_init(struct device *dev, struct viot_iommu *viommu, u32 epid) { - const struct iommu_ops *ops; - - if (!viommu) + if (!viommu || !IS_ENABLED(CONFIG_VIRTIO_IOMMU)) return -ENODEV; /* We're not translating ourself */ if (device_match_fwnode(dev, viommu->fwnode)) return -EINVAL; - ops = iommu_ops_from_fwnode(viommu->fwnode); - if (!ops) - return IS_ENABLED(CONFIG_VIRTIO_IOMMU) ? - -EPROBE_DEFER : -ENODEV; - - return acpi_iommu_fwspec_init(dev, epid, viommu->fwnode, ops); + return acpi_iommu_fwspec_init(dev, epid, viommu->fwnode); } static int viot_pci_dev_iommu_init(struct pci_dev *pdev, u16 dev_id, void *data) diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index d5512037c38b..2a94e82316f9 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -407,10 +407,13 @@ struct msm_mmu *msm_iommu_new(struct device *dev, unsigned long quirks) struct msm_iommu *iommu; int ret; - domain = iommu_domain_alloc(dev->bus); - if (!domain) + if (!device_iommu_mapped(dev)) return NULL; + domain = iommu_paging_domain_alloc(dev); + if (IS_ERR(domain)) + return ERR_CAST(domain); + iommu_set_pgtable_quirks(domain, quirks); iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index 84e0f41e7dfa..f948b76f984d 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -443,11 +443,11 @@ struct usnic_uiom_pd *usnic_uiom_alloc_pd(struct device *dev) if (!pd) return ERR_PTR(-ENOMEM); - pd->domain = domain = iommu_domain_alloc(dev->bus); - if (!domain) { + pd->domain = domain = iommu_paging_domain_alloc(dev); + if (IS_ERR(domain)) { usnic_err("Failed to allocate IOMMU domain"); kfree(pd); - return ERR_PTR(-ENOMEM); + return ERR_CAST(domain); } iommu_set_fault_handler(pd->domain, usnic_uiom_dma_fault, NULL); diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index c04584be3089..a82f10054aec 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -394,6 +394,7 @@ config ARM_SMMU_V3 select IOMMU_API select IOMMU_IO_PGTABLE_LPAE select GENERIC_MSI_IRQ + select IOMMUFD_DRIVER if IOMMUFD help Support for implementations of the ARM System MMU architecture version 3 providing translation support to a PCIe root complex. diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c index 78ac37c5ccc1..664e91c88748 100644 --- a/drivers/iommu/amd/io_pgtable_v2.c +++ b/drivers/iommu/amd/io_pgtable_v2.c @@ -158,7 +158,7 @@ static u64 *v2_alloc_pte(int nid, u64 *pgd, unsigned long iova, __npte = set_pgtable_attr(page); /* pte could have been changed somewhere. */ - if (cmpxchg64(pte, __pte, __npte) != __pte) + if (!try_cmpxchg64(pte, &__pte, __npte)) iommu_free_page(page); else if (IOMMU_PTE_PRESENT(__pte)) *updated = true; diff --git a/drivers/iommu/arm/arm-smmu-v3/Makefile b/drivers/iommu/arm/arm-smmu-v3/Makefile index 014a997753a8..355173d1441d 100644 --- a/drivers/iommu/arm/arm-smmu-v3/Makefile +++ b/drivers/iommu/arm/arm-smmu-v3/Makefile @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_ARM_SMMU_V3) += arm_smmu_v3.o -arm_smmu_v3-objs-y += arm-smmu-v3.o -arm_smmu_v3-objs-$(CONFIG_ARM_SMMU_V3_SVA) += arm-smmu-v3-sva.o -arm_smmu_v3-objs := $(arm_smmu_v3-objs-y) +arm_smmu_v3-y := arm-smmu-v3.o +arm_smmu_v3-$(CONFIG_ARM_SMMU_V3_SVA) += arm-smmu-v3-sva.o obj-$(CONFIG_ARM_SMMU_V3_KUNIT_TEST) += arm-smmu-v3-test.o diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index e490ffb38015..a7c36654dee5 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -13,103 +13,31 @@ #include "arm-smmu-v3.h" #include "../../io-pgtable-arm.h" -struct arm_smmu_mmu_notifier { - struct mmu_notifier mn; - struct arm_smmu_ctx_desc *cd; - bool cleared; - refcount_t refs; - struct list_head list; - struct arm_smmu_domain *domain; -}; - -#define mn_to_smmu(mn) container_of(mn, struct arm_smmu_mmu_notifier, mn) - -struct arm_smmu_bond { - struct mm_struct *mm; - struct arm_smmu_mmu_notifier *smmu_mn; - struct list_head list; -}; - -#define sva_to_bond(handle) \ - container_of(handle, struct arm_smmu_bond, sva) - static DEFINE_MUTEX(sva_lock); -static void +static void __maybe_unused arm_smmu_update_s1_domain_cd_entry(struct arm_smmu_domain *smmu_domain) { - struct arm_smmu_master *master; + struct arm_smmu_master_domain *master_domain; struct arm_smmu_cd target_cd; unsigned long flags; spin_lock_irqsave(&smmu_domain->devices_lock, flags); - list_for_each_entry(master, &smmu_domain->devices, domain_head) { + list_for_each_entry(master_domain, &smmu_domain->devices, devices_elm) { + struct arm_smmu_master *master = master_domain->master; struct arm_smmu_cd *cdptr; - /* S1 domains only support RID attachment right now */ - cdptr = arm_smmu_get_cd_ptr(master, IOMMU_NO_PASID); + cdptr = arm_smmu_get_cd_ptr(master, master_domain->ssid); if (WARN_ON(!cdptr)) continue; arm_smmu_make_s1_cd(&target_cd, master, smmu_domain); - arm_smmu_write_cd_entry(master, IOMMU_NO_PASID, cdptr, + arm_smmu_write_cd_entry(master, master_domain->ssid, cdptr, &target_cd); } spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); } -/* - * Check if the CPU ASID is available on the SMMU side. If a private context - * descriptor is using it, try to replace it. - */ -static struct arm_smmu_ctx_desc * -arm_smmu_share_asid(struct mm_struct *mm, u16 asid) -{ - int ret; - u32 new_asid; - struct arm_smmu_ctx_desc *cd; - struct arm_smmu_device *smmu; - struct arm_smmu_domain *smmu_domain; - - cd = xa_load(&arm_smmu_asid_xa, asid); - if (!cd) - return NULL; - - if (cd->mm) { - if (WARN_ON(cd->mm != mm)) - return ERR_PTR(-EINVAL); - /* All devices bound to this mm use the same cd struct. */ - refcount_inc(&cd->refs); - return cd; - } - - smmu_domain = container_of(cd, struct arm_smmu_domain, cd); - smmu = smmu_domain->smmu; - - ret = xa_alloc(&arm_smmu_asid_xa, &new_asid, cd, - XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL); - if (ret) - return ERR_PTR(-ENOSPC); - /* - * Race with unmap: TLB invalidations will start targeting the new ASID, - * which isn't assigned yet. We'll do an invalidate-all on the old ASID - * later, so it doesn't matter. - */ - cd->asid = new_asid; - /* - * Update ASID and invalidate CD in all associated masters. There will - * be some overlap between use of both ASIDs, until we invalidate the - * TLB. - */ - arm_smmu_update_s1_domain_cd_entry(smmu_domain); - - /* Invalidate TLB entries previously associated with that context */ - arm_smmu_tlb_inv_asid(smmu, asid); - - xa_erase(&arm_smmu_asid_xa, asid); - return NULL; -} - static u64 page_size_to_cd(void) { static_assert(PAGE_SIZE == SZ_4K || PAGE_SIZE == SZ_16K || @@ -187,69 +115,6 @@ void arm_smmu_make_sva_cd(struct arm_smmu_cd *target, } EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_sva_cd); -static struct arm_smmu_ctx_desc *arm_smmu_alloc_shared_cd(struct mm_struct *mm) -{ - u16 asid; - int err = 0; - struct arm_smmu_ctx_desc *cd; - struct arm_smmu_ctx_desc *ret = NULL; - - /* Don't free the mm until we release the ASID */ - mmgrab(mm); - - asid = arm64_mm_context_get(mm); - if (!asid) { - err = -ESRCH; - goto out_drop_mm; - } - - cd = kzalloc(sizeof(*cd), GFP_KERNEL); - if (!cd) { - err = -ENOMEM; - goto out_put_context; - } - - refcount_set(&cd->refs, 1); - - mutex_lock(&arm_smmu_asid_lock); - ret = arm_smmu_share_asid(mm, asid); - if (ret) { - mutex_unlock(&arm_smmu_asid_lock); - goto out_free_cd; - } - - err = xa_insert(&arm_smmu_asid_xa, asid, cd, GFP_KERNEL); - mutex_unlock(&arm_smmu_asid_lock); - - if (err) - goto out_free_asid; - - cd->asid = asid; - cd->mm = mm; - - return cd; - -out_free_asid: - arm_smmu_free_asid(cd); -out_free_cd: - kfree(cd); -out_put_context: - arm64_mm_context_put(mm); -out_drop_mm: - mmdrop(mm); - return err < 0 ? ERR_PTR(err) : ret; -} - -static void arm_smmu_free_shared_cd(struct arm_smmu_ctx_desc *cd) -{ - if (arm_smmu_free_asid(cd)) { - /* Unpin ASID */ - arm64_mm_context_put(cd->mm); - mmdrop(cd->mm); - kfree(cd); - } -} - /* * Cloned from the MAX_TLBI_OPS in arch/arm64/include/asm/tlbflush.h, this * is used as a threshold to replace per-page TLBI commands to issue in the @@ -264,8 +129,8 @@ static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, unsigned long start, unsigned long end) { - struct arm_smmu_mmu_notifier *smmu_mn = mn_to_smmu(mn); - struct arm_smmu_domain *smmu_domain = smmu_mn->domain; + struct arm_smmu_domain *smmu_domain = + container_of(mn, struct arm_smmu_domain, mmu_notifier); size_t size; /* @@ -282,62 +147,50 @@ static void arm_smmu_mm_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, size = 0; } - if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_BTM)) { - if (!size) - arm_smmu_tlb_inv_asid(smmu_domain->smmu, - smmu_mn->cd->asid); - else - arm_smmu_tlb_inv_range_asid(start, size, - smmu_mn->cd->asid, - PAGE_SIZE, false, - smmu_domain); - } + if (!size) + arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_domain->cd.asid); + else + arm_smmu_tlb_inv_range_asid(start, size, smmu_domain->cd.asid, + PAGE_SIZE, false, smmu_domain); - arm_smmu_atc_inv_domain(smmu_domain, mm_get_enqcmd_pasid(mm), start, - size); + arm_smmu_atc_inv_domain(smmu_domain, start, size); } static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) { - struct arm_smmu_mmu_notifier *smmu_mn = mn_to_smmu(mn); - struct arm_smmu_domain *smmu_domain = smmu_mn->domain; - struct arm_smmu_master *master; + struct arm_smmu_domain *smmu_domain = + container_of(mn, struct arm_smmu_domain, mmu_notifier); + struct arm_smmu_master_domain *master_domain; unsigned long flags; - mutex_lock(&sva_lock); - if (smmu_mn->cleared) { - mutex_unlock(&sva_lock); - return; - } - /* * DMA may still be running. Keep the cd valid to avoid C_BAD_CD events, * but disable translation. */ spin_lock_irqsave(&smmu_domain->devices_lock, flags); - list_for_each_entry(master, &smmu_domain->devices, domain_head) { + list_for_each_entry(master_domain, &smmu_domain->devices, + devices_elm) { + struct arm_smmu_master *master = master_domain->master; struct arm_smmu_cd target; struct arm_smmu_cd *cdptr; - cdptr = arm_smmu_get_cd_ptr(master, mm_get_enqcmd_pasid(mm)); + cdptr = arm_smmu_get_cd_ptr(master, master_domain->ssid); if (WARN_ON(!cdptr)) continue; - arm_smmu_make_sva_cd(&target, master, NULL, smmu_mn->cd->asid); - arm_smmu_write_cd_entry(master, mm_get_enqcmd_pasid(mm), cdptr, + arm_smmu_make_sva_cd(&target, master, NULL, + smmu_domain->cd.asid); + arm_smmu_write_cd_entry(master, master_domain->ssid, cdptr, &target); } spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); - arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_mn->cd->asid); - arm_smmu_atc_inv_domain(smmu_domain, mm_get_enqcmd_pasid(mm), 0, 0); - - smmu_mn->cleared = true; - mutex_unlock(&sva_lock); + arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_domain->cd.asid); + arm_smmu_atc_inv_domain(smmu_domain, 0, 0); } static void arm_smmu_mmu_notifier_free(struct mmu_notifier *mn) { - kfree(mn_to_smmu(mn)); + kfree(container_of(mn, struct arm_smmu_domain, mmu_notifier)); } static const struct mmu_notifier_ops arm_smmu_mmu_notifier_ops = { @@ -346,127 +199,6 @@ static const struct mmu_notifier_ops arm_smmu_mmu_notifier_ops = { .free_notifier = arm_smmu_mmu_notifier_free, }; -/* Allocate or get existing MMU notifier for this {domain, mm} pair */ -static struct arm_smmu_mmu_notifier * -arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain, - struct mm_struct *mm) -{ - int ret; - struct arm_smmu_ctx_desc *cd; - struct arm_smmu_mmu_notifier *smmu_mn; - - list_for_each_entry(smmu_mn, &smmu_domain->mmu_notifiers, list) { - if (smmu_mn->mn.mm == mm) { - refcount_inc(&smmu_mn->refs); - return smmu_mn; - } - } - - cd = arm_smmu_alloc_shared_cd(mm); - if (IS_ERR(cd)) - return ERR_CAST(cd); - - smmu_mn = kzalloc(sizeof(*smmu_mn), GFP_KERNEL); - if (!smmu_mn) { - ret = -ENOMEM; - goto err_free_cd; - } - - refcount_set(&smmu_mn->refs, 1); - smmu_mn->cd = cd; - smmu_mn->domain = smmu_domain; - smmu_mn->mn.ops = &arm_smmu_mmu_notifier_ops; - - ret = mmu_notifier_register(&smmu_mn->mn, mm); - if (ret) { - kfree(smmu_mn); - goto err_free_cd; - } - - list_add(&smmu_mn->list, &smmu_domain->mmu_notifiers); - return smmu_mn; - -err_free_cd: - arm_smmu_free_shared_cd(cd); - return ERR_PTR(ret); -} - -static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn) -{ - struct mm_struct *mm = smmu_mn->mn.mm; - struct arm_smmu_ctx_desc *cd = smmu_mn->cd; - struct arm_smmu_domain *smmu_domain = smmu_mn->domain; - - if (!refcount_dec_and_test(&smmu_mn->refs)) - return; - - list_del(&smmu_mn->list); - - /* - * If we went through clear(), we've already invalidated, and no - * new TLB entry can have been formed. - */ - if (!smmu_mn->cleared) { - arm_smmu_tlb_inv_asid(smmu_domain->smmu, cd->asid); - arm_smmu_atc_inv_domain(smmu_domain, mm_get_enqcmd_pasid(mm), 0, - 0); - } - - /* Frees smmu_mn */ - mmu_notifier_put(&smmu_mn->mn); - arm_smmu_free_shared_cd(cd); -} - -static int __arm_smmu_sva_bind(struct device *dev, ioasid_t pasid, - struct mm_struct *mm) -{ - int ret; - struct arm_smmu_cd target; - struct arm_smmu_cd *cdptr; - struct arm_smmu_bond *bond; - struct arm_smmu_master *master = dev_iommu_priv_get(dev); - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - struct arm_smmu_domain *smmu_domain; - - if (!(domain->type & __IOMMU_DOMAIN_PAGING)) - return -ENODEV; - smmu_domain = to_smmu_domain(domain); - if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1) - return -ENODEV; - - if (!master || !master->sva_enabled) - return -ENODEV; - - bond = kzalloc(sizeof(*bond), GFP_KERNEL); - if (!bond) - return -ENOMEM; - - bond->mm = mm; - - bond->smmu_mn = arm_smmu_mmu_notifier_get(smmu_domain, mm); - if (IS_ERR(bond->smmu_mn)) { - ret = PTR_ERR(bond->smmu_mn); - goto err_free_bond; - } - - cdptr = arm_smmu_alloc_cd_ptr(master, mm_get_enqcmd_pasid(mm)); - if (!cdptr) { - ret = -ENOMEM; - goto err_put_notifier; - } - arm_smmu_make_sva_cd(&target, master, mm, bond->smmu_mn->cd->asid); - arm_smmu_write_cd_entry(master, pasid, cdptr, &target); - - list_add(&bond->list, &master->bonds); - return 0; - -err_put_notifier: - arm_smmu_mmu_notifier_put(bond->smmu_mn); -err_free_bond: - kfree(bond); - return ret; -} - bool arm_smmu_sva_supported(struct arm_smmu_device *smmu) { unsigned long reg, fld; @@ -583,11 +315,6 @@ int arm_smmu_master_enable_sva(struct arm_smmu_master *master) int arm_smmu_master_disable_sva(struct arm_smmu_master *master) { mutex_lock(&sva_lock); - if (!list_empty(&master->bonds)) { - dev_err(master->dev, "cannot disable SVA, device is bound\n"); - mutex_unlock(&sva_lock); - return -EBUSY; - } arm_smmu_master_sva_disable_iopf(master); master->sva_enabled = false; mutex_unlock(&sva_lock); @@ -604,51 +331,51 @@ void arm_smmu_sva_notifier_synchronize(void) mmu_notifier_synchronize(); } -void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, - struct device *dev, ioasid_t id) -{ - struct mm_struct *mm = domain->mm; - struct arm_smmu_bond *bond = NULL, *t; - struct arm_smmu_master *master = dev_iommu_priv_get(dev); - - mutex_lock(&sva_lock); - - arm_smmu_clear_cd(master, id); - - list_for_each_entry(t, &master->bonds, list) { - if (t->mm == mm) { - bond = t; - break; - } - } - - if (!WARN_ON(!bond)) { - list_del(&bond->list); - arm_smmu_mmu_notifier_put(bond->smmu_mn); - kfree(bond); - } - mutex_unlock(&sva_lock); -} - static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t id) { - int ret = 0; - struct mm_struct *mm = domain->mm; + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + struct arm_smmu_cd target; + int ret; - if (mm_get_enqcmd_pasid(mm) != id) + /* Prevent arm_smmu_mm_release from being called while we are attaching */ + if (!mmget_not_zero(domain->mm)) return -EINVAL; - mutex_lock(&sva_lock); - ret = __arm_smmu_sva_bind(dev, id, mm); - mutex_unlock(&sva_lock); + /* + * This does not need the arm_smmu_asid_lock because SVA domains never + * get reassigned + */ + arm_smmu_make_sva_cd(&target, master, domain->mm, smmu_domain->cd.asid); + ret = arm_smmu_set_pasid(master, smmu_domain, id, &target); + mmput(domain->mm); return ret; } static void arm_smmu_sva_domain_free(struct iommu_domain *domain) { - kfree(domain); + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + + /* + * Ensure the ASID is empty in the iommu cache before allowing reuse. + */ + arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_domain->cd.asid); + + /* + * Notice that the arm_smmu_mm_arch_invalidate_secondary_tlbs op can + * still be called/running at this point. We allow the ASID to be + * reused, and if there is a race then it just suffers harmless + * unnecessary invalidation. + */ + xa_erase(&arm_smmu_asid_xa, smmu_domain->cd.asid); + + /* + * Actual free is defered to the SRCU callback + * arm_smmu_mmu_notifier_free() + */ + mmu_notifier_put(&smmu_domain->mmu_notifier); } static const struct iommu_domain_ops arm_smmu_sva_domain_ops = { @@ -656,14 +383,38 @@ static const struct iommu_domain_ops arm_smmu_sva_domain_ops = { .free = arm_smmu_sva_domain_free }; -struct iommu_domain *arm_smmu_sva_domain_alloc(void) +struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev, + struct mm_struct *mm) { - struct iommu_domain *domain; + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + struct arm_smmu_device *smmu = master->smmu; + struct arm_smmu_domain *smmu_domain; + u32 asid; + int ret; - domain = kzalloc(sizeof(*domain), GFP_KERNEL); - if (!domain) - return NULL; - domain->ops = &arm_smmu_sva_domain_ops; + smmu_domain = arm_smmu_domain_alloc(); + if (IS_ERR(smmu_domain)) + return ERR_CAST(smmu_domain); + smmu_domain->domain.type = IOMMU_DOMAIN_SVA; + smmu_domain->domain.ops = &arm_smmu_sva_domain_ops; + smmu_domain->smmu = smmu; - return domain; + ret = xa_alloc(&arm_smmu_asid_xa, &asid, smmu_domain, + XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL); + if (ret) + goto err_free; + + smmu_domain->cd.asid = asid; + smmu_domain->mmu_notifier.ops = &arm_smmu_mmu_notifier_ops; + ret = mmu_notifier_register(&smmu_domain->mmu_notifier, mm); + if (ret) + goto err_asid; + + return &smmu_domain->domain; + +err_asid: + xa_erase(&arm_smmu_asid_xa, smmu_domain->cd.asid); +err_free: + kfree(smmu_domain); + return ERR_PTR(ret); } diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c index 315e487fd990..cceb737a7001 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-test.c @@ -144,6 +144,14 @@ static void arm_smmu_v3_test_ste_expect_transition( KUNIT_EXPECT_MEMEQ(test, target->data, cur_copy.data, sizeof(cur_copy)); } +static void arm_smmu_v3_test_ste_expect_non_hitless_transition( + struct kunit *test, const struct arm_smmu_ste *cur, + const struct arm_smmu_ste *target, unsigned int num_syncs_expected) +{ + arm_smmu_v3_test_ste_expect_transition(test, cur, target, + num_syncs_expected, false); +} + static void arm_smmu_v3_test_ste_expect_hitless_transition( struct kunit *test, const struct arm_smmu_ste *cur, const struct arm_smmu_ste *target, unsigned int num_syncs_expected) @@ -155,6 +163,7 @@ static void arm_smmu_v3_test_ste_expect_hitless_transition( static const dma_addr_t fake_cdtab_dma_addr = 0xF0F0F0F0F0F0; static void arm_smmu_test_make_cdtable_ste(struct arm_smmu_ste *ste, + unsigned int s1dss, const dma_addr_t dma_addr) { struct arm_smmu_master master = { @@ -164,7 +173,7 @@ static void arm_smmu_test_make_cdtable_ste(struct arm_smmu_ste *ste, .smmu = &smmu, }; - arm_smmu_make_cdtable_ste(ste, &master); + arm_smmu_make_cdtable_ste(ste, &master, true, s1dss); } static void arm_smmu_v3_write_ste_test_bypass_to_abort(struct kunit *test) @@ -194,7 +203,8 @@ static void arm_smmu_v3_write_ste_test_cdtable_to_abort(struct kunit *test) { struct arm_smmu_ste ste; - arm_smmu_test_make_cdtable_ste(&ste, fake_cdtab_dma_addr); + arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr); arm_smmu_v3_test_ste_expect_hitless_transition(test, &ste, &abort_ste, NUM_EXPECTED_SYNCS(2)); } @@ -203,7 +213,8 @@ static void arm_smmu_v3_write_ste_test_abort_to_cdtable(struct kunit *test) { struct arm_smmu_ste ste; - arm_smmu_test_make_cdtable_ste(&ste, fake_cdtab_dma_addr); + arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr); arm_smmu_v3_test_ste_expect_hitless_transition(test, &abort_ste, &ste, NUM_EXPECTED_SYNCS(2)); } @@ -212,7 +223,8 @@ static void arm_smmu_v3_write_ste_test_cdtable_to_bypass(struct kunit *test) { struct arm_smmu_ste ste; - arm_smmu_test_make_cdtable_ste(&ste, fake_cdtab_dma_addr); + arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr); arm_smmu_v3_test_ste_expect_hitless_transition(test, &ste, &bypass_ste, NUM_EXPECTED_SYNCS(3)); } @@ -221,17 +233,59 @@ static void arm_smmu_v3_write_ste_test_bypass_to_cdtable(struct kunit *test) { struct arm_smmu_ste ste; - arm_smmu_test_make_cdtable_ste(&ste, fake_cdtab_dma_addr); + arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr); arm_smmu_v3_test_ste_expect_hitless_transition(test, &bypass_ste, &ste, NUM_EXPECTED_SYNCS(3)); } +static void arm_smmu_v3_write_ste_test_cdtable_s1dss_change(struct kunit *test) +{ + struct arm_smmu_ste ste; + struct arm_smmu_ste s1dss_bypass; + + arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr); + arm_smmu_test_make_cdtable_ste(&s1dss_bypass, STRTAB_STE_1_S1DSS_BYPASS, + fake_cdtab_dma_addr); + + /* + * Flipping s1dss on a CD table STE only involves changes to the second + * qword of an STE and can be done in a single write. + */ + arm_smmu_v3_test_ste_expect_hitless_transition( + test, &ste, &s1dss_bypass, NUM_EXPECTED_SYNCS(1)); + arm_smmu_v3_test_ste_expect_hitless_transition( + test, &s1dss_bypass, &ste, NUM_EXPECTED_SYNCS(1)); +} + +static void +arm_smmu_v3_write_ste_test_s1dssbypass_to_stebypass(struct kunit *test) +{ + struct arm_smmu_ste s1dss_bypass; + + arm_smmu_test_make_cdtable_ste(&s1dss_bypass, STRTAB_STE_1_S1DSS_BYPASS, + fake_cdtab_dma_addr); + arm_smmu_v3_test_ste_expect_hitless_transition( + test, &s1dss_bypass, &bypass_ste, NUM_EXPECTED_SYNCS(2)); +} + +static void +arm_smmu_v3_write_ste_test_stebypass_to_s1dssbypass(struct kunit *test) +{ + struct arm_smmu_ste s1dss_bypass; + + arm_smmu_test_make_cdtable_ste(&s1dss_bypass, STRTAB_STE_1_S1DSS_BYPASS, + fake_cdtab_dma_addr); + arm_smmu_v3_test_ste_expect_hitless_transition( + test, &bypass_ste, &s1dss_bypass, NUM_EXPECTED_SYNCS(2)); +} + static void arm_smmu_test_make_s2_ste(struct arm_smmu_ste *ste, bool ats_enabled) { struct arm_smmu_master master = { .smmu = &smmu, - .ats_enabled = ats_enabled, }; struct io_pgtable io_pgtable = {}; struct arm_smmu_domain smmu_domain = { @@ -247,7 +301,7 @@ static void arm_smmu_test_make_s2_ste(struct arm_smmu_ste *ste, io_pgtable.cfg.arm_lpae_s2_cfg.vtcr.sl = 3; io_pgtable.cfg.arm_lpae_s2_cfg.vtcr.tsz = 4; - arm_smmu_make_s2_domain_ste(ste, &master, &smmu_domain); + arm_smmu_make_s2_domain_ste(ste, &master, &smmu_domain, ats_enabled); } static void arm_smmu_v3_write_ste_test_s2_to_abort(struct kunit *test) @@ -286,6 +340,48 @@ static void arm_smmu_v3_write_ste_test_bypass_to_s2(struct kunit *test) NUM_EXPECTED_SYNCS(2)); } +static void arm_smmu_v3_write_ste_test_s1_to_s2(struct kunit *test) +{ + struct arm_smmu_ste s1_ste; + struct arm_smmu_ste s2_ste; + + arm_smmu_test_make_cdtable_ste(&s1_ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr); + arm_smmu_test_make_s2_ste(&s2_ste, true); + arm_smmu_v3_test_ste_expect_hitless_transition(test, &s1_ste, &s2_ste, + NUM_EXPECTED_SYNCS(3)); +} + +static void arm_smmu_v3_write_ste_test_s2_to_s1(struct kunit *test) +{ + struct arm_smmu_ste s1_ste; + struct arm_smmu_ste s2_ste; + + arm_smmu_test_make_cdtable_ste(&s1_ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr); + arm_smmu_test_make_s2_ste(&s2_ste, true); + arm_smmu_v3_test_ste_expect_hitless_transition(test, &s2_ste, &s1_ste, + NUM_EXPECTED_SYNCS(3)); +} + +static void arm_smmu_v3_write_ste_test_non_hitless(struct kunit *test) +{ + struct arm_smmu_ste ste; + struct arm_smmu_ste ste_2; + + /* + * Although no flow resembles this in practice, one way to force an STE + * update to be non-hitless is to change its CD table pointer as well as + * s1 dss field in the same update. + */ + arm_smmu_test_make_cdtable_ste(&ste, STRTAB_STE_1_S1DSS_SSID0, + fake_cdtab_dma_addr); + arm_smmu_test_make_cdtable_ste(&ste_2, STRTAB_STE_1_S1DSS_BYPASS, + 0x4B4B4b4B4B); + arm_smmu_v3_test_ste_expect_non_hitless_transition( + test, &ste, &ste_2, NUM_EXPECTED_SYNCS(3)); +} + static void arm_smmu_v3_test_cd_expect_transition( struct kunit *test, const struct arm_smmu_cd *cur, const struct arm_smmu_cd *target, unsigned int num_syncs_expected, @@ -439,10 +535,16 @@ static struct kunit_case arm_smmu_v3_test_cases[] = { KUNIT_CASE(arm_smmu_v3_write_ste_test_abort_to_cdtable), KUNIT_CASE(arm_smmu_v3_write_ste_test_cdtable_to_bypass), KUNIT_CASE(arm_smmu_v3_write_ste_test_bypass_to_cdtable), + KUNIT_CASE(arm_smmu_v3_write_ste_test_cdtable_s1dss_change), + KUNIT_CASE(arm_smmu_v3_write_ste_test_s1dssbypass_to_stebypass), + KUNIT_CASE(arm_smmu_v3_write_ste_test_stebypass_to_s1dssbypass), KUNIT_CASE(arm_smmu_v3_write_ste_test_s2_to_abort), KUNIT_CASE(arm_smmu_v3_write_ste_test_abort_to_s2), KUNIT_CASE(arm_smmu_v3_write_ste_test_s2_to_bypass), KUNIT_CASE(arm_smmu_v3_write_ste_test_bypass_to_s2), + KUNIT_CASE(arm_smmu_v3_write_ste_test_s1_to_s2), + KUNIT_CASE(arm_smmu_v3_write_ste_test_s2_to_s1), + KUNIT_CASE(arm_smmu_v3_write_ste_test_non_hitless), KUNIT_CASE(arm_smmu_v3_write_cd_test_s1_clear), KUNIT_CASE(arm_smmu_v3_write_cd_test_s1_change_asid), KUNIT_CASE(arm_smmu_v3_write_cd_test_sva_clear), @@ -465,4 +567,5 @@ static struct kunit_suite arm_smmu_v3_test_module = { kunit_test_suites(&arm_smmu_v3_test_module); MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); +MODULE_DESCRIPTION("KUnit tests for arm-smmu-v3 driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index ab415e107054..a31460f9f3d4 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "arm-smmu-v3.h" #include "../../dma-iommu.h" @@ -36,6 +37,9 @@ module_param(disable_msipolling, bool, 0444); MODULE_PARM_DESC(disable_msipolling, "Disable MSI-based polling for CMD_SYNC completion."); +static struct iommu_ops arm_smmu_ops; +static struct iommu_dirty_ops arm_smmu_dirty_ops; + enum arm_smmu_msi_index { EVTQ_MSI_INDEX, GERROR_MSI_INDEX, @@ -80,7 +84,7 @@ static struct arm_smmu_option_prop arm_smmu_options[] = { }; static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, - struct arm_smmu_device *smmu); + struct arm_smmu_device *smmu, u32 flags); static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master); static void parse_driver_options(struct arm_smmu_device *smmu) @@ -991,6 +995,14 @@ void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits) STRTAB_STE_1_S1STALLD | STRTAB_STE_1_STRW | STRTAB_STE_1_EATS); used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID); + + /* + * See 13.5 Summary of attribute/permission configuration fields + * for the SHCFG behavior. + */ + if (FIELD_GET(STRTAB_STE_1_S1DSS, le64_to_cpu(ent[1])) == + STRTAB_STE_1_S1DSS_BYPASS) + used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG); } /* S2 translates */ @@ -1211,8 +1223,8 @@ struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, return &l1_desc->l2ptr[ssid % CTXDESC_L2_ENTRIES]; } -struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master, - u32 ssid) +static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master, + u32 ssid) { struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; struct arm_smmu_device *smmu = master->smmu; @@ -1289,6 +1301,8 @@ void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid, struct arm_smmu_cd *cdptr, const struct arm_smmu_cd *target) { + bool target_valid = target->data[0] & cpu_to_le64(CTXDESC_CD_0_V); + bool cur_valid = cdptr->data[0] & cpu_to_le64(CTXDESC_CD_0_V); struct arm_smmu_cd_writer cd_writer = { .writer = { .ops = &arm_smmu_cd_writer_ops, @@ -1297,6 +1311,13 @@ void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid, .ssid = ssid, }; + if (ssid != IOMMU_NO_PASID && cur_valid != target_valid) { + if (cur_valid) + master->cd_table.used_ssids--; + else + master->cd_table.used_ssids++; + } + arm_smmu_write_entry(&cd_writer.writer, cdptr->data, target->data); } @@ -1331,6 +1352,12 @@ void arm_smmu_make_s1_cd(struct arm_smmu_cd *target, CTXDESC_CD_0_ASET | FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) ); + + /* To enable dirty flag update, set both Access flag and dirty state update */ + if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_HD) + target->data[0] |= cpu_to_le64(CTXDESC_CD_0_TCR_HA | + CTXDESC_CD_0_TCR_HD); + target->data[1] = cpu_to_le64(pgtbl_cfg->arm_lpae_s1_cfg.ttbr & CTXDESC_CD_1_TTB0_MASK); target->data[3] = cpu_to_le64(pgtbl_cfg->arm_lpae_s1_cfg.mair); @@ -1430,30 +1457,13 @@ static void arm_smmu_free_cd_tables(struct arm_smmu_master *master) cd_table->cdtab = NULL; } -bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd) -{ - bool free; - struct arm_smmu_ctx_desc *old_cd; - - if (!cd->asid) - return false; - - free = refcount_dec_and_test(&cd->refs); - if (free) { - old_cd = xa_erase(&arm_smmu_asid_xa, cd->asid); - WARN_ON(old_cd != cd); - } - return free; -} - /* Stream table manipulation functions */ -static void -arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc) +static void arm_smmu_write_strtab_l1_desc(__le64 *dst, dma_addr_t l2ptr_dma) { u64 val = 0; - val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span); - val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK; + val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, STRTAB_SPLIT + 1); + val |= l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK; /* The HW has 64 bit atomicity with stores to the L2 STE table */ WRITE_ONCE(*dst, cpu_to_le64(val)); @@ -1538,7 +1548,8 @@ EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_bypass_ste); VISIBLE_IF_KUNIT void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target, - struct arm_smmu_master *master) + struct arm_smmu_master *master, bool ats_enabled, + unsigned int s1dss) { struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; struct arm_smmu_device *smmu = master->smmu; @@ -1552,7 +1563,7 @@ void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target, FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax)); target->data[1] = cpu_to_le64( - FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) | + FIELD_PREP(STRTAB_STE_1_S1DSS, s1dss) | FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) | FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) | FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) | @@ -1561,7 +1572,12 @@ void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target, STRTAB_STE_1_S1STALLD : 0) | FIELD_PREP(STRTAB_STE_1_EATS, - master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0)); + ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0)); + + if ((smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR) && + s1dss == STRTAB_STE_1_S1DSS_BYPASS) + target->data[1] |= cpu_to_le64(FIELD_PREP( + STRTAB_STE_1_SHCFG, STRTAB_STE_1_SHCFG_INCOMING)); if (smmu->features & ARM_SMMU_FEAT_E2H) { /* @@ -1591,7 +1607,8 @@ EXPORT_SYMBOL_IF_KUNIT(arm_smmu_make_cdtable_ste); VISIBLE_IF_KUNIT void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, struct arm_smmu_master *master, - struct arm_smmu_domain *smmu_domain) + struct arm_smmu_domain *smmu_domain, + bool ats_enabled) { struct arm_smmu_s2_cfg *s2_cfg = &smmu_domain->s2_cfg; const struct io_pgtable_cfg *pgtbl_cfg = @@ -1608,7 +1625,7 @@ void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, target->data[1] = cpu_to_le64( FIELD_PREP(STRTAB_STE_1_EATS, - master->ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0)); + ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0)); if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR) target->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG, @@ -1655,6 +1672,7 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) { size_t size; void *strtab; + dma_addr_t l2ptr_dma; struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; struct arm_smmu_strtab_l1_desc *desc = &cfg->l1_desc[sid >> STRTAB_SPLIT]; @@ -1664,8 +1682,7 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3); strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS]; - desc->span = STRTAB_SPLIT + 1; - desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &desc->l2ptr_dma, + desc->l2ptr = dmam_alloc_coherent(smmu->dev, size, &l2ptr_dma, GFP_KERNEL); if (!desc->l2ptr) { dev_err(smmu->dev, @@ -1675,7 +1692,7 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) } arm_smmu_init_initial_stes(desc->l2ptr, 1 << STRTAB_SPLIT); - arm_smmu_write_strtab_l1_desc(strtab, desc); + arm_smmu_write_strtab_l1_desc(strtab, l2ptr_dma); return 0; } @@ -1995,13 +2012,14 @@ arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size, cmd->atc.size = log2_span; } -static int arm_smmu_atc_inv_master(struct arm_smmu_master *master) +static int arm_smmu_atc_inv_master(struct arm_smmu_master *master, + ioasid_t ssid) { int i; struct arm_smmu_cmdq_ent cmd; struct arm_smmu_cmdq_batch cmds; - arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd); + arm_smmu_atc_inv_to_cmd(ssid, 0, 0, &cmd); cmds.num = 0; for (i = 0; i < master->num_streams; i++) { @@ -2012,13 +2030,13 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master) return arm_smmu_cmdq_batch_submit(master->smmu, &cmds); } -int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, +int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, unsigned long iova, size_t size) { + struct arm_smmu_master_domain *master_domain; int i; unsigned long flags; struct arm_smmu_cmdq_ent cmd; - struct arm_smmu_master *master; struct arm_smmu_cmdq_batch cmds; if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS)) @@ -2041,15 +2059,18 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, if (!atomic_read(&smmu_domain->nr_ats_masters)) return 0; - arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd); - cmds.num = 0; spin_lock_irqsave(&smmu_domain->devices_lock, flags); - list_for_each_entry(master, &smmu_domain->devices, domain_head) { + list_for_each_entry(master_domain, &smmu_domain->devices, + devices_elm) { + struct arm_smmu_master *master = master_domain->master; + if (!master->ats_enabled) continue; + arm_smmu_atc_inv_to_cmd(master_domain->ssid, iova, size, &cmd); + for (i = 0; i < master->num_streams; i++) { cmd.atc.sid = master->streams[i].id; arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd); @@ -2081,7 +2102,7 @@ static void arm_smmu_tlb_inv_context(void *cookie) cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); } - arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0); + arm_smmu_atc_inv_domain(smmu_domain, 0, 0); } static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd, @@ -2179,7 +2200,7 @@ static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size, * Unfortunately, this can't be leaf-only since we may have * zapped an entire table. */ - arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, iova, size); + arm_smmu_atc_inv_domain(smmu_domain, iova, size); } void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, @@ -2220,6 +2241,13 @@ static const struct iommu_flush_ops arm_smmu_flush_ops = { .tlb_add_page = arm_smmu_tlb_inv_page_nosync, }; +static bool arm_smmu_dbm_capable(struct arm_smmu_device *smmu) +{ + u32 features = (ARM_SMMU_FEAT_HD | ARM_SMMU_FEAT_COHERENCY); + + return (smmu->features & features) == features; +} + /* IOMMU API */ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) { @@ -2232,17 +2260,26 @@ static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) case IOMMU_CAP_NOEXEC: case IOMMU_CAP_DEFERRED_FLUSH: return true; + case IOMMU_CAP_DIRTY_TRACKING: + return arm_smmu_dbm_capable(master->smmu); default: return false; } } -static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) +struct arm_smmu_domain *arm_smmu_domain_alloc(void) { + struct arm_smmu_domain *smmu_domain; - if (type == IOMMU_DOMAIN_SVA) - return arm_smmu_sva_domain_alloc(); - return ERR_PTR(-EOPNOTSUPP); + smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL); + if (!smmu_domain) + return ERR_PTR(-ENOMEM); + + mutex_init(&smmu_domain->init_mutex); + INIT_LIST_HEAD(&smmu_domain->devices); + spin_lock_init(&smmu_domain->devices_lock); + + return smmu_domain; } static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev) @@ -2254,20 +2291,15 @@ static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev) * We can't really do anything meaningful until we've added a * master. */ - smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL); - if (!smmu_domain) - return ERR_PTR(-ENOMEM); - - mutex_init(&smmu_domain->init_mutex); - INIT_LIST_HEAD(&smmu_domain->devices); - spin_lock_init(&smmu_domain->devices_lock); - INIT_LIST_HEAD(&smmu_domain->mmu_notifiers); + smmu_domain = arm_smmu_domain_alloc(); + if (IS_ERR(smmu_domain)) + return ERR_CAST(smmu_domain); if (dev) { struct arm_smmu_master *master = dev_iommu_priv_get(dev); int ret; - ret = arm_smmu_domain_finalise(smmu_domain, master->smmu); + ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, 0); if (ret) { kfree(smmu_domain); return ERR_PTR(ret); @@ -2276,7 +2308,7 @@ static struct iommu_domain *arm_smmu_domain_alloc_paging(struct device *dev) return &smmu_domain->domain; } -static void arm_smmu_domain_free(struct iommu_domain *domain) +static void arm_smmu_domain_free_paging(struct iommu_domain *domain) { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_device *smmu = smmu_domain->smmu; @@ -2287,7 +2319,7 @@ static void arm_smmu_domain_free(struct iommu_domain *domain) if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { /* Prevent SVA from touching the CD while we're freeing it */ mutex_lock(&arm_smmu_asid_lock); - arm_smmu_free_asid(&smmu_domain->cd); + xa_erase(&arm_smmu_asid_xa, smmu_domain->cd.asid); mutex_unlock(&arm_smmu_asid_lock); } else { struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg; @@ -2302,14 +2334,12 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_device *smmu, struct arm_smmu_domain *smmu_domain) { int ret; - u32 asid; + u32 asid = 0; struct arm_smmu_ctx_desc *cd = &smmu_domain->cd; - refcount_set(&cd->refs, 1); - /* Prevent SVA from modifying the ASID until it is written to the CD */ mutex_lock(&arm_smmu_asid_lock); - ret = xa_alloc(&arm_smmu_asid_xa, &asid, cd, + ret = xa_alloc(&arm_smmu_asid_xa, &asid, smmu_domain, XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL); cd->asid = (u16)asid; mutex_unlock(&arm_smmu_asid_lock); @@ -2333,15 +2363,15 @@ static int arm_smmu_domain_finalise_s2(struct arm_smmu_device *smmu, } static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, - struct arm_smmu_device *smmu) + struct arm_smmu_device *smmu, u32 flags) { int ret; - unsigned long ias, oas; enum io_pgtable_fmt fmt; struct io_pgtable_cfg pgtbl_cfg; struct io_pgtable_ops *pgtbl_ops; int (*finalise_stage_fn)(struct arm_smmu_device *smmu, struct arm_smmu_domain *smmu_domain); + bool enable_dirty = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING; /* Restrict the stage to what we can actually support */ if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1)) @@ -2349,17 +2379,31 @@ static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2)) smmu_domain->stage = ARM_SMMU_DOMAIN_S1; + pgtbl_cfg = (struct io_pgtable_cfg) { + .pgsize_bitmap = smmu->pgsize_bitmap, + .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY, + .tlb = &arm_smmu_flush_ops, + .iommu_dev = smmu->dev, + }; + switch (smmu_domain->stage) { - case ARM_SMMU_DOMAIN_S1: - ias = (smmu->features & ARM_SMMU_FEAT_VAX) ? 52 : 48; - ias = min_t(unsigned long, ias, VA_BITS); - oas = smmu->ias; + case ARM_SMMU_DOMAIN_S1: { + unsigned long ias = (smmu->features & + ARM_SMMU_FEAT_VAX) ? 52 : 48; + + pgtbl_cfg.ias = min_t(unsigned long, ias, VA_BITS); + pgtbl_cfg.oas = smmu->ias; + if (enable_dirty) + pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_HD; fmt = ARM_64_LPAE_S1; finalise_stage_fn = arm_smmu_domain_finalise_s1; break; + } case ARM_SMMU_DOMAIN_S2: - ias = smmu->ias; - oas = smmu->oas; + if (enable_dirty) + return -EOPNOTSUPP; + pgtbl_cfg.ias = smmu->ias; + pgtbl_cfg.oas = smmu->oas; fmt = ARM_64_LPAE_S2; finalise_stage_fn = arm_smmu_domain_finalise_s2; break; @@ -2367,15 +2411,6 @@ static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, return -EINVAL; } - pgtbl_cfg = (struct io_pgtable_cfg) { - .pgsize_bitmap = smmu->pgsize_bitmap, - .ias = ias, - .oas = oas, - .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY, - .tlb = &arm_smmu_flush_ops, - .iommu_dev = smmu->dev, - }; - pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); if (!pgtbl_ops) return -ENOMEM; @@ -2383,6 +2418,8 @@ static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, smmu_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap; smmu_domain->domain.geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1; smmu_domain->domain.geometry.force_aperture = true; + if (enable_dirty && smmu_domain->stage == ARM_SMMU_DOMAIN_S1) + smmu_domain->domain.dirty_ops = &arm_smmu_dirty_ops; ret = finalise_stage_fn(smmu, smmu_domain); if (ret < 0) { @@ -2420,6 +2457,13 @@ static void arm_smmu_install_ste_for_dev(struct arm_smmu_master *master, int i, j; struct arm_smmu_device *smmu = master->smmu; + master->cd_table.in_ste = + FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(target->data[0])) == + STRTAB_STE_0_CFG_S1_TRANS; + master->ste_ats_enabled = + FIELD_GET(STRTAB_STE_1_EATS, le64_to_cpu(target->data[1])) == + STRTAB_STE_1_EATS_TRANS; + for (i = 0; i < master->num_streams; ++i) { u32 sid = master->streams[i].id; struct arm_smmu_ste *step = @@ -2451,46 +2495,24 @@ static bool arm_smmu_ats_supported(struct arm_smmu_master *master) return dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev)); } -static void arm_smmu_enable_ats(struct arm_smmu_master *master, - struct arm_smmu_domain *smmu_domain) +static void arm_smmu_enable_ats(struct arm_smmu_master *master) { size_t stu; struct pci_dev *pdev; struct arm_smmu_device *smmu = master->smmu; - /* Don't enable ATS at the endpoint if it's not enabled in the STE */ - if (!master->ats_enabled) - return; - /* Smallest Translation Unit: log2 of the smallest supported granule */ stu = __ffs(smmu->pgsize_bitmap); pdev = to_pci_dev(master->dev); - atomic_inc(&smmu_domain->nr_ats_masters); /* * ATC invalidation of PASID 0 causes the entire ATC to be flushed. */ - arm_smmu_atc_inv_master(master); + arm_smmu_atc_inv_master(master, IOMMU_NO_PASID); if (pci_enable_ats(pdev, stu)) dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu); } -static void arm_smmu_disable_ats(struct arm_smmu_master *master, - struct arm_smmu_domain *smmu_domain) -{ - if (!master->ats_enabled) - return; - - pci_disable_ats(to_pci_dev(master->dev)); - /* - * Ensure ATS is disabled at the endpoint before we issue the - * ATC invalidation via the SMMU. - */ - wmb(); - arm_smmu_atc_inv_master(master); - atomic_dec(&smmu_domain->nr_ats_masters); -} - static int arm_smmu_enable_pasid(struct arm_smmu_master *master) { int ret; @@ -2538,56 +2560,216 @@ static void arm_smmu_disable_pasid(struct arm_smmu_master *master) pci_disable_pasid(pdev); } -static void arm_smmu_detach_dev(struct arm_smmu_master *master) +static struct arm_smmu_master_domain * +arm_smmu_find_master_domain(struct arm_smmu_domain *smmu_domain, + struct arm_smmu_master *master, + ioasid_t ssid) { - struct iommu_domain *domain = iommu_get_domain_for_dev(master->dev); - struct arm_smmu_domain *smmu_domain; + struct arm_smmu_master_domain *master_domain; + + lockdep_assert_held(&smmu_domain->devices_lock); + + list_for_each_entry(master_domain, &smmu_domain->devices, + devices_elm) { + if (master_domain->master == master && + master_domain->ssid == ssid) + return master_domain; + } + return NULL; +} + +/* + * If the domain uses the smmu_domain->devices list return the arm_smmu_domain + * structure, otherwise NULL. These domains track attached devices so they can + * issue invalidations. + */ +static struct arm_smmu_domain * +to_smmu_domain_devices(struct iommu_domain *domain) +{ + /* The domain can be NULL only when processing the first attach */ + if (!domain) + return NULL; + if ((domain->type & __IOMMU_DOMAIN_PAGING) || + domain->type == IOMMU_DOMAIN_SVA) + return to_smmu_domain(domain); + return NULL; +} + +static void arm_smmu_remove_master_domain(struct arm_smmu_master *master, + struct iommu_domain *domain, + ioasid_t ssid) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain_devices(domain); + struct arm_smmu_master_domain *master_domain; unsigned long flags; - if (!domain || !(domain->type & __IOMMU_DOMAIN_PAGING)) + if (!smmu_domain) return; - smmu_domain = to_smmu_domain(domain); - arm_smmu_disable_ats(master, smmu_domain); - spin_lock_irqsave(&smmu_domain->devices_lock, flags); - list_del_init(&master->domain_head); + master_domain = arm_smmu_find_master_domain(smmu_domain, master, ssid); + if (master_domain) { + list_del(&master_domain->devices_elm); + kfree(master_domain); + if (master->ats_enabled) + atomic_dec(&smmu_domain->nr_ats_masters); + } spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); +} - master->ats_enabled = false; +struct arm_smmu_attach_state { + /* Inputs */ + struct iommu_domain *old_domain; + struct arm_smmu_master *master; + bool cd_needs_ats; + ioasid_t ssid; + /* Resulting state */ + bool ats_enabled; +}; + +/* + * Start the sequence to attach a domain to a master. The sequence contains three + * steps: + * arm_smmu_attach_prepare() + * arm_smmu_install_ste_for_dev() + * arm_smmu_attach_commit() + * + * If prepare succeeds then the sequence must be completed. The STE installed + * must set the STE.EATS field according to state.ats_enabled. + * + * If the device supports ATS then this determines if EATS should be enabled + * in the STE, and starts sequencing EATS disable if required. + * + * The change of the EATS in the STE and the PCI ATS config space is managed by + * this sequence to be in the right order so that if PCI ATS is enabled then + * STE.ETAS is enabled. + * + * new_domain can be a non-paging domain. In this case ATS will not be enabled, + * and invalidations won't be tracked. + */ +static int arm_smmu_attach_prepare(struct arm_smmu_attach_state *state, + struct iommu_domain *new_domain) +{ + struct arm_smmu_master *master = state->master; + struct arm_smmu_master_domain *master_domain; + struct arm_smmu_domain *smmu_domain = + to_smmu_domain_devices(new_domain); + unsigned long flags; + + /* + * arm_smmu_share_asid() must not see two domains pointing to the same + * arm_smmu_master_domain contents otherwise it could randomly write one + * or the other to the CD. + */ + lockdep_assert_held(&arm_smmu_asid_lock); + + if (smmu_domain || state->cd_needs_ats) { + /* + * The SMMU does not support enabling ATS with bypass/abort. + * When the STE is in bypass (STE.Config[2:0] == 0b100), ATS + * Translation Requests and Translated transactions are denied + * as though ATS is disabled for the stream (STE.EATS == 0b00), + * causing F_BAD_ATS_TREQ and F_TRANSL_FORBIDDEN events + * (IHI0070Ea 5.2 Stream Table Entry). Thus ATS can only be + * enabled if we have arm_smmu_domain, those always have page + * tables. + */ + state->ats_enabled = arm_smmu_ats_supported(master); + } + + if (smmu_domain) { + master_domain = kzalloc(sizeof(*master_domain), GFP_KERNEL); + if (!master_domain) + return -ENOMEM; + master_domain->master = master; + master_domain->ssid = state->ssid; + + /* + * During prepare we want the current smmu_domain and new + * smmu_domain to be in the devices list before we change any + * HW. This ensures that both domains will send ATS + * invalidations to the master until we are done. + * + * It is tempting to make this list only track masters that are + * using ATS, but arm_smmu_share_asid() also uses this to change + * the ASID of a domain, unrelated to ATS. + * + * Notice if we are re-attaching the same domain then the list + * will have two identical entries and commit will remove only + * one of them. + */ + spin_lock_irqsave(&smmu_domain->devices_lock, flags); + if (state->ats_enabled) + atomic_inc(&smmu_domain->nr_ats_masters); + list_add(&master_domain->devices_elm, &smmu_domain->devices); + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); + } + + if (!state->ats_enabled && master->ats_enabled) { + pci_disable_ats(to_pci_dev(master->dev)); + /* + * This is probably overkill, but the config write for disabling + * ATS should complete before the STE is configured to generate + * UR to avoid AER noise. + */ + wmb(); + } + return 0; +} + +/* + * Commit is done after the STE/CD are configured with the EATS setting. It + * completes synchronizing the PCI device's ATC and finishes manipulating the + * smmu_domain->devices list. + */ +static void arm_smmu_attach_commit(struct arm_smmu_attach_state *state) +{ + struct arm_smmu_master *master = state->master; + + lockdep_assert_held(&arm_smmu_asid_lock); + + if (state->ats_enabled && !master->ats_enabled) { + arm_smmu_enable_ats(master); + } else if (state->ats_enabled && master->ats_enabled) { + /* + * The translation has changed, flush the ATC. At this point the + * SMMU is translating for the new domain and both the old&new + * domain will issue invalidations. + */ + arm_smmu_atc_inv_master(master, state->ssid); + } else if (!state->ats_enabled && master->ats_enabled) { + /* ATS is being switched off, invalidate the entire ATC */ + arm_smmu_atc_inv_master(master, IOMMU_NO_PASID); + } + master->ats_enabled = state->ats_enabled; + + arm_smmu_remove_master_domain(master, state->old_domain, state->ssid); } static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) { int ret = 0; - unsigned long flags; struct arm_smmu_ste target; struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct arm_smmu_device *smmu; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct arm_smmu_attach_state state = { + .old_domain = iommu_get_domain_for_dev(dev), + .ssid = IOMMU_NO_PASID, + }; struct arm_smmu_master *master; struct arm_smmu_cd *cdptr; if (!fwspec) return -ENOENT; - master = dev_iommu_priv_get(dev); + state.master = master = dev_iommu_priv_get(dev); smmu = master->smmu; - /* - * Checking that SVA is disabled ensures that this device isn't bound to - * any mm, and can be safely detached from its old domain. Bonds cannot - * be removed concurrently since we're holding the group mutex. - */ - if (arm_smmu_master_sva_enabled(master)) { - dev_err(dev, "cannot attach - SVA enabled\n"); - return -EBUSY; - } - mutex_lock(&smmu_domain->init_mutex); if (!smmu_domain->smmu) { - ret = arm_smmu_domain_finalise(smmu_domain, smmu); + ret = arm_smmu_domain_finalise(smmu_domain, smmu, 0); } else if (smmu_domain->smmu != smmu) ret = -EINVAL; @@ -2599,7 +2781,8 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) cdptr = arm_smmu_alloc_cd_ptr(master, IOMMU_NO_PASID); if (!cdptr) return -ENOMEM; - } + } else if (arm_smmu_ssids_in_use(&master->cd_table)) + return -EBUSY; /* * Prevent arm_smmu_share_asid() from trying to change the ASID @@ -2609,13 +2792,11 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) */ mutex_lock(&arm_smmu_asid_lock); - arm_smmu_detach_dev(master); - - master->ats_enabled = arm_smmu_ats_supported(master); - - spin_lock_irqsave(&smmu_domain->devices_lock, flags); - list_add(&master->domain_head, &smmu_domain->devices); - spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); + ret = arm_smmu_attach_prepare(&state, domain); + if (ret) { + mutex_unlock(&arm_smmu_asid_lock); + return ret; + } switch (smmu_domain->stage) { case ARM_SMMU_DOMAIN_S1: { @@ -2624,29 +2805,172 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) arm_smmu_make_s1_cd(&target_cd, master, smmu_domain); arm_smmu_write_cd_entry(master, IOMMU_NO_PASID, cdptr, &target_cd); - arm_smmu_make_cdtable_ste(&target, master); + arm_smmu_make_cdtable_ste(&target, master, state.ats_enabled, + STRTAB_STE_1_S1DSS_SSID0); arm_smmu_install_ste_for_dev(master, &target); break; } case ARM_SMMU_DOMAIN_S2: - arm_smmu_make_s2_domain_ste(&target, master, smmu_domain); + arm_smmu_make_s2_domain_ste(&target, master, smmu_domain, + state.ats_enabled); arm_smmu_install_ste_for_dev(master, &target); arm_smmu_clear_cd(master, IOMMU_NO_PASID); break; } - arm_smmu_enable_ats(master, smmu_domain); + arm_smmu_attach_commit(&state); mutex_unlock(&arm_smmu_asid_lock); return 0; } -static int arm_smmu_attach_dev_ste(struct device *dev, - struct arm_smmu_ste *ste) +static int arm_smmu_s1_set_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t id) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + struct arm_smmu_device *smmu = master->smmu; + struct arm_smmu_cd target_cd; + int ret = 0; + + mutex_lock(&smmu_domain->init_mutex); + if (!smmu_domain->smmu) + ret = arm_smmu_domain_finalise(smmu_domain, smmu, 0); + else if (smmu_domain->smmu != smmu) + ret = -EINVAL; + mutex_unlock(&smmu_domain->init_mutex); + if (ret) + return ret; + + if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1) + return -EINVAL; + + /* + * We can read cd.asid outside the lock because arm_smmu_set_pasid() + * will fix it + */ + arm_smmu_make_s1_cd(&target_cd, master, smmu_domain); + return arm_smmu_set_pasid(master, to_smmu_domain(domain), id, + &target_cd); +} + +static void arm_smmu_update_ste(struct arm_smmu_master *master, + struct iommu_domain *sid_domain, + bool ats_enabled) +{ + unsigned int s1dss = STRTAB_STE_1_S1DSS_TERMINATE; + struct arm_smmu_ste ste; + + if (master->cd_table.in_ste && master->ste_ats_enabled == ats_enabled) + return; + + if (sid_domain->type == IOMMU_DOMAIN_IDENTITY) + s1dss = STRTAB_STE_1_S1DSS_BYPASS; + else + WARN_ON(sid_domain->type != IOMMU_DOMAIN_BLOCKED); + + /* + * Change the STE into a cdtable one with SID IDENTITY/BLOCKED behavior + * using s1dss if necessary. If the cd_table is already installed then + * the S1DSS is correct and this will just update the EATS. Otherwise it + * installs the entire thing. This will be hitless. + */ + arm_smmu_make_cdtable_ste(&ste, master, ats_enabled, s1dss); + arm_smmu_install_ste_for_dev(master, &ste); +} + +int arm_smmu_set_pasid(struct arm_smmu_master *master, + struct arm_smmu_domain *smmu_domain, ioasid_t pasid, + struct arm_smmu_cd *cd) +{ + struct iommu_domain *sid_domain = iommu_get_domain_for_dev(master->dev); + struct arm_smmu_attach_state state = { + .master = master, + /* + * For now the core code prevents calling this when a domain is + * already attached, no need to set old_domain. + */ + .ssid = pasid, + }; + struct arm_smmu_cd *cdptr; + int ret; + + /* The core code validates pasid */ + + if (smmu_domain->smmu != master->smmu) + return -EINVAL; + + if (!master->cd_table.in_ste && + sid_domain->type != IOMMU_DOMAIN_IDENTITY && + sid_domain->type != IOMMU_DOMAIN_BLOCKED) + return -EINVAL; + + cdptr = arm_smmu_alloc_cd_ptr(master, pasid); + if (!cdptr) + return -ENOMEM; + + mutex_lock(&arm_smmu_asid_lock); + ret = arm_smmu_attach_prepare(&state, &smmu_domain->domain); + if (ret) + goto out_unlock; + + /* + * We don't want to obtain to the asid_lock too early, so fix up the + * caller set ASID under the lock in case it changed. + */ + cd->data[0] &= ~cpu_to_le64(CTXDESC_CD_0_ASID); + cd->data[0] |= cpu_to_le64( + FIELD_PREP(CTXDESC_CD_0_ASID, smmu_domain->cd.asid)); + + arm_smmu_write_cd_entry(master, pasid, cdptr, cd); + arm_smmu_update_ste(master, sid_domain, state.ats_enabled); + + arm_smmu_attach_commit(&state); + +out_unlock: + mutex_unlock(&arm_smmu_asid_lock); + return ret; +} + +static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid, + struct iommu_domain *domain) { struct arm_smmu_master *master = dev_iommu_priv_get(dev); + struct arm_smmu_domain *smmu_domain; - if (arm_smmu_master_sva_enabled(master)) - return -EBUSY; + smmu_domain = to_smmu_domain(domain); + + mutex_lock(&arm_smmu_asid_lock); + arm_smmu_clear_cd(master, pasid); + if (master->ats_enabled) + arm_smmu_atc_inv_master(master, pasid); + arm_smmu_remove_master_domain(master, &smmu_domain->domain, pasid); + mutex_unlock(&arm_smmu_asid_lock); + + /* + * When the last user of the CD table goes away downgrade the STE back + * to a non-cd_table one. + */ + if (!arm_smmu_ssids_in_use(&master->cd_table)) { + struct iommu_domain *sid_domain = + iommu_get_domain_for_dev(master->dev); + + if (sid_domain->type == IOMMU_DOMAIN_IDENTITY || + sid_domain->type == IOMMU_DOMAIN_BLOCKED) + sid_domain->ops->attach_dev(sid_domain, dev); + } +} + +static void arm_smmu_attach_dev_ste(struct iommu_domain *domain, + struct device *dev, + struct arm_smmu_ste *ste, + unsigned int s1dss) +{ + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + struct arm_smmu_attach_state state = { + .master = master, + .old_domain = iommu_get_domain_for_dev(dev), + .ssid = IOMMU_NO_PASID, + }; /* * Do not allow any ASID to be changed while are working on the STE, @@ -2655,15 +2979,23 @@ static int arm_smmu_attach_dev_ste(struct device *dev, mutex_lock(&arm_smmu_asid_lock); /* - * The SMMU does not support enabling ATS with bypass/abort. When the - * STE is in bypass (STE.Config[2:0] == 0b100), ATS Translation Requests - * and Translated transactions are denied as though ATS is disabled for - * the stream (STE.EATS == 0b00), causing F_BAD_ATS_TREQ and - * F_TRANSL_FORBIDDEN events (IHI0070Ea 5.2 Stream Table Entry). + * If the CD table is not in use we can use the provided STE, otherwise + * we use a cdtable STE with the provided S1DSS. */ - arm_smmu_detach_dev(master); - + if (arm_smmu_ssids_in_use(&master->cd_table)) { + /* + * If a CD table has to be present then we need to run with ATS + * on even though the RID will fail ATS queries with UR. This is + * because we have no idea what the PASID's need. + */ + state.cd_needs_ats = true; + arm_smmu_attach_prepare(&state, domain); + arm_smmu_make_cdtable_ste(ste, master, state.ats_enabled, s1dss); + } else { + arm_smmu_attach_prepare(&state, domain); + } arm_smmu_install_ste_for_dev(master, ste); + arm_smmu_attach_commit(&state); mutex_unlock(&arm_smmu_asid_lock); /* @@ -2672,7 +3004,6 @@ static int arm_smmu_attach_dev_ste(struct device *dev, * descriptor from arm_smmu_share_asid(). */ arm_smmu_clear_cd(master, IOMMU_NO_PASID); - return 0; } static int arm_smmu_attach_dev_identity(struct iommu_domain *domain, @@ -2682,7 +3013,8 @@ static int arm_smmu_attach_dev_identity(struct iommu_domain *domain, struct arm_smmu_master *master = dev_iommu_priv_get(dev); arm_smmu_make_bypass_ste(master->smmu, &ste); - return arm_smmu_attach_dev_ste(dev, &ste); + arm_smmu_attach_dev_ste(domain, dev, &ste, STRTAB_STE_1_S1DSS_BYPASS); + return 0; } static const struct iommu_domain_ops arm_smmu_identity_ops = { @@ -2700,7 +3032,9 @@ static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain, struct arm_smmu_ste ste; arm_smmu_make_abort_ste(&ste); - return arm_smmu_attach_dev_ste(dev, &ste); + arm_smmu_attach_dev_ste(domain, dev, &ste, + STRTAB_STE_1_S1DSS_TERMINATE); + return 0; } static const struct iommu_domain_ops arm_smmu_blocked_ops = { @@ -2712,6 +3046,37 @@ static struct iommu_domain arm_smmu_blocked_domain = { .ops = &arm_smmu_blocked_ops, }; +static struct iommu_domain * +arm_smmu_domain_alloc_user(struct device *dev, u32 flags, + struct iommu_domain *parent, + const struct iommu_user_data *user_data) +{ + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + const u32 PAGING_FLAGS = IOMMU_HWPT_ALLOC_DIRTY_TRACKING; + struct arm_smmu_domain *smmu_domain; + int ret; + + if (flags & ~PAGING_FLAGS) + return ERR_PTR(-EOPNOTSUPP); + if (parent || user_data) + return ERR_PTR(-EOPNOTSUPP); + + smmu_domain = arm_smmu_domain_alloc(); + if (!smmu_domain) + return ERR_PTR(-ENOMEM); + + smmu_domain->domain.type = IOMMU_DOMAIN_UNMANAGED; + smmu_domain->domain.ops = arm_smmu_ops.default_domain_ops; + ret = arm_smmu_domain_finalise(smmu_domain, master->smmu, flags); + if (ret) + goto err_free; + return &smmu_domain->domain; + +err_free: + kfree(smmu_domain); + return ERR_PTR(ret); +} + static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t pgsize, size_t pgcount, int prot, gfp_t gfp, size_t *mapped) @@ -2882,8 +3247,6 @@ static void arm_smmu_remove_master(struct arm_smmu_master *master) kfree(master->streams); } -static struct iommu_ops arm_smmu_ops; - static struct iommu_device *arm_smmu_probe_device(struct device *dev) { int ret; @@ -2904,8 +3267,6 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev) master->dev = dev; master->smmu = smmu; - INIT_LIST_HEAD(&master->bonds); - INIT_LIST_HEAD(&master->domain_head); dev_iommu_priv_set(dev, master); ret = arm_smmu_insert_master(smmu, master); @@ -2961,6 +3322,27 @@ static void arm_smmu_release_device(struct device *dev) kfree(master); } +static int arm_smmu_read_and_clear_dirty(struct iommu_domain *domain, + unsigned long iova, size_t size, + unsigned long flags, + struct iommu_dirty_bitmap *dirty) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; + + return ops->read_and_clear_dirty(ops, iova, size, flags, dirty); +} + +static int arm_smmu_set_dirty_tracking(struct iommu_domain *domain, + bool enabled) +{ + /* + * Always enabled and the dirty bitmap is cleared prior to + * set_dirty_tracking(). + */ + return 0; +} + static struct iommu_group *arm_smmu_device_group(struct device *dev) { struct iommu_group *group; @@ -3087,18 +3469,13 @@ static int arm_smmu_def_domain_type(struct device *dev) return 0; } -static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid, - struct iommu_domain *domain) -{ - arm_smmu_sva_remove_dev_pasid(domain, dev, pasid); -} - static struct iommu_ops arm_smmu_ops = { .identity_domain = &arm_smmu_identity_domain, .blocked_domain = &arm_smmu_blocked_domain, .capable = arm_smmu_capable, - .domain_alloc = arm_smmu_domain_alloc, .domain_alloc_paging = arm_smmu_domain_alloc_paging, + .domain_alloc_sva = arm_smmu_sva_domain_alloc, + .domain_alloc_user = arm_smmu_domain_alloc_user, .probe_device = arm_smmu_probe_device, .release_device = arm_smmu_release_device, .device_group = arm_smmu_device_group, @@ -3113,16 +3490,22 @@ static struct iommu_ops arm_smmu_ops = { .owner = THIS_MODULE, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = arm_smmu_attach_dev, + .set_dev_pasid = arm_smmu_s1_set_dev_pasid, .map_pages = arm_smmu_map_pages, .unmap_pages = arm_smmu_unmap_pages, .flush_iotlb_all = arm_smmu_flush_iotlb_all, .iotlb_sync = arm_smmu_iotlb_sync, .iova_to_phys = arm_smmu_iova_to_phys, .enable_nesting = arm_smmu_enable_nesting, - .free = arm_smmu_domain_free, + .free = arm_smmu_domain_free_paging, } }; +static struct iommu_dirty_ops arm_smmu_dirty_ops = { + .read_and_clear_dirty = arm_smmu_read_and_clear_dirty, + .set_dirty_tracking = arm_smmu_set_dirty_tracking, +}; + /* Probing and initialisation functions */ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, struct arm_smmu_queue *q, @@ -3221,25 +3604,6 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu) PRIQ_ENT_DWORDS, "priq"); } -static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu) -{ - unsigned int i; - struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; - void *strtab = smmu->strtab_cfg.strtab; - - cfg->l1_desc = devm_kcalloc(smmu->dev, cfg->num_l1_ents, - sizeof(*cfg->l1_desc), GFP_KERNEL); - if (!cfg->l1_desc) - return -ENOMEM; - - for (i = 0; i < cfg->num_l1_ents; ++i) { - arm_smmu_write_strtab_l1_desc(strtab, &cfg->l1_desc[i]); - strtab += STRTAB_L1_DESC_DWORDS << 3; - } - - return 0; -} - static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) { void *strtab; @@ -3275,7 +3639,12 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT); cfg->strtab_base_cfg = reg; - return arm_smmu_init_l1_strtab(smmu); + cfg->l1_desc = devm_kcalloc(smmu->dev, cfg->num_l1_ents, + sizeof(*cfg->l1_desc), GFP_KERNEL); + if (!cfg->l1_desc) + return -ENOMEM; + + return 0; } static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu) @@ -3698,6 +4067,28 @@ static void arm_smmu_device_iidr_probe(struct arm_smmu_device *smmu) } } +static void arm_smmu_get_httu(struct arm_smmu_device *smmu, u32 reg) +{ + u32 fw_features = smmu->features & (ARM_SMMU_FEAT_HA | ARM_SMMU_FEAT_HD); + u32 hw_features = 0; + + switch (FIELD_GET(IDR0_HTTU, reg)) { + case IDR0_HTTU_ACCESS_DIRTY: + hw_features |= ARM_SMMU_FEAT_HD; + fallthrough; + case IDR0_HTTU_ACCESS: + hw_features |= ARM_SMMU_FEAT_HA; + } + + if (smmu->dev->of_node) + smmu->features |= hw_features; + else if (hw_features != fw_features) + /* ACPI IORT sets the HTTU bits */ + dev_warn(smmu->dev, + "IDR0.HTTU features(0x%x) overridden by FW configuration (0x%x)\n", + hw_features, fw_features); +} + static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) { u32 reg; @@ -3758,6 +4149,8 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) smmu->features |= ARM_SMMU_FEAT_E2H; } + arm_smmu_get_httu(smmu, reg); + /* * The coherency feature as set by FW is used in preference to the ID * register, but warn on mismatch. @@ -3953,6 +4346,14 @@ static int arm_smmu_device_acpi_probe(struct platform_device *pdev, if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE) smmu->features |= ARM_SMMU_FEAT_COHERENCY; + switch (FIELD_GET(ACPI_IORT_SMMU_V3_HTTU_OVERRIDE, iort_smmu->flags)) { + case IDR0_HTTU_ACCESS_DIRTY: + smmu->features |= ARM_SMMU_FEAT_HD; + fallthrough; + case IDR0_HTTU_ACCESS: + smmu->features |= ARM_SMMU_FEAT_HA; + } + return 0; } #else diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 1242a086c9f9..14bca41a981b 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -33,6 +33,9 @@ #define IDR0_ASID16 (1 << 12) #define IDR0_ATS (1 << 10) #define IDR0_HYP (1 << 9) +#define IDR0_HTTU GENMASK(7, 6) +#define IDR0_HTTU_ACCESS 1 +#define IDR0_HTTU_ACCESS_DIRTY 2 #define IDR0_COHACC (1 << 4) #define IDR0_TTF GENMASK(3, 2) #define IDR0_TTF_AARCH64 2 @@ -301,6 +304,9 @@ struct arm_smmu_cd { #define CTXDESC_CD_0_TCR_IPS GENMASK_ULL(34, 32) #define CTXDESC_CD_0_TCR_TBI0 (1ULL << 38) +#define CTXDESC_CD_0_TCR_HA (1UL << 43) +#define CTXDESC_CD_0_TCR_HD (1UL << 42) + #define CTXDESC_CD_0_AA64 (1UL << 41) #define CTXDESC_CD_0_S (1UL << 44) #define CTXDESC_CD_0_R (1UL << 45) @@ -579,17 +585,11 @@ struct arm_smmu_priq { /* High-level stream table and context descriptor structures */ struct arm_smmu_strtab_l1_desc { - u8 span; - struct arm_smmu_ste *l2ptr; - dma_addr_t l2ptr_dma; }; struct arm_smmu_ctx_desc { u16 asid; - - refcount_t refs; - struct mm_struct *mm; }; struct arm_smmu_l1_ctx_desc { @@ -602,11 +602,19 @@ struct arm_smmu_ctx_desc_cfg { dma_addr_t cdtab_dma; struct arm_smmu_l1_ctx_desc *l1_desc; unsigned int num_l1_ents; + unsigned int used_ssids; + u8 in_ste; u8 s1fmt; /* log2 of the maximum number of CDs supported by this table */ u8 s1cdmax; }; +/* True if the cd table has SSIDS > 0 in use. */ +static inline bool arm_smmu_ssids_in_use(struct arm_smmu_ctx_desc_cfg *cd_table) +{ + return cd_table->used_ssids; +} + struct arm_smmu_s2_cfg { u16 vmid; }; @@ -648,6 +656,8 @@ struct arm_smmu_device { #define ARM_SMMU_FEAT_E2H (1 << 18) #define ARM_SMMU_FEAT_NESTING (1 << 19) #define ARM_SMMU_FEAT_ATTR_TYPES_OVR (1 << 20) +#define ARM_SMMU_FEAT_HA (1 << 21) +#define ARM_SMMU_FEAT_HD (1 << 22) u32 features; #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) @@ -696,16 +706,15 @@ struct arm_smmu_stream { struct arm_smmu_master { struct arm_smmu_device *smmu; struct device *dev; - struct list_head domain_head; struct arm_smmu_stream *streams; /* Locked by the iommu core using the group mutex */ struct arm_smmu_ctx_desc_cfg cd_table; unsigned int num_streams; - bool ats_enabled; + bool ats_enabled : 1; + bool ste_ats_enabled : 1; bool stall_enabled; bool sva_enabled; bool iopf_enabled; - struct list_head bonds; unsigned int ssid_bits; }; @@ -730,10 +739,11 @@ struct arm_smmu_domain { struct iommu_domain domain; + /* List of struct arm_smmu_master_domain */ struct list_head devices; spinlock_t devices_lock; - struct list_head mmu_notifiers; + struct mmu_notifier mmu_notifier; }; /* The following are exposed for testing purposes. */ @@ -757,15 +767,23 @@ void arm_smmu_make_abort_ste(struct arm_smmu_ste *target); void arm_smmu_make_bypass_ste(struct arm_smmu_device *smmu, struct arm_smmu_ste *target); void arm_smmu_make_cdtable_ste(struct arm_smmu_ste *target, - struct arm_smmu_master *master); + struct arm_smmu_master *master, bool ats_enabled, + unsigned int s1dss); void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target, struct arm_smmu_master *master, - struct arm_smmu_domain *smmu_domain); + struct arm_smmu_domain *smmu_domain, + bool ats_enabled); void arm_smmu_make_sva_cd(struct arm_smmu_cd *target, struct arm_smmu_master *master, struct mm_struct *mm, u16 asid); #endif +struct arm_smmu_master_domain { + struct list_head devices_elm; + struct arm_smmu_master *master; + ioasid_t ssid; +}; + static inline struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) { return container_of(dom, struct arm_smmu_domain, domain); @@ -774,11 +792,11 @@ static inline struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) extern struct xarray arm_smmu_asid_xa; extern struct mutex arm_smmu_asid_lock; +struct arm_smmu_domain *arm_smmu_domain_alloc(void); + void arm_smmu_clear_cd(struct arm_smmu_master *master, ioasid_t ssid); struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, u32 ssid); -struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master, - u32 ssid); void arm_smmu_make_s1_cd(struct arm_smmu_cd *target, struct arm_smmu_master *master, struct arm_smmu_domain *smmu_domain); @@ -786,12 +804,15 @@ void arm_smmu_write_cd_entry(struct arm_smmu_master *master, int ssid, struct arm_smmu_cd *cdptr, const struct arm_smmu_cd *target); +int arm_smmu_set_pasid(struct arm_smmu_master *master, + struct arm_smmu_domain *smmu_domain, ioasid_t pasid, + struct arm_smmu_cd *cd); + void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid); void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, size_t granule, bool leaf, struct arm_smmu_domain *smmu_domain); -bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd); -int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, +int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, unsigned long iova, size_t size); #ifdef CONFIG_ARM_SMMU_V3_SVA @@ -802,9 +823,8 @@ int arm_smmu_master_enable_sva(struct arm_smmu_master *master); int arm_smmu_master_disable_sva(struct arm_smmu_master *master); bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master); void arm_smmu_sva_notifier_synchronize(void); -struct iommu_domain *arm_smmu_sva_domain_alloc(void); -void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, - struct device *dev, ioasid_t id); +struct iommu_domain *arm_smmu_sva_domain_alloc(struct device *dev, + struct mm_struct *mm); #else /* CONFIG_ARM_SMMU_V3_SVA */ static inline bool arm_smmu_sva_supported(struct arm_smmu_device *smmu) { @@ -838,10 +858,7 @@ static inline bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master static inline void arm_smmu_sva_notifier_synchronize(void) {} -static inline struct iommu_domain *arm_smmu_sva_domain_alloc(void) -{ - return NULL; -} +#define arm_smmu_sva_domain_alloc NULL static inline void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, struct device *dev, diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c index 957d988b6d83..4b2994b6126d 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c @@ -200,7 +200,7 @@ static irqreturn_t nvidia_smmu_context_fault_bank(int irq, void __iomem *cb_base = nvidia_smmu_page(smmu, inst, smmu->numpage + idx); fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR); - if (!(fsr & ARM_SMMU_FSR_FAULT)) + if (!(fsr & ARM_SMMU_CB_FSR_FAULT)) return IRQ_NONE; fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0); diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c index 552199cbd9e2..548783f3f8e8 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c @@ -141,7 +141,7 @@ static int qcom_tbu_halt(struct qcom_tbu *tbu, struct arm_smmu_domain *smmu_doma writel_relaxed(val, tbu->base + DEBUG_SID_HALT_REG); fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); - if ((fsr & ARM_SMMU_FSR_FAULT) && (fsr & ARM_SMMU_FSR_SS)) { + if ((fsr & ARM_SMMU_CB_FSR_FAULT) && (fsr & ARM_SMMU_CB_FSR_SS)) { u32 sctlr_orig, sctlr; /* @@ -298,7 +298,7 @@ static phys_addr_t qcom_iova_to_phys(struct arm_smmu_domain *smmu_domain, arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, sctlr); fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); - if (fsr & ARM_SMMU_FSR_FAULT) { + if (fsr & ARM_SMMU_CB_FSR_FAULT) { /* Clear pending interrupts */ arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr); @@ -306,7 +306,7 @@ static phys_addr_t qcom_iova_to_phys(struct arm_smmu_domain *smmu_domain, * TBU halt takes care of resuming any stalled transcation. * Kept it here for completeness sake. */ - if (fsr & ARM_SMMU_FSR_SS) + if (fsr & ARM_SMMU_CB_FSR_SS) arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME, ARM_SMMU_RESUME_TERMINATE); } @@ -320,11 +320,11 @@ static phys_addr_t qcom_iova_to_phys(struct arm_smmu_domain *smmu_domain, phys = qcom_tbu_trigger_atos(smmu_domain, tbu, iova, sid); fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); - if (fsr & ARM_SMMU_FSR_FAULT) { + if (fsr & ARM_SMMU_CB_FSR_FAULT) { /* Clear pending interrupts */ arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr); - if (fsr & ARM_SMMU_FSR_SS) + if (fsr & ARM_SMMU_CB_FSR_SS) arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME, ARM_SMMU_RESUME_TERMINATE); } @@ -383,68 +383,44 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev) struct arm_smmu_domain *smmu_domain = dev; struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; struct arm_smmu_device *smmu = smmu_domain->smmu; - u32 fsr, fsynr, cbfrsynra, resume = 0; + struct arm_smmu_context_fault_info cfi; + u32 resume = 0; int idx = smmu_domain->cfg.cbndx; phys_addr_t phys_soft; - unsigned long iova; int ret, tmp; static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); - fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); - if (!(fsr & ARM_SMMU_FSR_FAULT)) + arm_smmu_read_context_fault_info(smmu, idx, &cfi); + + if (!(cfi.fsr & ARM_SMMU_CB_FSR_FAULT)) return IRQ_NONE; - fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0); - iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR); - cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx)); - if (list_empty(&tbu_list)) { - ret = report_iommu_fault(&smmu_domain->domain, NULL, iova, - fsynr & ARM_SMMU_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); + ret = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova, + cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); if (ret == -ENOSYS) - dev_err_ratelimited(smmu->dev, - "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n", - fsr, iova, fsynr, cbfrsynra, idx); + arm_smmu_print_context_fault_info(smmu, idx, &cfi); - arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr); + arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, cfi.fsr); return IRQ_HANDLED; } - phys_soft = ops->iova_to_phys(ops, iova); + phys_soft = ops->iova_to_phys(ops, cfi.iova); - tmp = report_iommu_fault(&smmu_domain->domain, NULL, iova, - fsynr & ARM_SMMU_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); + tmp = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova, + cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); if (!tmp || tmp == -EBUSY) { - dev_dbg(smmu->dev, - "Context fault handled by client: iova=0x%08lx, fsr=0x%x, fsynr=0x%x, cb=%d\n", - iova, fsr, fsynr, idx); - dev_dbg(smmu->dev, "soft iova-to-phys=%pa\n", &phys_soft); ret = IRQ_HANDLED; resume = ARM_SMMU_RESUME_TERMINATE; } else { - phys_addr_t phys_atos = qcom_smmu_verify_fault(smmu_domain, iova, fsr); + phys_addr_t phys_atos = qcom_smmu_verify_fault(smmu_domain, cfi.iova, cfi.fsr); if (__ratelimit(&_rs)) { - dev_err(smmu->dev, - "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n", - fsr, iova, fsynr, cbfrsynra, idx); - dev_err(smmu->dev, - "FSR = %08x [%s%s%s%s%s%s%s%s%s], SID=0x%x\n", - fsr, - (fsr & 0x02) ? "TF " : "", - (fsr & 0x04) ? "AFF " : "", - (fsr & 0x08) ? "PF " : "", - (fsr & 0x10) ? "EF " : "", - (fsr & 0x20) ? "TLBMCF " : "", - (fsr & 0x40) ? "TLBLKF " : "", - (fsr & 0x80) ? "MHF " : "", - (fsr & 0x40000000) ? "SS " : "", - (fsr & 0x80000000) ? "MULTI " : "", - cbfrsynra); + arm_smmu_print_context_fault_info(smmu, idx, &cfi); dev_err(smmu->dev, "soft iova-to-phys=%pa\n", &phys_soft); @@ -478,17 +454,17 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev) */ if (tmp != -EBUSY) { /* Clear the faulting FSR */ - arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr); + arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, cfi.fsr); /* Retry or terminate any stalled transactions */ - if (fsr & ARM_SMMU_FSR_SS) + if (cfi.fsr & ARM_SMMU_CB_FSR_SS) arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_RESUME, resume); } return ret; } -static int qcom_tbu_probe(struct platform_device *pdev) +int qcom_tbu_probe(struct platform_device *pdev) { struct of_phandle_args args = { .args_count = 2 }; struct device_node *np = pdev->dev.of_node; @@ -530,18 +506,3 @@ static int qcom_tbu_probe(struct platform_device *pdev) return 0; } - -static const struct of_device_id qcom_tbu_of_match[] = { - { .compatible = "qcom,sc7280-tbu" }, - { .compatible = "qcom,sdm845-tbu" }, - { } -}; - -static struct platform_driver qcom_tbu_driver = { - .driver = { - .name = "qcom_tbu", - .of_match_table = qcom_tbu_of_match, - }, - .probe = qcom_tbu_probe, -}; -builtin_platform_driver(qcom_tbu_driver); diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 25f034677f56..36c6b36ad4ff 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include "arm-smmu.h" #include "arm-smmu-qcom.h" @@ -469,7 +471,8 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu, /* Check to make sure qcom_scm has finished probing */ if (!qcom_scm_is_available()) - return ERR_PTR(-EPROBE_DEFER); + return ERR_PTR(dev_err_probe(smmu->dev, -EPROBE_DEFER, + "qcom_scm not ready\n")); qsmmu = devm_krealloc(smmu->dev, smmu, sizeof(*qsmmu), GFP_KERNEL); if (!qsmmu) @@ -561,10 +564,47 @@ static struct acpi_platform_list qcom_acpi_platlist[] = { }; #endif +static int qcom_smmu_tbu_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + int ret; + + if (IS_ENABLED(CONFIG_ARM_SMMU_QCOM_DEBUG)) { + ret = qcom_tbu_probe(pdev); + if (ret) + return ret; + } + + if (dev->pm_domain) { + pm_runtime_set_active(dev); + pm_runtime_enable(dev); + } + + return 0; +} + +static const struct of_device_id qcom_smmu_tbu_of_match[] = { + { .compatible = "qcom,sc7280-tbu" }, + { .compatible = "qcom,sdm845-tbu" }, + { } +}; + +static struct platform_driver qcom_smmu_tbu_driver = { + .driver = { + .name = "qcom_tbu", + .of_match_table = qcom_smmu_tbu_of_match, + }, + .probe = qcom_smmu_tbu_probe, +}; + struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu) { const struct device_node *np = smmu->dev->of_node; const struct of_device_id *match; + static u8 tbu_registered; + + if (!tbu_registered++) + platform_driver_register(&qcom_smmu_tbu_driver); #ifdef CONFIG_ACPI if (np == NULL) { diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h index 9bb3ae7d62da..3c134d1a6277 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h @@ -34,8 +34,10 @@ irqreturn_t qcom_smmu_context_fault(int irq, void *dev); #ifdef CONFIG_ARM_SMMU_QCOM_DEBUG void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu); +int qcom_tbu_probe(struct platform_device *pdev); #else static inline void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu) { } +static inline int qcom_tbu_probe(struct platform_device *pdev) { return -EINVAL; } #endif #endif /* _ARM_SMMU_QCOM_H */ diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 87c81f75cf84..723273440c21 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -178,8 +178,7 @@ static int arm_smmu_register_legacy_master(struct device *dev, it.cur_count = 1; } - err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode, - &arm_smmu_ops); + err = iommu_fwspec_init(dev, NULL); if (err) return err; @@ -405,32 +404,72 @@ static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = { .tlb_add_page = arm_smmu_tlb_add_page_s2_v1, }; + +void arm_smmu_read_context_fault_info(struct arm_smmu_device *smmu, int idx, + struct arm_smmu_context_fault_info *cfi) +{ + cfi->iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR); + cfi->fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); + cfi->fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0); + cfi->cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx)); +} + +void arm_smmu_print_context_fault_info(struct arm_smmu_device *smmu, int idx, + const struct arm_smmu_context_fault_info *cfi) +{ + dev_dbg(smmu->dev, + "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n", + cfi->fsr, cfi->iova, cfi->fsynr, cfi->cbfrsynra, idx); + + dev_err(smmu->dev, "FSR = %08x [%s%sFormat=%u%s%s%s%s%s%s%s%s], SID=0x%x\n", + cfi->fsr, + (cfi->fsr & ARM_SMMU_CB_FSR_MULTI) ? "MULTI " : "", + (cfi->fsr & ARM_SMMU_CB_FSR_SS) ? "SS " : "", + (u32)FIELD_GET(ARM_SMMU_CB_FSR_FORMAT, cfi->fsr), + (cfi->fsr & ARM_SMMU_CB_FSR_UUT) ? " UUT" : "", + (cfi->fsr & ARM_SMMU_CB_FSR_ASF) ? " ASF" : "", + (cfi->fsr & ARM_SMMU_CB_FSR_TLBLKF) ? " TLBLKF" : "", + (cfi->fsr & ARM_SMMU_CB_FSR_TLBMCF) ? " TLBMCF" : "", + (cfi->fsr & ARM_SMMU_CB_FSR_EF) ? " EF" : "", + (cfi->fsr & ARM_SMMU_CB_FSR_PF) ? " PF" : "", + (cfi->fsr & ARM_SMMU_CB_FSR_AFF) ? " AFF" : "", + (cfi->fsr & ARM_SMMU_CB_FSR_TF) ? " TF" : "", + cfi->cbfrsynra); + + dev_err(smmu->dev, "FSYNR0 = %08x [S1CBNDX=%u%s%s%s%s%s%s PLVL=%u]\n", + cfi->fsynr, + (u32)FIELD_GET(ARM_SMMU_CB_FSYNR0_S1CBNDX, cfi->fsynr), + (cfi->fsynr & ARM_SMMU_CB_FSYNR0_AFR) ? " AFR" : "", + (cfi->fsynr & ARM_SMMU_CB_FSYNR0_PTWF) ? " PTWF" : "", + (cfi->fsynr & ARM_SMMU_CB_FSYNR0_NSATTR) ? " NSATTR" : "", + (cfi->fsynr & ARM_SMMU_CB_FSYNR0_IND) ? " IND" : "", + (cfi->fsynr & ARM_SMMU_CB_FSYNR0_PNU) ? " PNU" : "", + (cfi->fsynr & ARM_SMMU_CB_FSYNR0_WNR) ? " WNR" : "", + (u32)FIELD_GET(ARM_SMMU_CB_FSYNR0_PLVL, cfi->fsynr)); +} + static irqreturn_t arm_smmu_context_fault(int irq, void *dev) { - u32 fsr, fsynr, cbfrsynra; - unsigned long iova; + struct arm_smmu_context_fault_info cfi; struct arm_smmu_domain *smmu_domain = dev; struct arm_smmu_device *smmu = smmu_domain->smmu; + static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); int idx = smmu_domain->cfg.cbndx; int ret; - fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR); - if (!(fsr & ARM_SMMU_FSR_FAULT)) + arm_smmu_read_context_fault_info(smmu, idx, &cfi); + + if (!(cfi.fsr & ARM_SMMU_CB_FSR_FAULT)) return IRQ_NONE; - fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0); - iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR); - cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx)); + ret = report_iommu_fault(&smmu_domain->domain, NULL, cfi.iova, + cfi.fsynr & ARM_SMMU_CB_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); - ret = report_iommu_fault(&smmu_domain->domain, NULL, iova, - fsynr & ARM_SMMU_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ); + if (ret == -ENOSYS && __ratelimit(&rs)) + arm_smmu_print_context_fault_info(smmu, idx, &cfi); - if (ret == -ENOSYS) - dev_err_ratelimited(smmu->dev, - "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n", - fsr, iova, fsynr, cbfrsynra, idx); - - arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr); + arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, cfi.fsr); return IRQ_HANDLED; } @@ -1306,7 +1345,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_ATS1PR, va); reg = arm_smmu_page(smmu, ARM_SMMU_CB(smmu, idx)) + ARM_SMMU_CB_ATSR; - if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ARM_SMMU_ATSR_ACTIVE), + if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ARM_SMMU_CB_ATSR_ACTIVE), 5, 50)) { spin_unlock_irqrestore(&smmu_domain->cb_lock, flags); dev_err(dev, @@ -1642,7 +1681,7 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu) /* Make sure all context banks are disabled and clear CB_FSR */ for (i = 0; i < smmu->num_context_banks; ++i) { arm_smmu_write_context_bank(smmu, i); - arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, ARM_SMMU_FSR_FAULT); + arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, ARM_SMMU_CB_FSR_FAULT); } /* Invalidate the TLB, just in case */ diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.h b/drivers/iommu/arm/arm-smmu/arm-smmu.h index 4765c6945c34..e2aeb511ae90 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.h +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.h @@ -196,34 +196,42 @@ enum arm_smmu_cbar_type { #define ARM_SMMU_CB_PAR_F BIT(0) #define ARM_SMMU_CB_FSR 0x58 -#define ARM_SMMU_FSR_MULTI BIT(31) -#define ARM_SMMU_FSR_SS BIT(30) -#define ARM_SMMU_FSR_UUT BIT(8) -#define ARM_SMMU_FSR_ASF BIT(7) -#define ARM_SMMU_FSR_TLBLKF BIT(6) -#define ARM_SMMU_FSR_TLBMCF BIT(5) -#define ARM_SMMU_FSR_EF BIT(4) -#define ARM_SMMU_FSR_PF BIT(3) -#define ARM_SMMU_FSR_AFF BIT(2) -#define ARM_SMMU_FSR_TF BIT(1) +#define ARM_SMMU_CB_FSR_MULTI BIT(31) +#define ARM_SMMU_CB_FSR_SS BIT(30) +#define ARM_SMMU_CB_FSR_FORMAT GENMASK(10, 9) +#define ARM_SMMU_CB_FSR_UUT BIT(8) +#define ARM_SMMU_CB_FSR_ASF BIT(7) +#define ARM_SMMU_CB_FSR_TLBLKF BIT(6) +#define ARM_SMMU_CB_FSR_TLBMCF BIT(5) +#define ARM_SMMU_CB_FSR_EF BIT(4) +#define ARM_SMMU_CB_FSR_PF BIT(3) +#define ARM_SMMU_CB_FSR_AFF BIT(2) +#define ARM_SMMU_CB_FSR_TF BIT(1) -#define ARM_SMMU_FSR_IGN (ARM_SMMU_FSR_AFF | \ - ARM_SMMU_FSR_ASF | \ - ARM_SMMU_FSR_TLBMCF | \ - ARM_SMMU_FSR_TLBLKF) +#define ARM_SMMU_CB_FSR_IGN (ARM_SMMU_CB_FSR_AFF | \ + ARM_SMMU_CB_FSR_ASF | \ + ARM_SMMU_CB_FSR_TLBMCF | \ + ARM_SMMU_CB_FSR_TLBLKF) -#define ARM_SMMU_FSR_FAULT (ARM_SMMU_FSR_MULTI | \ - ARM_SMMU_FSR_SS | \ - ARM_SMMU_FSR_UUT | \ - ARM_SMMU_FSR_EF | \ - ARM_SMMU_FSR_PF | \ - ARM_SMMU_FSR_TF | \ - ARM_SMMU_FSR_IGN) +#define ARM_SMMU_CB_FSR_FAULT (ARM_SMMU_CB_FSR_MULTI | \ + ARM_SMMU_CB_FSR_SS | \ + ARM_SMMU_CB_FSR_UUT | \ + ARM_SMMU_CB_FSR_EF | \ + ARM_SMMU_CB_FSR_PF | \ + ARM_SMMU_CB_FSR_TF | \ + ARM_SMMU_CB_FSR_IGN) #define ARM_SMMU_CB_FAR 0x60 #define ARM_SMMU_CB_FSYNR0 0x68 -#define ARM_SMMU_FSYNR0_WNR BIT(4) +#define ARM_SMMU_CB_FSYNR0_PLVL GENMASK(1, 0) +#define ARM_SMMU_CB_FSYNR0_WNR BIT(4) +#define ARM_SMMU_CB_FSYNR0_PNU BIT(5) +#define ARM_SMMU_CB_FSYNR0_IND BIT(6) +#define ARM_SMMU_CB_FSYNR0_NSATTR BIT(8) +#define ARM_SMMU_CB_FSYNR0_PTWF BIT(10) +#define ARM_SMMU_CB_FSYNR0_AFR BIT(11) +#define ARM_SMMU_CB_FSYNR0_S1CBNDX GENMASK(23, 16) #define ARM_SMMU_CB_FSYNR1 0x6c @@ -237,7 +245,7 @@ enum arm_smmu_cbar_type { #define ARM_SMMU_CB_ATS1PR 0x800 #define ARM_SMMU_CB_ATSR 0x8f0 -#define ARM_SMMU_ATSR_ACTIVE BIT(0) +#define ARM_SMMU_CB_ATSR_ACTIVE BIT(0) #define ARM_SMMU_RESUME_TERMINATE BIT(0) @@ -533,4 +541,17 @@ struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu); void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx); int arm_mmu500_reset(struct arm_smmu_device *smmu); +struct arm_smmu_context_fault_info { + unsigned long iova; + u32 fsr; + u32 fsynr; + u32 cbfrsynra; +}; + +void arm_smmu_read_context_fault_info(struct arm_smmu_device *smmu, int idx, + struct arm_smmu_context_fault_info *cfi); + +void arm_smmu_print_context_fault_info(struct arm_smmu_device *smmu, int idx, + const struct arm_smmu_context_fault_info *cfi); + #endif /* _ARM_SMMU_H */ diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c index e079bb7a993e..b98a7a598b89 100644 --- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c +++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -194,7 +194,7 @@ static irqreturn_t qcom_iommu_fault(int irq, void *dev) fsr = iommu_readl(ctx, ARM_SMMU_CB_FSR); - if (!(fsr & ARM_SMMU_FSR_FAULT)) + if (!(fsr & ARM_SMMU_CB_FSR_FAULT)) return IRQ_NONE; fsynr = iommu_readl(ctx, ARM_SMMU_CB_FSYNR0); @@ -274,7 +274,7 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain, /* Clear context bank fault address fault status registers */ iommu_writel(ctx, ARM_SMMU_CB_FAR, 0); - iommu_writel(ctx, ARM_SMMU_CB_FSR, ARM_SMMU_FSR_FAULT); + iommu_writel(ctx, ARM_SMMU_CB_FSR, ARM_SMMU_CB_FSR_FAULT); /* TTBRs */ iommu_writeq(ctx, ARM_SMMU_CB_TTBR0, diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 43520e7275cc..18603d63ad3f 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -939,8 +939,7 @@ static struct page **__iommu_dma_alloc_pages(struct device *dev, * but an IOMMU which supports smaller pages might not map the whole thing. */ static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev, - size_t size, struct sg_table *sgt, gfp_t gfp, pgprot_t prot, - unsigned long attrs) + size_t size, struct sg_table *sgt, gfp_t gfp, unsigned long attrs) { struct iommu_domain *domain = iommu_get_dma_domain(dev); struct iommu_dma_cookie *cookie = domain->iova_cookie; @@ -1014,15 +1013,14 @@ out_free_pages: } static void *iommu_dma_alloc_remap(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, pgprot_t prot, - unsigned long attrs) + dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { struct page **pages; struct sg_table sgt; void *vaddr; + pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs); - pages = __iommu_dma_alloc_noncontiguous(dev, size, &sgt, gfp, prot, - attrs); + pages = __iommu_dma_alloc_noncontiguous(dev, size, &sgt, gfp, attrs); if (!pages) return NULL; *dma_handle = sgt.sgl->dma_address; @@ -1049,8 +1047,7 @@ static struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev, if (!sh) return NULL; - sh->pages = __iommu_dma_alloc_noncontiguous(dev, size, &sh->sgt, gfp, - PAGE_KERNEL, attrs); + sh->pages = __iommu_dma_alloc_noncontiguous(dev, size, &sh->sgt, gfp, attrs); if (!sh->pages) { kfree(sh); return NULL; @@ -1619,8 +1616,7 @@ static void *iommu_dma_alloc(struct device *dev, size_t size, if (gfpflags_allow_blocking(gfp) && !(attrs & DMA_ATTR_FORCE_CONTIGUOUS)) { - return iommu_dma_alloc_remap(dev, size, handle, gfp, - dma_pgprot(dev, PAGE_KERNEL, attrs), attrs); + return iommu_dma_alloc_remap(dev, size, handle, gfp, attrs); } if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && diff --git a/drivers/iommu/intel/cache.c b/drivers/iommu/intel/cache.c index e8418cdd8331..44e92638c0cd 100644 --- a/drivers/iommu/intel/cache.c +++ b/drivers/iommu/intel/cache.c @@ -245,7 +245,8 @@ static unsigned long calculate_psi_aligned_address(unsigned long start, * shared_bits are all equal in both pfn and end_pfn. */ shared_bits = ~(pfn ^ end_pfn) & ~bitmask; - mask = shared_bits ? __ffs(shared_bits) : BITS_PER_LONG; + mask = shared_bits ? __ffs(shared_bits) : MAX_AGAW_PFN_WIDTH; + aligned_pages = 1UL << mask; } *_pages = aligned_pages; diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 304e84949ca7..1c8d3141cb55 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1446,7 +1446,7 @@ restart: */ writel(qi->free_head << shift, iommu->reg + DMAR_IQT_REG); - while (qi->desc_status[wait_index] != QI_DONE) { + while (READ_ONCE(qi->desc_status[wait_index]) != QI_DONE) { /* * We will leave the interrupts disabled, to prevent interrupt * context to queue another cmd while a cmd is already submitted diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index fd11a080380c..9ff8b83c19a3 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -854,7 +854,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE); pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE; if (domain->use_first_level) - pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS; + pteval |= DMA_FL_PTE_US | DMA_FL_PTE_ACCESS; tmp = 0ULL; if (!try_cmpxchg64(&pte->val, &tmp, pteval)) @@ -1359,21 +1359,6 @@ static void iommu_disable_pci_caps(struct device_domain_info *info) } } -static void __iommu_flush_dev_iotlb(struct device_domain_info *info, - u64 addr, unsigned int mask) -{ - u16 sid, qdep; - - if (!info || !info->ats_enabled) - return; - - sid = info->bus << 8 | info->devfn; - qdep = info->ats_qdep; - qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, - qdep, addr, mask); - quirk_extra_dev_tlb_flush(info, addr, mask, IOMMU_NO_PASID, qdep); -} - static void intel_flush_iotlb_all(struct iommu_domain *domain) { cache_tag_flush_all(to_dmar_domain(domain)); @@ -1872,7 +1857,7 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP); attr |= DMA_FL_PTE_PRESENT; if (domain->use_first_level) { - attr |= DMA_FL_PTE_XD | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS; + attr |= DMA_FL_PTE_US | DMA_FL_PTE_ACCESS; if (prot & DMA_PTE_WRITE) attr |= DMA_FL_PTE_DIRTY; } @@ -1959,7 +1944,6 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 { struct intel_iommu *iommu = info->iommu; struct context_entry *context; - u16 did_old; spin_lock(&iommu->lock); context = iommu_context_addr(iommu, bus, devfn, 0); @@ -1968,24 +1952,10 @@ static void domain_context_clear_one(struct device_domain_info *info, u8 bus, u8 return; } - did_old = context_domain_id(context); - context_clear_entry(context); __iommu_flush_cache(iommu, context, sizeof(*context)); spin_unlock(&iommu->lock); - iommu->flush.flush_context(iommu, - did_old, - (((u16)bus) << 8) | devfn, - DMA_CCMD_MASK_NOBIT, - DMA_CCMD_DEVICE_INVL); - - iommu->flush.flush_iotlb(iommu, - did_old, - 0, - 0, - DMA_TLB_DSI_FLUSH); - - __iommu_flush_dev_iotlb(info, 0, MAX_AGAW_PFN_WIDTH); + intel_context_flush_present(info, context, true); } static int domain_setup_first_level(struct intel_iommu *iommu, @@ -2071,7 +2041,7 @@ static int __init si_domain_init(int hw) for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) { ret = iommu_domain_identity_map(si_domain, mm_to_dma_pfn_start(start_pfn), - mm_to_dma_pfn_end(end_pfn)); + mm_to_dma_pfn_end(end_pfn-1)); if (ret) return ret; } @@ -2177,17 +2147,6 @@ static bool device_rmrr_is_relaxable(struct device *dev) return false; } -/* - * Return the required default domain type for a specific device. - * - * @dev: the device in query - * @startup: true if this is during early boot - * - * Returns: - * - IOMMU_DOMAIN_DMA: device requires a dynamic mapping domain - * - IOMMU_DOMAIN_IDENTITY: device requires an identical mapping domain - * - 0: both identity and dynamic domains work for this device - */ static int device_def_domain_type(struct device *dev) { if (dev_is_pci(dev)) { @@ -3633,6 +3592,79 @@ static struct iommu_domain blocking_domain = { } }; +static int iommu_superpage_capability(struct intel_iommu *iommu, bool first_stage) +{ + if (!intel_iommu_superpage) + return 0; + + if (first_stage) + return cap_fl1gp_support(iommu->cap) ? 2 : 1; + + return fls(cap_super_page_val(iommu->cap)); +} + +static struct dmar_domain *paging_domain_alloc(struct device *dev, bool first_stage) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + struct dmar_domain *domain; + int addr_width; + + domain = kzalloc(sizeof(*domain), GFP_KERNEL); + if (!domain) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&domain->devices); + INIT_LIST_HEAD(&domain->dev_pasids); + INIT_LIST_HEAD(&domain->cache_tags); + spin_lock_init(&domain->lock); + spin_lock_init(&domain->cache_lock); + xa_init(&domain->iommu_array); + + domain->nid = dev_to_node(dev); + domain->has_iotlb_device = info->ats_enabled; + domain->use_first_level = first_stage; + + /* calculate the address width */ + addr_width = agaw_to_width(iommu->agaw); + if (addr_width > cap_mgaw(iommu->cap)) + addr_width = cap_mgaw(iommu->cap); + domain->gaw = addr_width; + domain->agaw = iommu->agaw; + domain->max_addr = __DOMAIN_MAX_ADDR(addr_width); + + /* iommu memory access coherency */ + domain->iommu_coherency = iommu_paging_structure_coherency(iommu); + + /* pagesize bitmap */ + domain->domain.pgsize_bitmap = SZ_4K; + domain->iommu_superpage = iommu_superpage_capability(iommu, first_stage); + domain->domain.pgsize_bitmap |= domain_super_pgsize_bitmap(domain); + + /* + * IOVA aperture: First-level translation restricts the input-address + * to a canonical address (i.e., address bits 63:N have the same value + * as address bit [N-1], where N is 48-bits with 4-level paging and + * 57-bits with 5-level paging). Hence, skip bit [N-1]. + */ + domain->domain.geometry.force_aperture = true; + domain->domain.geometry.aperture_start = 0; + if (first_stage) + domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw - 1); + else + domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw); + + /* always allocate the top pgd */ + domain->pgd = iommu_alloc_page_node(domain->nid, GFP_KERNEL); + if (!domain->pgd) { + kfree(domain); + return ERR_PTR(-ENOMEM); + } + domain_flush_cache(domain, domain->pgd, PAGE_SIZE); + + return domain; +} + static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) { struct dmar_domain *dmar_domain; @@ -3695,15 +3727,14 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags, if (user_data || (dirty_tracking && !ssads_supported(iommu))) return ERR_PTR(-EOPNOTSUPP); - /* - * domain_alloc_user op needs to fully initialize a domain before - * return, so uses iommu_domain_alloc() here for simple. - */ - domain = iommu_domain_alloc(dev->bus); - if (!domain) - return ERR_PTR(-ENOMEM); - - dmar_domain = to_dmar_domain(domain); + /* Do not use first stage for user domain translation. */ + dmar_domain = paging_domain_alloc(dev, false); + if (IS_ERR(dmar_domain)) + return ERR_CAST(dmar_domain); + domain = &dmar_domain->domain; + domain->type = IOMMU_DOMAIN_UNMANAGED; + domain->owner = &intel_iommu_ops; + domain->ops = intel_iommu_ops.default_domain_ops; if (nested_parent) { dmar_domain->nested_parent = true; @@ -4213,6 +4244,37 @@ static int intel_iommu_enable_sva(struct device *dev) return 0; } +static int context_flip_pri(struct device_domain_info *info, bool enable) +{ + struct intel_iommu *iommu = info->iommu; + u8 bus = info->bus, devfn = info->devfn; + struct context_entry *context; + + spin_lock(&iommu->lock); + if (context_copied(iommu, bus, devfn)) { + spin_unlock(&iommu->lock); + return -EINVAL; + } + + context = iommu_context_addr(iommu, bus, devfn, false); + if (!context || !context_present(context)) { + spin_unlock(&iommu->lock); + return -ENODEV; + } + + if (enable) + context_set_sm_pre(context); + else + context_clear_sm_pre(context); + + if (!ecap_coherent(iommu->ecap)) + clflush_cache_range(context, sizeof(*context)); + intel_context_flush_present(info, context, true); + spin_unlock(&iommu->lock); + + return 0; +} + static int intel_iommu_enable_iopf(struct device *dev) { struct pci_dev *pdev = dev_is_pci(dev) ? to_pci_dev(dev) : NULL; @@ -4242,15 +4304,23 @@ static int intel_iommu_enable_iopf(struct device *dev) if (ret) return ret; + ret = context_flip_pri(info, true); + if (ret) + goto err_remove_device; + ret = pci_enable_pri(pdev, PRQ_DEPTH); - if (ret) { - iopf_queue_remove_device(iommu->iopf_queue, dev); - return ret; - } + if (ret) + goto err_clear_pri; info->pri_enabled = 1; return 0; +err_clear_pri: + context_flip_pri(info, false); +err_remove_device: + iopf_queue_remove_device(iommu->iopf_queue, dev); + + return ret; } static int intel_iommu_disable_iopf(struct device *dev) @@ -4261,6 +4331,15 @@ static int intel_iommu_disable_iopf(struct device *dev) if (!info->pri_enabled) return -EINVAL; + /* Disable new PRI reception: */ + context_flip_pri(info, false); + + /* + * Remove device from fault queue and acknowledge all outstanding + * PRQs to the device: + */ + iopf_queue_remove_device(iommu->iopf_queue, dev); + /* * PCIe spec states that by clearing PRI enable bit, the Page * Request Interface will not issue new page requests, but has @@ -4271,7 +4350,6 @@ static int intel_iommu_disable_iopf(struct device *dev) */ pci_disable_pri(to_pci_dev(dev)); info->pri_enabled = 0; - iopf_queue_remove_device(iommu->iopf_queue, dev); return 0; } diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index eaf015b4353b..b67c14da1240 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -49,7 +49,6 @@ #define DMA_FL_PTE_US BIT_ULL(2) #define DMA_FL_PTE_ACCESS BIT_ULL(5) #define DMA_FL_PTE_DIRTY BIT_ULL(6) -#define DMA_FL_PTE_XD BIT_ULL(63) #define DMA_SL_PTE_DIRTY_BIT 9 #define DMA_SL_PTE_DIRTY BIT_ULL(DMA_SL_PTE_DIRTY_BIT) @@ -831,11 +830,10 @@ static inline void dma_clear_pte(struct dma_pte *pte) static inline u64 dma_pte_addr(struct dma_pte *pte) { #ifdef CONFIG_64BIT - return pte->val & VTD_PAGE_MASK & (~DMA_FL_PTE_XD); + return pte->val & VTD_PAGE_MASK; #else /* Must have a full atomic 64-bit read */ - return __cmpxchg64(&pte->val, 0ULL, 0ULL) & - VTD_PAGE_MASK & (~DMA_FL_PTE_XD); + return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK; #endif } @@ -1047,6 +1045,15 @@ static inline void context_set_sm_pre(struct context_entry *context) context->lo |= BIT_ULL(4); } +/* + * Clear the PRE(Page Request Enable) field of a scalable mode context + * entry. + */ +static inline void context_clear_sm_pre(struct context_entry *context) +{ + context->lo &= ~BIT_ULL(4); +} + /* Returns a number of VTD pages, but aligned to MM page size */ static inline unsigned long aligned_nrpages(unsigned long host_addr, size_t size) { @@ -1145,6 +1152,10 @@ void cache_tag_flush_all(struct dmar_domain *domain); void cache_tag_flush_range_np(struct dmar_domain *domain, unsigned long start, unsigned long end); +void intel_context_flush_present(struct device_domain_info *info, + struct context_entry *context, + bool affect_domains); + #ifdef CONFIG_INTEL_IOMMU_SVM void intel_svm_check(struct intel_iommu *iommu); int intel_svm_enable_prq(struct intel_iommu *iommu); diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index e4a70886678c..e090ca07364b 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -597,8 +597,8 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) if (ir_pre_enabled(iommu)) { if (!is_kdump_kernel()) { - pr_warn("IRQ remapping was enabled on %s but we are not in kdump mode\n", - iommu->name); + pr_info_once("IRQ remapping was enabled on %s but we are not in kdump mode\n", + iommu->name); clear_ir_pre_enabled(iommu); iommu_disable_irq_remapping(iommu); } else if (iommu_load_old_irte(iommu)) diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index abce19e2ad6f..5792c817cefa 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -146,6 +146,8 @@ static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid) retry: entries = get_pasid_table_from_pde(&dir[dir_index]); if (!entries) { + u64 tmp; + entries = iommu_alloc_page_node(info->iommu->node, GFP_ATOMIC); if (!entries) return NULL; @@ -156,8 +158,9 @@ retry: * clear. However, this entry might be populated by others * while we are preparing it. Use theirs with a retry. */ - if (cmpxchg64(&dir[dir_index].val, 0ULL, - (u64)virt_to_phys(entries) | PASID_PTE_PRESENT)) { + tmp = 0ULL; + if (!try_cmpxchg64(&dir[dir_index].val, &tmp, + (u64)virt_to_phys(entries) | PASID_PTE_PRESENT)) { iommu_free_page(entries); goto retry; } @@ -333,7 +336,6 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu, pasid_set_domain_id(pte, did); pasid_set_address_width(pte, iommu->agaw); pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); - pasid_set_nxe(pte); /* Setup Present and PASID Granular Transfer Type: */ pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY); @@ -692,25 +694,7 @@ static void device_pasid_table_teardown(struct device *dev, u8 bus, u8 devfn) context_clear_entry(context); __iommu_flush_cache(iommu, context, sizeof(*context)); spin_unlock(&iommu->lock); - - /* - * Cache invalidation for changes to a scalable-mode context table - * entry. - * - * Section 6.5.3.3 of the VT-d spec: - * - Device-selective context-cache invalidation; - * - Domain-selective PASID-cache invalidation to affected domains - * (can be skipped if all PASID entries were not-present); - * - Domain-selective IOTLB invalidation to affected domains; - * - Global Device-TLB invalidation to affected functions. - * - * The iommu has been parked in the blocking state. All domains have - * been detached from the device or PASID. The PASID and IOTLB caches - * have been invalidated during the domain detach path. - */ - iommu->flush.flush_context(iommu, 0, PCI_DEVID(bus, devfn), - DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL); - devtlb_invalidation_with_pasid(iommu, dev, IOMMU_NO_PASID); + intel_context_flush_present(info, context, false); } static int pci_pasid_table_teardown(struct pci_dev *pdev, u16 alias, void *data) @@ -768,8 +752,6 @@ static int context_entry_set_pasid_table(struct context_entry *context, if (info->ats_supported) context_set_sm_dte(context); - if (info->pri_supported) - context_set_sm_pre(context); if (info->pasid_supported) context_set_pasid(context); @@ -872,3 +854,89 @@ int intel_pasid_setup_sm_context(struct device *dev) return pci_for_each_dma_alias(to_pci_dev(dev), pci_pasid_table_setup, dev); } + +/* + * Global Device-TLB invalidation following changes in a context entry which + * was present. + */ +static void __context_flush_dev_iotlb(struct device_domain_info *info) +{ + if (!info->ats_enabled) + return; + + qi_flush_dev_iotlb(info->iommu, PCI_DEVID(info->bus, info->devfn), + info->pfsid, info->ats_qdep, 0, MAX_AGAW_PFN_WIDTH); + + /* + * There is no guarantee that the device DMA is stopped when it reaches + * here. Therefore, always attempt the extra device TLB invalidation + * quirk. The impact on performance is acceptable since this is not a + * performance-critical path. + */ + quirk_extra_dev_tlb_flush(info, 0, MAX_AGAW_PFN_WIDTH, IOMMU_NO_PASID, + info->ats_qdep); +} + +/* + * Cache invalidations after change in a context table entry that was present + * according to the Spec 6.5.3.3 (Guidance to Software for Invalidations). If + * IOMMU is in scalable mode and all PASID table entries of the device were + * non-present, set flush_domains to false. Otherwise, true. + */ +void intel_context_flush_present(struct device_domain_info *info, + struct context_entry *context, + bool flush_domains) +{ + struct intel_iommu *iommu = info->iommu; + u16 did = context_domain_id(context); + struct pasid_entry *pte; + int i; + + /* + * Device-selective context-cache invalidation. The Domain-ID field + * of the Context-cache Invalidate Descriptor is ignored by hardware + * when operating in scalable mode. Therefore the @did value doesn't + * matter in scalable mode. + */ + iommu->flush.flush_context(iommu, did, PCI_DEVID(info->bus, info->devfn), + DMA_CCMD_MASK_NOBIT, DMA_CCMD_DEVICE_INVL); + + /* + * For legacy mode: + * - Domain-selective IOTLB invalidation + * - Global Device-TLB invalidation to all affected functions + */ + if (!sm_supported(iommu)) { + iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); + __context_flush_dev_iotlb(info); + + return; + } + + /* + * For scalable mode: + * - Domain-selective PASID-cache invalidation to affected domains + * - Domain-selective IOTLB invalidation to affected domains + * - Global Device-TLB invalidation to affected functions + */ + if (flush_domains) { + /* + * If the IOMMU is running in scalable mode and there might + * be potential PASID translations, the caller should hold + * the lock to ensure that context changes and cache flushes + * are atomic. + */ + assert_spin_locked(&iommu->lock); + for (i = 0; i < info->pasid_table->max_pasid; i++) { + pte = intel_pasid_get_entry(info->dev, i); + if (!pte || !pasid_pte_is_present(pte)) + continue; + + did = pasid_get_domain_id(pte); + qi_flush_pasid_cache(iommu, did, QI_PC_ALL_PASIDS, 0); + iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); + } + } + + __context_flush_dev_iotlb(info); +} diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index da9978fef7ac..dde6d3ba5ae0 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -247,16 +247,6 @@ static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value) pasid_set_bits(&pe->val[1], 1 << 23, value << 23); } -/* - * Setup No Execute Enable bit (Bit 133) of a scalable mode PASID - * entry. It is required when XD bit of the first level page table - * entry is about to be set. - */ -static inline void pasid_set_nxe(struct pasid_entry *pe) -{ - pasid_set_bits(&pe->val[2], 1 << 5, 1 << 5); -} - /* * Setup the Page Snoop (PGSNP) field (Bit 88) of a scalable mode * PASID entry. diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 3d23b924cec1..f5d9fd1f45bf 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -76,6 +76,7 @@ #define ARM_LPAE_PTE_NSTABLE (((arm_lpae_iopte)1) << 63) #define ARM_LPAE_PTE_XN (((arm_lpae_iopte)3) << 53) +#define ARM_LPAE_PTE_DBM (((arm_lpae_iopte)1) << 51) #define ARM_LPAE_PTE_AF (((arm_lpae_iopte)1) << 10) #define ARM_LPAE_PTE_SH_NS (((arm_lpae_iopte)0) << 8) #define ARM_LPAE_PTE_SH_OS (((arm_lpae_iopte)2) << 8) @@ -85,7 +86,7 @@ #define ARM_LPAE_PTE_ATTR_LO_MASK (((arm_lpae_iopte)0x3ff) << 2) /* Ignore the contiguous bit for block splitting */ -#define ARM_LPAE_PTE_ATTR_HI_MASK (((arm_lpae_iopte)6) << 52) +#define ARM_LPAE_PTE_ATTR_HI_MASK (ARM_LPAE_PTE_XN | ARM_LPAE_PTE_DBM) #define ARM_LPAE_PTE_ATTR_MASK (ARM_LPAE_PTE_ATTR_LO_MASK | \ ARM_LPAE_PTE_ATTR_HI_MASK) /* Software bit for solving coherency races */ @@ -93,7 +94,11 @@ /* Stage-1 PTE */ #define ARM_LPAE_PTE_AP_UNPRIV (((arm_lpae_iopte)1) << 6) -#define ARM_LPAE_PTE_AP_RDONLY (((arm_lpae_iopte)2) << 6) +#define ARM_LPAE_PTE_AP_RDONLY_BIT 7 +#define ARM_LPAE_PTE_AP_RDONLY (((arm_lpae_iopte)1) << \ + ARM_LPAE_PTE_AP_RDONLY_BIT) +#define ARM_LPAE_PTE_AP_WR_CLEAN_MASK (ARM_LPAE_PTE_AP_RDONLY | \ + ARM_LPAE_PTE_DBM) #define ARM_LPAE_PTE_ATTRINDX_SHIFT 2 #define ARM_LPAE_PTE_nG (((arm_lpae_iopte)1) << 11) @@ -139,6 +144,12 @@ #define iopte_prot(pte) ((pte) & ARM_LPAE_PTE_ATTR_MASK) +#define iopte_writeable_dirty(pte) \ + (((pte) & ARM_LPAE_PTE_AP_WR_CLEAN_MASK) == ARM_LPAE_PTE_DBM) + +#define iopte_set_writeable_clean(ptep) \ + set_bit(ARM_LPAE_PTE_AP_RDONLY_BIT, (unsigned long *)(ptep)) + struct arm_lpae_io_pgtable { struct io_pgtable iop; @@ -160,6 +171,13 @@ static inline bool iopte_leaf(arm_lpae_iopte pte, int lvl, return iopte_type(pte) == ARM_LPAE_PTE_TYPE_BLOCK; } +static inline bool iopte_table(arm_lpae_iopte pte, int lvl) +{ + if (lvl == (ARM_LPAE_MAX_LEVELS - 1)) + return false; + return iopte_type(pte) == ARM_LPAE_PTE_TYPE_TABLE; +} + static arm_lpae_iopte paddr_to_iopte(phys_addr_t paddr, struct arm_lpae_io_pgtable *data) { @@ -422,6 +440,8 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, pte = ARM_LPAE_PTE_nG; if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ)) pte |= ARM_LPAE_PTE_AP_RDONLY; + else if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_HD) + pte |= ARM_LPAE_PTE_DBM; if (!(prot & IOMMU_PRIV)) pte |= ARM_LPAE_PTE_AP_UNPRIV; } else { @@ -726,6 +746,97 @@ found_translation: return iopte_to_paddr(pte, data) | iova; } +struct io_pgtable_walk_data { + struct iommu_dirty_bitmap *dirty; + unsigned long flags; + u64 addr; + const u64 end; +}; + +static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data, + struct io_pgtable_walk_data *walk_data, + arm_lpae_iopte *ptep, + int lvl); + +static int io_pgtable_visit_dirty(struct arm_lpae_io_pgtable *data, + struct io_pgtable_walk_data *walk_data, + arm_lpae_iopte *ptep, int lvl) +{ + struct io_pgtable *iop = &data->iop; + arm_lpae_iopte pte = READ_ONCE(*ptep); + + if (iopte_leaf(pte, lvl, iop->fmt)) { + size_t size = ARM_LPAE_BLOCK_SIZE(lvl, data); + + if (iopte_writeable_dirty(pte)) { + iommu_dirty_bitmap_record(walk_data->dirty, + walk_data->addr, size); + if (!(walk_data->flags & IOMMU_DIRTY_NO_CLEAR)) + iopte_set_writeable_clean(ptep); + } + walk_data->addr += size; + return 0; + } + + if (WARN_ON(!iopte_table(pte, lvl))) + return -EINVAL; + + ptep = iopte_deref(pte, data); + return __arm_lpae_iopte_walk_dirty(data, walk_data, ptep, lvl + 1); +} + +static int __arm_lpae_iopte_walk_dirty(struct arm_lpae_io_pgtable *data, + struct io_pgtable_walk_data *walk_data, + arm_lpae_iopte *ptep, + int lvl) +{ + u32 idx; + int max_entries, ret; + + if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) + return -EINVAL; + + if (lvl == data->start_level) + max_entries = ARM_LPAE_PGD_SIZE(data) / sizeof(arm_lpae_iopte); + else + max_entries = ARM_LPAE_PTES_PER_TABLE(data); + + for (idx = ARM_LPAE_LVL_IDX(walk_data->addr, lvl, data); + (idx < max_entries) && (walk_data->addr < walk_data->end); ++idx) { + ret = io_pgtable_visit_dirty(data, walk_data, ptep + idx, lvl); + if (ret) + return ret; + } + + return 0; +} + +static int arm_lpae_read_and_clear_dirty(struct io_pgtable_ops *ops, + unsigned long iova, size_t size, + unsigned long flags, + struct iommu_dirty_bitmap *dirty) +{ + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); + struct io_pgtable_cfg *cfg = &data->iop.cfg; + struct io_pgtable_walk_data walk_data = { + .dirty = dirty, + .flags = flags, + .addr = iova, + .end = iova + size, + }; + arm_lpae_iopte *ptep = data->pgd; + int lvl = data->start_level; + + if (WARN_ON(!size)) + return -EINVAL; + if (WARN_ON((iova + size - 1) & ~(BIT(cfg->ias) - 1))) + return -EINVAL; + if (data->iop.fmt != ARM_64_LPAE_S1) + return -EINVAL; + + return __arm_lpae_iopte_walk_dirty(data, &walk_data, ptep, lvl); +} + static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg) { unsigned long granule, page_sizes; @@ -804,6 +915,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg) .map_pages = arm_lpae_map_pages, .unmap_pages = arm_lpae_unmap_pages, .iova_to_phys = arm_lpae_iova_to_phys, + .read_and_clear_dirty = arm_lpae_read_and_clear_dirty, }; return data; @@ -819,7 +931,8 @@ arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie) if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_ARM_TTBR1 | - IO_PGTABLE_QUIRK_ARM_OUTER_WBWA)) + IO_PGTABLE_QUIRK_ARM_OUTER_WBWA | + IO_PGTABLE_QUIRK_ARM_HD)) return NULL; data = arm_lpae_alloc_pgtable(cfg); diff --git a/drivers/iommu/iommu-priv.h b/drivers/iommu/iommu-priv.h index c37801c32f33..de5b54eaa8bf 100644 --- a/drivers/iommu/iommu-priv.h +++ b/drivers/iommu/iommu-priv.h @@ -17,6 +17,13 @@ static inline const struct iommu_ops *dev_iommu_ops(struct device *dev) return dev->iommu->iommu_dev->ops; } +const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode); + +static inline const struct iommu_ops *iommu_fwspec_ops(struct iommu_fwspec *fwspec) +{ + return iommu_ops_from_fwnode(fwspec ? fwspec->iommu_fwnode : NULL); +} + int iommu_group_replace_domain(struct iommu_group *group, struct iommu_domain *new_domain); diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index 69cde094440e..503c5d23c1ea 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c @@ -10,6 +10,8 @@ #include "iommu-priv.h" static DEFINE_MUTEX(iommu_sva_lock); +static struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, + struct mm_struct *mm); /* Allocate a PASID for the mm within range (inclusive) */ static struct iommu_mm_data *iommu_alloc_mm_data(struct mm_struct *mm, struct device *dev) @@ -291,8 +293,8 @@ static int iommu_sva_iopf_handler(struct iopf_group *group) return 0; } -struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, - struct mm_struct *mm) +static struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, + struct mm_struct *mm) { const struct iommu_ops *ops = dev_iommu_ops(dev); struct iommu_domain *domain; diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 8484285fbaa8..ed6c5cb60c5a 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -510,7 +510,6 @@ DEFINE_MUTEX(iommu_probe_device_lock); static int __iommu_probe_device(struct device *dev, struct list_head *group_list) { const struct iommu_ops *ops; - struct iommu_fwspec *fwspec; struct iommu_group *group; struct group_device *gdev; int ret; @@ -523,12 +522,7 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list * be present, and that any of their registered instances has suitable * ops for probing, and thus cheekily co-opt the same mechanism. */ - fwspec = dev_iommu_fwspec_get(dev); - if (fwspec && fwspec->ops) - ops = fwspec->ops; - else - ops = iommu_ops_from_fwnode(NULL); - + ops = iommu_fwspec_ops(dev_iommu_fwspec_get(dev)); if (!ops) return -ENODEV; /* @@ -2016,6 +2010,10 @@ static int __iommu_domain_alloc_dev(struct device *dev, void *data) return 0; } +/* + * The iommu ops in bus has been retired. Do not use this interface in + * new drivers. + */ struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus) { const struct iommu_ops *ops = NULL; @@ -2032,6 +2030,22 @@ struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus) } EXPORT_SYMBOL_GPL(iommu_domain_alloc); +/** + * iommu_paging_domain_alloc() - Allocate a paging domain + * @dev: device for which the domain is allocated + * + * Allocate a paging domain which will be managed by a kernel driver. Return + * allocated domain if successful, or a ERR pointer for failure. + */ +struct iommu_domain *iommu_paging_domain_alloc(struct device *dev) +{ + if (!dev_has_iommu(dev)) + return ERR_PTR(-ENODEV); + + return __iommu_domain_alloc(dev_iommu_ops(dev), dev, IOMMU_DOMAIN_UNMANAGED); +} +EXPORT_SYMBOL_GPL(iommu_paging_domain_alloc); + void iommu_domain_free(struct iommu_domain *domain) { if (domain->type == IOMMU_DOMAIN_SVA) @@ -2822,13 +2836,16 @@ const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode return ops; } -int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, - const struct iommu_ops *ops) +int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode) { + const struct iommu_ops *ops = iommu_ops_from_fwnode(iommu_fwnode); struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + if (!ops) + return -EPROBE_DEFER; + if (fwspec) - return ops == fwspec->ops ? 0 : -EINVAL; + return ops == iommu_fwspec_ops(fwspec) ? 0 : -EINVAL; if (!dev_iommu_get(dev)) return -ENOMEM; @@ -2838,9 +2855,8 @@ int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, if (!fwspec) return -ENOMEM; - of_node_get(to_of_node(iommu_fwnode)); + fwnode_handle_get(iommu_fwnode); fwspec->iommu_fwnode = iommu_fwnode; - fwspec->ops = ops; dev_iommu_fwspec_set(dev, fwspec); return 0; } diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c index d90f1c30cf8a..aefde4443671 100644 --- a/drivers/iommu/iommufd/hw_pagetable.c +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -119,6 +119,9 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, return ERR_PTR(-EOPNOTSUPP); if (flags & ~valid_flags) return ERR_PTR(-EOPNOTSUPP); + if ((flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) && + !device_iommu_capable(idev->dev, IOMMU_CAP_DIRTY_TRACKING)) + return ERR_PTR(-EOPNOTSUPP); hwpt_paging = __iommufd_object_alloc( ictx, hwpt_paging, IOMMUFD_OBJ_HWPT_PAGING, common.obj); @@ -142,9 +145,10 @@ iommufd_hwpt_paging_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, } hwpt->domain->owner = ops; } else { - hwpt->domain = iommu_domain_alloc(idev->dev->bus); - if (!hwpt->domain) { - rc = -ENOMEM; + hwpt->domain = iommu_paging_domain_alloc(idev->dev); + if (IS_ERR(hwpt->domain)) { + rc = PTR_ERR(hwpt->domain); + hwpt->domain = NULL; goto out_abort; } } diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c index 528f356238b3..117f644a0c5b 100644 --- a/drivers/iommu/iommufd/pages.c +++ b/drivers/iommu/iommufd/pages.c @@ -809,13 +809,14 @@ static int incr_user_locked_vm(struct iopt_pages *pages, unsigned long npages) lock_limit = task_rlimit(pages->source_task, RLIMIT_MEMLOCK) >> PAGE_SHIFT; + + cur_pages = atomic_long_read(&pages->source_user->locked_vm); do { - cur_pages = atomic_long_read(&pages->source_user->locked_vm); new_pages = cur_pages + npages; if (new_pages > lock_limit) return -ENOMEM; - } while (atomic_long_cmpxchg(&pages->source_user->locked_vm, cur_pages, - new_pages) != cur_pages); + } while (!atomic_long_try_cmpxchg(&pages->source_user->locked_vm, + &cur_pages, new_pages)); return 0; } diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index d59d0ea2fd21..16c6adff3eb7 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -1000,4 +1000,5 @@ void iova_cache_put(void) EXPORT_SYMBOL_GPL(iova_cache_put); MODULE_AUTHOR("Anil S Keshavamurthy "); +MODULE_DESCRIPTION("IOMMU I/O Virtual Address management"); MODULE_LICENSE("GPL"); diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index d6e4002200bd..c6ea5b4baff3 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -401,7 +401,6 @@ static const struct iommu_ops mtk_iommu_v1_ops; static int mtk_iommu_v1_create_mapping(struct device *dev, const struct of_phandle_args *args) { - struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); struct mtk_iommu_v1_data *data; struct platform_device *m4updev; struct dma_iommu_mapping *mtk_mapping; @@ -413,14 +412,9 @@ static int mtk_iommu_v1_create_mapping(struct device *dev, return -EINVAL; } - if (!fwspec) { - ret = iommu_fwspec_init(dev, &args->np->fwnode, &mtk_iommu_v1_ops); - if (ret) - return ret; - fwspec = dev_iommu_fwspec_get(dev); - } else if (dev_iommu_fwspec_get(dev)->ops != &mtk_iommu_v1_ops) { - return -EINVAL; - } + ret = iommu_fwspec_init(dev, of_fwnode_handle(args->np)); + if (ret) + return ret; if (!dev_iommu_priv_get(dev)) { /* Get the m4u device */ diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 3afe0b48a48d..78d61da75257 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -17,30 +17,25 @@ #include #include +#include "iommu-priv.h" + static int of_iommu_xlate(struct device *dev, struct of_phandle_args *iommu_spec) { const struct iommu_ops *ops; - struct fwnode_handle *fwnode = &iommu_spec->np->fwnode; int ret; - ops = iommu_ops_from_fwnode(fwnode); - if ((ops && !ops->of_xlate) || - !of_device_is_available(iommu_spec->np)) + if (!of_device_is_available(iommu_spec->np)) return -ENODEV; - ret = iommu_fwspec_init(dev, fwnode, ops); + ret = iommu_fwspec_init(dev, of_fwnode_handle(iommu_spec->np)); + if (ret == -EPROBE_DEFER) + return driver_deferred_probe_check_state(dev); if (ret) return ret; - /* - * The otherwise-empty fwspec handily serves to indicate the specific - * IOMMU device we're waiting for, which will be useful if we ever get - * a proper probe-ordering dependency mechanism in future. - */ - if (!ops) - return driver_deferred_probe_check_state(dev); - if (!try_module_get(ops->owner)) + ops = iommu_ops_from_fwnode(&iommu_spec->np->fwnode); + if (!ops->of_xlate || !try_module_get(ops->owner)) return -ENODEV; ret = ops->of_xlate(dev, iommu_spec); @@ -105,6 +100,14 @@ static int of_iommu_configure_device(struct device_node *master_np, of_iommu_configure_dev(master_np, dev); } +static void of_pci_check_device_ats(struct device *dev, struct device_node *np) +{ + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); + + if (fwspec && of_property_read_bool(np, "ats-supported")) + fwspec->flags |= IOMMU_FWSPEC_PCI_RC_ATS; +} + /* * Returns: * 0 on success, an iommu was configured @@ -115,7 +118,6 @@ static int of_iommu_configure_device(struct device_node *master_np, int of_iommu_configure(struct device *dev, struct device_node *master_np, const u32 *id) { - struct iommu_fwspec *fwspec; int err; if (!master_np) @@ -123,14 +125,9 @@ int of_iommu_configure(struct device *dev, struct device_node *master_np, /* Serialise to make dev->iommu stable under our potential fwspec */ mutex_lock(&iommu_probe_device_lock); - fwspec = dev_iommu_fwspec_get(dev); - if (fwspec) { - if (fwspec->ops) { - mutex_unlock(&iommu_probe_device_lock); - return 0; - } - /* In the deferred case, start again from scratch */ - iommu_fwspec_free(dev); + if (dev_iommu_fwspec_get(dev)) { + mutex_unlock(&iommu_probe_device_lock); + return 0; } /* @@ -147,23 +144,21 @@ int of_iommu_configure(struct device *dev, struct device_node *master_np, pci_request_acs(); err = pci_for_each_dma_alias(to_pci_dev(dev), of_pci_iommu_init, &info); + of_pci_check_device_ats(dev, master_np); } else { err = of_iommu_configure_device(master_np, dev, id); } + + if (err) + iommu_fwspec_free(dev); mutex_unlock(&iommu_probe_device_lock); - if (err == -ENODEV || err == -EPROBE_DEFER) - return err; - if (err) - goto err_log; + if (!err && dev->bus) + err = iommu_probe_device(dev); - err = iommu_probe_device(dev); - if (err) - goto err_log; - return 0; + if (err && err != -EPROBE_DEFER) + dev_dbg(dev, "Adding to IOMMU failed: %d\n", err); -err_log: - dev_dbg(dev, "Adding to IOMMU failed: %pe\n", ERR_PTR(err)); return err; } diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index c519b991749d..8d8f11854676 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -452,6 +452,7 @@ static int sun50i_iommu_enable(struct sun50i_iommu *iommu) IOMMU_TLB_PREFETCH_MASTER_ENABLE(3) | IOMMU_TLB_PREFETCH_MASTER_ENABLE(4) | IOMMU_TLB_PREFETCH_MASTER_ENABLE(5)); + iommu_write(iommu, IOMMU_BYPASS_REG, 0); iommu_write(iommu, IOMMU_INT_ENABLE_REG, IOMMU_INT_MASK); iommu_write(iommu, IOMMU_DM_AUT_CTRL_REG(SUN50I_IOMMU_ACI_NONE), IOMMU_DM_AUT_CTRL_RD_UNAVAIL(SUN50I_IOMMU_ACI_NONE, 0) | @@ -601,6 +602,14 @@ static int sun50i_iommu_map(struct iommu_domain *domain, unsigned long iova, u32 *page_table, *pte_addr; int ret = 0; + /* the IOMMU can only handle 32-bit addresses, both input and output */ + if ((uint64_t)paddr >> 32) { + ret = -EINVAL; + dev_warn_once(iommu->dev, + "attempt to map address beyond 4GB\n"); + goto out; + } + page_table = sun50i_dte_get_page_table(sun50i_domain, iova, gfp); if (IS_ERR(page_table)) { ret = PTR_ERR(page_table); @@ -681,7 +690,8 @@ sun50i_iommu_domain_alloc_paging(struct device *dev) if (!sun50i_domain) return NULL; - sun50i_domain->dt = iommu_alloc_pages(GFP_KERNEL, get_order(DT_SIZE)); + sun50i_domain->dt = iommu_alloc_pages(GFP_KERNEL | GFP_DMA32, + get_order(DT_SIZE)); if (!sun50i_domain->dt) goto err_free_domain; @@ -996,7 +1006,7 @@ static int sun50i_iommu_probe(struct platform_device *pdev) iommu->pt_pool = kmem_cache_create(dev_name(&pdev->dev), PT_SIZE, PT_SIZE, - SLAB_HWCACHE_ALIGN, + SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA32, NULL); if (!iommu->pt_pool) return -ENOMEM; @@ -1057,6 +1067,7 @@ err_free_cache: static const struct of_device_id sun50i_iommu_dt[] = { { .compatible = "allwinner,sun50i-h6-iommu", }, + { .compatible = "allwinner,sun50i-h616-iommu", }, { /* sentinel */ }, }; MODULE_DEVICE_TABLE(of, sun50i_iommu_dt); diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index f86c7ae91814..7f633bb5efef 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -837,7 +837,7 @@ static int tegra_smmu_configure(struct tegra_smmu *smmu, struct device *dev, const struct iommu_ops *ops = smmu->iommu.ops; int err; - err = iommu_fwspec_init(dev, &dev->of_node->fwnode, ops); + err = iommu_fwspec_init(dev, dev_fwnode(smmu->dev)); if (err < 0) { dev_err(dev, "failed to initialize fwspec: %d\n", err); return err; diff --git a/drivers/net/wireless/ath/ath10k/snoc.c b/drivers/net/wireless/ath/ath10k/snoc.c index 8530550cf5df..0fe47d51013c 100644 --- a/drivers/net/wireless/ath/ath10k/snoc.c +++ b/drivers/net/wireless/ath/ath10k/snoc.c @@ -1635,10 +1635,10 @@ static int ath10k_fw_init(struct ath10k *ar) ar_snoc->fw.dev = &pdev->dev; - iommu_dom = iommu_domain_alloc(&platform_bus_type); - if (!iommu_dom) { + iommu_dom = iommu_paging_domain_alloc(ar_snoc->fw.dev); + if (IS_ERR(iommu_dom)) { ath10k_err(ar, "failed to allocate iommu domain\n"); - ret = -ENOMEM; + ret = PTR_ERR(iommu_dom); goto err_unregister; } diff --git a/drivers/net/wireless/ath/ath11k/ahb.c b/drivers/net/wireless/ath/ath11k/ahb.c index e3ff4786c714..634d385fd9ad 100644 --- a/drivers/net/wireless/ath/ath11k/ahb.c +++ b/drivers/net/wireless/ath/ath11k/ahb.c @@ -1031,10 +1031,10 @@ static int ath11k_ahb_fw_resources_init(struct ath11k_base *ab) ab_ahb->fw.dev = &pdev->dev; - iommu_dom = iommu_domain_alloc(&platform_bus_type); - if (!iommu_dom) { + iommu_dom = iommu_paging_domain_alloc(ab_ahb->fw.dev); + if (IS_ERR(iommu_dom)) { ath11k_err(ab, "failed to allocate iommu domain\n"); - ret = -ENOMEM; + ret = PTR_ERR(iommu_dom); goto err_unregister; } diff --git a/drivers/of/device.c b/drivers/of/device.c index 312c63361211..edf3be197265 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -96,8 +96,7 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, const struct bus_dma_region *map = NULL; struct device_node *bus_np; u64 mask, end = 0; - bool coherent; - int iommu_ret; + bool coherent, set_map = false; int ret; if (np == dev->of_node) @@ -118,6 +117,7 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, } else { /* Determine the overall bounds of all DMA regions */ end = dma_range_map_max(map); + set_map = true; } /* @@ -144,7 +144,7 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, dev->coherent_dma_mask &= mask; *dev->dma_mask &= mask; /* ...but only set bus limit and range map if we found valid dma-ranges earlier */ - if (!ret) { + if (set_map) { dev->bus_dma_limit = end; dev->dma_range_map = map; } @@ -153,29 +153,21 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, dev_dbg(dev, "device is%sdma coherent\n", coherent ? " " : " not "); - iommu_ret = of_iommu_configure(dev, np, id); - if (iommu_ret == -EPROBE_DEFER) { + ret = of_iommu_configure(dev, np, id); + if (ret == -EPROBE_DEFER) { /* Don't touch range map if it wasn't set from a valid dma-ranges */ - if (!ret) + if (set_map) dev->dma_range_map = NULL; kfree(map); return -EPROBE_DEFER; - } else if (iommu_ret == -ENODEV) { - dev_dbg(dev, "device is not behind an iommu\n"); - } else if (iommu_ret) { - dev_err(dev, "iommu configuration for device failed with %pe\n", - ERR_PTR(iommu_ret)); - - /* - * Historically this routine doesn't fail driver probing - * due to errors in of_iommu_configure() - */ - } else - dev_dbg(dev, "device is behind an iommu\n"); + } + /* Take all other IOMMU errors to mean we'll just carry on without it */ + dev_dbg(dev, "device is%sbehind an iommu\n", + !ret ? " " : " not "); arch_setup_dma_ops(dev, coherent); - if (iommu_ret) + if (ret) of_dma_set_restricted_buffer(dev, np); return 0; diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 3a0218171cfa..0960699e7554 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -2135,7 +2135,7 @@ static int vfio_iommu_domain_alloc(struct device *dev, void *data) { struct iommu_domain **domain = data; - *domain = iommu_domain_alloc(dev->bus); + *domain = iommu_paging_domain_alloc(dev); return 1; /* Don't iterate */ } @@ -2192,11 +2192,12 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, * us a representative device for the IOMMU API call. We don't actually * want to iterate beyond the first device (if any). */ - ret = -EIO; iommu_group_for_each_dev(iommu_group, &domain->domain, vfio_iommu_domain_alloc); - if (!domain->domain) + if (IS_ERR(domain->domain)) { + ret = PTR_ERR(domain->domain); goto out_free_domain; + } if (iommu->nesting) { ret = iommu_enable_nesting(domain->domain); diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 63a53680a85c..e31ec9ebc4ce 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -1312,26 +1312,24 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v) struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; struct device *dma_dev = vdpa_get_dma_dev(vdpa); - const struct bus_type *bus; int ret; /* Device want to do DMA by itself */ if (ops->set_map || ops->dma_map) return 0; - bus = dma_dev->bus; - if (!bus) - return -EFAULT; - if (!device_iommu_capable(dma_dev, IOMMU_CAP_CACHE_COHERENCY)) { dev_warn_once(&v->dev, "Failed to allocate domain, device is not IOMMU cache coherent capable\n"); return -ENOTSUPP; } - v->domain = iommu_domain_alloc(bus); - if (!v->domain) - return -EIO; + v->domain = iommu_paging_domain_alloc(dma_dev); + if (IS_ERR(v->domain)) { + ret = PTR_ERR(v->domain); + v->domain = NULL; + return ret; + } ret = iommu_attach_device(v->domain, dma_dev); if (ret) diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 05ca49478537..5d148343628a 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -737,8 +737,7 @@ struct iommu_ops; bool acpi_dma_supported(const struct acpi_device *adev); enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev); int acpi_iommu_fwspec_init(struct device *dev, u32 id, - struct fwnode_handle *fwnode, - const struct iommu_ops *ops); + struct fwnode_handle *fwnode); int acpi_dma_get_range(struct device *dev, const struct bus_dma_region **map); int acpi_dma_configure_id(struct device *dev, enum dev_dma_attr attr, const u32 *input_id); diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 86cf1f7ae389..f9a81761bfce 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -85,6 +85,8 @@ struct io_pgtable_cfg { * * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the outer-cacheability * attributes set in the TCR for a non-coherent page-table walker. + * + * IO_PGTABLE_QUIRK_ARM_HD: Enables dirty tracking in stage 1 pagetable. */ #define IO_PGTABLE_QUIRK_ARM_NS BIT(0) #define IO_PGTABLE_QUIRK_NO_PERMS BIT(1) @@ -92,6 +94,7 @@ struct io_pgtable_cfg { #define IO_PGTABLE_QUIRK_ARM_MTK_TTBR_EXT BIT(4) #define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5) #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6) + #define IO_PGTABLE_QUIRK_ARM_HD BIT(7) unsigned long quirks; unsigned long pgsize_bitmap; unsigned int ias; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 73bc3aee95a1..4d47f2c33311 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -321,6 +321,9 @@ enum iommu_dev_features { #define IOMMU_PASID_INVALID (-1U) typedef unsigned int ioasid_t; +/* Read but do not clear any dirty bits */ +#define IOMMU_DIRTY_NO_CLEAR (1 << 0) + #ifdef CONFIG_IOMMU_API /** @@ -357,9 +360,6 @@ struct iommu_dirty_bitmap { struct iommu_iotlb_gather *gather; }; -/* Read but do not clear any dirty bits */ -#define IOMMU_DIRTY_NO_CLEAR (1 << 0) - /** * struct iommu_dirty_ops - domain specific dirty tracking operations * @set_dirty_tracking: Enable or Disable dirty tracking on the iommu domain @@ -789,6 +789,7 @@ extern bool iommu_present(const struct bus_type *bus); extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap); extern bool iommu_group_has_isolated_msi(struct iommu_group *group); extern struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus); +struct iommu_domain *iommu_paging_domain_alloc(struct device *dev); extern void iommu_domain_free(struct iommu_domain *domain); extern int iommu_attach_device(struct iommu_domain *domain, struct device *dev); @@ -977,7 +978,6 @@ extern struct iommu_group *generic_single_device_group(struct device *dev); /** * struct iommu_fwspec - per-device IOMMU instance data - * @ops: ops for this device's IOMMU * @iommu_fwnode: firmware handle for this device's IOMMU * @flags: IOMMU_FWSPEC_* flags * @num_ids: number of associated device IDs @@ -988,7 +988,6 @@ extern struct iommu_group *generic_single_device_group(struct device *dev); * consumers. */ struct iommu_fwspec { - const struct iommu_ops *ops; struct fwnode_handle *iommu_fwnode; u32 flags; unsigned int num_ids; @@ -1022,11 +1021,9 @@ struct iommu_mm_data { struct list_head sva_domains; }; -int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode, - const struct iommu_ops *ops); +int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode); void iommu_fwspec_free(struct device *dev); int iommu_fwspec_add_ids(struct device *dev, const u32 *ids, int num_ids); -const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode); static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev) { @@ -1101,6 +1098,11 @@ static inline struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus return NULL; } +static inline struct iommu_domain *iommu_paging_domain_alloc(struct device *dev) +{ + return ERR_PTR(-ENODEV); +} + static inline void iommu_domain_free(struct iommu_domain *domain) { } @@ -1330,8 +1332,7 @@ static inline void iommu_device_unlink(struct device *dev, struct device *link) } static inline int iommu_fwspec_init(struct device *dev, - struct fwnode_handle *iommu_fwnode, - const struct iommu_ops *ops) + struct fwnode_handle *iommu_fwnode) { return -ENODEV; } @@ -1346,12 +1347,6 @@ static inline int iommu_fwspec_add_ids(struct device *dev, u32 *ids, return -ENODEV; } -static inline -const struct iommu_ops *iommu_ops_from_fwnode(const struct fwnode_handle *fwnode) -{ - return NULL; -} - static inline int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features feat) { @@ -1536,8 +1531,6 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm); void iommu_sva_unbind_device(struct iommu_sva *handle); u32 iommu_sva_get_pasid(struct iommu_sva *handle); -struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, - struct mm_struct *mm); #else static inline struct iommu_sva * iommu_sva_bind_device(struct device *dev, struct mm_struct *mm) @@ -1562,12 +1555,6 @@ static inline u32 mm_get_enqcmd_pasid(struct mm_struct *mm) } static inline void mm_pasid_drop(struct mm_struct *mm) {} - -static inline struct iommu_domain * -iommu_sva_domain_alloc(struct device *dev, struct mm_struct *mm) -{ - return NULL; -} #endif /* CONFIG_IOMMU_SVA */ #ifdef CONFIG_IOMMU_IOPF