VFIO updates for v5.10-rc1

- New fsl-mc vfio bus driver supporting userspace drivers of objects
    within NXP's DPAA2 architecture (Diana Craciun)
 
  - Support for exposing zPCI information on s390 (Matthew Rosato)
 
  - Fixes for "detached" VFs on s390 (Matthew Rosato)
 
  - Fixes for pin-pages and dma-rw accesses (Yan Zhao)
 
  - Cleanups and optimize vconfig regen (Zenghui Yu)
 
  - Fix duplicate irq-bypass token registration (Alex Williamson)
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2.0.14 (GNU/Linux)
 
 iQIcBAABAgAGBQJfkcCjAAoJECObm247sIsi2XIP/j7NL4glPrWU37mesz9dd5nx
 SmZhcmxnOqZSQkOCnu+hNFZ9e+tdQjuX+jATOZaYz5l55bLAFmBlBj1Dv8HWaCVI
 mTbJ6xXUwdOvNSxbFH6BIUkJg8otR0iEkefVyJLNlF84FsaDknH4yZxx0vdeczjF
 wTkkk3+4VmH+4klvPIa9v0eL7yeKeFmgls9nQViVE5kDWUF4us/z/oHlVm9wR+mL
 2r3DEjHyz4L2hwVEkhZk7ytR6szdhuhF2l7NoMmaSEXRXjBzJoO6I3P9Y2W4i+su
 MFgTfiQ+OpIfVuiR8GzGev+/SrjWGX0Hvb2sYriKOELjhyedkE2kmxacbqMZ/UE+
 SRAhFf64C1rzJ4g1IW//Gg+9ObIPqlkqU52VDbOZdCED0AquwSyVmdwIUAK6qF+I
 HLOyZXhMI8EZ+w063cS+aKLJIvQTBbfIdMmPZkopVZhwWB3N3BjdvBKA+rPpPoTx
 0DpeUo891+zyeEE4aunUmCB8HFnBPgUa+XZqg2juq9MxjScsqgTzA0WEZg7jV4oj
 tORQrqoAKJgSk9oVL3EvAnr+IJix3ScRTqYymESORkz/lRCk2hFX48qdeW+qiSP8
 W1DHOnivFb1+JzhuZyaRKFWy1mK0EQQWTsE2b2ymPMKJbFhi+pVxaksmeG5x+4Q9
 SAp+Qma8Aj3UtBKcj/S+
 =LDPo
 -----END PGP SIGNATURE-----

Merge tag 'vfio-v5.10-rc1' of git://github.com/awilliam/linux-vfio

Pull VFIO updates from Alex Williamson:

 - New fsl-mc vfio bus driver supporting userspace drivers of objects
   within NXP's DPAA2 architecture (Diana Craciun)

 - Support for exposing zPCI information on s390 (Matthew Rosato)

 - Fixes for "detached" VFs on s390 (Matthew Rosato)

 - Fixes for pin-pages and dma-rw accesses (Yan Zhao)

 - Cleanups and optimize vconfig regen (Zenghui Yu)

 - Fix duplicate irq-bypass token registration (Alex Williamson)

* tag 'vfio-v5.10-rc1' of git://github.com/awilliam/linux-vfio: (30 commits)
  vfio iommu type1: Fix memory leak in vfio_iommu_type1_pin_pages
  vfio/pci: Clear token on bypass registration failure
  vfio/fsl-mc: fix the return of the uninitialized variable ret
  vfio/fsl-mc: Fix the dead code in vfio_fsl_mc_set_irq_trigger
  vfio/fsl-mc: Fixed vfio-fsl-mc driver compilation on 32 bit
  MAINTAINERS: Add entry for s390 vfio-pci
  vfio-pci/zdev: Add zPCI capabilities to VFIO_DEVICE_GET_INFO
  vfio/fsl-mc: Add support for device reset
  vfio/fsl-mc: Add read/write support for fsl-mc devices
  vfio/fsl-mc: trigger an interrupt via eventfd
  vfio/fsl-mc: Add irq infrastructure for fsl-mc devices
  vfio/fsl-mc: Added lock support in preparation for interrupt handling
  vfio/fsl-mc: Allow userspace to MMAP fsl-mc device MMIO regions
  vfio/fsl-mc: Implement VFIO_DEVICE_GET_REGION_INFO ioctl call
  vfio/fsl-mc: Implement VFIO_DEVICE_GET_INFO ioctl
  vfio/fsl-mc: Scan DPRC objects on vfio-fsl-mc driver bind
  vfio: Introduce capability definitions for VFIO_DEVICE_GET_INFO
  s390/pci: track whether util_str is valid in the zpci_dev
  s390/pci: stash version in the zpci_dev
  vfio/fsl-mc: Add VFIO framework skeleton for fsl-mc devices
  ...
This commit is contained in:
Linus Torvalds 2020-10-22 13:00:44 -07:00
commit fc996db970
24 changed files with 1330 additions and 20 deletions

View File

@ -15287,6 +15287,14 @@ F: Documentation/s390/vfio-ccw.rst
F: drivers/s390/cio/vfio_ccw*
F: include/uapi/linux/vfio_ccw.h
S390 VFIO-PCI DRIVER
M: Matthew Rosato <mjrosato@linux.ibm.com>
L: linux-s390@vger.kernel.org
L: kvm@vger.kernel.org
S: Supported
F: drivers/vfio/pci/vfio_pci_zdev.c
F: include/uapi/linux/vfio_zdev.h
S390 ZCRYPT DRIVER
M: Harald Freudenberger <freude@linux.ibm.com>
L: linux-s390@vger.kernel.org
@ -18385,6 +18393,12 @@ F: drivers/vfio/
F: include/linux/vfio.h
F: include/uapi/linux/vfio.h
VFIO FSL-MC DRIVER
M: Diana Craciun <diana.craciun@oss.nxp.com>
L: kvm@vger.kernel.org
S: Maintained
F: drivers/vfio/fsl-mc/
VFIO MEDIATED DEVICE DRIVERS
M: Kirti Wankhede <kwankhede@nvidia.com>
L: kvm@vger.kernel.org

View File

@ -132,7 +132,8 @@ struct zpci_dev {
u8 rid_available : 1;
u8 has_hp_slot : 1;
u8 is_physfn : 1;
u8 reserved : 5;
u8 util_str_avail : 1;
u8 reserved : 4;
unsigned int devfn; /* DEVFN part of the RID*/
struct mutex lock;
@ -179,6 +180,7 @@ struct zpci_dev {
atomic64_t mapped_pages;
atomic64_t unmapped_pages;
u8 version;
enum pci_bus_speed max_bus_speed;
struct dentry *debugfs_dev;

View File

@ -135,9 +135,10 @@ void pcibios_bus_add_device(struct pci_dev *pdev)
* With pdev->no_vf_scan the common PCI probing code does not
* perform PF/VF linking.
*/
if (zdev->vfn)
if (zdev->vfn) {
zpci_iov_setup_virtfn(zdev->zbus, pdev, zdev->vfn);
pdev->no_command_memory = 1;
}
}
static int zpci_bus_add_device(struct zpci_bus *zbus, struct zpci_dev *zdev)

View File

@ -102,6 +102,7 @@ static void clp_store_query_pci_fngrp(struct zpci_dev *zdev,
zdev->msi_addr = response->msia;
zdev->max_msi = response->noi;
zdev->fmb_update = response->mui;
zdev->version = response->version;
switch (response->version) {
case 1:
@ -167,6 +168,7 @@ static int clp_store_query_pci_fn(struct zpci_dev *zdev,
if (response->util_str_avail) {
memcpy(zdev->util_str, response->util_str,
sizeof(zdev->util_str));
zdev->util_str_avail = 1;
}
zdev->mio_capable = response->mio_addr_avail;
for (i = 0; i < PCI_STD_NUM_BARS; i++) {

View File

@ -180,6 +180,7 @@ int pci_iov_add_virtfn(struct pci_dev *dev, int id)
virtfn->device = iov->vf_device;
virtfn->is_virtfn = 1;
virtfn->physfn = pci_dev_get(dev);
virtfn->no_command_memory = 1;
if (id == 0)
pci_read_vf_config_common(virtfn);

View File

@ -47,4 +47,5 @@ menuconfig VFIO_NOIOMMU
source "drivers/vfio/pci/Kconfig"
source "drivers/vfio/platform/Kconfig"
source "drivers/vfio/mdev/Kconfig"
source "drivers/vfio/fsl-mc/Kconfig"
source "virt/lib/Kconfig"

View File

@ -9,3 +9,4 @@ obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o
obj-$(CONFIG_VFIO_PCI) += pci/
obj-$(CONFIG_VFIO_PLATFORM) += platform/
obj-$(CONFIG_VFIO_MDEV) += mdev/
obj-$(CONFIG_VFIO_FSL_MC) += fsl-mc/

View File

@ -0,0 +1,9 @@
config VFIO_FSL_MC
tristate "VFIO support for QorIQ DPAA2 fsl-mc bus devices"
depends on VFIO && FSL_MC_BUS && EVENTFD
help
Driver to enable support for the VFIO QorIQ DPAA2 fsl-mc
(Management Complex) devices. This is required to passthrough
fsl-mc bus devices using the VFIO framework.
If you don't know what to do here, say N.

View File

@ -0,0 +1,4 @@
# SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
vfio-fsl-mc-y := vfio_fsl_mc.o vfio_fsl_mc_intr.o
obj-$(CONFIG_VFIO_FSL_MC) += vfio-fsl-mc.o

View File

@ -0,0 +1,683 @@
// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
/*
* Copyright 2013-2016 Freescale Semiconductor Inc.
* Copyright 2016-2017,2019-2020 NXP
*/
#include <linux/device.h>
#include <linux/iommu.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/vfio.h>
#include <linux/fsl/mc.h>
#include <linux/delay.h>
#include <linux/io-64-nonatomic-hi-lo.h>
#include "vfio_fsl_mc_private.h"
static struct fsl_mc_driver vfio_fsl_mc_driver;
static DEFINE_MUTEX(reflck_lock);
static void vfio_fsl_mc_reflck_get(struct vfio_fsl_mc_reflck *reflck)
{
kref_get(&reflck->kref);
}
static void vfio_fsl_mc_reflck_release(struct kref *kref)
{
struct vfio_fsl_mc_reflck *reflck = container_of(kref,
struct vfio_fsl_mc_reflck,
kref);
mutex_destroy(&reflck->lock);
kfree(reflck);
mutex_unlock(&reflck_lock);
}
static void vfio_fsl_mc_reflck_put(struct vfio_fsl_mc_reflck *reflck)
{
kref_put_mutex(&reflck->kref, vfio_fsl_mc_reflck_release, &reflck_lock);
}
static struct vfio_fsl_mc_reflck *vfio_fsl_mc_reflck_alloc(void)
{
struct vfio_fsl_mc_reflck *reflck;
reflck = kzalloc(sizeof(*reflck), GFP_KERNEL);
if (!reflck)
return ERR_PTR(-ENOMEM);
kref_init(&reflck->kref);
mutex_init(&reflck->lock);
return reflck;
}
static int vfio_fsl_mc_reflck_attach(struct vfio_fsl_mc_device *vdev)
{
int ret = 0;
mutex_lock(&reflck_lock);
if (is_fsl_mc_bus_dprc(vdev->mc_dev)) {
vdev->reflck = vfio_fsl_mc_reflck_alloc();
ret = PTR_ERR_OR_ZERO(vdev->reflck);
} else {
struct device *mc_cont_dev = vdev->mc_dev->dev.parent;
struct vfio_device *device;
struct vfio_fsl_mc_device *cont_vdev;
device = vfio_device_get_from_dev(mc_cont_dev);
if (!device) {
ret = -ENODEV;
goto unlock;
}
cont_vdev = vfio_device_data(device);
if (!cont_vdev || !cont_vdev->reflck) {
vfio_device_put(device);
ret = -ENODEV;
goto unlock;
}
vfio_fsl_mc_reflck_get(cont_vdev->reflck);
vdev->reflck = cont_vdev->reflck;
vfio_device_put(device);
}
unlock:
mutex_unlock(&reflck_lock);
return ret;
}
static int vfio_fsl_mc_regions_init(struct vfio_fsl_mc_device *vdev)
{
struct fsl_mc_device *mc_dev = vdev->mc_dev;
int count = mc_dev->obj_desc.region_count;
int i;
vdev->regions = kcalloc(count, sizeof(struct vfio_fsl_mc_region),
GFP_KERNEL);
if (!vdev->regions)
return -ENOMEM;
for (i = 0; i < count; i++) {
struct resource *res = &mc_dev->regions[i];
int no_mmap = is_fsl_mc_bus_dprc(mc_dev);
vdev->regions[i].addr = res->start;
vdev->regions[i].size = resource_size(res);
vdev->regions[i].type = mc_dev->regions[i].flags & IORESOURCE_BITS;
/*
* Only regions addressed with PAGE granularity may be
* MMAPed securely.
*/
if (!no_mmap && !(vdev->regions[i].addr & ~PAGE_MASK) &&
!(vdev->regions[i].size & ~PAGE_MASK))
vdev->regions[i].flags |=
VFIO_REGION_INFO_FLAG_MMAP;
vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_READ;
if (!(mc_dev->regions[i].flags & IORESOURCE_READONLY))
vdev->regions[i].flags |= VFIO_REGION_INFO_FLAG_WRITE;
}
return 0;
}
static void vfio_fsl_mc_regions_cleanup(struct vfio_fsl_mc_device *vdev)
{
struct fsl_mc_device *mc_dev = vdev->mc_dev;
int i;
for (i = 0; i < mc_dev->obj_desc.region_count; i++)
iounmap(vdev->regions[i].ioaddr);
kfree(vdev->regions);
}
static int vfio_fsl_mc_open(void *device_data)
{
struct vfio_fsl_mc_device *vdev = device_data;
int ret;
if (!try_module_get(THIS_MODULE))
return -ENODEV;
mutex_lock(&vdev->reflck->lock);
if (!vdev->refcnt) {
ret = vfio_fsl_mc_regions_init(vdev);
if (ret)
goto err_reg_init;
}
vdev->refcnt++;
mutex_unlock(&vdev->reflck->lock);
return 0;
err_reg_init:
mutex_unlock(&vdev->reflck->lock);
module_put(THIS_MODULE);
return ret;
}
static void vfio_fsl_mc_release(void *device_data)
{
struct vfio_fsl_mc_device *vdev = device_data;
int ret;
mutex_lock(&vdev->reflck->lock);
if (!(--vdev->refcnt)) {
struct fsl_mc_device *mc_dev = vdev->mc_dev;
struct device *cont_dev = fsl_mc_cont_dev(&mc_dev->dev);
struct fsl_mc_device *mc_cont = to_fsl_mc_device(cont_dev);
vfio_fsl_mc_regions_cleanup(vdev);
/* reset the device before cleaning up the interrupts */
ret = dprc_reset_container(mc_cont->mc_io, 0,
mc_cont->mc_handle,
mc_cont->obj_desc.id,
DPRC_RESET_OPTION_NON_RECURSIVE);
if (ret) {
dev_warn(&mc_cont->dev, "VFIO_FLS_MC: reset device has failed (%d)\n",
ret);
WARN_ON(1);
}
vfio_fsl_mc_irqs_cleanup(vdev);
fsl_mc_cleanup_irq_pool(mc_cont);
}
mutex_unlock(&vdev->reflck->lock);
module_put(THIS_MODULE);
}
static long vfio_fsl_mc_ioctl(void *device_data, unsigned int cmd,
unsigned long arg)
{
unsigned long minsz;
struct vfio_fsl_mc_device *vdev = device_data;
struct fsl_mc_device *mc_dev = vdev->mc_dev;
switch (cmd) {
case VFIO_DEVICE_GET_INFO:
{
struct vfio_device_info info;
minsz = offsetofend(struct vfio_device_info, num_irqs);
if (copy_from_user(&info, (void __user *)arg, minsz))
return -EFAULT;
if (info.argsz < minsz)
return -EINVAL;
info.flags = VFIO_DEVICE_FLAGS_FSL_MC;
if (is_fsl_mc_bus_dprc(mc_dev))
info.flags |= VFIO_DEVICE_FLAGS_RESET;
info.num_regions = mc_dev->obj_desc.region_count;
info.num_irqs = mc_dev->obj_desc.irq_count;
return copy_to_user((void __user *)arg, &info, minsz) ?
-EFAULT : 0;
}
case VFIO_DEVICE_GET_REGION_INFO:
{
struct vfio_region_info info;
minsz = offsetofend(struct vfio_region_info, offset);
if (copy_from_user(&info, (void __user *)arg, minsz))
return -EFAULT;
if (info.argsz < minsz)
return -EINVAL;
if (info.index >= mc_dev->obj_desc.region_count)
return -EINVAL;
/* map offset to the physical address */
info.offset = VFIO_FSL_MC_INDEX_TO_OFFSET(info.index);
info.size = vdev->regions[info.index].size;
info.flags = vdev->regions[info.index].flags;
return copy_to_user((void __user *)arg, &info, minsz);
}
case VFIO_DEVICE_GET_IRQ_INFO:
{
struct vfio_irq_info info;
minsz = offsetofend(struct vfio_irq_info, count);
if (copy_from_user(&info, (void __user *)arg, minsz))
return -EFAULT;
if (info.argsz < minsz)
return -EINVAL;
if (info.index >= mc_dev->obj_desc.irq_count)
return -EINVAL;
info.flags = VFIO_IRQ_INFO_EVENTFD;
info.count = 1;
return copy_to_user((void __user *)arg, &info, minsz);
}
case VFIO_DEVICE_SET_IRQS:
{
struct vfio_irq_set hdr;
u8 *data = NULL;
int ret = 0;
size_t data_size = 0;
minsz = offsetofend(struct vfio_irq_set, count);
if (copy_from_user(&hdr, (void __user *)arg, minsz))
return -EFAULT;
ret = vfio_set_irqs_validate_and_prepare(&hdr, mc_dev->obj_desc.irq_count,
mc_dev->obj_desc.irq_count, &data_size);
if (ret)
return ret;
if (data_size) {
data = memdup_user((void __user *)(arg + minsz),
data_size);
if (IS_ERR(data))
return PTR_ERR(data);
}
mutex_lock(&vdev->igate);
ret = vfio_fsl_mc_set_irqs_ioctl(vdev, hdr.flags,
hdr.index, hdr.start,
hdr.count, data);
mutex_unlock(&vdev->igate);
kfree(data);
return ret;
}
case VFIO_DEVICE_RESET:
{
int ret;
struct fsl_mc_device *mc_dev = vdev->mc_dev;
/* reset is supported only for the DPRC */
if (!is_fsl_mc_bus_dprc(mc_dev))
return -ENOTTY;
ret = dprc_reset_container(mc_dev->mc_io, 0,
mc_dev->mc_handle,
mc_dev->obj_desc.id,
DPRC_RESET_OPTION_NON_RECURSIVE);
return ret;
}
default:
return -ENOTTY;
}
}
static ssize_t vfio_fsl_mc_read(void *device_data, char __user *buf,
size_t count, loff_t *ppos)
{
struct vfio_fsl_mc_device *vdev = device_data;
unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
struct fsl_mc_device *mc_dev = vdev->mc_dev;
struct vfio_fsl_mc_region *region;
u64 data[8];
int i;
if (index >= mc_dev->obj_desc.region_count)
return -EINVAL;
region = &vdev->regions[index];
if (!(region->flags & VFIO_REGION_INFO_FLAG_READ))
return -EINVAL;
if (!region->ioaddr) {
region->ioaddr = ioremap(region->addr, region->size);
if (!region->ioaddr)
return -ENOMEM;
}
if (count != 64 || off != 0)
return -EINVAL;
for (i = 7; i >= 0; i--)
data[i] = readq(region->ioaddr + i * sizeof(uint64_t));
if (copy_to_user(buf, data, 64))
return -EFAULT;
return count;
}
#define MC_CMD_COMPLETION_TIMEOUT_MS 5000
#define MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS 500
static int vfio_fsl_mc_send_command(void __iomem *ioaddr, uint64_t *cmd_data)
{
int i;
enum mc_cmd_status status;
unsigned long timeout_usecs = MC_CMD_COMPLETION_TIMEOUT_MS * 1000;
/* Write at command parameter into portal */
for (i = 7; i >= 1; i--)
writeq_relaxed(cmd_data[i], ioaddr + i * sizeof(uint64_t));
/* Write command header in the end */
writeq(cmd_data[0], ioaddr);
/* Wait for response before returning to user-space
* This can be optimized in future to even prepare response
* before returning to user-space and avoid read ioctl.
*/
for (;;) {
u64 header;
struct mc_cmd_header *resp_hdr;
header = cpu_to_le64(readq_relaxed(ioaddr));
resp_hdr = (struct mc_cmd_header *)&header;
status = (enum mc_cmd_status)resp_hdr->status;
if (status != MC_CMD_STATUS_READY)
break;
udelay(MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS);
timeout_usecs -= MC_CMD_COMPLETION_POLLING_MAX_SLEEP_USECS;
if (timeout_usecs == 0)
return -ETIMEDOUT;
}
return 0;
}
static ssize_t vfio_fsl_mc_write(void *device_data, const char __user *buf,
size_t count, loff_t *ppos)
{
struct vfio_fsl_mc_device *vdev = device_data;
unsigned int index = VFIO_FSL_MC_OFFSET_TO_INDEX(*ppos);
loff_t off = *ppos & VFIO_FSL_MC_OFFSET_MASK;
struct fsl_mc_device *mc_dev = vdev->mc_dev;
struct vfio_fsl_mc_region *region;
u64 data[8];
int ret;
if (index >= mc_dev->obj_desc.region_count)
return -EINVAL;
region = &vdev->regions[index];
if (!(region->flags & VFIO_REGION_INFO_FLAG_WRITE))
return -EINVAL;
if (!region->ioaddr) {
region->ioaddr = ioremap(region->addr, region->size);
if (!region->ioaddr)
return -ENOMEM;
}
if (count != 64 || off != 0)
return -EINVAL;
if (copy_from_user(&data, buf, 64))
return -EFAULT;
ret = vfio_fsl_mc_send_command(region->ioaddr, data);
if (ret)
return ret;
return count;
}
static int vfio_fsl_mc_mmap_mmio(struct vfio_fsl_mc_region region,
struct vm_area_struct *vma)
{
u64 size = vma->vm_end - vma->vm_start;
u64 pgoff, base;
u8 region_cacheable;
pgoff = vma->vm_pgoff &
((1U << (VFIO_FSL_MC_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
base = pgoff << PAGE_SHIFT;
if (region.size < PAGE_SIZE || base + size > region.size)
return -EINVAL;
region_cacheable = (region.type & FSL_MC_REGION_CACHEABLE) &&
(region.type & FSL_MC_REGION_SHAREABLE);
if (!region_cacheable)
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
vma->vm_pgoff = (region.addr >> PAGE_SHIFT) + pgoff;
return remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
size, vma->vm_page_prot);
}
static int vfio_fsl_mc_mmap(void *device_data, struct vm_area_struct *vma)
{
struct vfio_fsl_mc_device *vdev = device_data;
struct fsl_mc_device *mc_dev = vdev->mc_dev;
int index;
index = vma->vm_pgoff >> (VFIO_FSL_MC_OFFSET_SHIFT - PAGE_SHIFT);
if (vma->vm_end < vma->vm_start)
return -EINVAL;
if (vma->vm_start & ~PAGE_MASK)
return -EINVAL;
if (vma->vm_end & ~PAGE_MASK)
return -EINVAL;
if (!(vma->vm_flags & VM_SHARED))
return -EINVAL;
if (index >= mc_dev->obj_desc.region_count)
return -EINVAL;
if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_MMAP))
return -EINVAL;
if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_READ)
&& (vma->vm_flags & VM_READ))
return -EINVAL;
if (!(vdev->regions[index].flags & VFIO_REGION_INFO_FLAG_WRITE)
&& (vma->vm_flags & VM_WRITE))
return -EINVAL;
vma->vm_private_data = mc_dev;
return vfio_fsl_mc_mmap_mmio(vdev->regions[index], vma);
}
static const struct vfio_device_ops vfio_fsl_mc_ops = {
.name = "vfio-fsl-mc",
.open = vfio_fsl_mc_open,
.release = vfio_fsl_mc_release,
.ioctl = vfio_fsl_mc_ioctl,
.read = vfio_fsl_mc_read,
.write = vfio_fsl_mc_write,
.mmap = vfio_fsl_mc_mmap,
};
static int vfio_fsl_mc_bus_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
struct vfio_fsl_mc_device *vdev = container_of(nb,
struct vfio_fsl_mc_device, nb);
struct device *dev = data;
struct fsl_mc_device *mc_dev = to_fsl_mc_device(dev);
struct fsl_mc_device *mc_cont = to_fsl_mc_device(mc_dev->dev.parent);
if (action == BUS_NOTIFY_ADD_DEVICE &&
vdev->mc_dev == mc_cont) {
mc_dev->driver_override = kasprintf(GFP_KERNEL, "%s",
vfio_fsl_mc_ops.name);
if (!mc_dev->driver_override)
dev_warn(dev, "VFIO_FSL_MC: Setting driver override for device in dprc %s failed\n",
dev_name(&mc_cont->dev));
else
dev_info(dev, "VFIO_FSL_MC: Setting driver override for device in dprc %s\n",
dev_name(&mc_cont->dev));
} else if (action == BUS_NOTIFY_BOUND_DRIVER &&
vdev->mc_dev == mc_cont) {
struct fsl_mc_driver *mc_drv = to_fsl_mc_driver(dev->driver);
if (mc_drv && mc_drv != &vfio_fsl_mc_driver)
dev_warn(dev, "VFIO_FSL_MC: Object %s bound to driver %s while DPRC bound to vfio-fsl-mc\n",
dev_name(dev), mc_drv->driver.name);
}
return 0;
}
static int vfio_fsl_mc_init_device(struct vfio_fsl_mc_device *vdev)
{
struct fsl_mc_device *mc_dev = vdev->mc_dev;
int ret;
/* Non-dprc devices share mc_io from parent */
if (!is_fsl_mc_bus_dprc(mc_dev)) {
struct fsl_mc_device *mc_cont = to_fsl_mc_device(mc_dev->dev.parent);
mc_dev->mc_io = mc_cont->mc_io;
return 0;
}
vdev->nb.notifier_call = vfio_fsl_mc_bus_notifier;
ret = bus_register_notifier(&fsl_mc_bus_type, &vdev->nb);
if (ret)
return ret;
/* open DPRC, allocate a MC portal */
ret = dprc_setup(mc_dev);
if (ret) {
dev_err(&mc_dev->dev, "VFIO_FSL_MC: Failed to setup DPRC (%d)\n", ret);
goto out_nc_unreg;
}
ret = dprc_scan_container(mc_dev, false);
if (ret) {
dev_err(&mc_dev->dev, "VFIO_FSL_MC: Container scanning failed (%d)\n", ret);
goto out_dprc_cleanup;
}
return 0;
out_dprc_cleanup:
dprc_remove_devices(mc_dev, NULL, 0);
dprc_cleanup(mc_dev);
out_nc_unreg:
bus_unregister_notifier(&fsl_mc_bus_type, &vdev->nb);
vdev->nb.notifier_call = NULL;
return ret;
}
static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev)
{
struct iommu_group *group;
struct vfio_fsl_mc_device *vdev;
struct device *dev = &mc_dev->dev;
int ret;
group = vfio_iommu_group_get(dev);
if (!group) {
dev_err(dev, "VFIO_FSL_MC: No IOMMU group\n");
return -EINVAL;
}
vdev = devm_kzalloc(dev, sizeof(*vdev), GFP_KERNEL);
if (!vdev) {
ret = -ENOMEM;
goto out_group_put;
}
vdev->mc_dev = mc_dev;
ret = vfio_add_group_dev(dev, &vfio_fsl_mc_ops, vdev);
if (ret) {
dev_err(dev, "VFIO_FSL_MC: Failed to add to vfio group\n");
goto out_group_put;
}
ret = vfio_fsl_mc_reflck_attach(vdev);
if (ret)
goto out_group_dev;
ret = vfio_fsl_mc_init_device(vdev);
if (ret)
goto out_reflck;
mutex_init(&vdev->igate);
return 0;
out_reflck:
vfio_fsl_mc_reflck_put(vdev->reflck);
out_group_dev:
vfio_del_group_dev(dev);
out_group_put:
vfio_iommu_group_put(group, dev);
return ret;
}
static int vfio_fsl_mc_remove(struct fsl_mc_device *mc_dev)
{
struct vfio_fsl_mc_device *vdev;
struct device *dev = &mc_dev->dev;
vdev = vfio_del_group_dev(dev);
if (!vdev)
return -EINVAL;
mutex_destroy(&vdev->igate);
vfio_fsl_mc_reflck_put(vdev->reflck);
if (is_fsl_mc_bus_dprc(mc_dev)) {
dprc_remove_devices(mc_dev, NULL, 0);
dprc_cleanup(mc_dev);
}
if (vdev->nb.notifier_call)
bus_unregister_notifier(&fsl_mc_bus_type, &vdev->nb);
vfio_iommu_group_put(mc_dev->dev.iommu_group, dev);
return 0;
}
static struct fsl_mc_driver vfio_fsl_mc_driver = {
.probe = vfio_fsl_mc_probe,
.remove = vfio_fsl_mc_remove,
.driver = {
.name = "vfio-fsl-mc",
.owner = THIS_MODULE,
},
};
static int __init vfio_fsl_mc_driver_init(void)
{
return fsl_mc_driver_register(&vfio_fsl_mc_driver);
}
static void __exit vfio_fsl_mc_driver_exit(void)
{
fsl_mc_driver_unregister(&vfio_fsl_mc_driver);
}
module_init(vfio_fsl_mc_driver_init);
module_exit(vfio_fsl_mc_driver_exit);
MODULE_LICENSE("Dual BSD/GPL");
MODULE_DESCRIPTION("VFIO for FSL-MC devices - User Level meta-driver");

View File

@ -0,0 +1,194 @@
// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
/*
* Copyright 2013-2016 Freescale Semiconductor Inc.
* Copyright 2019 NXP
*/
#include <linux/vfio.h>
#include <linux/slab.h>
#include <linux/types.h>
#include <linux/eventfd.h>
#include <linux/msi.h>
#include "linux/fsl/mc.h"
#include "vfio_fsl_mc_private.h"
int vfio_fsl_mc_irqs_allocate(struct vfio_fsl_mc_device *vdev)
{
struct fsl_mc_device *mc_dev = vdev->mc_dev;
struct vfio_fsl_mc_irq *mc_irq;
int irq_count;
int ret, i;
/* Device does not support any interrupt */
if (mc_dev->obj_desc.irq_count == 0)
return 0;
/* interrupts were already allocated for this device */
if (vdev->mc_irqs)
return 0;
irq_count = mc_dev->obj_desc.irq_count;
mc_irq = kcalloc(irq_count, sizeof(*mc_irq), GFP_KERNEL);
if (!mc_irq)
return -ENOMEM;
/* Allocate IRQs */
ret = fsl_mc_allocate_irqs(mc_dev);
if (ret) {
kfree(mc_irq);
return ret;
}
for (i = 0; i < irq_count; i++) {
mc_irq[i].count = 1;
mc_irq[i].flags = VFIO_IRQ_INFO_EVENTFD;
}
vdev->mc_irqs = mc_irq;
return 0;
}
static irqreturn_t vfio_fsl_mc_irq_handler(int irq_num, void *arg)
{
struct vfio_fsl_mc_irq *mc_irq = (struct vfio_fsl_mc_irq *)arg;
eventfd_signal(mc_irq->trigger, 1);
return IRQ_HANDLED;
}
static int vfio_set_trigger(struct vfio_fsl_mc_device *vdev,
int index, int fd)
{
struct vfio_fsl_mc_irq *irq = &vdev->mc_irqs[index];
struct eventfd_ctx *trigger;
int hwirq;
int ret;
hwirq = vdev->mc_dev->irqs[index]->msi_desc->irq;
if (irq->trigger) {
free_irq(hwirq, irq);
kfree(irq->name);
eventfd_ctx_put(irq->trigger);
irq->trigger = NULL;
}
if (fd < 0) /* Disable only */
return 0;
irq->name = kasprintf(GFP_KERNEL, "vfio-irq[%d](%s)",
hwirq, dev_name(&vdev->mc_dev->dev));
if (!irq->name)
return -ENOMEM;
trigger = eventfd_ctx_fdget(fd);
if (IS_ERR(trigger)) {
kfree(irq->name);
return PTR_ERR(trigger);
}
irq->trigger = trigger;
ret = request_irq(hwirq, vfio_fsl_mc_irq_handler, 0,
irq->name, irq);
if (ret) {
kfree(irq->name);
eventfd_ctx_put(trigger);
irq->trigger = NULL;
return ret;
}
return 0;
}
static int vfio_fsl_mc_set_irq_trigger(struct vfio_fsl_mc_device *vdev,
unsigned int index, unsigned int start,
unsigned int count, u32 flags,
void *data)
{
struct fsl_mc_device *mc_dev = vdev->mc_dev;
int ret, hwirq;
struct vfio_fsl_mc_irq *irq;
struct device *cont_dev = fsl_mc_cont_dev(&mc_dev->dev);
struct fsl_mc_device *mc_cont = to_fsl_mc_device(cont_dev);
if (!count && (flags & VFIO_IRQ_SET_DATA_NONE))
return vfio_set_trigger(vdev, index, -1);
if (start != 0 || count != 1)
return -EINVAL;
mutex_lock(&vdev->reflck->lock);
ret = fsl_mc_populate_irq_pool(mc_cont,
FSL_MC_IRQ_POOL_MAX_TOTAL_IRQS);
if (ret)
goto unlock;
ret = vfio_fsl_mc_irqs_allocate(vdev);
if (ret)
goto unlock;
mutex_unlock(&vdev->reflck->lock);
if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
s32 fd = *(s32 *)data;
return vfio_set_trigger(vdev, index, fd);
}
hwirq = vdev->mc_dev->irqs[index]->msi_desc->irq;
irq = &vdev->mc_irqs[index];
if (flags & VFIO_IRQ_SET_DATA_NONE) {
vfio_fsl_mc_irq_handler(hwirq, irq);
} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
u8 trigger = *(u8 *)data;
if (trigger)
vfio_fsl_mc_irq_handler(hwirq, irq);
}
return 0;
unlock:
mutex_unlock(&vdev->reflck->lock);
return ret;
}
int vfio_fsl_mc_set_irqs_ioctl(struct vfio_fsl_mc_device *vdev,
u32 flags, unsigned int index,
unsigned int start, unsigned int count,
void *data)
{
if (flags & VFIO_IRQ_SET_ACTION_TRIGGER)
return vfio_fsl_mc_set_irq_trigger(vdev, index, start,
count, flags, data);
else
return -EINVAL;
}
/* Free All IRQs for the given MC object */
void vfio_fsl_mc_irqs_cleanup(struct vfio_fsl_mc_device *vdev)
{
struct fsl_mc_device *mc_dev = vdev->mc_dev;
int irq_count = mc_dev->obj_desc.irq_count;
int i;
/*
* Device does not support any interrupt or the interrupts
* were not configured
*/
if (!vdev->mc_irqs)
return;
for (i = 0; i < irq_count; i++)
vfio_set_trigger(vdev, i, -1);
fsl_mc_free_irqs(mc_dev);
kfree(vdev->mc_irqs);
vdev->mc_irqs = NULL;
}

View File

@ -0,0 +1,55 @@
/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
/*
* Copyright 2013-2016 Freescale Semiconductor Inc.
* Copyright 2016,2019-2020 NXP
*/
#ifndef VFIO_FSL_MC_PRIVATE_H
#define VFIO_FSL_MC_PRIVATE_H
#define VFIO_FSL_MC_OFFSET_SHIFT 40
#define VFIO_FSL_MC_OFFSET_MASK (((u64)(1) << VFIO_FSL_MC_OFFSET_SHIFT) - 1)
#define VFIO_FSL_MC_OFFSET_TO_INDEX(off) ((off) >> VFIO_FSL_MC_OFFSET_SHIFT)
#define VFIO_FSL_MC_INDEX_TO_OFFSET(index) \
((u64)(index) << VFIO_FSL_MC_OFFSET_SHIFT)
struct vfio_fsl_mc_irq {
u32 flags;
u32 count;
struct eventfd_ctx *trigger;
char *name;
};
struct vfio_fsl_mc_reflck {
struct kref kref;
struct mutex lock;
};
struct vfio_fsl_mc_region {
u32 flags;
u32 type;
u64 addr;
resource_size_t size;
void __iomem *ioaddr;
};
struct vfio_fsl_mc_device {
struct fsl_mc_device *mc_dev;
struct notifier_block nb;
int refcnt;
struct vfio_fsl_mc_region *regions;
struct vfio_fsl_mc_reflck *reflck;
struct mutex igate;
struct vfio_fsl_mc_irq *mc_irqs;
};
extern int vfio_fsl_mc_set_irqs_ioctl(struct vfio_fsl_mc_device *vdev,
u32 flags, unsigned int index,
unsigned int start, unsigned int count,
void *data);
void vfio_fsl_mc_irqs_cleanup(struct vfio_fsl_mc_device *vdev);
#endif /* VFIO_FSL_MC_PRIVATE_H */

View File

@ -45,3 +45,15 @@ config VFIO_PCI_NVLINK2
depends on VFIO_PCI && PPC_POWERNV
help
VFIO PCI support for P9 Witherspoon machine with NVIDIA V100 GPUs
config VFIO_PCI_ZDEV
bool "VFIO PCI ZPCI device CLP support"
depends on VFIO_PCI && S390
default y
help
Enabling this option exposes VFIO capabilities containing hardware
configuration for zPCI devices. This enables userspace (e.g. QEMU)
to supply proper configuration values instead of hard-coded defaults
for zPCI devices passed through via VFIO on s390.
Say Y here.

View File

@ -3,5 +3,6 @@
vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o
vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o
vfio-pci-$(CONFIG_VFIO_PCI_NVLINK2) += vfio_pci_nvlink2.o
vfio-pci-$(CONFIG_VFIO_PCI_ZDEV) += vfio_pci_zdev.o
obj-$(CONFIG_VFIO_PCI) += vfio-pci.o

View File

@ -807,15 +807,25 @@ static long vfio_pci_ioctl(void *device_data,
if (cmd == VFIO_DEVICE_GET_INFO) {
struct vfio_device_info info;
struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
unsigned long capsz;
minsz = offsetofend(struct vfio_device_info, num_irqs);
/* For backward compatibility, cannot require this */
capsz = offsetofend(struct vfio_iommu_type1_info, cap_offset);
if (copy_from_user(&info, (void __user *)arg, minsz))
return -EFAULT;
if (info.argsz < minsz)
return -EINVAL;
if (info.argsz >= capsz) {
minsz = capsz;
info.cap_offset = 0;
}
info.flags = VFIO_DEVICE_FLAGS_PCI;
if (vdev->reset_works)
@ -824,6 +834,33 @@ static long vfio_pci_ioctl(void *device_data,
info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions;
info.num_irqs = VFIO_PCI_NUM_IRQS;
if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV)) {
int ret = vfio_pci_info_zdev_add_caps(vdev, &caps);
if (ret && ret != -ENODEV) {
pci_warn(vdev->pdev, "Failed to setup zPCI info capabilities\n");
return ret;
}
}
if (caps.size) {
info.flags |= VFIO_DEVICE_FLAGS_CAPS;
if (info.argsz < sizeof(info) + caps.size) {
info.argsz = sizeof(info) + caps.size;
} else {
vfio_info_cap_shift(&caps, sizeof(info));
if (copy_to_user((void __user *)arg +
sizeof(info), caps.buf,
caps.size)) {
kfree(caps.buf);
return -EFAULT;
}
info.cap_offset = sizeof(info);
}
kfree(caps.buf);
}
return copy_to_user((void __user *)arg, &info, minsz) ?
-EFAULT : 0;
@ -1860,7 +1897,6 @@ static const struct vfio_device_ops vfio_pci_ops = {
static int vfio_pci_reflck_attach(struct vfio_pci_device *vdev);
static void vfio_pci_reflck_put(struct vfio_pci_reflck *reflck);
static struct pci_driver vfio_pci_driver;
static int vfio_pci_bus_notifier(struct notifier_block *nb,
unsigned long action, void *data)

View File

@ -406,7 +406,7 @@ bool __vfio_pci_memory_enabled(struct vfio_pci_device *vdev)
* PF SR-IOV capability, there's therefore no need to trigger
* faults based on the virtual value.
*/
return pdev->is_virtfn || (cmd & PCI_COMMAND_MEMORY);
return pdev->no_command_memory || (cmd & PCI_COMMAND_MEMORY);
}
/*
@ -467,6 +467,9 @@ static void vfio_bar_fixup(struct vfio_pci_device *vdev)
__le32 *vbar;
u64 mask;
if (!vdev->bardirty)
return;
vbar = (__le32 *)&vdev->vconfig[PCI_BASE_ADDRESS_0];
for (i = 0; i < PCI_STD_NUM_BARS; i++, vbar++) {
@ -520,8 +523,8 @@ static int vfio_basic_config_read(struct vfio_pci_device *vdev, int pos,
count = vfio_default_config_read(vdev, pos, count, perm, offset, val);
/* Mask in virtual memory enable for SR-IOV devices */
if (offset == PCI_COMMAND && vdev->pdev->is_virtfn) {
/* Mask in virtual memory enable */
if (offset == PCI_COMMAND && vdev->pdev->no_command_memory) {
u16 cmd = le16_to_cpu(*(__le16 *)&vdev->vconfig[PCI_COMMAND]);
u32 tmp_val = le32_to_cpu(*val);
@ -589,9 +592,11 @@ static int vfio_basic_config_write(struct vfio_pci_device *vdev, int pos,
* shows it disabled (phys_mem/io, then the device has
* undergone some kind of backdoor reset and needs to be
* restored before we allow it to enable the bars.
* SR-IOV devices will trigger this, but we catch them later
* SR-IOV devices will trigger this - for mem enable let's
* catch this now and for io enable it will be caught later
*/
if ((new_mem && virt_mem && !phys_mem) ||
if ((new_mem && virt_mem && !phys_mem &&
!pdev->no_command_memory) ||
(new_io && virt_io && !phys_io) ||
vfio_need_bar_restore(vdev))
vfio_bar_restore(vdev);
@ -1734,12 +1739,14 @@ int vfio_config_init(struct vfio_pci_device *vdev)
vconfig[PCI_INTERRUPT_PIN]);
vconfig[PCI_INTERRUPT_PIN] = 0; /* Gratuitous for good VFs */
}
if (pdev->no_command_memory) {
/*
* VFs do no implement the memory enable bit of the COMMAND
* register therefore we'll not have it set in our initial
* copy of config space after pci_enable_device(). For
* consistency with PFs, set the virtual enable bit here.
* VFs and devices that set pdev->no_command_memory do not
* implement the memory enable bit of the COMMAND register
* therefore we'll not have it set in our initial copy of
* config space after pci_enable_device(). For consistency
* with PFs, set the virtual enable bit here.
*/
*(__le16 *)&vconfig[PCI_COMMAND] |=
cpu_to_le16(PCI_COMMAND_MEMORY);

View File

@ -352,11 +352,13 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
vdev->ctx[vector].producer.token = trigger;
vdev->ctx[vector].producer.irq = irq;
ret = irq_bypass_register_producer(&vdev->ctx[vector].producer);
if (unlikely(ret))
if (unlikely(ret)) {
dev_info(&pdev->dev,
"irq bypass producer (token %p) registration fails: %d\n",
vdev->ctx[vector].producer.token, ret);
vdev->ctx[vector].producer.token = NULL;
}
vdev->ctx[vector].trigger = trigger;
return 0;

View File

@ -213,4 +213,16 @@ static inline int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev)
return -ENODEV;
}
#endif
#ifdef CONFIG_VFIO_PCI_ZDEV
extern int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev,
struct vfio_info_cap *caps);
#else
static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev,
struct vfio_info_cap *caps)
{
return -ENODEV;
}
#endif
#endif /* VFIO_PCI_PRIVATE_H */

View File

@ -0,0 +1,143 @@
// SPDX-License-Identifier: GPL-2.0+
/*
* VFIO ZPCI devices support
*
* Copyright (C) IBM Corp. 2020. All rights reserved.
* Author(s): Pierre Morel <pmorel@linux.ibm.com>
* Matthew Rosato <mjrosato@linux.ibm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
*/
#include <linux/io.h>
#include <linux/pci.h>
#include <linux/uaccess.h>
#include <linux/vfio.h>
#include <linux/vfio_zdev.h>
#include <asm/pci_clp.h>
#include <asm/pci_io.h>
#include "vfio_pci_private.h"
/*
* Add the Base PCI Function information to the device info region.
*/
static int zpci_base_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev,
struct vfio_info_cap *caps)
{
struct vfio_device_info_cap_zpci_base cap = {
.header.id = VFIO_DEVICE_INFO_CAP_ZPCI_BASE,
.header.version = 1,
.start_dma = zdev->start_dma,
.end_dma = zdev->end_dma,
.pchid = zdev->pchid,
.vfn = zdev->vfn,
.fmb_length = zdev->fmb_length,
.pft = zdev->pft,
.gid = zdev->pfgid
};
return vfio_info_add_capability(caps, &cap.header, sizeof(cap));
}
/*
* Add the Base PCI Function Group information to the device info region.
*/
static int zpci_group_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev,
struct vfio_info_cap *caps)
{
struct vfio_device_info_cap_zpci_group cap = {
.header.id = VFIO_DEVICE_INFO_CAP_ZPCI_GROUP,
.header.version = 1,
.dasm = zdev->dma_mask,
.msi_addr = zdev->msi_addr,
.flags = VFIO_DEVICE_INFO_ZPCI_FLAG_REFRESH,
.mui = zdev->fmb_update,
.noi = zdev->max_msi,
.maxstbl = ZPCI_MAX_WRITE_SIZE,
.version = zdev->version
};
return vfio_info_add_capability(caps, &cap.header, sizeof(cap));
}
/*
* Add the device utility string to the device info region.
*/
static int zpci_util_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev,
struct vfio_info_cap *caps)
{
struct vfio_device_info_cap_zpci_util *cap;
int cap_size = sizeof(*cap) + CLP_UTIL_STR_LEN;
int ret;
cap = kmalloc(cap_size, GFP_KERNEL);
cap->header.id = VFIO_DEVICE_INFO_CAP_ZPCI_UTIL;
cap->header.version = 1;
cap->size = CLP_UTIL_STR_LEN;
memcpy(cap->util_str, zdev->util_str, cap->size);
ret = vfio_info_add_capability(caps, &cap->header, cap_size);
kfree(cap);
return ret;
}
/*
* Add the function path string to the device info region.
*/
static int zpci_pfip_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev,
struct vfio_info_cap *caps)
{
struct vfio_device_info_cap_zpci_pfip *cap;
int cap_size = sizeof(*cap) + CLP_PFIP_NR_SEGMENTS;
int ret;
cap = kmalloc(cap_size, GFP_KERNEL);
cap->header.id = VFIO_DEVICE_INFO_CAP_ZPCI_PFIP;
cap->header.version = 1;
cap->size = CLP_PFIP_NR_SEGMENTS;
memcpy(cap->pfip, zdev->pfip, cap->size);
ret = vfio_info_add_capability(caps, &cap->header, cap_size);
kfree(cap);
return ret;
}
/*
* Add all supported capabilities to the VFIO_DEVICE_GET_INFO capability chain.
*/
int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev,
struct vfio_info_cap *caps)
{
struct zpci_dev *zdev = to_zpci(vdev->pdev);
int ret;
if (!zdev)
return -ENODEV;
ret = zpci_base_cap(zdev, vdev, caps);
if (ret)
return ret;
ret = zpci_group_cap(zdev, vdev, caps);
if (ret)
return ret;
if (zdev->util_str_avail) {
ret = zpci_util_cap(zdev, vdev, caps);
if (ret)
return ret;
}
ret = zpci_pfip_cap(zdev, vdev, caps);
return ret;
}

View File

@ -1949,8 +1949,10 @@ int vfio_pin_pages(struct device *dev, unsigned long *user_pfn, int npage,
if (!group)
return -ENODEV;
if (group->dev_counter > 1)
return -EINVAL;
if (group->dev_counter > 1) {
ret = -EINVAL;
goto err_pin_pages;
}
ret = vfio_group_add_container_user(group);
if (ret)
@ -2051,6 +2053,9 @@ int vfio_group_pin_pages(struct vfio_group *group,
if (!group || !user_iova_pfn || !phys_pfn || !npage)
return -EINVAL;
if (group->dev_counter > 1)
return -EINVAL;
if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
return -E2BIG;

View File

@ -693,7 +693,8 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
ret = vfio_add_to_pfn_list(dma, iova, phys_pfn[i]);
if (ret) {
vfio_unpin_page_external(dma, iova, do_accounting);
if (put_pfn(phys_pfn[i], dma->prot) && do_accounting)
vfio_lock_acct(dma, -1, true);
goto pin_unwind;
}
@ -2609,6 +2610,20 @@ static int vfio_iommu_migration_build_caps(struct vfio_iommu *iommu,
return vfio_info_add_capability(caps, &cap_mig.header, sizeof(cap_mig));
}
static int vfio_iommu_dma_avail_build_caps(struct vfio_iommu *iommu,
struct vfio_info_cap *caps)
{
struct vfio_iommu_type1_info_dma_avail cap_dma_avail;
cap_dma_avail.header.id = VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL;
cap_dma_avail.header.version = 1;
cap_dma_avail.avail = iommu->dma_avail;
return vfio_info_add_capability(caps, &cap_dma_avail.header,
sizeof(cap_dma_avail));
}
static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu,
unsigned long arg)
{
@ -2641,6 +2656,9 @@ static int vfio_iommu_type1_get_info(struct vfio_iommu *iommu,
ret = vfio_iommu_migration_build_caps(iommu, &caps);
if (!ret)
ret = vfio_iommu_dma_avail_build_caps(iommu, &caps);
if (!ret)
ret = vfio_iommu_iova_build_caps(iommu, &caps);
@ -2933,7 +2951,8 @@ static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu,
* size
*/
bitmap_set(dma->bitmap, offset >> pgshift,
*copied >> pgshift);
((offset + *copied - 1) >> pgshift) -
(offset >> pgshift) + 1);
}
} else
*copied = copy_from_user(data, (void __user *)vaddr,

View File

@ -446,6 +446,7 @@ struct pci_dev {
unsigned int is_probed:1; /* Device probing in progress */
unsigned int link_active_reporting:1;/* Device capable of reporting link active */
unsigned int no_vf_scan:1; /* Don't scan for VFs after IOV enablement */
unsigned int no_command_memory:1; /* No PCI_COMMAND_MEMORY */
pci_dev_flags_t dev_flags;
atomic_t enable_cnt; /* pci_enable_device has been called */

View File

@ -201,8 +201,11 @@ struct vfio_device_info {
#define VFIO_DEVICE_FLAGS_AMBA (1 << 3) /* vfio-amba device */
#define VFIO_DEVICE_FLAGS_CCW (1 << 4) /* vfio-ccw device */
#define VFIO_DEVICE_FLAGS_AP (1 << 5) /* vfio-ap device */
#define VFIO_DEVICE_FLAGS_FSL_MC (1 << 6) /* vfio-fsl-mc device */
#define VFIO_DEVICE_FLAGS_CAPS (1 << 7) /* Info supports caps */
__u32 num_regions; /* Max region index + 1 */
__u32 num_irqs; /* Max IRQ index + 1 */
__u32 cap_offset; /* Offset within info struct of first cap */
};
#define VFIO_DEVICE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 7)
@ -218,6 +221,15 @@ struct vfio_device_info {
#define VFIO_DEVICE_API_CCW_STRING "vfio-ccw"
#define VFIO_DEVICE_API_AP_STRING "vfio-ap"
/*
* The following capabilities are unique to s390 zPCI devices. Their contents
* are further-defined in vfio_zdev.h
*/
#define VFIO_DEVICE_INFO_CAP_ZPCI_BASE 1
#define VFIO_DEVICE_INFO_CAP_ZPCI_GROUP 2
#define VFIO_DEVICE_INFO_CAP_ZPCI_UTIL 3
#define VFIO_DEVICE_INFO_CAP_ZPCI_PFIP 4
/**
* VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
* struct vfio_region_info)
@ -462,7 +474,7 @@ struct vfio_region_gfx_edid {
* 5. Resumed
* |--------->|
*
* 0. Default state of VFIO device is _RUNNNG when the user application starts.
* 0. Default state of VFIO device is _RUNNING when the user application starts.
* 1. During normal shutdown of the user application, the user application may
* optionally change the VFIO device state from _RUNNING to _STOP. This
* transition is optional. The vendor driver must support this transition but
@ -1039,6 +1051,21 @@ struct vfio_iommu_type1_info_cap_migration {
__u64 max_dirty_bitmap_size; /* in bytes */
};
/*
* The DMA available capability allows to report the current number of
* simultaneously outstanding DMA mappings that are allowed.
*
* The structure below defines version 1 of this capability.
*
* avail: specifies the current number of outstanding DMA mappings allowed.
*/
#define VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL 3
struct vfio_iommu_type1_info_dma_avail {
struct vfio_info_cap_header header;
__u32 avail;
};
#define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
/**

View File

@ -0,0 +1,78 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* VFIO Region definitions for ZPCI devices
*
* Copyright IBM Corp. 2020
*
* Author(s): Pierre Morel <pmorel@linux.ibm.com>
* Matthew Rosato <mjrosato@linux.ibm.com>
*/
#ifndef _VFIO_ZDEV_H_
#define _VFIO_ZDEV_H_
#include <linux/types.h>
#include <linux/vfio.h>
/**
* VFIO_DEVICE_INFO_CAP_ZPCI_BASE - Base PCI Function information
*
* This capability provides a set of descriptive information about the
* associated PCI function.
*/
struct vfio_device_info_cap_zpci_base {
struct vfio_info_cap_header header;
__u64 start_dma; /* Start of available DMA addresses */
__u64 end_dma; /* End of available DMA addresses */
__u16 pchid; /* Physical Channel ID */
__u16 vfn; /* Virtual function number */
__u16 fmb_length; /* Measurement Block Length (in bytes) */
__u8 pft; /* PCI Function Type */
__u8 gid; /* PCI function group ID */
};
/**
* VFIO_DEVICE_INFO_CAP_ZPCI_GROUP - Base PCI Function Group information
*
* This capability provides a set of descriptive information about the group of
* PCI functions that the associated device belongs to.
*/
struct vfio_device_info_cap_zpci_group {
struct vfio_info_cap_header header;
__u64 dasm; /* DMA Address space mask */
__u64 msi_addr; /* MSI address */
__u64 flags;
#define VFIO_DEVICE_INFO_ZPCI_FLAG_REFRESH 1 /* Program-specified TLB refresh */
__u16 mui; /* Measurement Block Update Interval */
__u16 noi; /* Maximum number of MSIs */
__u16 maxstbl; /* Maximum Store Block Length */
__u8 version; /* Supported PCI Version */
};
/**
* VFIO_DEVICE_INFO_CAP_ZPCI_UTIL - Utility String
*
* This capability provides the utility string for the associated device, which
* is a device identifier string made up of EBCDID characters. 'size' specifies
* the length of 'util_str'.
*/
struct vfio_device_info_cap_zpci_util {
struct vfio_info_cap_header header;
__u32 size;
__u8 util_str[];
};
/**
* VFIO_DEVICE_INFO_CAP_ZPCI_PFIP - PCI Function Path
*
* This capability provides the PCI function path string, which is an identifier
* that describes the internal hardware path of the device. 'size' specifies
* the length of 'pfip'.
*/
struct vfio_device_info_cap_zpci_pfip {
struct vfio_info_cap_header header;
__u32 size;
__u8 pfip[];
};
#endif