PCI: iommu: iotlb flushing
This patch is for batching up the flushing of the IOTLB for the DMAR implementation found in the Intel VT-d hardware. It works by building a list of to be flushed IOTLB entries and a bitmap list of which DMAR engine they are from. After either a high water mark (250 accessible via debugfs) or 10ms the list of iova's will be reclaimed and the DMAR engines associated are IOTLB-flushed. This approach recovers 15 to 20% of the performance lost when using the IOMMU for my netperf udp stream benchmark with small packets. It can be disabled with a kernel boot parameter "intel_iommu=strict". Its use does weaken the IOMMU protections a bit. Signed-off-by: Mark Gross <mgross@linux.intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
This commit is contained in:
parent
0255f543d9
commit
5e0d2a6fc0
@ -847,6 +847,10 @@ and is between 256 and 4096 characters. It is defined in the file
|
|||||||
than 32 bit addressing. The default is to look
|
than 32 bit addressing. The default is to look
|
||||||
for translation below 32 bit and if not available
|
for translation below 32 bit and if not available
|
||||||
then look in the higher range.
|
then look in the higher range.
|
||||||
|
strict [Default Off]
|
||||||
|
With this option on every unmap_single operation will
|
||||||
|
result in a hardware IOTLB flush operation as opposed
|
||||||
|
to batching them for performance.
|
||||||
|
|
||||||
io_delay= [X86-32,X86-64] I/O delay method
|
io_delay= [X86-32,X86-64] I/O delay method
|
||||||
0x80
|
0x80
|
||||||
|
@ -22,6 +22,7 @@
|
|||||||
|
|
||||||
#include <linux/init.h>
|
#include <linux/init.h>
|
||||||
#include <linux/bitmap.h>
|
#include <linux/bitmap.h>
|
||||||
|
#include <linux/debugfs.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <linux/irq.h>
|
#include <linux/irq.h>
|
||||||
#include <linux/interrupt.h>
|
#include <linux/interrupt.h>
|
||||||
@ -31,6 +32,7 @@
|
|||||||
#include <linux/dmar.h>
|
#include <linux/dmar.h>
|
||||||
#include <linux/dma-mapping.h>
|
#include <linux/dma-mapping.h>
|
||||||
#include <linux/mempool.h>
|
#include <linux/mempool.h>
|
||||||
|
#include <linux/timer.h>
|
||||||
#include "iova.h"
|
#include "iova.h"
|
||||||
#include "intel-iommu.h"
|
#include "intel-iommu.h"
|
||||||
#include <asm/proto.h> /* force_iommu in this header in x86-64*/
|
#include <asm/proto.h> /* force_iommu in this header in x86-64*/
|
||||||
@ -51,11 +53,32 @@
|
|||||||
|
|
||||||
#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
|
#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
|
||||||
|
|
||||||
|
|
||||||
|
static void flush_unmaps_timeout(unsigned long data);
|
||||||
|
|
||||||
|
DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
|
||||||
|
|
||||||
|
static struct intel_iommu *g_iommus;
|
||||||
|
/* bitmap for indexing intel_iommus */
|
||||||
|
static unsigned long *g_iommus_to_flush;
|
||||||
|
static int g_num_of_iommus;
|
||||||
|
|
||||||
|
static DEFINE_SPINLOCK(async_umap_flush_lock);
|
||||||
|
static LIST_HEAD(unmaps_to_do);
|
||||||
|
|
||||||
|
static int timer_on;
|
||||||
|
static long list_size;
|
||||||
|
static int high_watermark;
|
||||||
|
|
||||||
|
static struct dentry *intel_iommu_debug, *debug;
|
||||||
|
|
||||||
|
|
||||||
static void domain_remove_dev_info(struct dmar_domain *domain);
|
static void domain_remove_dev_info(struct dmar_domain *domain);
|
||||||
|
|
||||||
static int dmar_disabled;
|
static int dmar_disabled;
|
||||||
static int __initdata dmar_map_gfx = 1;
|
static int __initdata dmar_map_gfx = 1;
|
||||||
static int dmar_forcedac;
|
static int dmar_forcedac;
|
||||||
|
static int intel_iommu_strict;
|
||||||
|
|
||||||
#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
|
#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
|
||||||
static DEFINE_SPINLOCK(device_domain_lock);
|
static DEFINE_SPINLOCK(device_domain_lock);
|
||||||
@ -74,9 +97,13 @@ static int __init intel_iommu_setup(char *str)
|
|||||||
printk(KERN_INFO
|
printk(KERN_INFO
|
||||||
"Intel-IOMMU: disable GFX device mapping\n");
|
"Intel-IOMMU: disable GFX device mapping\n");
|
||||||
} else if (!strncmp(str, "forcedac", 8)) {
|
} else if (!strncmp(str, "forcedac", 8)) {
|
||||||
printk (KERN_INFO
|
printk(KERN_INFO
|
||||||
"Intel-IOMMU: Forcing DAC for PCI devices\n");
|
"Intel-IOMMU: Forcing DAC for PCI devices\n");
|
||||||
dmar_forcedac = 1;
|
dmar_forcedac = 1;
|
||||||
|
} else if (!strncmp(str, "strict", 6)) {
|
||||||
|
printk(KERN_INFO
|
||||||
|
"Intel-IOMMU: disable batched IOTLB flush\n");
|
||||||
|
intel_iommu_strict = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
str += strcspn(str, ",");
|
str += strcspn(str, ",");
|
||||||
@ -966,17 +993,13 @@ static int iommu_init_domains(struct intel_iommu *iommu)
|
|||||||
set_bit(0, iommu->domain_ids);
|
set_bit(0, iommu->domain_ids);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu,
|
||||||
static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd)
|
struct dmar_drhd_unit *drhd)
|
||||||
{
|
{
|
||||||
struct intel_iommu *iommu;
|
|
||||||
int ret;
|
int ret;
|
||||||
int map_size;
|
int map_size;
|
||||||
u32 ver;
|
u32 ver;
|
||||||
|
|
||||||
iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
|
|
||||||
if (!iommu)
|
|
||||||
return NULL;
|
|
||||||
iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
|
iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
|
||||||
if (!iommu->reg) {
|
if (!iommu->reg) {
|
||||||
printk(KERN_ERR "IOMMU: can't map the region\n");
|
printk(KERN_ERR "IOMMU: can't map the region\n");
|
||||||
@ -1404,7 +1427,7 @@ static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
|
|||||||
int index;
|
int index;
|
||||||
|
|
||||||
while (dev) {
|
while (dev) {
|
||||||
for (index = 0; index < cnt; index ++)
|
for (index = 0; index < cnt; index++)
|
||||||
if (dev == devices[index])
|
if (dev == devices[index])
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
@ -1669,7 +1692,7 @@ int __init init_dmars(void)
|
|||||||
struct dmar_rmrr_unit *rmrr;
|
struct dmar_rmrr_unit *rmrr;
|
||||||
struct pci_dev *pdev;
|
struct pci_dev *pdev;
|
||||||
struct intel_iommu *iommu;
|
struct intel_iommu *iommu;
|
||||||
int ret, unit = 0;
|
int nlongs, i, ret, unit = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* for each drhd
|
* for each drhd
|
||||||
@ -1680,7 +1703,35 @@ int __init init_dmars(void)
|
|||||||
for_each_drhd_unit(drhd) {
|
for_each_drhd_unit(drhd) {
|
||||||
if (drhd->ignored)
|
if (drhd->ignored)
|
||||||
continue;
|
continue;
|
||||||
iommu = alloc_iommu(drhd);
|
g_num_of_iommus++;
|
||||||
|
/*
|
||||||
|
* lock not needed as this is only incremented in the single
|
||||||
|
* threaded kernel __init code path all other access are read
|
||||||
|
* only
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
|
nlongs = BITS_TO_LONGS(g_num_of_iommus);
|
||||||
|
g_iommus_to_flush = kzalloc(nlongs * sizeof(unsigned long), GFP_KERNEL);
|
||||||
|
if (!g_iommus_to_flush) {
|
||||||
|
printk(KERN_ERR "Intel-IOMMU: "
|
||||||
|
"Allocating bitmap array failed\n");
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
|
g_iommus = kzalloc(g_num_of_iommus * sizeof(*iommu), GFP_KERNEL);
|
||||||
|
if (!g_iommus) {
|
||||||
|
kfree(g_iommus_to_flush);
|
||||||
|
ret = -ENOMEM;
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
i = 0;
|
||||||
|
for_each_drhd_unit(drhd) {
|
||||||
|
if (drhd->ignored)
|
||||||
|
continue;
|
||||||
|
iommu = alloc_iommu(&g_iommus[i], drhd);
|
||||||
|
i++;
|
||||||
if (!iommu) {
|
if (!iommu) {
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
goto error;
|
goto error;
|
||||||
@ -1713,7 +1764,6 @@ int __init init_dmars(void)
|
|||||||
* endfor
|
* endfor
|
||||||
*/
|
*/
|
||||||
for_each_rmrr_units(rmrr) {
|
for_each_rmrr_units(rmrr) {
|
||||||
int i;
|
|
||||||
for (i = 0; i < rmrr->devices_cnt; i++) {
|
for (i = 0; i < rmrr->devices_cnt; i++) {
|
||||||
pdev = rmrr->devices[i];
|
pdev = rmrr->devices[i];
|
||||||
/* some BIOS lists non-exist devices in DMAR table */
|
/* some BIOS lists non-exist devices in DMAR table */
|
||||||
@ -1769,6 +1819,7 @@ error:
|
|||||||
iommu = drhd->iommu;
|
iommu = drhd->iommu;
|
||||||
free_iommu(iommu);
|
free_iommu(iommu);
|
||||||
}
|
}
|
||||||
|
kfree(g_iommus);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1917,6 +1968,53 @@ error:
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void flush_unmaps(void)
|
||||||
|
{
|
||||||
|
struct iova *node, *n;
|
||||||
|
unsigned long flags;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
spin_lock_irqsave(&async_umap_flush_lock, flags);
|
||||||
|
timer_on = 0;
|
||||||
|
|
||||||
|
/* just flush them all */
|
||||||
|
for (i = 0; i < g_num_of_iommus; i++) {
|
||||||
|
if (test_and_clear_bit(i, g_iommus_to_flush))
|
||||||
|
iommu_flush_iotlb_global(&g_iommus[i], 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
list_for_each_entry_safe(node, n, &unmaps_to_do, list) {
|
||||||
|
/* free iova */
|
||||||
|
list_del(&node->list);
|
||||||
|
__free_iova(&((struct dmar_domain *)node->dmar)->iovad, node);
|
||||||
|
|
||||||
|
}
|
||||||
|
list_size = 0;
|
||||||
|
spin_unlock_irqrestore(&async_umap_flush_lock, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void flush_unmaps_timeout(unsigned long data)
|
||||||
|
{
|
||||||
|
flush_unmaps();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void add_unmap(struct dmar_domain *dom, struct iova *iova)
|
||||||
|
{
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
spin_lock_irqsave(&async_umap_flush_lock, flags);
|
||||||
|
iova->dmar = dom;
|
||||||
|
list_add(&iova->list, &unmaps_to_do);
|
||||||
|
set_bit((dom->iommu - g_iommus), g_iommus_to_flush);
|
||||||
|
|
||||||
|
if (!timer_on) {
|
||||||
|
mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
|
||||||
|
timer_on = 1;
|
||||||
|
}
|
||||||
|
list_size++;
|
||||||
|
spin_unlock_irqrestore(&async_umap_flush_lock, flags);
|
||||||
|
}
|
||||||
|
|
||||||
static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
|
static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
|
||||||
size_t size, int dir)
|
size_t size, int dir)
|
||||||
{
|
{
|
||||||
@ -1944,13 +2042,21 @@ static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
|
|||||||
dma_pte_clear_range(domain, start_addr, start_addr + size);
|
dma_pte_clear_range(domain, start_addr, start_addr + size);
|
||||||
/* free page tables */
|
/* free page tables */
|
||||||
dma_pte_free_pagetable(domain, start_addr, start_addr + size);
|
dma_pte_free_pagetable(domain, start_addr, start_addr + size);
|
||||||
|
if (intel_iommu_strict) {
|
||||||
if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
|
if (iommu_flush_iotlb_psi(domain->iommu,
|
||||||
size >> PAGE_SHIFT_4K, 0))
|
domain->id, start_addr, size >> PAGE_SHIFT_4K, 0))
|
||||||
iommu_flush_write_buffer(domain->iommu);
|
iommu_flush_write_buffer(domain->iommu);
|
||||||
|
/* free iova */
|
||||||
/* free iova */
|
__free_iova(&domain->iovad, iova);
|
||||||
__free_iova(&domain->iovad, iova);
|
} else {
|
||||||
|
add_unmap(domain, iova);
|
||||||
|
/*
|
||||||
|
* queue up the release of the unmap to save the 1/6th of the
|
||||||
|
* cpu used up by the iotlb flush operation...
|
||||||
|
*/
|
||||||
|
if (list_size > high_watermark)
|
||||||
|
flush_unmaps();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void * intel_alloc_coherent(struct device *hwdev, size_t size,
|
static void * intel_alloc_coherent(struct device *hwdev, size_t size,
|
||||||
@ -2274,6 +2380,10 @@ int __init intel_iommu_init(void)
|
|||||||
if (dmar_table_init())
|
if (dmar_table_init())
|
||||||
return -ENODEV;
|
return -ENODEV;
|
||||||
|
|
||||||
|
high_watermark = 250;
|
||||||
|
intel_iommu_debug = debugfs_create_dir("intel_iommu", NULL);
|
||||||
|
debug = debugfs_create_u32("high_watermark", S_IWUGO | S_IRUGO,
|
||||||
|
intel_iommu_debug, &high_watermark);
|
||||||
iommu_init_mempool();
|
iommu_init_mempool();
|
||||||
dmar_init_reserved_ranges();
|
dmar_init_reserved_ranges();
|
||||||
|
|
||||||
@ -2289,6 +2399,7 @@ int __init intel_iommu_init(void)
|
|||||||
printk(KERN_INFO
|
printk(KERN_INFO
|
||||||
"PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
|
"PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
|
||||||
|
|
||||||
|
init_timer(&unmap_timer);
|
||||||
force_iommu = 1;
|
force_iommu = 1;
|
||||||
dma_ops = &intel_dma_ops;
|
dma_ops = &intel_dma_ops;
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -24,6 +24,8 @@ struct iova {
|
|||||||
struct rb_node node;
|
struct rb_node node;
|
||||||
unsigned long pfn_hi; /* IOMMU dish out addr hi */
|
unsigned long pfn_hi; /* IOMMU dish out addr hi */
|
||||||
unsigned long pfn_lo; /* IOMMU dish out addr lo */
|
unsigned long pfn_lo; /* IOMMU dish out addr lo */
|
||||||
|
struct list_head list;
|
||||||
|
void *dmar;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* holds all the iova translations for a domain */
|
/* holds all the iova translations for a domain */
|
||||||
|
Loading…
Reference in New Issue
Block a user