intel-iommu: Fix leaks in pagetable freeing
At best the current code only seems to free the leaf pagetables and the root. If you're unlucky enough to have a large gap (like any QEMU guest with more than 3G of memory), only the first chunk of leaf pagetables are freed (plus the root). This is a massive memory leak. This patch re-writes the pagetable freeing function to use a recursive algorithm and manages to not only free all the pagetables, but does it without any apparent performance loss versus the current broken version. Signed-off-by: Alex Williamson <alex.williamson@redhat.com> Cc: stable@vger.kernel.org Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com> Signed-off-by: Joerg Roedel <joro@8bytes.org>
This commit is contained in:
parent
d4e4ab86bc
commit
3269ee0bd6
@ -890,56 +890,54 @@ static int dma_pte_clear_range(struct dmar_domain *domain,
|
||||
return order;
|
||||
}
|
||||
|
||||
static void dma_pte_free_level(struct dmar_domain *domain, int level,
|
||||
struct dma_pte *pte, unsigned long pfn,
|
||||
unsigned long start_pfn, unsigned long last_pfn)
|
||||
{
|
||||
pfn = max(start_pfn, pfn);
|
||||
pte = &pte[pfn_level_offset(pfn, level)];
|
||||
|
||||
do {
|
||||
unsigned long level_pfn;
|
||||
struct dma_pte *level_pte;
|
||||
|
||||
if (!dma_pte_present(pte) || dma_pte_superpage(pte))
|
||||
goto next;
|
||||
|
||||
level_pfn = pfn & level_mask(level - 1);
|
||||
level_pte = phys_to_virt(dma_pte_addr(pte));
|
||||
|
||||
if (level > 2)
|
||||
dma_pte_free_level(domain, level - 1, level_pte,
|
||||
level_pfn, start_pfn, last_pfn);
|
||||
|
||||
/* If range covers entire pagetable, free it */
|
||||
if (!(start_pfn > level_pfn ||
|
||||
last_pfn < level_pfn + level_size(level))) {
|
||||
dma_clear_pte(pte);
|
||||
domain_flush_cache(domain, pte, sizeof(*pte));
|
||||
free_pgtable_page(level_pte);
|
||||
}
|
||||
next:
|
||||
pfn += level_size(level);
|
||||
} while (!first_pte_in_page(++pte) && pfn <= last_pfn);
|
||||
}
|
||||
|
||||
/* free page table pages. last level pte should already be cleared */
|
||||
static void dma_pte_free_pagetable(struct dmar_domain *domain,
|
||||
unsigned long start_pfn,
|
||||
unsigned long last_pfn)
|
||||
{
|
||||
int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
|
||||
struct dma_pte *first_pte, *pte;
|
||||
int total = agaw_to_level(domain->agaw);
|
||||
int level;
|
||||
unsigned long tmp;
|
||||
int large_page = 2;
|
||||
|
||||
BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
|
||||
BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
|
||||
BUG_ON(start_pfn > last_pfn);
|
||||
|
||||
/* We don't need lock here; nobody else touches the iova range */
|
||||
level = 2;
|
||||
while (level <= total) {
|
||||
tmp = align_to_level(start_pfn, level);
|
||||
dma_pte_free_level(domain, agaw_to_level(domain->agaw),
|
||||
domain->pgd, 0, start_pfn, last_pfn);
|
||||
|
||||
/* If we can't even clear one PTE at this level, we're done */
|
||||
if (tmp + level_size(level) - 1 > last_pfn)
|
||||
return;
|
||||
|
||||
do {
|
||||
large_page = level;
|
||||
first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page);
|
||||
if (large_page > level)
|
||||
level = large_page + 1;
|
||||
if (!pte) {
|
||||
tmp = align_to_level(tmp + 1, level + 1);
|
||||
continue;
|
||||
}
|
||||
do {
|
||||
if (dma_pte_present(pte)) {
|
||||
free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
|
||||
dma_clear_pte(pte);
|
||||
}
|
||||
pte++;
|
||||
tmp += level_size(level);
|
||||
} while (!first_pte_in_page(pte) &&
|
||||
tmp + level_size(level) - 1 <= last_pfn);
|
||||
|
||||
domain_flush_cache(domain, first_pte,
|
||||
(void *)pte - (void *)first_pte);
|
||||
|
||||
} while (tmp && tmp + level_size(level) - 1 <= last_pfn);
|
||||
level++;
|
||||
}
|
||||
/* free pgd */
|
||||
if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
|
||||
free_pgtable_page(domain->pgd);
|
||||
|
Loading…
Reference in New Issue
Block a user