Skip to content

Commit

Permalink
intel-iommu: Fix leaks in pagetable freeing
Browse files Browse the repository at this point in the history
At best the current code only seems to free the leaf pagetables and
the root.  If you're unlucky enough to have a large gap (like any
QEMU guest with more than 3G of memory), only the first chunk of leaf
pagetables are freed (plus the root).  This is a massive memory leak.
This patch re-writes the pagetable freeing function to use a
recursive algorithm and manages to not only free all the pagetables,
but does it without any apparent performance loss versus the current
broken version.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Cc: stable@vger.kernel.org
Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Joerg Roedel <joro@8bytes.org>
  • Loading branch information
Alex Williamson authored and Joerg Roedel committed Aug 14, 2013
1 parent d4e4ab8 commit 3269ee0
Showing 1 changed file with 35 additions and 37 deletions.
72 changes: 35 additions & 37 deletions drivers/iommu/intel-iommu.c
Original file line number Diff line number Diff line change
Expand Up @@ -890,56 +890,54 @@ static int dma_pte_clear_range(struct dmar_domain *domain,
return order;
}

static void dma_pte_free_level(struct dmar_domain *domain, int level,
struct dma_pte *pte, unsigned long pfn,
unsigned long start_pfn, unsigned long last_pfn)
{
pfn = max(start_pfn, pfn);
pte = &pte[pfn_level_offset(pfn, level)];

do {
unsigned long level_pfn;
struct dma_pte *level_pte;

if (!dma_pte_present(pte) || dma_pte_superpage(pte))
goto next;

level_pfn = pfn & level_mask(level - 1);
level_pte = phys_to_virt(dma_pte_addr(pte));

if (level > 2)
dma_pte_free_level(domain, level - 1, level_pte,
level_pfn, start_pfn, last_pfn);

/* If range covers entire pagetable, free it */
if (!(start_pfn > level_pfn ||
last_pfn < level_pfn + level_size(level))) {
dma_clear_pte(pte);
domain_flush_cache(domain, pte, sizeof(*pte));
free_pgtable_page(level_pte);
}
next:
pfn += level_size(level);
} while (!first_pte_in_page(++pte) && pfn <= last_pfn);
}

/* free page table pages. last level pte should already be cleared */
static void dma_pte_free_pagetable(struct dmar_domain *domain,
unsigned long start_pfn,
unsigned long last_pfn)
{
int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
struct dma_pte *first_pte, *pte;
int total = agaw_to_level(domain->agaw);
int level;
unsigned long tmp;
int large_page = 2;

BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
BUG_ON(start_pfn > last_pfn);

/* We don't need lock here; nobody else touches the iova range */
level = 2;
while (level <= total) {
tmp = align_to_level(start_pfn, level);

/* If we can't even clear one PTE at this level, we're done */
if (tmp + level_size(level) - 1 > last_pfn)
return;

do {
large_page = level;
first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page);
if (large_page > level)
level = large_page + 1;
if (!pte) {
tmp = align_to_level(tmp + 1, level + 1);
continue;
}
do {
if (dma_pte_present(pte)) {
free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
dma_clear_pte(pte);
}
pte++;
tmp += level_size(level);
} while (!first_pte_in_page(pte) &&
tmp + level_size(level) - 1 <= last_pfn);
dma_pte_free_level(domain, agaw_to_level(domain->agaw),
domain->pgd, 0, start_pfn, last_pfn);

domain_flush_cache(domain, first_pte,
(void *)pte - (void *)first_pte);

} while (tmp && tmp + level_size(level) - 1 <= last_pfn);
level++;
}
/* free pgd */
if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
free_pgtable_page(domain->pgd);
Expand Down

0 comments on commit 3269ee0

Please sign in to comment.