linux/drivers/gpu/drm/v3d/v3d_mmu.c
Eric Anholt 57692c94dc drm/v3d: Introduce a new DRM driver for Broadcom V3D V3.x+
This driver will be used to support Mesa on the Broadcom 7268 and 7278
platforms.

V3D 3.3 introduces an MMU, which means we no longer need CMA or vc4's
complicated CL/shader validation scheme.  This massively changes the
GEM behavior, so I've forked off to a new driver.

v2: Mark SUBMIT_CL as needing DRM_AUTH.  coccinelle fixes from kbuild
    test robot. Drop personal git link from MAINTAINERS.  Don't
    double-map dma-buf imported BOs.  Add kerneldoc about needing MMU
    eviction.  Drop prime vmap/unmap stubs.  Delay mmap offset setup
    to mmap time.  Use drm_dev_init instead of _alloc.  Use
    ktime_get() for wait_bo timeouts.  Drop drm_can_sleep() usage,
    since we don't modeset.  Switch page tables back to WC (debug
    change to coherent had slipped in).  Switch
    drm_gem_object_unreference_unlocked() to
    drm_gem_object_put_unlocked().  Simplify overflow mem handling by
    not sharing overflow mem between jobs.
v3: no changes
v4: align submit_cl to 64 bits (review by airlied), check zero flags in
    other ioctls.

Signed-off-by: Eric Anholt <eric@anholt.net>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> (v4)
Acked-by: Dave Airlie <airlied@linux.ie> (v3, requested submit_cl change)
Link: https://patchwork.freedesktop.org/patch/msgid/20180430181058.30181-3-eric@anholt.net
2018-05-03 16:26:30 -07:00

123 lines
3.3 KiB
C

// SPDX-License-Identifier: GPL-2.0+
/* Copyright (C) 2017-2018 Broadcom */
/**
* DOC: Broadcom V3D MMU
*
* The V3D 3.x hardware (compared to VC4) now includes an MMU. It has
* a single level of page tables for the V3D's 4GB address space to
* map to AXI bus addresses, thus it could need up to 4MB of
* physically contiguous memory to store the PTEs.
*
* Because the 4MB of contiguous memory for page tables is precious,
* and switching between them is expensive, we load all BOs into the
* same 4GB address space.
*
* To protect clients from each other, we should use the GMP to
* quickly mask out (at 128kb granularity) what pages are available to
* each client. This is not yet implemented.
*/
#include "v3d_drv.h"
#include "v3d_regs.h"
#define V3D_MMU_PAGE_SHIFT 12
/* Note: All PTEs for the 1MB superpage must be filled with the
* superpage bit set.
*/
#define V3D_PTE_SUPERPAGE BIT(31)
#define V3D_PTE_WRITEABLE BIT(29)
#define V3D_PTE_VALID BIT(28)
static int v3d_mmu_flush_all(struct v3d_dev *v3d)
{
int ret;
/* Make sure that another flush isn't already running when we
* start this one.
*/
ret = wait_for(!(V3D_READ(V3D_MMU_CTL) &
V3D_MMU_CTL_TLB_CLEARING), 100);
if (ret)
dev_err(v3d->dev, "TLB clear wait idle pre-wait failed\n");
V3D_WRITE(V3D_MMU_CTL, V3D_READ(V3D_MMU_CTL) |
V3D_MMU_CTL_TLB_CLEAR);
V3D_WRITE(V3D_MMUC_CONTROL,
V3D_MMUC_CONTROL_FLUSH |
V3D_MMUC_CONTROL_ENABLE);
ret = wait_for(!(V3D_READ(V3D_MMU_CTL) &
V3D_MMU_CTL_TLB_CLEARING), 100);
if (ret) {
dev_err(v3d->dev, "TLB clear wait idle failed\n");
return ret;
}
ret = wait_for(!(V3D_READ(V3D_MMUC_CONTROL) &
V3D_MMUC_CONTROL_FLUSHING), 100);
if (ret)
dev_err(v3d->dev, "MMUC flush wait idle failed\n");
return ret;
}
int v3d_mmu_set_page_table(struct v3d_dev *v3d)
{
V3D_WRITE(V3D_MMU_PT_PA_BASE, v3d->pt_paddr >> V3D_MMU_PAGE_SHIFT);
V3D_WRITE(V3D_MMU_CTL,
V3D_MMU_CTL_ENABLE |
V3D_MMU_CTL_PT_INVALID |
V3D_MMU_CTL_PT_INVALID_ABORT |
V3D_MMU_CTL_WRITE_VIOLATION_ABORT |
V3D_MMU_CTL_CAP_EXCEEDED_ABORT);
V3D_WRITE(V3D_MMU_ILLEGAL_ADDR,
(v3d->mmu_scratch_paddr >> V3D_MMU_PAGE_SHIFT) |
V3D_MMU_ILLEGAL_ADDR_ENABLE);
V3D_WRITE(V3D_MMUC_CONTROL, V3D_MMUC_CONTROL_ENABLE);
return v3d_mmu_flush_all(v3d);
}
void v3d_mmu_insert_ptes(struct v3d_bo *bo)
{
struct v3d_dev *v3d = to_v3d_dev(bo->base.dev);
u32 page = bo->node.start;
u32 page_prot = V3D_PTE_WRITEABLE | V3D_PTE_VALID;
unsigned int count;
struct scatterlist *sgl;
for_each_sg(bo->sgt->sgl, sgl, bo->sgt->nents, count) {
u32 page_address = sg_dma_address(sgl) >> V3D_MMU_PAGE_SHIFT;
u32 pte = page_prot | page_address;
u32 i;
BUG_ON(page_address + (sg_dma_len(sgl) >> V3D_MMU_PAGE_SHIFT) >=
BIT(24));
for (i = 0; i < sg_dma_len(sgl) >> V3D_MMU_PAGE_SHIFT; i++)
v3d->pt[page++] = pte + i;
}
WARN_ON_ONCE(page - bo->node.start !=
bo->base.size >> V3D_MMU_PAGE_SHIFT);
if (v3d_mmu_flush_all(v3d))
dev_err(v3d->dev, "MMU flush timeout\n");
}
void v3d_mmu_remove_ptes(struct v3d_bo *bo)
{
struct v3d_dev *v3d = to_v3d_dev(bo->base.dev);
u32 npages = bo->base.size >> V3D_MMU_PAGE_SHIFT;
u32 page;
for (page = bo->node.start; page < bo->node.start + npages; page++)
v3d->pt[page] = 0;
if (v3d_mmu_flush_all(v3d))
dev_err(v3d->dev, "MMU flush timeout\n");
}