drm/ttm: Provide a generic LRU walker helper

Provide a generic LRU walker in TTM, in the spirit of drm_gem_lru_scan()
but building on the restartable TTM LRU functionality.

The LRU walker optionally supports locking objects as part of
a ww mutex locking transaction, to mimic to some extent the
current functionality in ttm. However any -EDEADLK return
is converted to -ENOSPC and then to -ENOMEM before reaching
the driver, so that the driver will need to backoff and possibly retry
without being able to keep the ticket.

v3:
- Move the helper to core ttm.
- Remove the drm_exec usage from it for now, it will be
  reintroduced later in the series.
v4:
- Handle the -EALREADY case if ticketlocking.
v6:
- Some cleanup and added code comments (Matthew Brost)
- Clarified the ticketlock in the commit message (Matthew Brost)
v7:
- Use s64 rather than long for the target and progress
  (Christian König)
- Update documentation to not encourage using pages as a
  progress measure. (Christian König)
- Remove cond_resched(). (Christian König)

Cc: Christian König <christian.koenig@amd.com>
Cc: Somalapuram Amaranath <Amaranath.Somalapuram@amd.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: <dri-devel@lists.freedesktop.org>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Matthew Brost <matthew.brost@intel.com> #v6
Link: https://patchwork.freedesktop.org/patch/msgid/20240705153206.68526-6-thomas.hellstrom@linux.intel.com
Signed-off-by: Christian König <christian.koenig@amd.com>
This commit is contained in:
Thomas Hellström 2024-07-05 17:32:03 +02:00 committed by Christian König
parent 4c44f89c5d
commit da966b82bf
2 changed files with 186 additions and 0 deletions

View File

@ -768,3 +768,154 @@ error_destroy_tt:
ttm_tt_destroy(bo->bdev, ttm);
return ret;
}
static bool ttm_lru_walk_trylock(struct ttm_lru_walk *walk,
struct ttm_buffer_object *bo,
bool *needs_unlock)
{
struct ttm_operation_ctx *ctx = walk->ctx;
*needs_unlock = false;
if (dma_resv_trylock(bo->base.resv)) {
*needs_unlock = true;
return true;
}
if (bo->base.resv == ctx->resv && ctx->allow_res_evict) {
dma_resv_assert_held(bo->base.resv);
return true;
}
return false;
}
static int ttm_lru_walk_ticketlock(struct ttm_lru_walk *walk,
struct ttm_buffer_object *bo,
bool *needs_unlock)
{
struct dma_resv *resv = bo->base.resv;
int ret;
if (walk->ctx->interruptible)
ret = dma_resv_lock_interruptible(resv, walk->ticket);
else
ret = dma_resv_lock(resv, walk->ticket);
if (!ret) {
*needs_unlock = true;
/*
* Only a single ticketlock per loop. Ticketlocks are prone
* to return -EDEADLK causing the eviction to fail, so
* after waiting for the ticketlock, revert back to
* trylocking for this walk.
*/
walk->ticket = NULL;
} else if (ret == -EDEADLK) {
/* Caller needs to exit the ww transaction. */
ret = -ENOSPC;
}
return ret;
}
static void ttm_lru_walk_unlock(struct ttm_buffer_object *bo, bool locked)
{
if (locked)
dma_resv_unlock(bo->base.resv);
}
/**
* ttm_lru_walk_for_evict() - Perform a LRU list walk, with actions taken on
* valid items.
* @walk: describe the walks and actions taken
* @bdev: The TTM device.
* @man: The struct ttm_resource manager whose LRU lists we're walking.
* @target: The end condition for the walk.
*
* The LRU lists of @man are walk, and for each struct ttm_resource encountered,
* the corresponding ttm_buffer_object is locked and taken a reference on, and
* the LRU lock is dropped. the LRU lock may be dropped before locking and, in
* that case, it's verified that the item actually remains on the LRU list after
* the lock, and that the buffer object didn't switch resource in between.
*
* With a locked object, the actions indicated by @walk->process_bo are
* performed, and after that, the bo is unlocked, the refcount dropped and the
* next struct ttm_resource is processed. Here, the walker relies on
* TTM's restartable LRU list implementation.
*
* Typically @walk->process_bo() would return the number of pages evicted,
* swapped or shrunken, so that when the total exceeds @target, or when the
* LRU list has been walked in full, iteration is terminated. It's also terminated
* on error. Note that the definition of @target is done by the caller, it
* could have a different meaning than the number of pages.
*
* Note that the way dma_resv individualization is done, locking needs to be done
* either with the LRU lock held (trylocking only) or with a reference on the
* object.
*
* Return: The progress made towards target or negative error code on error.
*/
s64 ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct ttm_device *bdev,
struct ttm_resource_manager *man, s64 target)
{
struct ttm_resource_cursor cursor;
struct ttm_resource *res;
s64 progress = 0;
s64 lret;
spin_lock(&bdev->lru_lock);
ttm_resource_manager_for_each_res(man, &cursor, res) {
struct ttm_buffer_object *bo = res->bo;
bool bo_needs_unlock = false;
bool bo_locked = false;
int mem_type;
/*
* Attempt a trylock before taking a reference on the bo,
* since if we do it the other way around, and the trylock fails,
* we need to drop the lru lock to put the bo.
*/
if (ttm_lru_walk_trylock(walk, bo, &bo_needs_unlock))
bo_locked = true;
else if (!walk->ticket || walk->ctx->no_wait_gpu ||
walk->trylock_only)
continue;
if (!ttm_bo_get_unless_zero(bo)) {
ttm_lru_walk_unlock(bo, bo_needs_unlock);
continue;
}
mem_type = res->mem_type;
spin_unlock(&bdev->lru_lock);
lret = 0;
if (!bo_locked)
lret = ttm_lru_walk_ticketlock(walk, bo, &bo_needs_unlock);
/*
* Note that in between the release of the lru lock and the
* ticketlock, the bo may have switched resource,
* and also memory type, since the resource may have been
* freed and allocated again with a different memory type.
* In that case, just skip it.
*/
if (!lret && bo->resource && bo->resource->mem_type == mem_type)
lret = walk->ops->process_bo(walk, bo);
ttm_lru_walk_unlock(bo, bo_needs_unlock);
ttm_bo_put(bo);
if (lret == -EBUSY || lret == -EALREADY)
lret = 0;
progress = (lret < 0) ? lret : progress + lret;
spin_lock(&bdev->lru_lock);
if (progress < 0 || progress >= target)
break;
}
ttm_resource_cursor_fini_locked(&cursor);
spin_unlock(&bdev->lru_lock);
return progress;
}

View File

@ -194,6 +194,41 @@ struct ttm_operation_ctx {
uint64_t bytes_moved;
};
struct ttm_lru_walk;
/** struct ttm_lru_walk_ops - Operations for a LRU walk. */
struct ttm_lru_walk_ops {
/**
* process_bo - Process this bo.
* @walk: struct ttm_lru_walk describing the walk.
* @bo: A locked and referenced buffer object.
*
* Return: Negative error code on error, User-defined positive value
* (typically, but not always, size of the processed bo) on success.
* On success, the returned values are summed by the walk and the
* walk exits when its target is met.
* 0 also indicates success, -EBUSY means this bo was skipped.
*/
s64 (*process_bo)(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo);
};
/**
* struct ttm_lru_walk - Structure describing a LRU walk.
*/
struct ttm_lru_walk {
/** @ops: Pointer to the ops structure. */
const struct ttm_lru_walk_ops *ops;
/** @ctx: Pointer to the struct ttm_operation_ctx. */
struct ttm_operation_ctx *ctx;
/** @ticket: The struct ww_acquire_ctx if any. */
struct ww_acquire_ctx *ticket;
/** @tryock_only: Only use trylock for locking. */
bool trylock_only;
};
s64 ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct ttm_device *bdev,
struct ttm_resource_manager *man, s64 target);
/**
* ttm_bo_get - reference a struct ttm_buffer_object
*