Merge tag 'drm-intel-gt-next-2021-01-14' of git://anongit.freedesktop.org/drm/drm-intel into drm-next

UAPI Changes:
- Deprecate I915_PMU_LAST and optimize state tracking (Tvrtko)

  Avoid relying on last item ABI marker in i915_drm.h, add a
  comment to mark as deprecated.

Cross-subsystem Changes:

Core Changes:

Driver Changes:

- Restore clear residuals security mitigations for Ivybridge and
  Baytrail (Chris)
- Close #1858: Allow sysadmin to choose applied GPU security mitigations
  through i915.mitigations=... similar to CPU (Chris)
- Fix for #2024: GPU hangs on HSW GT1 (Chris)
- Fix for #2707: Driver hang when editing UVs in Blender (Chris, Ville)
- Fix for #2797: False positive GuC loading error message (Chris)
- Fix for #2859: Missing GuC firmware for older Cometlakes (Chris)
- Lessen probability of GPU hang due to DMAR faults [reason 7,
  next page table ptr is invalid] on Tigerlake (Chris)
- Fix REVID macros for TGL to fetch correct stepping (Aditya)
- Limit frequency drop to RPe on parking (Chris, Edward)
- Limit W/A 1406941453 to TGL, RKL and DG1 (Swathi)
- Make W/A 22010271021 permanent on DG1 (Lucas)
- Implement W/A 16011163337 to prevent a HS/DS hang on DG1 (Swathi)
- Only disable preemption on gen8 render engines (Chris)
- Disable arbitration around Braswell's PDP updates (Chris)
- Disable arbitration on no-preempt requests (Chris)
- Check for arbitration after writing start seqno before busywaiting (Chris)
- Retain default context state across shrinking (Venkata, CQ)
- Fix mismatch between misplaced vma check and vma insert for 32-bit
  addressing userspaces (Chris, CQ)
- Propagate error for vmap() failure instead kernel NULL deref (Chris)
- Propagate error from cancelled submit due to context closure
  immediately (Chris)
- Fix RCU race on HWSP tracking per request (Chris)
- Clear CMD parser shadow and GPU reloc batches (Matt A)

- Populate logical context during first pin (Maarten)
- Optimistically prune dma-resv from the shrinker (Chris)
- Fix for virtual engine ownership race (Chris)
- Remove timeslice suppression to restore fairness for virtual engines (Chris)
- Rearrange IVB/HSW workarounds properly between GT and engine (Chris)
- Taint the reset mutex with the shrinker (Chris)
- Replace direct submit with direct call to tasklet (Chris)
- Multiple corrections to virtual engine dequeue and breadcrumbs code (Chris)
- Avoid wakeref from potentially hard IRQ context in PMU (Tvrtko)
- Use raw clock for RC6 time estimation in PMU (Tvrtko)
- Differentiate OOM failures from invalid map types (Chris)
- Fix Gen9 to have 64 MOCS entries similar to Gen11 (Chris)
- Ignore repeated attempts to suspend request flow across reset (Chris)
- Remove livelock from "do_idle_maps" VT-d W/A (Chris)
- Cancel the preemption timeout early in case engine reset fails (Chris)
- Code flow optimization in the scheduling code (Chris)
- Clear the execlists timers upon reset (Chris)
- Drain the breadcrumbs just once (Chris, Matt A)
- Track the overall GT awake/busy time (Chris)
- Tweak submission tasklet flushing to avoid starvation (Chris)
- Track timelines created using the HWSP to restore on resume (Chris)
- Use cmpxchg64 for 32b compatilibity for active tracking (Chris)
- Prefer recycling an idle GGTT fence to avoid GPU wait (Chris)

- Restructure GT code organization for clearer split between GuC
  and execlists (Chris, Daniele, John, Matt A)
- Remove GuC code that will remain unused by new interfaces (Matt B)
- Restructure the CS timestamp clocks code to local to GT (Chris)
- Fix error return paths in perf code (Zhang)
- Replace idr_init() by idr_init_base() in perf (Deepak)
- Fix shmem_pin_map error path (Colin)
- Drop redundant free_work worker for GEM contexts (Chris, Mika)
- Increase readability and understandability of intel_workarounds.c (Lucas)
- Defer enabling the breadcrumb interrupt to after submission (Chris)
- Deal with buddy alloc block sizes beyond 4G (Venkata, Chris)
- Encode fence specific waitqueue behaviour into the wait.flags (Chris)
- Don't cancel the breadcrumb interrupt shadow too early (Chris)
- Cancel submitted requests upon context reset (Chris)
- Use correct locks in GuC code (Tvrtko)
- Prevent use of engine->wa_ctx after error (Chris, Matt R)

- Fix build warning on 32-bit (Arnd)
- Avoid memory leak if platform would have more than 16 W/A (Tvrtko)
- Avoid unnecessary #if CONFIG_PM in PMU code (Chris, Tvrtko)
- Improve debugging output (Chris, Tvrtko, Matt R)
- Make file local variables static (Jani)
- Avoid uint*_t types in i915 (Jani)
- Selftest improvements (Chris, Matt A, Dan)
- Documentation fixes (Chris, Jose)

Signed-off-by: Dave Airlie <airlied@redhat.com>

# Conflicts:
#	drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
#	drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h
#	drivers/gpu/drm/i915/gt/intel_lrc.c
#	drivers/gpu/drm/i915/gvt/mmio_context.h
#	drivers/gpu/drm/i915/i915_drv.h
From: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210114152232.GA21588@jlahtine-mobl.ger.corp.intel.com
This commit is contained in:
Dave Airlie
2021-01-15 15:03:36 +10:00
130 changed files with 12996 additions and 11577 deletions

View File

@@ -428,7 +428,7 @@ User Batchbuffer Execution
Logical Rings, Logical Ring Contexts and Execlists Logical Rings, Logical Ring Contexts and Execlists
-------------------------------------------------- --------------------------------------------------
.. kernel-doc:: drivers/gpu/drm/i915/gt/intel_lrc.c .. kernel-doc:: drivers/gpu/drm/i915/gt/intel_execlists_submission.c
:doc: Logical Rings, Logical Ring Contexts and Execlists :doc: Logical Rings, Logical Ring Contexts and Execlists
Global GTT views Global GTT views

View File

@@ -38,6 +38,7 @@ i915-y += i915_drv.o \
i915_config.o \ i915_config.o \
i915_irq.o \ i915_irq.o \
i915_getparam.o \ i915_getparam.o \
i915_mitigations.o \
i915_params.o \ i915_params.o \
i915_pci.o \ i915_pci.o \
i915_scatterlist.o \ i915_scatterlist.o \
@@ -58,6 +59,7 @@ i915-y += i915_drv.o \
# core library code # core library code
i915-y += \ i915-y += \
dma_resv_utils.o \
i915_memcpy.o \ i915_memcpy.o \
i915_mm.o \ i915_mm.o \
i915_sw_fence.o \ i915_sw_fence.o \
@@ -82,6 +84,7 @@ gt-y += \
gt/gen6_engine_cs.o \ gt/gen6_engine_cs.o \
gt/gen6_ppgtt.o \ gt/gen6_ppgtt.o \
gt/gen7_renderclear.o \ gt/gen7_renderclear.o \
gt/gen8_engine_cs.o \
gt/gen8_ppgtt.o \ gt/gen8_ppgtt.o \
gt/intel_breadcrumbs.o \ gt/intel_breadcrumbs.o \
gt/intel_context.o \ gt/intel_context.o \
@@ -91,6 +94,7 @@ gt-y += \
gt/intel_engine_heartbeat.o \ gt/intel_engine_heartbeat.o \
gt/intel_engine_pm.o \ gt/intel_engine_pm.o \
gt/intel_engine_user.o \ gt/intel_engine_user.o \
gt/intel_execlists_submission.o \
gt/intel_ggtt.o \ gt/intel_ggtt.o \
gt/intel_ggtt_fencing.o \ gt/intel_ggtt_fencing.o \
gt/intel_gt.o \ gt/intel_gt.o \
@@ -106,6 +110,7 @@ gt-y += \
gt/intel_mocs.o \ gt/intel_mocs.o \
gt/intel_ppgtt.o \ gt/intel_ppgtt.o \
gt/intel_rc6.o \ gt/intel_rc6.o \
gt/intel_region_lmem.o \
gt/intel_renderstate.o \ gt/intel_renderstate.o \
gt/intel_reset.o \ gt/intel_reset.o \
gt/intel_ring.o \ gt/intel_ring.o \
@@ -166,7 +171,6 @@ i915-y += \
i915_scheduler.o \ i915_scheduler.o \
i915_trace_points.o \ i915_trace_points.o \
i915_vma.o \ i915_vma.o \
intel_region_lmem.o \
intel_wopcm.o intel_wopcm.o
# general-purpose microcontroller (GuC) support # general-purpose microcontroller (GuC) support

View File

@@ -29,6 +29,7 @@
#include <drm/drm_fourcc.h> #include <drm/drm_fourcc.h>
#include "gem/i915_gem_pm.h" #include "gem/i915_gem_pm.h"
#include "gt/intel_gpu_commands.h"
#include "gt/intel_ring.h" #include "gt/intel_ring.h"
#include "i915_drv.h" #include "i915_drv.h"

View File

@@ -0,0 +1,17 @@
// SPDX-License-Identifier: MIT
/*
* Copyright © 2020 Intel Corporation
*/
#include <linux/dma-resv.h>
#include "dma_resv_utils.h"
void dma_resv_prune(struct dma_resv *resv)
{
if (dma_resv_trylock(resv)) {
if (dma_resv_test_signaled_rcu(resv, true))
dma_resv_add_excl_fence(resv, NULL);
dma_resv_unlock(resv);
}
}

View File

@@ -0,0 +1,13 @@
/* SPDX-License-Identifier: MIT */
/*
* Copyright © 2020 Intel Corporation
*/
#ifndef DMA_RESV_UTILS_H
#define DMA_RESV_UTILS_H
struct dma_resv;
void dma_resv_prune(struct dma_resv *resv);
#endif /* DMA_RESV_UTILS_H */

View File

@@ -72,6 +72,8 @@
#include "gt/intel_context_param.h" #include "gt/intel_context_param.h"
#include "gt/intel_engine_heartbeat.h" #include "gt/intel_engine_heartbeat.h"
#include "gt/intel_engine_user.h" #include "gt/intel_engine_user.h"
#include "gt/intel_execlists_submission.h" /* virtual_engine */
#include "gt/intel_gpu_commands.h"
#include "gt/intel_ring.h" #include "gt/intel_ring.h"
#include "i915_gem_context.h" #include "i915_gem_context.h"
@@ -333,13 +335,12 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx)
return e; return e;
} }
static void i915_gem_context_free(struct i915_gem_context *ctx) void i915_gem_context_release(struct kref *ref)
{ {
GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref);
spin_lock(&ctx->i915->gem.contexts.lock); trace_i915_context_free(ctx);
list_del(&ctx->link); GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
spin_unlock(&ctx->i915->gem.contexts.lock);
mutex_destroy(&ctx->engines_mutex); mutex_destroy(&ctx->engines_mutex);
mutex_destroy(&ctx->lut_mutex); mutex_destroy(&ctx->lut_mutex);
@@ -353,37 +354,6 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
kfree_rcu(ctx, rcu); kfree_rcu(ctx, rcu);
} }
static void contexts_free_all(struct llist_node *list)
{
struct i915_gem_context *ctx, *cn;
llist_for_each_entry_safe(ctx, cn, list, free_link)
i915_gem_context_free(ctx);
}
static void contexts_flush_free(struct i915_gem_contexts *gc)
{
contexts_free_all(llist_del_all(&gc->free_list));
}
static void contexts_free_worker(struct work_struct *work)
{
struct i915_gem_contexts *gc =
container_of(work, typeof(*gc), free_work);
contexts_flush_free(gc);
}
void i915_gem_context_release(struct kref *ref)
{
struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref);
struct i915_gem_contexts *gc = &ctx->i915->gem.contexts;
trace_i915_context_free(ctx);
if (llist_add(&ctx->free_link, &gc->free_list))
schedule_work(&gc->free_work);
}
static inline struct i915_gem_engines * static inline struct i915_gem_engines *
__context_engines_static(const struct i915_gem_context *ctx) __context_engines_static(const struct i915_gem_context *ctx)
{ {
@@ -453,6 +423,9 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce)
struct intel_engine_cs *engine = NULL; struct intel_engine_cs *engine = NULL;
struct i915_request *rq; struct i915_request *rq;
if (intel_context_has_inflight(ce))
return intel_context_inflight(ce);
if (!ce->timeline) if (!ce->timeline)
return NULL; return NULL;
@@ -632,6 +605,10 @@ static void context_close(struct i915_gem_context *ctx)
*/ */
lut_close(ctx); lut_close(ctx);
spin_lock(&ctx->i915->gem.contexts.lock);
list_del(&ctx->link);
spin_unlock(&ctx->i915->gem.contexts.lock);
mutex_unlock(&ctx->mutex); mutex_unlock(&ctx->mutex);
/* /*
@@ -849,9 +826,6 @@ i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags)
!HAS_EXECLISTS(i915)) !HAS_EXECLISTS(i915))
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
/* Reap the stale contexts */
contexts_flush_free(&i915->gem.contexts);
ctx = __create_context(i915); ctx = __create_context(i915);
if (IS_ERR(ctx)) if (IS_ERR(ctx))
return ctx; return ctx;
@@ -896,23 +870,11 @@ static void init_contexts(struct i915_gem_contexts *gc)
{ {
spin_lock_init(&gc->lock); spin_lock_init(&gc->lock);
INIT_LIST_HEAD(&gc->list); INIT_LIST_HEAD(&gc->list);
INIT_WORK(&gc->free_work, contexts_free_worker);
init_llist_head(&gc->free_list);
} }
void i915_gem_init__contexts(struct drm_i915_private *i915) void i915_gem_init__contexts(struct drm_i915_private *i915)
{ {
init_contexts(&i915->gem.contexts); init_contexts(&i915->gem.contexts);
drm_dbg(&i915->drm, "%s context support initialized\n",
DRIVER_CAPS(i915)->has_logical_contexts ?
"logical" : "fake");
}
void i915_gem_driver_release__contexts(struct drm_i915_private *i915)
{
flush_work(&i915->gem.contexts.free_work);
rcu_barrier(); /* and flush the left over RCU frees */
} }
static int gem_context_register(struct i915_gem_context *ctx, static int gem_context_register(struct i915_gem_context *ctx,
@@ -988,7 +950,6 @@ err:
void i915_gem_context_close(struct drm_file *file) void i915_gem_context_close(struct drm_file *file)
{ {
struct drm_i915_file_private *file_priv = file->driver_priv; struct drm_i915_file_private *file_priv = file->driver_priv;
struct drm_i915_private *i915 = file_priv->dev_priv;
struct i915_address_space *vm; struct i915_address_space *vm;
struct i915_gem_context *ctx; struct i915_gem_context *ctx;
unsigned long idx; unsigned long idx;
@@ -1000,8 +961,6 @@ void i915_gem_context_close(struct drm_file *file)
xa_for_each(&file_priv->vm_xa, idx, vm) xa_for_each(&file_priv->vm_xa, idx, vm)
i915_vm_put(vm); i915_vm_put(vm);
xa_destroy(&file_priv->vm_xa); xa_destroy(&file_priv->vm_xa);
contexts_flush_free(&i915->gem.contexts);
} }
int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,

View File

@@ -110,7 +110,6 @@ i915_gem_context_clear_user_engines(struct i915_gem_context *ctx)
/* i915_gem_context.c */ /* i915_gem_context.c */
void i915_gem_init__contexts(struct drm_i915_private *i915); void i915_gem_init__contexts(struct drm_i915_private *i915);
void i915_gem_driver_release__contexts(struct drm_i915_private *i915);
int i915_gem_context_open(struct drm_i915_private *i915, int i915_gem_context_open(struct drm_i915_private *i915,
struct drm_file *file); struct drm_file *file);

View File

@@ -108,7 +108,6 @@ struct i915_gem_context {
/** link: place with &drm_i915_private.context_list */ /** link: place with &drm_i915_private.context_list */
struct list_head link; struct list_head link;
struct llist_node free_link;
/** /**
* @ref: reference count * @ref: reference count

View File

@@ -15,6 +15,7 @@
#include "gem/i915_gem_ioctls.h" #include "gem/i915_gem_ioctls.h"
#include "gt/intel_context.h" #include "gt/intel_context.h"
#include "gt/intel_gpu_commands.h"
#include "gt/intel_gt.h" #include "gt/intel_gt.h"
#include "gt/intel_gt_buffer_pool.h" #include "gt/intel_gt_buffer_pool.h"
#include "gt/intel_gt_pm.h" #include "gt/intel_gt_pm.h"
@@ -534,8 +535,6 @@ eb_add_vma(struct i915_execbuffer *eb,
struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
struct eb_vma *ev = &eb->vma[i]; struct eb_vma *ev = &eb->vma[i];
GEM_BUG_ON(i915_vma_is_closed(vma));
ev->vma = vma; ev->vma = vma;
ev->exec = entry; ev->exec = entry;
ev->flags = entry->flags; ev->flags = entry->flags;
@@ -1046,7 +1045,7 @@ static void reloc_gpu_flush(struct i915_execbuffer *eb, struct reloc_cache *cach
GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32)); GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32));
cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END; cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END;
__i915_gem_object_flush_map(obj, 0, sizeof(u32) * (cache->rq_size + 1)); i915_gem_object_flush_map(obj);
i915_gem_object_unpin_map(obj); i915_gem_object_unpin_map(obj);
intel_gt_chipset_flush(cache->rq->engine->gt); intel_gt_chipset_flush(cache->rq->engine->gt);
@@ -1296,6 +1295,8 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
goto err_pool; goto err_pool;
} }
memset32(cmd, 0, pool->obj->base.size / sizeof(u32));
batch = i915_vma_instance(pool->obj, vma->vm, NULL); batch = i915_vma_instance(pool->obj, vma->vm, NULL);
if (IS_ERR(batch)) { if (IS_ERR(batch)) {
err = PTR_ERR(batch); err = PTR_ERR(batch);
@@ -2533,6 +2534,9 @@ static int eb_submit(struct i915_execbuffer *eb, struct i915_vma *batch)
{ {
int err; int err;
if (intel_context_nopreempt(eb->context))
__set_bit(I915_FENCE_FLAG_NOPREEMPT, &eb->request->fence.flags);
err = eb_move_to_gpu(eb); err = eb_move_to_gpu(eb);
if (err) if (err)
return err; return err;
@@ -2573,15 +2577,12 @@ static int eb_submit(struct i915_execbuffer *eb, struct i915_vma *batch)
return err; return err;
} }
if (intel_context_nopreempt(eb->context))
__set_bit(I915_FENCE_FLAG_NOPREEMPT, &eb->request->fence.flags);
return 0; return 0;
} }
static int num_vcs_engines(const struct drm_i915_private *i915) static int num_vcs_engines(const struct drm_i915_private *i915)
{ {
return hweight64(VDBOX_MASK(&i915->gt)); return hweight_long(VDBOX_MASK(&i915->gt));
} }
/* /*

View File

@@ -6,6 +6,7 @@
#include "i915_drv.h" #include "i915_drv.h"
#include "gt/intel_context.h" #include "gt/intel_context.h"
#include "gt/intel_engine_pm.h" #include "gt/intel_engine_pm.h"
#include "gt/intel_gpu_commands.h"
#include "gt/intel_gt.h" #include "gt/intel_gt.h"
#include "gt/intel_gt_buffer_pool.h" #include "gt/intel_gt_buffer_pool.h"
#include "gt/intel_ring.h" #include "gt/intel_ring.h"

View File

@@ -281,7 +281,7 @@ static void *i915_gem_object_map_page(struct drm_i915_gem_object *obj,
/* Too big for stack -- allocate temporary array instead */ /* Too big for stack -- allocate temporary array instead */
pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL); pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
if (!pages) if (!pages)
return NULL; return ERR_PTR(-ENOMEM);
} }
i = 0; i = 0;
@@ -290,7 +290,8 @@ static void *i915_gem_object_map_page(struct drm_i915_gem_object *obj,
vaddr = vmap(pages, n_pages, 0, pgprot); vaddr = vmap(pages, n_pages, 0, pgprot);
if (pages != stack) if (pages != stack)
kvfree(pages); kvfree(pages);
return vaddr;
return vaddr ?: ERR_PTR(-ENOMEM);
} }
static void *i915_gem_object_map_pfn(struct drm_i915_gem_object *obj, static void *i915_gem_object_map_pfn(struct drm_i915_gem_object *obj,
@@ -305,13 +306,13 @@ static void *i915_gem_object_map_pfn(struct drm_i915_gem_object *obj,
void *vaddr; void *vaddr;
if (type != I915_MAP_WC) if (type != I915_MAP_WC)
return NULL; return ERR_PTR(-ENODEV);
if (n_pfn > ARRAY_SIZE(stack)) { if (n_pfn > ARRAY_SIZE(stack)) {
/* Too big for stack -- allocate temporary array instead */ /* Too big for stack -- allocate temporary array instead */
pfns = kvmalloc_array(n_pfn, sizeof(*pfns), GFP_KERNEL); pfns = kvmalloc_array(n_pfn, sizeof(*pfns), GFP_KERNEL);
if (!pfns) if (!pfns)
return NULL; return ERR_PTR(-ENOMEM);
} }
i = 0; i = 0;
@@ -320,7 +321,8 @@ static void *i915_gem_object_map_pfn(struct drm_i915_gem_object *obj,
vaddr = vmap_pfn(pfns, n_pfn, pgprot_writecombine(PAGE_KERNEL_IO)); vaddr = vmap_pfn(pfns, n_pfn, pgprot_writecombine(PAGE_KERNEL_IO));
if (pfns != stack) if (pfns != stack)
kvfree(pfns); kvfree(pfns);
return vaddr;
return vaddr ?: ERR_PTR(-ENOMEM);
} }
/* get, pin, and map the pages of the object into kernel space */ /* get, pin, and map the pages of the object into kernel space */
@@ -349,8 +351,10 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
err = ____i915_gem_object_get_pages(obj); err = ____i915_gem_object_get_pages(obj);
if (err) if (err) {
goto err_unlock; ptr = ERR_PTR(err);
goto out_unlock;
}
smp_mb__before_atomic(); smp_mb__before_atomic();
} }
@@ -362,7 +366,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
ptr = page_unpack_bits(obj->mm.mapping, &has_type); ptr = page_unpack_bits(obj->mm.mapping, &has_type);
if (ptr && has_type != type) { if (ptr && has_type != type) {
if (pinned) { if (pinned) {
err = -EBUSY; ptr = ERR_PTR(-EBUSY);
goto err_unpin; goto err_unpin;
} }
@@ -374,15 +378,13 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
if (!ptr) { if (!ptr) {
if (GEM_WARN_ON(type == I915_MAP_WC && if (GEM_WARN_ON(type == I915_MAP_WC &&
!static_cpu_has(X86_FEATURE_PAT))) !static_cpu_has(X86_FEATURE_PAT)))
ptr = NULL; ptr = ERR_PTR(-ENODEV);
else if (i915_gem_object_has_struct_page(obj)) else if (i915_gem_object_has_struct_page(obj))
ptr = i915_gem_object_map_page(obj, type); ptr = i915_gem_object_map_page(obj, type);
else else
ptr = i915_gem_object_map_pfn(obj, type); ptr = i915_gem_object_map_pfn(obj, type);
if (!ptr) { if (IS_ERR(ptr))
err = -ENOMEM;
goto err_unpin; goto err_unpin;
}
obj->mm.mapping = page_pack_bits(ptr, type); obj->mm.mapping = page_pack_bits(ptr, type);
} }
@@ -393,8 +395,6 @@ out_unlock:
err_unpin: err_unpin:
atomic_dec(&obj->mm.pages_pin_count); atomic_dec(&obj->mm.pages_pin_count);
err_unlock:
ptr = ERR_PTR(err);
goto out_unlock; goto out_unlock;
} }

View File

@@ -22,6 +22,7 @@ i915_gem_object_put_pages_buddy(struct drm_i915_gem_object *obj,
int int
i915_gem_object_get_pages_buddy(struct drm_i915_gem_object *obj) i915_gem_object_get_pages_buddy(struct drm_i915_gem_object *obj)
{ {
const u64 max_segment = i915_sg_segment_size();
struct intel_memory_region *mem = obj->mm.region; struct intel_memory_region *mem = obj->mm.region;
struct list_head *blocks = &obj->mm.blocks; struct list_head *blocks = &obj->mm.blocks;
resource_size_t size = obj->base.size; resource_size_t size = obj->base.size;
@@ -37,7 +38,7 @@ i915_gem_object_get_pages_buddy(struct drm_i915_gem_object *obj)
if (!st) if (!st)
return -ENOMEM; return -ENOMEM;
if (sg_alloc_table(st, size >> ilog2(mem->mm.chunk_size), GFP_KERNEL)) { if (sg_alloc_table(st, size >> PAGE_SHIFT, GFP_KERNEL)) {
kfree(st); kfree(st);
return -ENOMEM; return -ENOMEM;
} }
@@ -64,27 +65,30 @@ i915_gem_object_get_pages_buddy(struct drm_i915_gem_object *obj)
i915_buddy_block_size(&mem->mm, block)); i915_buddy_block_size(&mem->mm, block));
offset = i915_buddy_block_offset(block); offset = i915_buddy_block_offset(block);
GEM_BUG_ON(overflows_type(block_size, sg->length)); while (block_size) {
u64 len;
if (offset != prev_end || if (offset != prev_end || sg->length >= max_segment) {
add_overflows_t(typeof(sg->length), sg->length, block_size)) {
if (st->nents) { if (st->nents) {
sg_page_sizes |= sg->length; sg_page_sizes |= sg->length;
sg = __sg_next(sg); sg = __sg_next(sg);
} }
sg_dma_address(sg) = mem->region.start + offset; sg_dma_address(sg) = mem->region.start + offset;
sg_dma_len(sg) = block_size; sg_dma_len(sg) = 0;
sg->length = 0;
sg->length = block_size;
st->nents++; st->nents++;
} else {
sg->length += block_size;
sg_dma_len(sg) += block_size;
} }
prev_end = offset + block_size; len = min(block_size, max_segment - sg->length);
sg->length += len;
sg_dma_len(sg) += len;
offset += len;
block_size -= len;
prev_end = offset;
}
} }
sg_page_sizes |= sg->length; sg_page_sizes |= sg->length;

View File

@@ -15,6 +15,7 @@
#include "gt/intel_gt_requests.h" #include "gt/intel_gt_requests.h"
#include "dma_resv_utils.h"
#include "i915_trace.h" #include "i915_trace.h"
static bool swap_available(void) static bool swap_available(void)
@@ -209,6 +210,8 @@ i915_gem_shrink(struct drm_i915_private *i915,
mutex_unlock(&obj->mm.lock); mutex_unlock(&obj->mm.lock);
} }
dma_resv_prune(obj->base.resv);
scanned += obj->base.size >> PAGE_SHIFT; scanned += obj->base.size >> PAGE_SHIFT;
i915_gem_object_put(obj); i915_gem_object_put(obj);

View File

@@ -608,11 +608,10 @@ i915_gem_object_release_stolen(struct drm_i915_gem_object *obj)
struct drm_mm_node *stolen = fetch_and_zero(&obj->stolen); struct drm_mm_node *stolen = fetch_and_zero(&obj->stolen);
GEM_BUG_ON(!stolen); GEM_BUG_ON(!stolen);
i915_gem_object_release_memory_region(obj);
i915_gem_stolen_remove_node(i915, stolen); i915_gem_stolen_remove_node(i915, stolen);
kfree(stolen); kfree(stolen);
i915_gem_object_release_memory_region(obj);
} }
static const struct drm_i915_gem_object_ops i915_gem_object_stolen_ops = { static const struct drm_i915_gem_object_ops i915_gem_object_stolen_ops = {

View File

@@ -9,6 +9,7 @@
#include "gt/intel_engine.h" #include "gt/intel_engine.h"
#include "dma_resv_utils.h"
#include "i915_gem_ioctls.h" #include "i915_gem_ioctls.h"
#include "i915_gem_object.h" #include "i915_gem_object.h"
@@ -84,11 +85,8 @@ i915_gem_object_wait_reservation(struct dma_resv *resv,
* Opportunistically prune the fences iff we know they have *all* been * Opportunistically prune the fences iff we know they have *all* been
* signaled. * signaled.
*/ */
if (prune_fences && dma_resv_trylock(resv)) { if (prune_fences)
if (dma_resv_test_signaled_rcu(resv, true)) dma_resv_prune(resv);
dma_resv_add_excl_fence(resv, NULL);
dma_resv_unlock(resv);
}
return timeout; return timeout;
} }

View File

@@ -27,7 +27,7 @@ static void huge_free_pages(struct drm_i915_gem_object *obj,
static int huge_get_pages(struct drm_i915_gem_object *obj) static int huge_get_pages(struct drm_i915_gem_object *obj)
{ {
#define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) #define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_RETRY_MAYFAIL)
const unsigned long nreal = obj->scratch / PAGE_SIZE; const unsigned long nreal = obj->scratch / PAGE_SIZE;
const unsigned long npages = obj->base.size / PAGE_SIZE; const unsigned long npages = obj->base.size / PAGE_SIZE;
struct scatterlist *sg, *src, *end; struct scatterlist *sg, *src, *end;

View File

@@ -368,6 +368,27 @@ static int igt_check_page_sizes(struct i915_vma *vma)
err = -EINVAL; err = -EINVAL;
} }
/*
* The dma-api is like a box of chocolates when it comes to the
* alignment of dma addresses, however for LMEM we have total control
* and so can guarantee alignment, likewise when we allocate our blocks
* they should appear in descending order, and if we know that we align
* to the largest page size for the GTT address, we should be able to
* assert that if we see 2M physical pages then we should also get 2M
* GTT pages. If we don't then something might be wrong in our
* construction of the backing pages.
*
* Maintaining alignment is required to utilise huge pages in the ppGGT.
*/
if (i915_gem_object_is_lmem(obj) &&
IS_ALIGNED(vma->node.start, SZ_2M) &&
vma->page_sizes.sg & SZ_2M &&
vma->page_sizes.gtt < SZ_2M) {
pr_err("gtt pages mismatch for LMEM, expected 2M GTT pages, sg(%u), gtt(%u)\n",
vma->page_sizes.sg, vma->page_sizes.gtt);
err = -EINVAL;
}
if (obj->mm.page_sizes.gtt) { if (obj->mm.page_sizes.gtt) {
pr_err("obj->page_sizes.gtt(%u) should never be set\n", pr_err("obj->page_sizes.gtt(%u) should never be set\n",
obj->mm.page_sizes.gtt); obj->mm.page_sizes.gtt);
@@ -1333,6 +1354,7 @@ static int igt_ppgtt_sanity_check(void *arg)
unsigned int flags; unsigned int flags;
} backends[] = { } backends[] = {
{ igt_create_system, 0, }, { igt_create_system, 0, },
{ igt_create_local, 0, },
{ igt_create_local, I915_BO_ALLOC_CONTIGUOUS, }, { igt_create_local, I915_BO_ALLOC_CONTIGUOUS, },
}; };
struct { struct {

View File

@@ -20,13 +20,11 @@ static int __igt_client_fill(struct intel_engine_cs *engine)
{ {
struct intel_context *ce = engine->kernel_context; struct intel_context *ce = engine->kernel_context;
struct drm_i915_gem_object *obj; struct drm_i915_gem_object *obj;
struct rnd_state prng; I915_RND_STATE(prng);
IGT_TIMEOUT(end); IGT_TIMEOUT(end);
u32 *vaddr; u32 *vaddr;
int err = 0; int err = 0;
prandom_seed_state(&prng, i915_selftest.random_seed);
intel_engine_pm_get(engine); intel_engine_pm_get(engine);
do { do {
const u32 max_block_size = S16_MAX * PAGE_SIZE; const u32 max_block_size = S16_MAX * PAGE_SIZE;

View File

@@ -7,6 +7,7 @@
#include <linux/prime_numbers.h> #include <linux/prime_numbers.h>
#include "gt/intel_engine_pm.h" #include "gt/intel_engine_pm.h"
#include "gt/intel_gpu_commands.h"
#include "gt/intel_gt.h" #include "gt/intel_gt.h"
#include "gt/intel_gt_pm.h" #include "gt/intel_gt_pm.h"
#include "gt/intel_ring.h" #include "gt/intel_ring.h"

View File

@@ -7,6 +7,7 @@
#include <linux/prime_numbers.h> #include <linux/prime_numbers.h>
#include "gt/intel_engine_pm.h" #include "gt/intel_engine_pm.h"
#include "gt/intel_gpu_commands.h"
#include "gt/intel_gt.h" #include "gt/intel_gt.h"
#include "gt/intel_gt_pm.h" #include "gt/intel_gt_pm.h"
#include "gem/i915_gem_region.h" #include "gem/i915_gem_region.h"

View File

@@ -9,6 +9,7 @@
#include "gem/i915_gem_context.h" #include "gem/i915_gem_context.h"
#include "gem/i915_gem_pm.h" #include "gem/i915_gem_pm.h"
#include "gt/intel_context.h" #include "gt/intel_context.h"
#include "gt/intel_gpu_commands.h"
#include "gt/intel_gt.h" #include "gt/intel_gt.h"
#include "i915_vma.h" #include "i915_vma.h"
#include "i915_drv.h" #include "i915_drv.h"

View File

@@ -11,6 +11,7 @@
#include "i915_drv.h" #include "i915_drv.h"
#include "intel_gt.h" #include "intel_gt.h"
#include "intel_gt_clock_utils.h" #include "intel_gt_clock_utils.h"
#include "intel_gt_pm.h"
#include "intel_llc.h" #include "intel_llc.h"
#include "intel_rc6.h" #include "intel_rc6.h"
#include "intel_rps.h" #include "intel_rps.h"
@@ -403,34 +404,34 @@ static int frequency_show(struct seq_file *m, void *unused)
seq_printf(m, "RPDECLIMIT: 0x%08x\n", rpdeclimit); seq_printf(m, "RPDECLIMIT: 0x%08x\n", rpdeclimit);
seq_printf(m, "RPNSWREQ: %dMHz\n", reqf); seq_printf(m, "RPNSWREQ: %dMHz\n", reqf);
seq_printf(m, "CAGF: %dMHz\n", cagf); seq_printf(m, "CAGF: %dMHz\n", cagf);
seq_printf(m, "RP CUR UP EI: %d (%dns)\n", seq_printf(m, "RP CUR UP EI: %d (%lldns)\n",
rpcurupei, rpcurupei,
intel_gt_pm_interval_to_ns(gt, rpcurupei)); intel_gt_pm_interval_to_ns(gt, rpcurupei));
seq_printf(m, "RP CUR UP: %d (%dns)\n", seq_printf(m, "RP CUR UP: %d (%lldns)\n",
rpcurup, intel_gt_pm_interval_to_ns(gt, rpcurup)); rpcurup, intel_gt_pm_interval_to_ns(gt, rpcurup));
seq_printf(m, "RP PREV UP: %d (%dns)\n", seq_printf(m, "RP PREV UP: %d (%lldns)\n",
rpprevup, intel_gt_pm_interval_to_ns(gt, rpprevup)); rpprevup, intel_gt_pm_interval_to_ns(gt, rpprevup));
seq_printf(m, "Up threshold: %d%%\n", seq_printf(m, "Up threshold: %d%%\n",
rps->power.up_threshold); rps->power.up_threshold);
seq_printf(m, "RP UP EI: %d (%dns)\n", seq_printf(m, "RP UP EI: %d (%lldns)\n",
rpupei, intel_gt_pm_interval_to_ns(gt, rpupei)); rpupei, intel_gt_pm_interval_to_ns(gt, rpupei));
seq_printf(m, "RP UP THRESHOLD: %d (%dns)\n", seq_printf(m, "RP UP THRESHOLD: %d (%lldns)\n",
rpupt, intel_gt_pm_interval_to_ns(gt, rpupt)); rpupt, intel_gt_pm_interval_to_ns(gt, rpupt));
seq_printf(m, "RP CUR DOWN EI: %d (%dns)\n", seq_printf(m, "RP CUR DOWN EI: %d (%lldns)\n",
rpcurdownei, rpcurdownei,
intel_gt_pm_interval_to_ns(gt, rpcurdownei)); intel_gt_pm_interval_to_ns(gt, rpcurdownei));
seq_printf(m, "RP CUR DOWN: %d (%dns)\n", seq_printf(m, "RP CUR DOWN: %d (%lldns)\n",
rpcurdown, rpcurdown,
intel_gt_pm_interval_to_ns(gt, rpcurdown)); intel_gt_pm_interval_to_ns(gt, rpcurdown));
seq_printf(m, "RP PREV DOWN: %d (%dns)\n", seq_printf(m, "RP PREV DOWN: %d (%lldns)\n",
rpprevdown, rpprevdown,
intel_gt_pm_interval_to_ns(gt, rpprevdown)); intel_gt_pm_interval_to_ns(gt, rpprevdown));
seq_printf(m, "Down threshold: %d%%\n", seq_printf(m, "Down threshold: %d%%\n",
rps->power.down_threshold); rps->power.down_threshold);
seq_printf(m, "RP DOWN EI: %d (%dns)\n", seq_printf(m, "RP DOWN EI: %d (%lldns)\n",
rpdownei, intel_gt_pm_interval_to_ns(gt, rpdownei)); rpdownei, intel_gt_pm_interval_to_ns(gt, rpdownei));
seq_printf(m, "RP DOWN THRESHOLD: %d (%dns)\n", seq_printf(m, "RP DOWN THRESHOLD: %d (%lldns)\n",
rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt)); rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt));
max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 0 : max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 0 :
@@ -558,7 +559,9 @@ static int rps_boost_show(struct seq_file *m, void *data)
seq_printf(m, "RPS enabled? %s\n", yesno(intel_rps_is_enabled(rps))); seq_printf(m, "RPS enabled? %s\n", yesno(intel_rps_is_enabled(rps)));
seq_printf(m, "RPS active? %s\n", yesno(intel_rps_is_active(rps))); seq_printf(m, "RPS active? %s\n", yesno(intel_rps_is_active(rps)));
seq_printf(m, "GPU busy? %s\n", yesno(gt->awake)); seq_printf(m, "GPU busy? %s, %llums\n",
yesno(gt->awake),
ktime_to_ms(intel_gt_get_awake_time(gt)));
seq_printf(m, "Boosts outstanding? %d\n", seq_printf(m, "Boosts outstanding? %d\n",
atomic_read(&rps->num_waiters)); atomic_read(&rps->num_waiters));
seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive)); seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive));
@@ -575,7 +578,7 @@ static int rps_boost_show(struct seq_file *m, void *data)
intel_gpu_freq(rps, rps->efficient_freq), intel_gpu_freq(rps, rps->efficient_freq),
intel_gpu_freq(rps, rps->boost_freq)); intel_gpu_freq(rps, rps->boost_freq));
seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts)); seq_printf(m, "Wait boosts: %d\n", READ_ONCE(rps->boosts));
if (INTEL_GEN(i915) >= 6 && intel_rps_is_active(rps)) { if (INTEL_GEN(i915) >= 6 && intel_rps_is_active(rps)) {
struct intel_uncore *uncore = gt->uncore; struct intel_uncore *uncore = gt->uncore;

View File

@@ -7,8 +7,6 @@
#include "i915_drv.h" #include "i915_drv.h"
#include "intel_gpu_commands.h" #include "intel_gpu_commands.h"
#define MAX_URB_ENTRIES 64
#define STATE_SIZE (4 * 1024)
#define GT3_INLINE_DATA_DELAYS 0x1E00 #define GT3_INLINE_DATA_DELAYS 0x1E00
#define batch_advance(Y, CS) GEM_BUG_ON((Y)->end != (CS)) #define batch_advance(Y, CS) GEM_BUG_ON((Y)->end != (CS))
@@ -34,38 +32,59 @@ struct batch_chunk {
}; };
struct batch_vals { struct batch_vals {
u32 max_primitives; u32 max_threads;
u32 max_urb_entries;
u32 cmd_size;
u32 state_size;
u32 state_start; u32 state_start;
u32 batch_size; u32 surface_start;
u32 surface_height; u32 surface_height;
u32 surface_width; u32 surface_width;
u32 scratch_size; u32 size;
u32 max_size;
}; };
static inline int num_primitives(const struct batch_vals *bv)
{
/*
* We need to saturate the GPU with work in order to dispatch
* a shader on every HW thread, and clear the thread-local registers.
* In short, we have to dispatch work faster than the shaders can
* run in order to fill the EU and occupy each HW thread.
*/
return bv->max_threads;
}
static void static void
batch_get_defaults(struct drm_i915_private *i915, struct batch_vals *bv) batch_get_defaults(struct drm_i915_private *i915, struct batch_vals *bv)
{ {
if (IS_HASWELL(i915)) { if (IS_HASWELL(i915)) {
bv->max_primitives = 280; switch (INTEL_INFO(i915)->gt) {
bv->max_urb_entries = MAX_URB_ENTRIES; default:
case 1:
bv->max_threads = 70;
break;
case 2:
bv->max_threads = 140;
break;
case 3:
bv->max_threads = 280;
break;
}
bv->surface_height = 16 * 16; bv->surface_height = 16 * 16;
bv->surface_width = 32 * 2 * 16; bv->surface_width = 32 * 2 * 16;
} else { } else {
bv->max_primitives = 128; switch (INTEL_INFO(i915)->gt) {
bv->max_urb_entries = MAX_URB_ENTRIES / 2; default:
case 1: /* including vlv */
bv->max_threads = 36;
break;
case 2:
bv->max_threads = 128;
break;
}
bv->surface_height = 16 * 8; bv->surface_height = 16 * 8;
bv->surface_width = 32 * 16; bv->surface_width = 32 * 16;
} }
bv->cmd_size = bv->max_primitives * 4096; bv->state_start = round_up(SZ_1K + num_primitives(bv) * 64, SZ_4K);
bv->state_size = STATE_SIZE; bv->surface_start = bv->state_start + SZ_4K;
bv->state_start = bv->cmd_size; bv->size = bv->surface_start + bv->surface_height * bv->surface_width;
bv->batch_size = bv->cmd_size + bv->state_size;
bv->scratch_size = bv->surface_height * bv->surface_width;
bv->max_size = bv->batch_size + bv->scratch_size;
} }
static void batch_init(struct batch_chunk *bc, static void batch_init(struct batch_chunk *bc,
@@ -155,7 +174,8 @@ static u32
gen7_fill_binding_table(struct batch_chunk *state, gen7_fill_binding_table(struct batch_chunk *state,
const struct batch_vals *bv) const struct batch_vals *bv)
{ {
u32 surface_start = gen7_fill_surface_state(state, bv->batch_size, bv); u32 surface_start =
gen7_fill_surface_state(state, bv->surface_start, bv);
u32 *cs = batch_alloc_items(state, 32, 8); u32 *cs = batch_alloc_items(state, 32, 8);
u32 offset = batch_offset(state, cs); u32 offset = batch_offset(state, cs);
@@ -214,9 +234,9 @@ static void
gen7_emit_state_base_address(struct batch_chunk *batch, gen7_emit_state_base_address(struct batch_chunk *batch,
u32 surface_state_base) u32 surface_state_base)
{ {
u32 *cs = batch_alloc_items(batch, 0, 12); u32 *cs = batch_alloc_items(batch, 0, 10);
*cs++ = STATE_BASE_ADDRESS | (12 - 2); *cs++ = STATE_BASE_ADDRESS | (10 - 2);
/* general */ /* general */
*cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY; *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY;
/* surface */ /* surface */
@@ -233,8 +253,6 @@ gen7_emit_state_base_address(struct batch_chunk *batch,
*cs++ = BASE_ADDRESS_MODIFY; *cs++ = BASE_ADDRESS_MODIFY;
*cs++ = 0; *cs++ = 0;
*cs++ = BASE_ADDRESS_MODIFY; *cs++ = BASE_ADDRESS_MODIFY;
*cs++ = 0;
*cs++ = 0;
batch_advance(batch, cs); batch_advance(batch, cs);
} }
@@ -244,8 +262,7 @@ gen7_emit_vfe_state(struct batch_chunk *batch,
u32 urb_size, u32 curbe_size, u32 urb_size, u32 curbe_size,
u32 mode) u32 mode)
{ {
u32 urb_entries = bv->max_urb_entries; u32 threads = bv->max_threads - 1;
u32 threads = bv->max_primitives - 1;
u32 *cs = batch_alloc_items(batch, 32, 8); u32 *cs = batch_alloc_items(batch, 32, 8);
*cs++ = MEDIA_VFE_STATE | (8 - 2); *cs++ = MEDIA_VFE_STATE | (8 - 2);
@@ -254,7 +271,7 @@ gen7_emit_vfe_state(struct batch_chunk *batch,
*cs++ = 0; *cs++ = 0;
/* number of threads & urb entries for GPGPU vs Media Mode */ /* number of threads & urb entries for GPGPU vs Media Mode */
*cs++ = threads << 16 | urb_entries << 8 | mode << 2; *cs++ = threads << 16 | 1 << 8 | mode << 2;
*cs++ = 0; *cs++ = 0;
@@ -293,17 +310,12 @@ gen7_emit_media_object(struct batch_chunk *batch,
{ {
unsigned int x_offset = (media_object_index % 16) * 64; unsigned int x_offset = (media_object_index % 16) * 64;
unsigned int y_offset = (media_object_index / 16) * 16; unsigned int y_offset = (media_object_index / 16) * 16;
unsigned int inline_data_size; unsigned int pkt = 6 + 3;
unsigned int media_batch_size;
unsigned int i;
u32 *cs; u32 *cs;
inline_data_size = 112 * 8; cs = batch_alloc_items(batch, 8, pkt);
media_batch_size = inline_data_size + 6;
cs = batch_alloc_items(batch, 8, media_batch_size); *cs++ = MEDIA_OBJECT | (pkt - 2);
*cs++ = MEDIA_OBJECT | (media_batch_size - 2);
/* interface descriptor offset */ /* interface descriptor offset */
*cs++ = 0; *cs++ = 0;
@@ -317,25 +329,44 @@ gen7_emit_media_object(struct batch_chunk *batch,
*cs++ = 0; *cs++ = 0;
/* inline */ /* inline */
*cs++ = (y_offset << 16) | (x_offset); *cs++ = y_offset << 16 | x_offset;
*cs++ = 0; *cs++ = 0;
*cs++ = GT3_INLINE_DATA_DELAYS; *cs++ = GT3_INLINE_DATA_DELAYS;
for (i = 3; i < inline_data_size; i++)
*cs++ = 0;
batch_advance(batch, cs); batch_advance(batch, cs);
} }
static void gen7_emit_pipeline_flush(struct batch_chunk *batch) static void gen7_emit_pipeline_flush(struct batch_chunk *batch)
{ {
u32 *cs = batch_alloc_items(batch, 0, 5); u32 *cs = batch_alloc_items(batch, 0, 4);
*cs++ = GFX_OP_PIPE_CONTROL(5); *cs++ = GFX_OP_PIPE_CONTROL(4);
*cs++ = PIPE_CONTROL_STATE_CACHE_INVALIDATE | *cs++ = PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
PIPE_CONTROL_GLOBAL_GTT_IVB; PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_DC_FLUSH_ENABLE |
PIPE_CONTROL_CS_STALL;
*cs++ = 0; *cs++ = 0;
*cs++ = 0; *cs++ = 0;
batch_advance(batch, cs);
}
static void gen7_emit_pipeline_invalidate(struct batch_chunk *batch)
{
u32 *cs = batch_alloc_items(batch, 0, 8);
/* ivb: Stall before STATE_CACHE_INVALIDATE */
*cs++ = GFX_OP_PIPE_CONTROL(4);
*cs++ = PIPE_CONTROL_STALL_AT_SCOREBOARD |
PIPE_CONTROL_CS_STALL;
*cs++ = 0; *cs++ = 0;
*cs++ = 0;
*cs++ = GFX_OP_PIPE_CONTROL(4);
*cs++ = PIPE_CONTROL_STATE_CACHE_INVALIDATE;
*cs++ = 0;
*cs++ = 0;
batch_advance(batch, cs); batch_advance(batch, cs);
} }
@@ -344,34 +375,34 @@ static void emit_batch(struct i915_vma * const vma,
const struct batch_vals *bv) const struct batch_vals *bv)
{ {
struct drm_i915_private *i915 = vma->vm->i915; struct drm_i915_private *i915 = vma->vm->i915;
unsigned int desc_count = 64; const unsigned int desc_count = 1;
const u32 urb_size = 112; const unsigned int urb_size = 1;
struct batch_chunk cmds, state; struct batch_chunk cmds, state;
u32 interface_descriptor; u32 descriptors;
unsigned int i; unsigned int i;
batch_init(&cmds, vma, start, 0, bv->cmd_size); batch_init(&cmds, vma, start, 0, bv->state_start);
batch_init(&state, vma, start, bv->state_start, bv->state_size); batch_init(&state, vma, start, bv->state_start, SZ_4K);
interface_descriptor = descriptors = gen7_fill_interface_descriptor(&state, bv,
gen7_fill_interface_descriptor(&state, bv,
IS_HASWELL(i915) ? IS_HASWELL(i915) ?
&cb_kernel_hsw : &cb_kernel_hsw :
&cb_kernel_ivb, &cb_kernel_ivb,
desc_count); desc_count);
gen7_emit_pipeline_flush(&cmds);
gen7_emit_pipeline_invalidate(&cmds);
batch_add(&cmds, PIPELINE_SELECT | PIPELINE_SELECT_MEDIA); batch_add(&cmds, PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
batch_add(&cmds, MI_NOOP); batch_add(&cmds, MI_NOOP);
gen7_emit_state_base_address(&cmds, interface_descriptor); gen7_emit_pipeline_invalidate(&cmds);
gen7_emit_pipeline_flush(&cmds); gen7_emit_pipeline_flush(&cmds);
gen7_emit_state_base_address(&cmds, descriptors);
gen7_emit_pipeline_invalidate(&cmds);
gen7_emit_vfe_state(&cmds, bv, urb_size - 1, 0, 0); gen7_emit_vfe_state(&cmds, bv, urb_size - 1, 0, 0);
gen7_emit_interface_descriptor_load(&cmds, descriptors, desc_count);
gen7_emit_interface_descriptor_load(&cmds, for (i = 0; i < num_primitives(bv); i++)
interface_descriptor,
desc_count);
for (i = 0; i < bv->max_primitives; i++)
gen7_emit_media_object(&cmds, i); gen7_emit_media_object(&cmds, i);
batch_add(&cmds, MI_BATCH_BUFFER_END); batch_add(&cmds, MI_BATCH_BUFFER_END);
@@ -385,15 +416,15 @@ int gen7_setup_clear_gpr_bb(struct intel_engine_cs * const engine,
batch_get_defaults(engine->i915, &bv); batch_get_defaults(engine->i915, &bv);
if (!vma) if (!vma)
return bv.max_size; return bv.size;
GEM_BUG_ON(vma->obj->base.size < bv.max_size); GEM_BUG_ON(vma->obj->base.size < bv.size);
batch = i915_gem_object_pin_map(vma->obj, I915_MAP_WC); batch = i915_gem_object_pin_map(vma->obj, I915_MAP_WC);
if (IS_ERR(batch)) if (IS_ERR(batch))
return PTR_ERR(batch); return PTR_ERR(batch);
emit_batch(vma, memset(batch, 0, bv.max_size), &bv); emit_batch(vma, memset(batch, 0, bv.size), &bv);
i915_gem_object_flush_map(vma->obj); i915_gem_object_flush_map(vma->obj);
__i915_gem_object_release_map(vma->obj); __i915_gem_object_release_map(vma->obj);

View File

@@ -0,0 +1,633 @@
// SPDX-License-Identifier: MIT
/*
* Copyright © 2014 Intel Corporation
*/
#include "gen8_engine_cs.h"
#include "i915_drv.h"
#include "intel_lrc.h"
#include "intel_gpu_commands.h"
#include "intel_ring.h"
int gen8_emit_flush_rcs(struct i915_request *rq, u32 mode)
{
bool vf_flush_wa = false, dc_flush_wa = false;
u32 *cs, flags = 0;
int len;
flags |= PIPE_CONTROL_CS_STALL;
if (mode & EMIT_FLUSH) {
flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
flags |= PIPE_CONTROL_FLUSH_ENABLE;
}
if (mode & EMIT_INVALIDATE) {
flags |= PIPE_CONTROL_TLB_INVALIDATE;
flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_QW_WRITE;
flags |= PIPE_CONTROL_STORE_DATA_INDEX;
/*
* On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
* pipe control.
*/
if (IS_GEN(rq->engine->i915, 9))
vf_flush_wa = true;
/* WaForGAMHang:kbl */
if (IS_KBL_GT_REVID(rq->engine->i915, 0, KBL_REVID_B0))
dc_flush_wa = true;
}
len = 6;
if (vf_flush_wa)
len += 6;
if (dc_flush_wa)
len += 12;
cs = intel_ring_begin(rq, len);
if (IS_ERR(cs))
return PTR_ERR(cs);
if (vf_flush_wa)
cs = gen8_emit_pipe_control(cs, 0, 0);
if (dc_flush_wa)
cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE,
0);
cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
if (dc_flush_wa)
cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0);
intel_ring_advance(rq, cs);
return 0;
}
int gen8_emit_flush_xcs(struct i915_request *rq, u32 mode)
{
u32 cmd, *cs;
cs = intel_ring_begin(rq, 4);
if (IS_ERR(cs))
return PTR_ERR(cs);
cmd = MI_FLUSH_DW + 1;
/*
* We always require a command barrier so that subsequent
* commands, such as breadcrumb interrupts, are strictly ordered
* wrt the contents of the write cache being flushed to memory
* (and thus being coherent from the CPU).
*/
cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
if (mode & EMIT_INVALIDATE) {
cmd |= MI_INVALIDATE_TLB;
if (rq->engine->class == VIDEO_DECODE_CLASS)
cmd |= MI_INVALIDATE_BSD;
}
*cs++ = cmd;
*cs++ = LRC_PPHWSP_SCRATCH_ADDR;
*cs++ = 0; /* upper addr */
*cs++ = 0; /* value */
intel_ring_advance(rq, cs);
return 0;
}
int gen11_emit_flush_rcs(struct i915_request *rq, u32 mode)
{
if (mode & EMIT_FLUSH) {
u32 *cs;
u32 flags = 0;
flags |= PIPE_CONTROL_CS_STALL;
flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
flags |= PIPE_CONTROL_FLUSH_ENABLE;
flags |= PIPE_CONTROL_QW_WRITE;
flags |= PIPE_CONTROL_STORE_DATA_INDEX;
cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs))
return PTR_ERR(cs);
cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
intel_ring_advance(rq, cs);
}
if (mode & EMIT_INVALIDATE) {
u32 *cs;
u32 flags = 0;
flags |= PIPE_CONTROL_CS_STALL;
flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_TLB_INVALIDATE;
flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_QW_WRITE;
flags |= PIPE_CONTROL_STORE_DATA_INDEX;
cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs))
return PTR_ERR(cs);
cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
intel_ring_advance(rq, cs);
}
return 0;
}
static u32 preparser_disable(bool state)
{
return MI_ARB_CHECK | 1 << 8 | state;
}
static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine)
{
static const i915_reg_t vd[] = {
GEN12_VD0_AUX_NV,
GEN12_VD1_AUX_NV,
GEN12_VD2_AUX_NV,
GEN12_VD3_AUX_NV,
};
static const i915_reg_t ve[] = {
GEN12_VE0_AUX_NV,
GEN12_VE1_AUX_NV,
};
if (engine->class == VIDEO_DECODE_CLASS)
return vd[engine->instance];
if (engine->class == VIDEO_ENHANCEMENT_CLASS)
return ve[engine->instance];
GEM_BUG_ON("unknown aux_inv reg\n");
return INVALID_MMIO_REG;
}
static u32 *gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs)
{
*cs++ = MI_LOAD_REGISTER_IMM(1);
*cs++ = i915_mmio_reg_offset(inv_reg);
*cs++ = AUX_INV;
*cs++ = MI_NOOP;
return cs;
}
int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
{
if (mode & EMIT_FLUSH) {
u32 flags = 0;
u32 *cs;
flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
flags |= PIPE_CONTROL_FLUSH_L3;
flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
/* Wa_1409600907:tgl */
flags |= PIPE_CONTROL_DEPTH_STALL;
flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
flags |= PIPE_CONTROL_FLUSH_ENABLE;
flags |= PIPE_CONTROL_STORE_DATA_INDEX;
flags |= PIPE_CONTROL_QW_WRITE;
flags |= PIPE_CONTROL_CS_STALL;
cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs))
return PTR_ERR(cs);
cs = gen12_emit_pipe_control(cs,
PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
flags, LRC_PPHWSP_SCRATCH_ADDR);
intel_ring_advance(rq, cs);
}
if (mode & EMIT_INVALIDATE) {
u32 flags = 0;
u32 *cs;
flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_TLB_INVALIDATE;
flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_STORE_DATA_INDEX;
flags |= PIPE_CONTROL_QW_WRITE;
flags |= PIPE_CONTROL_CS_STALL;
cs = intel_ring_begin(rq, 8 + 4);
if (IS_ERR(cs))
return PTR_ERR(cs);
/*
* Prevent the pre-parser from skipping past the TLB
* invalidate and loading a stale page for the batch
* buffer / request payload.
*/
*cs++ = preparser_disable(true);
cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
/* hsdes: 1809175790 */
cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV, cs);
*cs++ = preparser_disable(false);
intel_ring_advance(rq, cs);
}
return 0;
}
int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
{
intel_engine_mask_t aux_inv = 0;
u32 cmd, *cs;
cmd = 4;
if (mode & EMIT_INVALIDATE)
cmd += 2;
if (mode & EMIT_INVALIDATE)
aux_inv = rq->engine->mask & ~BIT(BCS0);
if (aux_inv)
cmd += 2 * hweight8(aux_inv) + 2;
cs = intel_ring_begin(rq, cmd);
if (IS_ERR(cs))
return PTR_ERR(cs);
if (mode & EMIT_INVALIDATE)
*cs++ = preparser_disable(true);
cmd = MI_FLUSH_DW + 1;
/*
* We always require a command barrier so that subsequent
* commands, such as breadcrumb interrupts, are strictly ordered
* wrt the contents of the write cache being flushed to memory
* (and thus being coherent from the CPU).
*/
cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
if (mode & EMIT_INVALIDATE) {
cmd |= MI_INVALIDATE_TLB;
if (rq->engine->class == VIDEO_DECODE_CLASS)
cmd |= MI_INVALIDATE_BSD;
}
*cs++ = cmd;
*cs++ = LRC_PPHWSP_SCRATCH_ADDR;
*cs++ = 0; /* upper addr */
*cs++ = 0; /* value */
if (aux_inv) { /* hsdes: 1809175790 */
struct intel_engine_cs *engine;
unsigned int tmp;
*cs++ = MI_LOAD_REGISTER_IMM(hweight8(aux_inv));
for_each_engine_masked(engine, rq->engine->gt,
aux_inv, tmp) {
*cs++ = i915_mmio_reg_offset(aux_inv_reg(engine));
*cs++ = AUX_INV;
}
*cs++ = MI_NOOP;
}
if (mode & EMIT_INVALIDATE)
*cs++ = preparser_disable(false);
intel_ring_advance(rq, cs);
return 0;
}
static inline u32 preempt_address(struct intel_engine_cs *engine)
{
return (i915_ggtt_offset(engine->status_page.vma) +
I915_GEM_HWS_PREEMPT_ADDR);
}
static u32 hwsp_offset(const struct i915_request *rq)
{
const struct intel_timeline_cacheline *cl;
/* Before the request is executed, the timeline/cachline is fixed */
cl = rcu_dereference_protected(rq->hwsp_cacheline, 1);
if (cl)
return cl->ggtt_offset;
return rcu_dereference_protected(rq->timeline, 1)->hwsp_offset;
}
int gen8_emit_init_breadcrumb(struct i915_request *rq)
{
u32 *cs;
GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq));
if (!i915_request_timeline(rq)->has_initial_breadcrumb)
return 0;
cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs))
return PTR_ERR(cs);
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
*cs++ = hwsp_offset(rq);
*cs++ = 0;
*cs++ = rq->fence.seqno - 1;
/*
* Check if we have been preempted before we even get started.
*
* After this point i915_request_started() reports true, even if
* we get preempted and so are no longer running.
*
* i915_request_started() is used during preemption processing
* to decide if the request is currently inside the user payload
* or spinning on a kernel semaphore (or earlier). For no-preemption
* requests, we do allow preemption on the semaphore before the user
* payload, but do not allow preemption once the request is started.
*
* i915_request_started() is similarly used during GPU hangs to
* determine if the user's payload was guilty, and if so, the
* request is banned. Before the request is started, it is assumed
* to be unharmed and an innocent victim of another's hang.
*/
*cs++ = MI_NOOP;
*cs++ = MI_ARB_CHECK;
intel_ring_advance(rq, cs);
/* Record the updated position of the request's payload */
rq->infix = intel_ring_offset(rq, cs);
__set_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags);
return 0;
}
int gen8_emit_bb_start_noarb(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags)
{
u32 *cs;
cs = intel_ring_begin(rq, 4);
if (IS_ERR(cs))
return PTR_ERR(cs);
/*
* WaDisableCtxRestoreArbitration:bdw,chv
*
* We don't need to perform MI_ARB_ENABLE as often as we do (in
* particular all the gen that do not need the w/a at all!), if we
* took care to make sure that on every switch into this context
* (both ordinary and for preemption) that arbitrartion was enabled
* we would be fine. However, for gen8 there is another w/a that
* requires us to not preempt inside GPGPU execution, so we keep
* arbitration disabled for gen8 batches. Arbitration will be
* re-enabled before we close the request
* (engine->emit_fini_breadcrumb).
*/
*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
/* FIXME(BDW+): Address space and security selectors. */
*cs++ = MI_BATCH_BUFFER_START_GEN8 |
(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
*cs++ = lower_32_bits(offset);
*cs++ = upper_32_bits(offset);
intel_ring_advance(rq, cs);
return 0;
}
int gen8_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags)
{
u32 *cs;
if (unlikely(i915_request_has_nopreempt(rq)))
return gen8_emit_bb_start_noarb(rq, offset, len, flags);
cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs))
return PTR_ERR(cs);
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
*cs++ = MI_BATCH_BUFFER_START_GEN8 |
(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
*cs++ = lower_32_bits(offset);
*cs++ = upper_32_bits(offset);
*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
*cs++ = MI_NOOP;
intel_ring_advance(rq, cs);
return 0;
}
static void assert_request_valid(struct i915_request *rq)
{
struct intel_ring *ring __maybe_unused = rq->ring;
/* Can we unwind this request without appearing to go forwards? */
GEM_BUG_ON(intel_ring_direction(ring, rq->wa_tail, rq->head) <= 0);
}
/*
* Reserve space for 2 NOOPs at the end of each request to be
* used as a workaround for not being allowed to do lite
* restore with HEAD==TAIL (WaIdleLiteRestore).
*/
static u32 *gen8_emit_wa_tail(struct i915_request *rq, u32 *cs)
{
/* Ensure there's always at least one preemption point per-request. */
*cs++ = MI_ARB_CHECK;
*cs++ = MI_NOOP;
rq->wa_tail = intel_ring_offset(rq, cs);
/* Check that entire request is less than half the ring */
assert_request_valid(rq);
return cs;
}
static u32 *emit_preempt_busywait(struct i915_request *rq, u32 *cs)
{
*cs++ = MI_SEMAPHORE_WAIT |
MI_SEMAPHORE_GLOBAL_GTT |
MI_SEMAPHORE_POLL |
MI_SEMAPHORE_SAD_EQ_SDD;
*cs++ = 0;
*cs++ = preempt_address(rq->engine);
*cs++ = 0;
return cs;
}
static __always_inline u32*
gen8_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs)
{
*cs++ = MI_USER_INTERRUPT;
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
if (intel_engine_has_semaphores(rq->engine))
cs = emit_preempt_busywait(rq, cs);
rq->tail = intel_ring_offset(rq, cs);
assert_ring_tail_valid(rq->ring, rq->tail);
return gen8_emit_wa_tail(rq, cs);
}
static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs)
{
return gen8_emit_ggtt_write(cs, rq->fence.seqno, hwsp_offset(rq), 0);
}
u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
{
return gen8_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs));
}
u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
{
cs = gen8_emit_pipe_control(cs,
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_DC_FLUSH_ENABLE,
0);
/* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
cs = gen8_emit_ggtt_write_rcs(cs,
rq->fence.seqno,
hwsp_offset(rq),
PIPE_CONTROL_FLUSH_ENABLE |
PIPE_CONTROL_CS_STALL);
return gen8_emit_fini_breadcrumb_tail(rq, cs);
}
u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
{
cs = gen8_emit_ggtt_write_rcs(cs,
rq->fence.seqno,
hwsp_offset(rq),
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_TILE_CACHE_FLUSH |
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
PIPE_CONTROL_DC_FLUSH_ENABLE |
PIPE_CONTROL_FLUSH_ENABLE);
return gen8_emit_fini_breadcrumb_tail(rq, cs);
}
/*
* Note that the CS instruction pre-parser will not stall on the breadcrumb
* flush and will continue pre-fetching the instructions after it before the
* memory sync is completed. On pre-gen12 HW, the pre-parser will stop at
* BB_START/END instructions, so, even though we might pre-fetch the pre-amble
* of the next request before the memory has been flushed, we're guaranteed that
* we won't access the batch itself too early.
* However, on gen12+ the parser can pre-fetch across the BB_START/END commands,
* so, if the current request is modifying an instruction in the next request on
* the same intel_context, we might pre-fetch and then execute the pre-update
* instruction. To avoid this, the users of self-modifying code should either
* disable the parser around the code emitting the memory writes, via a new flag
* added to MI_ARB_CHECK, or emit the writes from a different intel_context. For
* the in-kernel use-cases we've opted to use a separate context, see
* reloc_gpu() as an example.
* All the above applies only to the instructions themselves. Non-inline data
* used by the instructions is not pre-fetched.
*/
static u32 *gen12_emit_preempt_busywait(struct i915_request *rq, u32 *cs)
{
*cs++ = MI_ARB_CHECK; /* trigger IDLE->ACTIVE first */
*cs++ = MI_SEMAPHORE_WAIT_TOKEN |
MI_SEMAPHORE_GLOBAL_GTT |
MI_SEMAPHORE_POLL |
MI_SEMAPHORE_SAD_EQ_SDD;
*cs++ = 0;
*cs++ = preempt_address(rq->engine);
*cs++ = 0;
*cs++ = 0;
return cs;
}
static __always_inline u32*
gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs)
{
*cs++ = MI_USER_INTERRUPT;
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
if (intel_engine_has_semaphores(rq->engine))
cs = gen12_emit_preempt_busywait(rq, cs);
rq->tail = intel_ring_offset(rq, cs);
assert_ring_tail_valid(rq->ring, rq->tail);
return gen8_emit_wa_tail(rq, cs);
}
u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
{
/* XXX Stalling flush before seqno write; post-sync not */
cs = emit_xcs_breadcrumb(rq, __gen8_emit_flush_dw(cs, 0, 0, 0));
return gen12_emit_fini_breadcrumb_tail(rq, cs);
}
u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
{
cs = gen12_emit_ggtt_write_rcs(cs,
rq->fence.seqno,
hwsp_offset(rq),
PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_TILE_CACHE_FLUSH |
PIPE_CONTROL_FLUSH_L3 |
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
/* Wa_1409600907:tgl */
PIPE_CONTROL_DEPTH_STALL |
PIPE_CONTROL_DC_FLUSH_ENABLE |
PIPE_CONTROL_FLUSH_ENABLE);
return gen12_emit_fini_breadcrumb_tail(rq, cs);
}

View File

@@ -0,0 +1,127 @@
/* SPDX-License-Identifier: MIT */
/*
* Copyright © 2014 Intel Corporation
*/
#ifndef __GEN8_ENGINE_CS_H__
#define __GEN8_ENGINE_CS_H__
#include <linux/string.h>
#include <linux/types.h>
#include "i915_gem.h" /* GEM_BUG_ON */
#include "intel_gpu_commands.h"
struct i915_request;
int gen8_emit_flush_rcs(struct i915_request *rq, u32 mode);
int gen11_emit_flush_rcs(struct i915_request *rq, u32 mode);
int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode);
int gen8_emit_flush_xcs(struct i915_request *rq, u32 mode);
int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode);
int gen8_emit_init_breadcrumb(struct i915_request *rq);
int gen8_emit_bb_start_noarb(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags);
int gen8_emit_bb_start(struct i915_request *rq,
u64 offset, u32 len,
const unsigned int flags);
u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs);
u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs);
u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
static inline u32 *
__gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
{
memset(batch, 0, 6 * sizeof(u32));
batch[0] = GFX_OP_PIPE_CONTROL(6) | flags0;
batch[1] = flags1;
batch[2] = offset;
return batch + 6;
}
static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
{
return __gen8_emit_pipe_control(batch, 0, flags, offset);
}
static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
{
return __gen8_emit_pipe_control(batch, flags0, flags1, offset);
}
static inline u32 *
__gen8_emit_write_rcs(u32 *cs, u32 value, u32 offset, u32 flags0, u32 flags1)
{
*cs++ = GFX_OP_PIPE_CONTROL(6) | flags0;
*cs++ = flags1 | PIPE_CONTROL_QW_WRITE;
*cs++ = offset;
*cs++ = 0;
*cs++ = value;
*cs++ = 0; /* We're thrashing one extra dword. */
return cs;
}
static inline u32*
gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
{
/* We're using qword write, offset should be aligned to 8 bytes. */
GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
return __gen8_emit_write_rcs(cs,
value,
gtt_offset,
0,
flags | PIPE_CONTROL_GLOBAL_GTT_IVB);
}
static inline u32*
gen12_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 flags1)
{
/* We're using qword write, offset should be aligned to 8 bytes. */
GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
return __gen8_emit_write_rcs(cs,
value,
gtt_offset,
flags0,
flags1 | PIPE_CONTROL_GLOBAL_GTT_IVB);
}
static inline u32 *
__gen8_emit_flush_dw(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
{
*cs++ = (MI_FLUSH_DW + 1) | flags;
*cs++ = gtt_offset;
*cs++ = 0;
*cs++ = value;
return cs;
}
static inline u32 *
gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
{
/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
GEM_BUG_ON(gtt_offset & (1 << 5));
/* Offset should be aligned to 8 bytes for both (QW/DW) write types */
GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
return __gen8_emit_flush_dw(cs,
value,
gtt_offset | MI_FLUSH_DW_USE_GTT,
flags | MI_FLUSH_DW_OP_STOREDW);
}
#endif /* __GEN8_ENGINE_CS_H__ */

View File

@@ -134,11 +134,6 @@ static bool remove_signaling_context(struct intel_breadcrumbs *b,
return true; return true;
} }
static inline bool __request_completed(const struct i915_request *rq)
{
return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);
}
__maybe_unused static bool __maybe_unused static bool
check_signal_order(struct intel_context *ce, struct i915_request *rq) check_signal_order(struct intel_context *ce, struct i915_request *rq)
{ {
@@ -192,18 +187,6 @@ static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl)
intel_engine_add_retire(b->irq_engine, tl); intel_engine_add_retire(b->irq_engine, tl);
} }
static bool __signal_request(struct i915_request *rq)
{
GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
if (!__dma_fence_signal(&rq->fence)) {
i915_request_put(rq);
return false;
}
return true;
}
static struct llist_node * static struct llist_node *
slist_add(struct llist_node *node, struct llist_node *head) slist_add(struct llist_node *node, struct llist_node *head)
{ {
@@ -251,13 +234,14 @@ static void signal_irq_work(struct irq_work *work)
intel_breadcrumbs_disarm_irq(b); intel_breadcrumbs_disarm_irq(b);
rcu_read_lock(); rcu_read_lock();
atomic_inc(&b->signaler_active);
list_for_each_entry_rcu(ce, &b->signalers, signal_link) { list_for_each_entry_rcu(ce, &b->signalers, signal_link) {
struct i915_request *rq; struct i915_request *rq;
list_for_each_entry_rcu(rq, &ce->signals, signal_link) { list_for_each_entry_rcu(rq, &ce->signals, signal_link) {
bool release; bool release;
if (!__request_completed(rq)) if (!__i915_request_is_complete(rq))
break; break;
if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
@@ -273,17 +257,20 @@ static void signal_irq_work(struct irq_work *work)
list_del_rcu(&rq->signal_link); list_del_rcu(&rq->signal_link);
release = remove_signaling_context(b, ce); release = remove_signaling_context(b, ce);
spin_unlock(&ce->signal_lock); spin_unlock(&ce->signal_lock);
if (__signal_request(rq))
/* We own signal_node now, xfer to local list */
signal = slist_add(&rq->signal_node, signal);
if (release) { if (release) {
if (intel_timeline_is_last(ce->timeline, rq))
add_retire(b, ce->timeline); add_retire(b, ce->timeline);
intel_context_put(ce); intel_context_put(ce);
} }
if (__dma_fence_signal(&rq->fence))
/* We own signal_node now, xfer to local list */
signal = slist_add(&rq->signal_node, signal);
else
i915_request_put(rq);
} }
} }
atomic_dec(&b->signaler_active);
rcu_read_unlock(); rcu_read_unlock();
llist_for_each_safe(signal, sn, signal) { llist_for_each_safe(signal, sn, signal) {
@@ -342,17 +329,19 @@ void intel_breadcrumbs_reset(struct intel_breadcrumbs *b)
spin_unlock_irqrestore(&b->irq_lock, flags); spin_unlock_irqrestore(&b->irq_lock, flags);
} }
void intel_breadcrumbs_park(struct intel_breadcrumbs *b) void __intel_breadcrumbs_park(struct intel_breadcrumbs *b)
{ {
/* Kick the work once more to drain the signalers */ if (!READ_ONCE(b->irq_armed))
return;
/* Kick the work once more to drain the signalers, and disarm the irq */
irq_work_sync(&b->irq_work); irq_work_sync(&b->irq_work);
while (unlikely(READ_ONCE(b->irq_armed))) { while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) {
local_irq_disable(); local_irq_disable();
signal_irq_work(&b->irq_work); signal_irq_work(&b->irq_work);
local_irq_enable(); local_irq_enable();
cond_resched(); cond_resched();
} }
GEM_BUG_ON(!list_empty(&b->signalers));
} }
void intel_breadcrumbs_free(struct intel_breadcrumbs *b) void intel_breadcrumbs_free(struct intel_breadcrumbs *b)
@@ -363,6 +352,17 @@ void intel_breadcrumbs_free(struct intel_breadcrumbs *b)
kfree(b); kfree(b);
} }
static void irq_signal_request(struct i915_request *rq,
struct intel_breadcrumbs *b)
{
if (!__dma_fence_signal(&rq->fence))
return;
i915_request_get(rq);
if (llist_add(&rq->signal_node, &b->signaled_requests))
irq_work_queue(&b->irq_work);
}
static void insert_breadcrumb(struct i915_request *rq) static void insert_breadcrumb(struct i915_request *rq)
{ {
struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs; struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs;
@@ -372,17 +372,13 @@ static void insert_breadcrumb(struct i915_request *rq)
if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
return; return;
i915_request_get(rq);
/* /*
* If the request is already completed, we can transfer it * If the request is already completed, we can transfer it
* straight onto a signaled list, and queue the irq worker for * straight onto a signaled list, and queue the irq worker for
* its signal completion. * its signal completion.
*/ */
if (__request_completed(rq)) { if (__i915_request_is_complete(rq)) {
if (__signal_request(rq) && irq_signal_request(rq, b);
llist_add(&rq->signal_node, &b->signaled_requests))
irq_work_queue(&b->irq_work);
return; return;
} }
@@ -413,6 +409,8 @@ static void insert_breadcrumb(struct i915_request *rq)
break; break;
} }
} }
i915_request_get(rq);
list_add_rcu(&rq->signal_link, pos); list_add_rcu(&rq->signal_link, pos);
GEM_BUG_ON(!check_signal_order(ce, rq)); GEM_BUG_ON(!check_signal_order(ce, rq));
GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)); GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags));
@@ -453,22 +451,60 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq)
void i915_request_cancel_breadcrumb(struct i915_request *rq) void i915_request_cancel_breadcrumb(struct i915_request *rq)
{ {
struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs;
struct intel_context *ce = rq->context; struct intel_context *ce = rq->context;
unsigned long flags;
bool release; bool release;
if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
return; return;
spin_lock(&ce->signal_lock); spin_lock_irqsave(&ce->signal_lock, flags);
list_del_rcu(&rq->signal_link); list_del_rcu(&rq->signal_link);
release = remove_signaling_context(rq->engine->breadcrumbs, ce); release = remove_signaling_context(b, ce);
spin_unlock(&ce->signal_lock); spin_unlock_irqrestore(&ce->signal_lock, flags);
if (release) if (release)
intel_context_put(ce); intel_context_put(ce);
if (__i915_request_is_complete(rq))
irq_signal_request(rq, b);
i915_request_put(rq); i915_request_put(rq);
} }
void intel_context_remove_breadcrumbs(struct intel_context *ce,
struct intel_breadcrumbs *b)
{
struct i915_request *rq, *rn;
bool release = false;
unsigned long flags;
spin_lock_irqsave(&ce->signal_lock, flags);
if (list_empty(&ce->signals))
goto unlock;
list_for_each_entry_safe(rq, rn, &ce->signals, signal_link) {
GEM_BUG_ON(!__i915_request_is_complete(rq));
if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL,
&rq->fence.flags))
continue;
list_del_rcu(&rq->signal_link);
irq_signal_request(rq, b);
i915_request_put(rq);
}
release = remove_signaling_context(b, ce);
unlock:
spin_unlock_irqrestore(&ce->signal_lock, flags);
if (release)
intel_context_put(ce);
while (atomic_read(&b->signaler_active))
cpu_relax();
}
static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p) static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p)
{ {
struct intel_context *ce; struct intel_context *ce;

View File

@@ -6,6 +6,7 @@
#ifndef __INTEL_BREADCRUMBS__ #ifndef __INTEL_BREADCRUMBS__
#define __INTEL_BREADCRUMBS__ #define __INTEL_BREADCRUMBS__
#include <linux/atomic.h>
#include <linux/irq_work.h> #include <linux/irq_work.h>
#include "intel_engine_types.h" #include "intel_engine_types.h"
@@ -19,7 +20,18 @@ intel_breadcrumbs_create(struct intel_engine_cs *irq_engine);
void intel_breadcrumbs_free(struct intel_breadcrumbs *b); void intel_breadcrumbs_free(struct intel_breadcrumbs *b);
void intel_breadcrumbs_reset(struct intel_breadcrumbs *b); void intel_breadcrumbs_reset(struct intel_breadcrumbs *b);
void intel_breadcrumbs_park(struct intel_breadcrumbs *b); void __intel_breadcrumbs_park(struct intel_breadcrumbs *b);
static inline void intel_breadcrumbs_unpark(struct intel_breadcrumbs *b)
{
atomic_inc(&b->active);
}
static inline void intel_breadcrumbs_park(struct intel_breadcrumbs *b)
{
if (atomic_dec_and_test(&b->active))
__intel_breadcrumbs_park(b);
}
static inline void static inline void
intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine) intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine)
@@ -33,4 +45,7 @@ void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
bool i915_request_enable_breadcrumb(struct i915_request *request); bool i915_request_enable_breadcrumb(struct i915_request *request);
void i915_request_cancel_breadcrumb(struct i915_request *request); void i915_request_cancel_breadcrumb(struct i915_request *request);
void intel_context_remove_breadcrumbs(struct intel_context *ce,
struct intel_breadcrumbs *b);
#endif /* __INTEL_BREADCRUMBS__ */ #endif /* __INTEL_BREADCRUMBS__ */

View File

@@ -29,17 +29,20 @@
* the overhead of waking that client is much preferred. * the overhead of waking that client is much preferred.
*/ */
struct intel_breadcrumbs { struct intel_breadcrumbs {
/* Not all breadcrumbs are attached to physical HW */ atomic_t active;
struct intel_engine_cs *irq_engine;
spinlock_t signalers_lock; /* protects the list of signalers */ spinlock_t signalers_lock; /* protects the list of signalers */
struct list_head signalers; struct list_head signalers;
struct llist_head signaled_requests; struct llist_head signaled_requests;
atomic_t signaler_active;
spinlock_t irq_lock; /* protects the interrupt from hardirq context */ spinlock_t irq_lock; /* protects the interrupt from hardirq context */
struct irq_work irq_work; /* for use from inside irq_lock */ struct irq_work irq_work; /* for use from inside irq_lock */
unsigned int irq_enabled; unsigned int irq_enabled;
bool irq_armed; bool irq_armed;
/* Not all breadcrumbs are attached to physical HW */
struct intel_engine_cs *irq_engine;
}; };
#endif /* __INTEL_BREADCRUMBS_TYPES__ */ #endif /* __INTEL_BREADCRUMBS_TYPES__ */

View File

@@ -191,6 +191,11 @@ static inline bool intel_context_is_closed(const struct intel_context *ce)
return test_bit(CONTEXT_CLOSED_BIT, &ce->flags); return test_bit(CONTEXT_CLOSED_BIT, &ce->flags);
} }
static inline bool intel_context_has_inflight(const struct intel_context *ce)
{
return test_bit(COPS_HAS_INFLIGHT_BIT, &ce->ops->flags);
}
static inline bool intel_context_use_semaphores(const struct intel_context *ce) static inline bool intel_context_use_semaphores(const struct intel_context *ce)
{ {
return test_bit(CONTEXT_USE_SEMAPHORES, &ce->flags); return test_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
@@ -248,16 +253,14 @@ intel_context_clear_nopreempt(struct intel_context *ce)
static inline u64 intel_context_get_total_runtime_ns(struct intel_context *ce) static inline u64 intel_context_get_total_runtime_ns(struct intel_context *ce)
{ {
const u32 period = const u32 period = ce->engine->gt->clock_period_ns;
RUNTIME_INFO(ce->engine->i915)->cs_timestamp_period_ns;
return READ_ONCE(ce->runtime.total) * period; return READ_ONCE(ce->runtime.total) * period;
} }
static inline u64 intel_context_get_avg_runtime_ns(struct intel_context *ce) static inline u64 intel_context_get_avg_runtime_ns(struct intel_context *ce)
{ {
const u32 period = const u32 period = ce->engine->gt->clock_period_ns;
RUNTIME_INFO(ce->engine->i915)->cs_timestamp_period_ns;
return mul_u32_u32(ewma_runtime_read(&ce->runtime.avg), period); return mul_u32_u32(ewma_runtime_read(&ce->runtime.avg), period);
} }

View File

@@ -9,7 +9,6 @@
#include "intel_engine_pm.h" #include "intel_engine_pm.h"
#include "intel_gpu_commands.h" #include "intel_gpu_commands.h"
#include "intel_lrc.h" #include "intel_lrc.h"
#include "intel_lrc_reg.h"
#include "intel_ring.h" #include "intel_ring.h"
#include "intel_sseu.h" #include "intel_sseu.h"

View File

@@ -30,6 +30,10 @@ struct intel_context;
struct intel_ring; struct intel_ring;
struct intel_context_ops { struct intel_context_ops {
unsigned long flags;
#define COPS_HAS_INFLIGHT_BIT 0
#define COPS_HAS_INFLIGHT BIT(COPS_HAS_INFLIGHT_BIT)
int (*alloc)(struct intel_context *ce); int (*alloc)(struct intel_context *ce);
int (*pre_pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr); int (*pre_pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr);
@@ -58,8 +62,12 @@ struct intel_context {
struct intel_engine_cs *engine; struct intel_engine_cs *engine;
struct intel_engine_cs *inflight; struct intel_engine_cs *inflight;
#define intel_context_inflight(ce) ptr_mask_bits(READ_ONCE((ce)->inflight), 2) #define __intel_context_inflight(engine) ptr_mask_bits(engine, 3)
#define intel_context_inflight_count(ce) ptr_unmask_bits(READ_ONCE((ce)->inflight), 2) #define __intel_context_inflight_count(engine) ptr_unmask_bits(engine, 3)
#define intel_context_inflight(ce) \
__intel_context_inflight(READ_ONCE((ce)->inflight))
#define intel_context_inflight_count(ce) \
__intel_context_inflight_count(READ_ONCE((ce)->inflight))
struct i915_address_space *vm; struct i915_address_space *vm;
struct i915_gem_context __rcu *gem_context; struct i915_gem_context __rcu *gem_context;
@@ -81,12 +89,13 @@ struct intel_context {
unsigned long flags; unsigned long flags;
#define CONTEXT_BARRIER_BIT 0 #define CONTEXT_BARRIER_BIT 0
#define CONTEXT_ALLOC_BIT 1 #define CONTEXT_ALLOC_BIT 1
#define CONTEXT_VALID_BIT 2 #define CONTEXT_INIT_BIT 2
#define CONTEXT_CLOSED_BIT 3 #define CONTEXT_VALID_BIT 3
#define CONTEXT_USE_SEMAPHORES 4 #define CONTEXT_CLOSED_BIT 4
#define CONTEXT_BANNED 5 #define CONTEXT_USE_SEMAPHORES 5
#define CONTEXT_FORCE_SINGLE_SUBMISSION 6 #define CONTEXT_BANNED 6
#define CONTEXT_NOPREEMPT 7 #define CONTEXT_FORCE_SINGLE_SUBMISSION 7
#define CONTEXT_NOPREEMPT 8
u32 *lrc_reg_state; u32 *lrc_reg_state;
union { union {

View File

@@ -15,7 +15,6 @@
#include "i915_selftest.h" #include "i915_selftest.h"
#include "gt/intel_timeline.h" #include "gt/intel_timeline.h"
#include "intel_engine_types.h" #include "intel_engine_types.h"
#include "intel_gpu_commands.h"
#include "intel_workarounds.h" #include "intel_workarounds.h"
struct drm_printer; struct drm_printer;
@@ -223,91 +222,6 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine,
void intel_engine_init_execlists(struct intel_engine_cs *engine); void intel_engine_init_execlists(struct intel_engine_cs *engine);
static inline u32 *__gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
{
memset(batch, 0, 6 * sizeof(u32));
batch[0] = GFX_OP_PIPE_CONTROL(6) | flags0;
batch[1] = flags1;
batch[2] = offset;
return batch + 6;
}
static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
{
return __gen8_emit_pipe_control(batch, 0, flags, offset);
}
static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
{
return __gen8_emit_pipe_control(batch, flags0, flags1, offset);
}
static inline u32 *
__gen8_emit_write_rcs(u32 *cs, u32 value, u32 offset, u32 flags0, u32 flags1)
{
*cs++ = GFX_OP_PIPE_CONTROL(6) | flags0;
*cs++ = flags1 | PIPE_CONTROL_QW_WRITE;
*cs++ = offset;
*cs++ = 0;
*cs++ = value;
*cs++ = 0; /* We're thrashing one extra dword. */
return cs;
}
static inline u32*
gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
{
/* We're using qword write, offset should be aligned to 8 bytes. */
GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
return __gen8_emit_write_rcs(cs,
value,
gtt_offset,
0,
flags | PIPE_CONTROL_GLOBAL_GTT_IVB);
}
static inline u32*
gen12_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 flags1)
{
/* We're using qword write, offset should be aligned to 8 bytes. */
GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
return __gen8_emit_write_rcs(cs,
value,
gtt_offset,
flags0,
flags1 | PIPE_CONTROL_GLOBAL_GTT_IVB);
}
static inline u32 *
__gen8_emit_flush_dw(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
{
*cs++ = (MI_FLUSH_DW + 1) | flags;
*cs++ = gtt_offset;
*cs++ = 0;
*cs++ = value;
return cs;
}
static inline u32 *
gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags)
{
/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
GEM_BUG_ON(gtt_offset & (1 << 5));
/* Offset should be aligned to 8 bytes for both (QW/DW) write types */
GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
return __gen8_emit_flush_dw(cs,
value,
gtt_offset | MI_FLUSH_DW_USE_GTT,
flags | MI_FLUSH_DW_OP_STOREDW);
}
static inline void __intel_engine_reset(struct intel_engine_cs *engine, static inline void __intel_engine_reset(struct intel_engine_cs *engine,
bool stalled) bool stalled)
{ {
@@ -318,7 +232,12 @@ static inline void __intel_engine_reset(struct intel_engine_cs *engine,
bool intel_engines_are_idle(struct intel_gt *gt); bool intel_engines_are_idle(struct intel_gt *gt);
bool intel_engine_is_idle(struct intel_engine_cs *engine); bool intel_engine_is_idle(struct intel_engine_cs *engine);
void intel_engine_flush_submission(struct intel_engine_cs *engine);
void __intel_engine_flush_submission(struct intel_engine_cs *engine, bool sync);
static inline void intel_engine_flush_submission(struct intel_engine_cs *engine)
{
__intel_engine_flush_submission(engine, true);
}
void intel_engines_reset_default_submission(struct intel_gt *gt); void intel_engines_reset_default_submission(struct intel_gt *gt);

View File

@@ -33,12 +33,14 @@
#include "intel_engine.h" #include "intel_engine.h"
#include "intel_engine_pm.h" #include "intel_engine_pm.h"
#include "intel_engine_user.h" #include "intel_engine_user.h"
#include "intel_execlists_submission.h"
#include "intel_gt.h" #include "intel_gt.h"
#include "intel_gt_requests.h" #include "intel_gt_requests.h"
#include "intel_gt_pm.h" #include "intel_gt_pm.h"
#include "intel_lrc.h" #include "intel_lrc_reg.h"
#include "intel_reset.h" #include "intel_reset.h"
#include "intel_ring.h" #include "intel_ring.h"
#include "uc/intel_guc_submission.h"
/* Haswell does have the CXT_SIZE register however it does not appear to be /* Haswell does have the CXT_SIZE register however it does not appear to be
* valid. Now, docs explain in dwords what is in the context object. The full * valid. Now, docs explain in dwords what is in the context object. The full
@@ -647,6 +649,8 @@ static int init_status_page(struct intel_engine_cs *engine)
void *vaddr; void *vaddr;
int ret; int ret;
INIT_LIST_HEAD(&engine->status_page.timelines);
/* /*
* Though the HWS register does support 36bit addresses, historically * Though the HWS register does support 36bit addresses, historically
* we have had hangs and corruption reported due to wild writes if * we have had hangs and corruption reported due to wild writes if
@@ -723,6 +727,9 @@ static int engine_setup_common(struct intel_engine_cs *engine)
intel_engine_init_whitelist(engine); intel_engine_init_whitelist(engine);
intel_engine_init_ctx_wa(engine); intel_engine_init_ctx_wa(engine);
if (INTEL_GEN(engine->i915) >= 12)
engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
return 0; return 0;
err_status: err_status:
@@ -829,6 +836,21 @@ create_pinned_context(struct intel_engine_cs *engine,
return ce; return ce;
} }
static void destroy_pinned_context(struct intel_context *ce)
{
struct intel_engine_cs *engine = ce->engine;
struct i915_vma *hwsp = engine->status_page.vma;
GEM_BUG_ON(ce->timeline->hwsp_ggtt != hwsp);
mutex_lock(&hwsp->vm->mutex);
list_del(&ce->timeline->engine_link);
mutex_unlock(&hwsp->vm->mutex);
intel_context_unpin(ce);
intel_context_put(ce);
}
static struct intel_context * static struct intel_context *
create_kernel_context(struct intel_engine_cs *engine) create_kernel_context(struct intel_engine_cs *engine)
{ {
@@ -889,7 +911,9 @@ int intel_engines_init(struct intel_gt *gt)
enum intel_engine_id id; enum intel_engine_id id;
int err; int err;
if (HAS_EXECLISTS(gt->i915)) if (intel_uc_uses_guc_submission(&gt->uc))
setup = intel_guc_submission_setup;
else if (HAS_EXECLISTS(gt->i915))
setup = intel_execlists_submission_setup; setup = intel_execlists_submission_setup;
else else
setup = intel_ring_submission_setup; setup = intel_ring_submission_setup;
@@ -925,7 +949,6 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
GEM_BUG_ON(!list_empty(&engine->active.requests)); GEM_BUG_ON(!list_empty(&engine->active.requests));
tasklet_kill(&engine->execlists.tasklet); /* flush the callback */ tasklet_kill(&engine->execlists.tasklet); /* flush the callback */
cleanup_status_page(engine);
intel_breadcrumbs_free(engine->breadcrumbs); intel_breadcrumbs_free(engine->breadcrumbs);
intel_engine_fini_retire(engine); intel_engine_fini_retire(engine);
@@ -934,11 +957,11 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
if (engine->default_state) if (engine->default_state)
fput(engine->default_state); fput(engine->default_state);
if (engine->kernel_context) { if (engine->kernel_context)
intel_context_unpin(engine->kernel_context); destroy_pinned_context(engine->kernel_context);
intel_context_put(engine->kernel_context);
}
GEM_BUG_ON(!llist_empty(&engine->barrier_tasks)); GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
cleanup_status_page(engine);
intel_wa_list_free(&engine->ctx_wa_list); intel_wa_list_free(&engine->ctx_wa_list);
intel_wa_list_free(&engine->wa_list); intel_wa_list_free(&engine->wa_list);
@@ -1002,32 +1025,50 @@ static unsigned long stop_timeout(const struct intel_engine_cs *engine)
return READ_ONCE(engine->props.stop_timeout_ms); return READ_ONCE(engine->props.stop_timeout_ms);
} }
int intel_engine_stop_cs(struct intel_engine_cs *engine) static int __intel_engine_stop_cs(struct intel_engine_cs *engine,
int fast_timeout_us,
int slow_timeout_ms)
{ {
struct intel_uncore *uncore = engine->uncore; struct intel_uncore *uncore = engine->uncore;
const u32 base = engine->mmio_base; const i915_reg_t mode = RING_MI_MODE(engine->mmio_base);
const i915_reg_t mode = RING_MI_MODE(base);
int err; int err;
intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING));
err = __intel_wait_for_register_fw(engine->uncore, mode,
MODE_IDLE, MODE_IDLE,
fast_timeout_us,
slow_timeout_ms,
NULL);
/* A final mmio read to let GPU writes be hopefully flushed to memory */
intel_uncore_posting_read_fw(uncore, mode);
return err;
}
int intel_engine_stop_cs(struct intel_engine_cs *engine)
{
int err = 0;
if (INTEL_GEN(engine->i915) < 3) if (INTEL_GEN(engine->i915) < 3)
return -ENODEV; return -ENODEV;
ENGINE_TRACE(engine, "\n"); ENGINE_TRACE(engine, "\n");
if (__intel_engine_stop_cs(engine, 1000, stop_timeout(engine))) {
ENGINE_TRACE(engine,
"timed out on STOP_RING -> IDLE; HEAD:%04x, TAIL:%04x\n",
ENGINE_READ_FW(engine, RING_HEAD) & HEAD_ADDR,
ENGINE_READ_FW(engine, RING_TAIL) & TAIL_ADDR);
intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING)); /*
* Sometimes we observe that the idle flag is not
err = 0; * set even though the ring is empty. So double
if (__intel_wait_for_register_fw(uncore, * check before giving up.
mode, MODE_IDLE, MODE_IDLE, */
1000, stop_timeout(engine), if ((ENGINE_READ_FW(engine, RING_HEAD) & HEAD_ADDR) !=
NULL)) { (ENGINE_READ_FW(engine, RING_TAIL) & TAIL_ADDR))
ENGINE_TRACE(engine, "timed out on STOP_RING -> IDLE\n");
err = -ETIMEDOUT; err = -ETIMEDOUT;
} }
/* A final mmio read to let GPU writes be hopefully flushed to memory */
intel_uncore_posting_read_fw(uncore, mode);
return err; return err;
} }
@@ -1189,17 +1230,13 @@ static bool ring_is_idle(struct intel_engine_cs *engine)
return idle; return idle;
} }
void intel_engine_flush_submission(struct intel_engine_cs *engine) void __intel_engine_flush_submission(struct intel_engine_cs *engine, bool sync)
{ {
struct tasklet_struct *t = &engine->execlists.tasklet; struct tasklet_struct *t = &engine->execlists.tasklet;
if (!t->func) if (!t->func)
return; return;
/* Synchronise and wait for the tasklet on another CPU */
tasklet_kill(t);
/* Having cancelled the tasklet, ensure that is run */
local_bh_disable(); local_bh_disable();
if (tasklet_trylock(t)) { if (tasklet_trylock(t)) {
/* Must wait for any GPU reset in progress. */ /* Must wait for any GPU reset in progress. */
@@ -1208,6 +1245,10 @@ void intel_engine_flush_submission(struct intel_engine_cs *engine)
tasklet_unlock(t); tasklet_unlock(t);
} }
local_bh_enable(); local_bh_enable();
/* Synchronise and wait for the tasklet on another CPU */
if (sync)
tasklet_unlock_wait(t);
} }
/** /**
@@ -1273,8 +1314,12 @@ void intel_engines_reset_default_submission(struct intel_gt *gt)
struct intel_engine_cs *engine; struct intel_engine_cs *engine;
enum intel_engine_id id; enum intel_engine_id id;
for_each_engine(engine, gt, id) for_each_engine(engine, gt, id) {
if (engine->sanitize)
engine->sanitize(engine);
engine->set_default_submission(engine); engine->set_default_submission(engine);
}
} }
bool intel_engine_can_store_dword(struct intel_engine_cs *engine) bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
@@ -1294,44 +1339,6 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
} }
} }
static int print_sched_attr(const struct i915_sched_attr *attr,
char *buf, int x, int len)
{
if (attr->priority == I915_PRIORITY_INVALID)
return x;
x += snprintf(buf + x, len - x,
" prio=%d", attr->priority);
return x;
}
static void print_request(struct drm_printer *m,
struct i915_request *rq,
const char *prefix)
{
const char *name = rq->fence.ops->get_timeline_name(&rq->fence);
char buf[80] = "";
int x = 0;
x = print_sched_attr(&rq->sched.attr, buf, x, sizeof(buf));
drm_printf(m, "%s %llx:%llx%s%s %s @ %dms: %s\n",
prefix,
rq->fence.context, rq->fence.seqno,
i915_request_completed(rq) ? "!" :
i915_request_started(rq) ? "*" :
"",
test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
&rq->fence.flags) ? "+" :
test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
&rq->fence.flags) ? "-" :
"",
buf,
jiffies_to_msecs(jiffies - rq->emitted_jiffies),
name);
}
static struct intel_timeline *get_timeline(struct i915_request *rq) static struct intel_timeline *get_timeline(struct i915_request *rq)
{ {
struct intel_timeline *tl; struct intel_timeline *tl;
@@ -1480,7 +1487,9 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
drm_printf(m, "\tIPEHR: 0x%08x\n", ENGINE_READ(engine, IPEHR)); drm_printf(m, "\tIPEHR: 0x%08x\n", ENGINE_READ(engine, IPEHR));
} }
if (HAS_EXECLISTS(dev_priv)) { if (intel_engine_in_guc_submission_mode(engine)) {
/* nothing to print yet */
} else if (HAS_EXECLISTS(dev_priv)) {
struct i915_request * const *port, *rq; struct i915_request * const *port, *rq;
const u32 *hws = const u32 *hws =
&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
@@ -1529,7 +1538,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
intel_context_is_banned(rq->context) ? "*" : ""); intel_context_is_banned(rq->context) ? "*" : "");
len += print_ring(hdr + len, sizeof(hdr) - len, rq); len += print_ring(hdr + len, sizeof(hdr) - len, rq);
scnprintf(hdr + len, sizeof(hdr) - len, "rq: "); scnprintf(hdr + len, sizeof(hdr) - len, "rq: ");
print_request(m, rq, hdr); i915_request_show(m, rq, hdr, 0);
} }
for (port = execlists->pending; (rq = *port); port++) { for (port = execlists->pending; (rq = *port); port++) {
char hdr[160]; char hdr[160];
@@ -1543,7 +1552,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
intel_context_is_banned(rq->context) ? "*" : ""); intel_context_is_banned(rq->context) ? "*" : "");
len += print_ring(hdr + len, sizeof(hdr) - len, rq); len += print_ring(hdr + len, sizeof(hdr) - len, rq);
scnprintf(hdr + len, sizeof(hdr) - len, "rq: "); scnprintf(hdr + len, sizeof(hdr) - len, "rq: ");
print_request(m, rq, hdr); i915_request_show(m, rq, hdr, 0);
} }
rcu_read_unlock(); rcu_read_unlock();
execlists_active_unlock_bh(execlists); execlists_active_unlock_bh(execlists);
@@ -1687,7 +1696,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
if (rq) { if (rq) {
struct intel_timeline *tl = get_timeline(rq); struct intel_timeline *tl = get_timeline(rq);
print_request(m, rq, "\t\tactive "); i915_request_show(m, rq, "\t\tactive ", 0);
drm_printf(m, "\t\tring->start: 0x%08x\n", drm_printf(m, "\t\tring->start: 0x%08x\n",
i915_ggtt_offset(rq->ring->vma)); i915_ggtt_offset(rq->ring->vma));
@@ -1725,7 +1734,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
drm_printf(m, "\tDevice is asleep; skipping register dump\n"); drm_printf(m, "\tDevice is asleep; skipping register dump\n");
} }
intel_execlists_show_requests(engine, m, print_request, 8); intel_execlists_show_requests(engine, m, i915_request_show, 8);
drm_printf(m, "HWSP:\n"); drm_printf(m, "HWSP:\n");
hexdump(m, engine->status_page.addr, PAGE_SIZE); hexdump(m, engine->status_page.addr, PAGE_SIZE);

View File

@@ -37,6 +37,18 @@ static bool next_heartbeat(struct intel_engine_cs *engine)
return true; return true;
} }
static struct i915_request *
heartbeat_create(struct intel_context *ce, gfp_t gfp)
{
struct i915_request *rq;
intel_context_enter(ce);
rq = __i915_request_create(ce, gfp);
intel_context_exit(ce);
return rq;
}
static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq) static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq)
{ {
engine->wakeref_serial = READ_ONCE(engine->serial) + 1; engine->wakeref_serial = READ_ONCE(engine->serial) + 1;
@@ -45,6 +57,15 @@ static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq)
engine->heartbeat.systole = i915_request_get(rq); engine->heartbeat.systole = i915_request_get(rq);
} }
static void heartbeat_commit(struct i915_request *rq,
const struct i915_sched_attr *attr)
{
idle_pulse(rq->engine, rq);
__i915_request_commit(rq);
__i915_request_queue(rq, attr);
}
static void show_heartbeat(const struct i915_request *rq, static void show_heartbeat(const struct i915_request *rq,
struct intel_engine_cs *engine) struct intel_engine_cs *engine)
{ {
@@ -139,16 +160,11 @@ static void heartbeat(struct work_struct *wrk)
goto out; goto out;
} }
intel_context_enter(ce); rq = heartbeat_create(ce, GFP_NOWAIT | __GFP_NOWARN);
rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
intel_context_exit(ce);
if (IS_ERR(rq)) if (IS_ERR(rq))
goto unlock; goto unlock;
idle_pulse(engine, rq); heartbeat_commit(rq, &attr);
__i915_request_commit(rq);
__i915_request_queue(rq, &attr);
unlock: unlock:
mutex_unlock(&ce->timeline->mutex); mutex_unlock(&ce->timeline->mutex);
@@ -187,17 +203,13 @@ static int __intel_engine_pulse(struct intel_engine_cs *engine)
GEM_BUG_ON(!intel_engine_has_preemption(engine)); GEM_BUG_ON(!intel_engine_has_preemption(engine));
GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
intel_context_enter(ce); rq = heartbeat_create(ce, GFP_NOWAIT | __GFP_NOWARN);
rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN);
intel_context_exit(ce);
if (IS_ERR(rq)) if (IS_ERR(rq))
return PTR_ERR(rq); return PTR_ERR(rq);
__set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags); __set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags);
idle_pulse(engine, rq);
__i915_request_commit(rq); heartbeat_commit(rq, &attr);
__i915_request_queue(rq, &attr);
GEM_BUG_ON(rq->sched.attr.priority < I915_PRIORITY_BARRIER); GEM_BUG_ON(rq->sched.attr.priority < I915_PRIORITY_BARRIER);
return 0; return 0;
@@ -273,8 +285,12 @@ int intel_engine_pulse(struct intel_engine_cs *engine)
int intel_engine_flush_barriers(struct intel_engine_cs *engine) int intel_engine_flush_barriers(struct intel_engine_cs *engine)
{ {
struct i915_sched_attr attr = {
.priority = I915_USER_PRIORITY(I915_PRIORITY_MIN),
};
struct intel_context *ce = engine->kernel_context;
struct i915_request *rq; struct i915_request *rq;
int err = 0; int err;
if (llist_empty(&engine->barrier_tasks)) if (llist_empty(&engine->barrier_tasks))
return 0; return 0;
@@ -282,15 +298,22 @@ int intel_engine_flush_barriers(struct intel_engine_cs *engine)
if (!intel_engine_pm_get_if_awake(engine)) if (!intel_engine_pm_get_if_awake(engine))
return 0; return 0;
rq = i915_request_create(engine->kernel_context); if (mutex_lock_interruptible(&ce->timeline->mutex)) {
if (IS_ERR(rq)) { err = -EINTR;
err = PTR_ERR(rq);
goto out_rpm; goto out_rpm;
} }
idle_pulse(engine, rq); rq = heartbeat_create(ce, GFP_KERNEL);
i915_request_add(rq); if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto out_unlock;
}
heartbeat_commit(rq, &attr);
err = 0;
out_unlock:
mutex_unlock(&ce->timeline->mutex);
out_rpm: out_rpm:
intel_engine_pm_put(engine); intel_engine_pm_put(engine);
return err; return err;

View File

@@ -60,11 +60,19 @@ static int __engine_unpark(struct intel_wakeref *wf)
/* Scrub the context image after our loss of control */ /* Scrub the context image after our loss of control */
ce->ops->reset(ce); ce->ops->reset(ce);
CE_TRACE(ce, "reset { seqno:%x, *hwsp:%x, ring:%x }\n",
ce->timeline->seqno,
READ_ONCE(*ce->timeline->hwsp_seqno),
ce->ring->emit);
GEM_BUG_ON(ce->timeline->seqno !=
READ_ONCE(*ce->timeline->hwsp_seqno));
} }
if (engine->unpark) if (engine->unpark)
engine->unpark(engine); engine->unpark(engine);
intel_breadcrumbs_unpark(engine->breadcrumbs);
intel_engine_unpark_heartbeat(engine); intel_engine_unpark_heartbeat(engine);
return 0; return 0;
} }
@@ -136,7 +144,7 @@ __queue_and_release_pm(struct i915_request *rq,
list_add_tail(&tl->link, &timelines->active_list); list_add_tail(&tl->link, &timelines->active_list);
/* Hand the request over to HW and so engine_retire() */ /* Hand the request over to HW and so engine_retire() */
__i915_request_queue(rq, NULL); __i915_request_queue_bh(rq);
/* Let new submissions commence (and maybe retire this timeline) */ /* Let new submissions commence (and maybe retire this timeline) */
__intel_wakeref_defer_park(&engine->wakeref); __intel_wakeref_defer_park(&engine->wakeref);

View File

@@ -68,6 +68,7 @@ typedef u8 intel_engine_mask_t;
#define ALL_ENGINES ((intel_engine_mask_t)~0ul) #define ALL_ENGINES ((intel_engine_mask_t)~0ul)
struct intel_hw_status_page { struct intel_hw_status_page {
struct list_head timelines;
struct i915_vma *vma; struct i915_vma *vma;
u32 *addr; u32 *addr;
}; };
@@ -184,6 +185,7 @@ struct intel_engine_execlists {
*/ */
u32 error_interrupt; u32 error_interrupt;
#define ERROR_CSB BIT(31) #define ERROR_CSB BIT(31)
#define ERROR_PREEMPT BIT(30)
/** /**
* @reset_ccid: Active CCID [EXECLISTS_STATUS_HI] at the time of reset * @reset_ccid: Active CCID [EXECLISTS_STATUS_HI] at the time of reset
@@ -236,16 +238,6 @@ struct intel_engine_execlists {
*/ */
unsigned int port_mask; unsigned int port_mask;
/**
* @switch_priority_hint: Second context priority.
*
* We submit multiple contexts to the HW simultaneously and would
* like to occasionally switch between them to emulate timeslicing.
* To know when timeslicing is suitable, we track the priority of
* the context submitted second.
*/
int switch_priority_hint;
/** /**
* @queue_priority_hint: Highest pending priority. * @queue_priority_hint: Highest pending priority.
* *
@@ -559,6 +551,8 @@ struct intel_engine_cs {
unsigned long stop_timeout_ms; unsigned long stop_timeout_ms;
unsigned long timeslice_duration_ms; unsigned long timeslice_duration_ms;
} props, defaults; } props, defaults;
I915_SELFTEST_DECLARE(struct fault_attr reset_timeout);
}; };
static inline bool static inline bool

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,47 @@
/* SPDX-License-Identifier: MIT */
/*
* Copyright © 2014 Intel Corporation
*/
#ifndef __INTEL_EXECLISTS_SUBMISSION_H__
#define __INTEL_EXECLISTS_SUBMISSION_H__
#include <linux/types.h>
struct drm_printer;
struct i915_request;
struct intel_context;
struct intel_engine_cs;
enum {
INTEL_CONTEXT_SCHEDULE_IN = 0,
INTEL_CONTEXT_SCHEDULE_OUT,
INTEL_CONTEXT_SCHEDULE_PREEMPTED,
};
int intel_execlists_submission_setup(struct intel_engine_cs *engine);
void intel_execlists_show_requests(struct intel_engine_cs *engine,
struct drm_printer *m,
void (*show_request)(struct drm_printer *m,
const struct i915_request *rq,
const char *prefix,
int indent),
unsigned int max);
struct intel_context *
intel_execlists_create_virtual(struct intel_engine_cs **siblings,
unsigned int count);
struct intel_context *
intel_execlists_clone_virtual(struct intel_engine_cs *src);
int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine,
const struct intel_engine_cs *master,
const struct intel_engine_cs *sibling);
bool
intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine);
#endif /* __INTEL_EXECLISTS_SUBMISSION_H__ */

View File

@@ -101,7 +101,16 @@ static bool needs_idle_maps(struct drm_i915_private *i915)
* Query intel_iommu to see if we need the workaround. Presumably that * Query intel_iommu to see if we need the workaround. Presumably that
* was loaded first. * was loaded first.
*/ */
return IS_GEN(i915, 5) && IS_MOBILE(i915) && intel_vtd_active(); if (!intel_vtd_active())
return false;
if (IS_GEN(i915, 5) && IS_MOBILE(i915))
return true;
if (IS_GEN(i915, 12))
return true; /* XXX DMAR fault reason 7 */
return false;
} }
void i915_ggtt_suspend(struct i915_ggtt *ggtt) void i915_ggtt_suspend(struct i915_ggtt *ggtt)
@@ -1050,7 +1059,12 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt)
ggtt->vm.alloc_pt_dma = alloc_pt_dma; ggtt->vm.alloc_pt_dma = alloc_pt_dma;
ggtt->do_idle_maps = needs_idle_maps(i915); if (needs_idle_maps(i915)) {
drm_notice(&i915->drm,
"Flushing DMA requests before IOMMU unmaps; performance may be degraded\n");
ggtt->do_idle_maps = true;
}
ggtt->vm.insert_page = i915_ggtt_insert_page; ggtt->vm.insert_page = i915_ggtt_insert_page;
ggtt->vm.insert_entries = i915_ggtt_insert_entries; ggtt->vm.insert_entries = i915_ggtt_insert_entries;
ggtt->vm.clear_range = i915_ggtt_clear_range; ggtt->vm.clear_range = i915_ggtt_clear_range;

View File

@@ -320,13 +320,31 @@ void i915_vma_revoke_fence(struct i915_vma *vma)
fence_write(fence); fence_write(fence);
} }
static bool fence_is_active(const struct i915_fence_reg *fence)
{
return fence->vma && i915_vma_is_active(fence->vma);
}
static struct i915_fence_reg *fence_find(struct i915_ggtt *ggtt) static struct i915_fence_reg *fence_find(struct i915_ggtt *ggtt)
{ {
struct i915_fence_reg *fence; struct i915_fence_reg *active = NULL;
struct i915_fence_reg *fence, *fn;
list_for_each_entry(fence, &ggtt->fence_list, link) { list_for_each_entry_safe(fence, fn, &ggtt->fence_list, link) {
GEM_BUG_ON(fence->vma && fence->vma->fence != fence); GEM_BUG_ON(fence->vma && fence->vma->fence != fence);
if (fence == active) /* now seen this fence twice */
active = ERR_PTR(-EAGAIN);
/* Prefer idle fences so we do not have to wait on the GPU */
if (active != ERR_PTR(-EAGAIN) && fence_is_active(fence)) {
if (!active)
active = fence;
list_move_tail(&fence->link, &ggtt->fence_list);
continue;
}
if (atomic_read(&fence->pin_count)) if (atomic_read(&fence->pin_count))
continue; continue;

View File

@@ -46,6 +46,8 @@ void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt)
int intel_gt_init_mmio(struct intel_gt *gt) int intel_gt_init_mmio(struct intel_gt *gt)
{ {
intel_gt_init_clock_frequency(gt);
intel_uc_init_mmio(&gt->uc); intel_uc_init_mmio(&gt->uc);
intel_sseu_info_init(gt); intel_sseu_info_init(gt);
@@ -546,8 +548,6 @@ int intel_gt_init(struct intel_gt *gt)
*/ */
intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
intel_gt_init_clock_frequency(gt);
err = intel_gt_init_scratch(gt, IS_GEN(gt->i915, 2) ? SZ_256K : SZ_4K); err = intel_gt_init_scratch(gt, IS_GEN(gt->i915, 2) ? SZ_256K : SZ_4K);
if (err) if (err)
goto out_fw; goto out_fw;

View File

@@ -7,34 +7,146 @@
#include "intel_gt.h" #include "intel_gt.h"
#include "intel_gt_clock_utils.h" #include "intel_gt_clock_utils.h"
#define MHZ_12 12000000 /* 12MHz (24MHz/2), 83.333ns */ static u32 read_reference_ts_freq(struct intel_uncore *uncore)
#define MHZ_12_5 12500000 /* 12.5MHz (25MHz/2), 80ns */
#define MHZ_19_2 19200000 /* 19.2MHz, 52.083ns */
static u32 read_clock_frequency(const struct intel_gt *gt)
{ {
if (INTEL_GEN(gt->i915) >= 11) { u32 ts_override = intel_uncore_read(uncore, GEN9_TIMESTAMP_OVERRIDE);
u32 config; u32 base_freq, frac_freq;
config = intel_uncore_read(gt->uncore, RPM_CONFIG0); base_freq = ((ts_override & GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK) >>
config &= GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK; GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT) + 1;
config >>= GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT; base_freq *= 1000000;
switch (config) { frac_freq = ((ts_override &
case 0: return MHZ_12; GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK) >>
case 1: GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT);
case 2: return MHZ_19_2; frac_freq = 1000000 / (frac_freq + 1);
return base_freq + frac_freq;
}
static u32 gen10_get_crystal_clock_freq(struct intel_uncore *uncore,
u32 rpm_config_reg)
{
u32 f19_2_mhz = 19200000;
u32 f24_mhz = 24000000;
u32 crystal_clock =
(rpm_config_reg & GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >>
GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT;
switch (crystal_clock) {
case GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ:
return f19_2_mhz;
case GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ:
return f24_mhz;
default: default:
case 3: return MHZ_12_5; MISSING_CASE(crystal_clock);
return 0;
} }
} else if (INTEL_GEN(gt->i915) >= 9) { }
if (IS_GEN9_LP(gt->i915))
return MHZ_19_2; static u32 gen11_get_crystal_clock_freq(struct intel_uncore *uncore,
else u32 rpm_config_reg)
return MHZ_12; {
u32 f19_2_mhz = 19200000;
u32 f24_mhz = 24000000;
u32 f25_mhz = 25000000;
u32 f38_4_mhz = 38400000;
u32 crystal_clock =
(rpm_config_reg & GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >>
GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT;
switch (crystal_clock) {
case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ:
return f24_mhz;
case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ:
return f19_2_mhz;
case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ:
return f38_4_mhz;
case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ:
return f25_mhz;
default:
MISSING_CASE(crystal_clock);
return 0;
}
}
static u32 read_clock_frequency(struct intel_uncore *uncore)
{
u32 f12_5_mhz = 12500000;
u32 f19_2_mhz = 19200000;
u32 f24_mhz = 24000000;
if (INTEL_GEN(uncore->i915) <= 4) {
/*
* PRMs say:
*
* "The value in this register increments once every 16
* hclks." (through the “Clocking Configuration”
* (CLKCFG) MCHBAR register)
*/
return RUNTIME_INFO(uncore->i915)->rawclk_freq * 1000 / 16;
} else if (INTEL_GEN(uncore->i915) <= 8) {
/*
* PRMs say:
*
* "The PCU TSC counts 10ns increments; this timestamp
* reflects bits 38:3 of the TSC (i.e. 80ns granularity,
* rolling over every 1.5 hours).
*/
return f12_5_mhz;
} else if (INTEL_GEN(uncore->i915) <= 9) {
u32 ctc_reg = intel_uncore_read(uncore, CTC_MODE);
u32 freq = 0;
if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) == CTC_SOURCE_DIVIDE_LOGIC) {
freq = read_reference_ts_freq(uncore);
} else { } else {
return MHZ_12_5; freq = IS_GEN9_LP(uncore->i915) ? f19_2_mhz : f24_mhz;
/*
* Now figure out how the command stream's timestamp
* register increments from this frequency (it might
* increment only every few clock cycle).
*/
freq >>= 3 - ((ctc_reg & CTC_SHIFT_PARAMETER_MASK) >>
CTC_SHIFT_PARAMETER_SHIFT);
} }
return freq;
} else if (INTEL_GEN(uncore->i915) <= 12) {
u32 ctc_reg = intel_uncore_read(uncore, CTC_MODE);
u32 freq = 0;
/*
* First figure out the reference frequency. There are 2 ways
* we can compute the frequency, either through the
* TIMESTAMP_OVERRIDE register or through RPM_CONFIG. CTC_MODE
* tells us which one we should use.
*/
if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) == CTC_SOURCE_DIVIDE_LOGIC) {
freq = read_reference_ts_freq(uncore);
} else {
u32 c0 = intel_uncore_read(uncore, RPM_CONFIG0);
if (INTEL_GEN(uncore->i915) <= 10)
freq = gen10_get_crystal_clock_freq(uncore, c0);
else
freq = gen11_get_crystal_clock_freq(uncore, c0);
/*
* Now figure out how the command stream's timestamp
* register increments from this frequency (it might
* increment only every few clock cycle).
*/
freq >>= 3 - ((c0 & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT);
}
return freq;
}
MISSING_CASE("Unknown gen, unable to read command streamer timestamp frequency\n");
return 0;
} }
void intel_gt_init_clock_frequency(struct intel_gt *gt) void intel_gt_init_clock_frequency(struct intel_gt *gt)
@@ -43,20 +155,27 @@ void intel_gt_init_clock_frequency(struct intel_gt *gt)
* Note that on gen11+, the clock frequency may be reconfigured. * Note that on gen11+, the clock frequency may be reconfigured.
* We do not, and we assume nobody else does. * We do not, and we assume nobody else does.
*/ */
gt->clock_frequency = read_clock_frequency(gt); gt->clock_frequency = read_clock_frequency(gt->uncore);
if (gt->clock_frequency)
gt->clock_period_ns = intel_gt_clock_interval_to_ns(gt, 1);
GT_TRACE(gt, GT_TRACE(gt,
"Using clock frequency: %dkHz\n", "Using clock frequency: %dkHz, period: %dns, wrap: %lldms\n",
gt->clock_frequency / 1000); gt->clock_frequency / 1000,
gt->clock_period_ns,
div_u64(mul_u32_u32(gt->clock_period_ns, S32_MAX),
USEC_PER_SEC));
} }
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
void intel_gt_check_clock_frequency(const struct intel_gt *gt) void intel_gt_check_clock_frequency(const struct intel_gt *gt)
{ {
if (gt->clock_frequency != read_clock_frequency(gt)) { if (gt->clock_frequency != read_clock_frequency(gt->uncore)) {
dev_err(gt->i915->drm.dev, dev_err(gt->i915->drm.dev,
"GT clock frequency changed, was %uHz, now %uHz!\n", "GT clock frequency changed, was %uHz, now %uHz!\n",
gt->clock_frequency, gt->clock_frequency,
read_clock_frequency(gt)); read_clock_frequency(gt->uncore));
} }
} }
#endif #endif
@@ -66,26 +185,24 @@ static u64 div_u64_roundup(u64 nom, u32 den)
return div_u64(nom + den - 1, den); return div_u64(nom + den - 1, den);
} }
u32 intel_gt_clock_interval_to_ns(const struct intel_gt *gt, u32 count) u64 intel_gt_clock_interval_to_ns(const struct intel_gt *gt, u64 count)
{ {
return div_u64_roundup(mul_u32_u32(count, 1000 * 1000 * 1000), return div_u64_roundup(count * NSEC_PER_SEC, gt->clock_frequency);
gt->clock_frequency);
} }
u32 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u32 count) u64 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u64 count)
{ {
return intel_gt_clock_interval_to_ns(gt, 16 * count); return intel_gt_clock_interval_to_ns(gt, 16 * count);
} }
u32 intel_gt_ns_to_clock_interval(const struct intel_gt *gt, u32 ns) u64 intel_gt_ns_to_clock_interval(const struct intel_gt *gt, u64 ns)
{ {
return div_u64_roundup(mul_u32_u32(gt->clock_frequency, ns), return div_u64_roundup(gt->clock_frequency * ns, NSEC_PER_SEC);
1000 * 1000 * 1000);
} }
u32 intel_gt_ns_to_pm_interval(const struct intel_gt *gt, u32 ns) u64 intel_gt_ns_to_pm_interval(const struct intel_gt *gt, u64 ns)
{ {
u32 val; u64 val;
/* /*
* Make these a multiple of magic 25 to avoid SNB (eg. Dell XPS * Make these a multiple of magic 25 to avoid SNB (eg. Dell XPS
@@ -94,9 +211,9 @@ u32 intel_gt_ns_to_pm_interval(const struct intel_gt *gt, u32 ns)
* EI/thresholds are "bad", leading to a very sluggish or even * EI/thresholds are "bad", leading to a very sluggish or even
* frozen machine. * frozen machine.
*/ */
val = DIV_ROUND_UP(intel_gt_ns_to_clock_interval(gt, ns), 16); val = div_u64_roundup(intel_gt_ns_to_clock_interval(gt, ns), 16);
if (IS_GEN(gt->i915, 6)) if (IS_GEN(gt->i915, 6))
val = roundup(val, 25); val = div_u64_roundup(val, 25) * 25;
return val; return val;
} }

View File

@@ -18,10 +18,10 @@ void intel_gt_check_clock_frequency(const struct intel_gt *gt);
static inline void intel_gt_check_clock_frequency(const struct intel_gt *gt) {} static inline void intel_gt_check_clock_frequency(const struct intel_gt *gt) {}
#endif #endif
u32 intel_gt_clock_interval_to_ns(const struct intel_gt *gt, u32 count); u64 intel_gt_clock_interval_to_ns(const struct intel_gt *gt, u64 count);
u32 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u32 count); u64 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u64 count);
u32 intel_gt_ns_to_clock_interval(const struct intel_gt *gt, u32 ns); u64 intel_gt_ns_to_clock_interval(const struct intel_gt *gt, u64 ns);
u32 intel_gt_ns_to_pm_interval(const struct intel_gt *gt, u32 ns); u64 intel_gt_ns_to_pm_interval(const struct intel_gt *gt, u64 ns);
#endif /* __INTEL_GT_CLOCK_UTILS_H__ */ #endif /* __INTEL_GT_CLOCK_UTILS_H__ */

View File

@@ -11,6 +11,7 @@
#include "intel_breadcrumbs.h" #include "intel_breadcrumbs.h"
#include "intel_gt.h" #include "intel_gt.h"
#include "intel_gt_irq.h" #include "intel_gt_irq.h"
#include "intel_lrc_reg.h"
#include "intel_uncore.h" #include "intel_uncore.h"
#include "intel_rps.h" #include "intel_rps.h"

View File

@@ -39,6 +39,28 @@ static void user_forcewake(struct intel_gt *gt, bool suspend)
intel_gt_pm_put(gt); intel_gt_pm_put(gt);
} }
static void runtime_begin(struct intel_gt *gt)
{
local_irq_disable();
write_seqcount_begin(&gt->stats.lock);
gt->stats.start = ktime_get();
gt->stats.active = true;
write_seqcount_end(&gt->stats.lock);
local_irq_enable();
}
static void runtime_end(struct intel_gt *gt)
{
local_irq_disable();
write_seqcount_begin(&gt->stats.lock);
gt->stats.active = false;
gt->stats.total =
ktime_add(gt->stats.total,
ktime_sub(ktime_get(), gt->stats.start));
write_seqcount_end(&gt->stats.lock);
local_irq_enable();
}
static int __gt_unpark(struct intel_wakeref *wf) static int __gt_unpark(struct intel_wakeref *wf)
{ {
struct intel_gt *gt = container_of(wf, typeof(*gt), wakeref); struct intel_gt *gt = container_of(wf, typeof(*gt), wakeref);
@@ -67,6 +89,7 @@ static int __gt_unpark(struct intel_wakeref *wf)
i915_pmu_gt_unparked(i915); i915_pmu_gt_unparked(i915);
intel_gt_unpark_requests(gt); intel_gt_unpark_requests(gt);
runtime_begin(gt);
return 0; return 0;
} }
@@ -79,6 +102,7 @@ static int __gt_park(struct intel_wakeref *wf)
GT_TRACE(gt, "\n"); GT_TRACE(gt, "\n");
runtime_end(gt);
intel_gt_park_requests(gt); intel_gt_park_requests(gt);
i915_vma_parked(gt); i915_vma_parked(gt);
@@ -106,6 +130,7 @@ static const struct intel_wakeref_ops wf_ops = {
void intel_gt_pm_init_early(struct intel_gt *gt) void intel_gt_pm_init_early(struct intel_gt *gt)
{ {
intel_wakeref_init(&gt->wakeref, gt->uncore->rpm, &wf_ops); intel_wakeref_init(&gt->wakeref, gt->uncore->rpm, &wf_ops);
seqcount_mutex_init(&gt->stats.lock, &gt->wakeref.mutex);
} }
void intel_gt_pm_init(struct intel_gt *gt) void intel_gt_pm_init(struct intel_gt *gt)
@@ -339,6 +364,30 @@ int intel_gt_runtime_resume(struct intel_gt *gt)
return intel_uc_runtime_resume(&gt->uc); return intel_uc_runtime_resume(&gt->uc);
} }
static ktime_t __intel_gt_get_awake_time(const struct intel_gt *gt)
{
ktime_t total = gt->stats.total;
if (gt->stats.active)
total = ktime_add(total,
ktime_sub(ktime_get(), gt->stats.start));
return total;
}
ktime_t intel_gt_get_awake_time(const struct intel_gt *gt)
{
unsigned int seq;
ktime_t total;
do {
seq = read_seqcount_begin(&gt->stats.lock);
total = __intel_gt_get_awake_time(gt);
} while (read_seqcount_retry(&gt->stats.lock, seq));
return total;
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftest_gt_pm.c" #include "selftest_gt_pm.c"
#endif #endif

View File

@@ -58,6 +58,8 @@ int intel_gt_resume(struct intel_gt *gt);
void intel_gt_runtime_suspend(struct intel_gt *gt); void intel_gt_runtime_suspend(struct intel_gt *gt);
int intel_gt_runtime_resume(struct intel_gt *gt); int intel_gt_runtime_resume(struct intel_gt *gt);
ktime_t intel_gt_get_awake_time(const struct intel_gt *gt);
static inline bool is_mock_gt(const struct intel_gt *gt) static inline bool is_mock_gt(const struct intel_gt *gt)
{ {
return I915_SELFTEST_ONLY(gt->awake == -ENODEV); return I915_SELFTEST_ONLY(gt->awake == -ENODEV);

View File

@@ -135,13 +135,8 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
struct intel_gt_timelines *timelines = &gt->timelines; struct intel_gt_timelines *timelines = &gt->timelines;
struct intel_timeline *tl, *tn; struct intel_timeline *tl, *tn;
unsigned long active_count = 0; unsigned long active_count = 0;
bool interruptible;
LIST_HEAD(free); LIST_HEAD(free);
interruptible = true;
if (unlikely(timeout < 0))
timeout = -timeout, interruptible = false;
flush_submission(gt, timeout); /* kick the ksoftirqd tasklets */ flush_submission(gt, timeout); /* kick the ksoftirqd tasklets */
spin_lock(&timelines->lock); spin_lock(&timelines->lock);
list_for_each_entry_safe(tl, tn, &timelines->active_list, link) { list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
@@ -163,7 +158,7 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout)
mutex_unlock(&tl->mutex); mutex_unlock(&tl->mutex);
timeout = dma_fence_wait_timeout(fence, timeout = dma_fence_wait_timeout(fence,
interruptible, true,
timeout); timeout);
dma_fence_put(fence); dma_fence_put(fence);

View File

@@ -75,6 +75,7 @@ struct intel_gt {
intel_wakeref_t awake; intel_wakeref_t awake;
u32 clock_frequency; u32 clock_frequency;
u32 clock_period_ns;
struct intel_llc llc; struct intel_llc llc;
struct intel_rc6 rc6; struct intel_rc6 rc6;
@@ -87,6 +88,30 @@ struct intel_gt {
u32 pm_guc_events; u32 pm_guc_events;
struct {
bool active;
/**
* @lock: Lock protecting the below fields.
*/
seqcount_mutex_t lock;
/**
* @total: Total time this engine was busy.
*
* Accumulated time not counting the most recent block in cases
* where engine is currently busy (active > 0).
*/
ktime_t total;
/**
* @start: Timestamp of the last idle to active transition.
*
* Idle is defined as active == 0, active is active > 0.
*/
ktime_t start;
} stats;
struct intel_engine_cs *engine[I915_NUM_ENGINES]; struct intel_engine_cs *engine[I915_NUM_ENGINES];
struct intel_engine_cs *engine_class[MAX_ENGINE_CLASS + 1] struct intel_engine_cs *engine_class[MAX_ENGINE_CLASS + 1]
[MAX_ENGINE_INSTANCE + 1]; [MAX_ENGINE_INSTANCE + 1];

View File

@@ -422,6 +422,35 @@ void setup_private_pat(struct intel_uncore *uncore)
bdw_setup_private_ppat(uncore); bdw_setup_private_ppat(uncore);
} }
struct i915_vma *
__vm_create_scratch_for_read(struct i915_address_space *vm, unsigned long size)
{
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
int err;
obj = i915_gem_object_create_internal(vm->i915, PAGE_ALIGN(size));
if (IS_ERR(obj))
return ERR_CAST(obj);
i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
vma = i915_vma_instance(obj, vm, NULL);
if (IS_ERR(vma)) {
i915_gem_object_put(obj);
return vma;
}
err = i915_vma_pin(vma, 0, 0,
i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
if (err) {
i915_vma_put(vma);
return ERR_PTR(err);
}
return vma;
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/mock_gtt.c" #include "selftests/mock_gtt.c"
#endif #endif

View File

@@ -573,6 +573,9 @@ int i915_vm_pin_pt_stash(struct i915_address_space *vm,
void i915_vm_free_pt_stash(struct i915_address_space *vm, void i915_vm_free_pt_stash(struct i915_address_space *vm,
struct i915_vm_pt_stash *stash); struct i915_vm_pt_stash *stash);
struct i915_vma *
__vm_create_scratch_for_read(struct i915_address_space *vm, unsigned long size);
static inline struct sgt_dma { static inline struct sgt_dma {
struct scatterlist *sg; struct scatterlist *sg;
dma_addr_t dma, max; dma_addr_t dma, max;

File diff suppressed because it is too large Load Diff

View File

@@ -1,90 +1,20 @@
/* SPDX-License-Identifier: MIT */
/* /*
* Copyright © 2014 Intel Corporation * Copyright © 2014 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/ */
#ifndef _INTEL_LRC_H_ #ifndef __INTEL_LRC_H__
#define _INTEL_LRC_H_ #define __INTEL_LRC_H__
#include <linux/types.h> #include <linux/types.h>
struct drm_printer; #include "intel_context.h"
#include "intel_lrc_reg.h"
struct drm_i915_private; struct drm_i915_gem_object;
struct i915_gem_context;
struct i915_request;
struct intel_context;
struct intel_engine_cs; struct intel_engine_cs;
struct intel_ring;
/* Execlists regs */
#define RING_ELSP(base) _MMIO((base) + 0x230)
#define RING_EXECLIST_STATUS_LO(base) _MMIO((base) + 0x234)
#define RING_EXECLIST_STATUS_HI(base) _MMIO((base) + 0x234 + 4)
#define RING_CONTEXT_CONTROL(base) _MMIO((base) + 0x244)
#define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH (1 << 3)
#define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT (1 << 0)
#define CTX_CTRL_RS_CTX_ENABLE (1 << 1)
#define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT (1 << 2)
#define GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE (1 << 8)
#define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0)
#define RING_EXECLIST_SQ_CONTENTS(base) _MMIO((base) + 0x510)
#define RING_EXECLIST_CONTROL(base) _MMIO((base) + 0x550)
#define EL_CTRL_LOAD (1 << 0)
/* The docs specify that the write pointer wraps around after 5h, "After status
* is written out to the last available status QW at offset 5h, this pointer
* wraps to 0."
*
* Therefore, one must infer than even though there are 3 bits available, 6 and
* 7 appear to be * reserved.
*/
#define GEN8_CSB_ENTRIES 6
#define GEN8_CSB_PTR_MASK 0x7
#define GEN8_CSB_READ_PTR_MASK (GEN8_CSB_PTR_MASK << 8)
#define GEN8_CSB_WRITE_PTR_MASK (GEN8_CSB_PTR_MASK << 0)
#define GEN11_CSB_ENTRIES 12
#define GEN11_CSB_PTR_MASK 0xf
#define GEN11_CSB_READ_PTR_MASK (GEN11_CSB_PTR_MASK << 8)
#define GEN11_CSB_WRITE_PTR_MASK (GEN11_CSB_PTR_MASK << 0)
#define MAX_CONTEXT_HW_ID (1<<21) /* exclusive */
#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */
#define GEN11_MAX_CONTEXT_HW_ID (1<<11) /* exclusive */
/* in Gen12 ID 0x7FF is reserved to indicate idle */
#define GEN12_MAX_CONTEXT_HW_ID (GEN11_MAX_CONTEXT_HW_ID - 1)
enum {
INTEL_CONTEXT_SCHEDULE_IN = 0,
INTEL_CONTEXT_SCHEDULE_OUT,
INTEL_CONTEXT_SCHEDULE_PREEMPTED,
};
/* Logical Rings */
void intel_logical_ring_cleanup(struct intel_engine_cs *engine);
int intel_execlists_submission_setup(struct intel_engine_cs *engine);
/* Logical Ring Contexts */
/* At the start of the context image is its per-process HWS page */ /* At the start of the context image is its per-process HWS page */
#define LRC_PPHWSP_PN (0) #define LRC_PPHWSP_PN (0)
#define LRC_PPHWSP_SZ (1) #define LRC_PPHWSP_SZ (1)
@@ -96,32 +26,57 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine);
#define LRC_PPHWSP_SCRATCH 0x34 #define LRC_PPHWSP_SCRATCH 0x34
#define LRC_PPHWSP_SCRATCH_ADDR (LRC_PPHWSP_SCRATCH * sizeof(u32)) #define LRC_PPHWSP_SCRATCH_ADDR (LRC_PPHWSP_SCRATCH * sizeof(u32))
void intel_execlists_set_default_submission(struct intel_engine_cs *engine); void lrc_init_wa_ctx(struct intel_engine_cs *engine);
void lrc_fini_wa_ctx(struct intel_engine_cs *engine);
void intel_lr_context_reset(struct intel_engine_cs *engine, int lrc_alloc(struct intel_context *ce,
struct intel_context *ce, struct intel_engine_cs *engine);
u32 head, void lrc_reset(struct intel_context *ce);
bool scrub); void lrc_fini(struct intel_context *ce);
void lrc_destroy(struct kref *kref);
void intel_execlists_show_requests(struct intel_engine_cs *engine, int
struct drm_printer *m, lrc_pre_pin(struct intel_context *ce,
void (*show_request)(struct drm_printer *m, struct intel_engine_cs *engine,
struct i915_request *rq, struct i915_gem_ww_ctx *ww,
const char *prefix), void **vaddr);
unsigned int max); int
lrc_pin(struct intel_context *ce,
struct intel_engine_cs *engine,
void *vaddr);
void lrc_unpin(struct intel_context *ce);
void lrc_post_unpin(struct intel_context *ce);
struct intel_context * void lrc_init_state(struct intel_context *ce,
intel_execlists_create_virtual(struct intel_engine_cs **siblings, struct intel_engine_cs *engine,
unsigned int count); void *state);
struct intel_context * void lrc_init_regs(const struct intel_context *ce,
intel_execlists_clone_virtual(struct intel_engine_cs *src); const struct intel_engine_cs *engine,
bool clear);
void lrc_reset_regs(const struct intel_context *ce,
const struct intel_engine_cs *engine);
int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, u32 lrc_update_regs(const struct intel_context *ce,
const struct intel_engine_cs *master, const struct intel_engine_cs *engine,
const struct intel_engine_cs *sibling); u32 head);
void lrc_update_offsets(struct intel_context *ce,
struct intel_engine_cs *engine);
bool void lrc_check_regs(const struct intel_context *ce,
intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine); const struct intel_engine_cs *engine,
const char *when);
#endif /* _INTEL_LRC_H_ */ void lrc_update_runtime(struct intel_context *ce);
static inline u32 lrc_get_runtime(const struct intel_context *ce)
{
/*
* We can use either ppHWSP[16] which is recorded before the context
* switch (and so excludes the cost of context switches) or use the
* value from the context image itself, which is saved/restored earlier
* and so includes the cost of the save.
*/
return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]);
}
#endif /* __INTEL_LRC_H__ */

View File

@@ -9,6 +9,8 @@
#include <linux/types.h> #include <linux/types.h>
#define CTX_DESC_FORCE_RESTORE BIT_ULL(2)
/* GEN8 to GEN12 Reg State Context */ /* GEN8 to GEN12 Reg State Context */
#define CTX_CONTEXT_CONTROL (0x02 + 1) #define CTX_CONTEXT_CONTROL (0x02 + 1)
#define CTX_RING_HEAD (0x04 + 1) #define CTX_RING_HEAD (0x04 + 1)
@@ -52,4 +54,43 @@
#define GEN8_EXECLISTS_STATUS_BUF 0x370 #define GEN8_EXECLISTS_STATUS_BUF 0x370
#define GEN11_EXECLISTS_STATUS_BUF2 0x3c0 #define GEN11_EXECLISTS_STATUS_BUF2 0x3c0
/* Execlists regs */
#define RING_ELSP(base) _MMIO((base) + 0x230)
#define RING_EXECLIST_STATUS_LO(base) _MMIO((base) + 0x234)
#define RING_EXECLIST_STATUS_HI(base) _MMIO((base) + 0x234 + 4)
#define RING_CONTEXT_CONTROL(base) _MMIO((base) + 0x244)
#define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT REG_BIT(0)
#define CTX_CTRL_RS_CTX_ENABLE REG_BIT(1)
#define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT REG_BIT(2)
#define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH REG_BIT(3)
#define GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE REG_BIT(8)
#define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0)
#define RING_EXECLIST_SQ_CONTENTS(base) _MMIO((base) + 0x510)
#define RING_EXECLIST_CONTROL(base) _MMIO((base) + 0x550)
#define EL_CTRL_LOAD REG_BIT(0)
/*
* The docs specify that the write pointer wraps around after 5h, "After status
* is written out to the last available status QW at offset 5h, this pointer
* wraps to 0."
*
* Therefore, one must infer than even though there are 3 bits available, 6 and
* 7 appear to be * reserved.
*/
#define GEN8_CSB_ENTRIES 6
#define GEN8_CSB_PTR_MASK 0x7
#define GEN8_CSB_READ_PTR_MASK (GEN8_CSB_PTR_MASK << 8)
#define GEN8_CSB_WRITE_PTR_MASK (GEN8_CSB_PTR_MASK << 0)
#define GEN11_CSB_ENTRIES 12
#define GEN11_CSB_PTR_MASK 0xf
#define GEN11_CSB_READ_PTR_MASK (GEN11_CSB_PTR_MASK << 8)
#define GEN11_CSB_WRITE_PTR_MASK (GEN11_CSB_PTR_MASK << 0)
#define MAX_CONTEXT_HW_ID (1 << 21) /* exclusive */
#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */
#define GEN11_MAX_CONTEXT_HW_ID (1 << 11) /* exclusive */
/* in Gen12 ID 0x7FF is reserved to indicate idle */
#define GEN12_MAX_CONTEXT_HW_ID (GEN11_MAX_CONTEXT_HW_ID - 1)
#endif /* _INTEL_LRC_REG_H_ */ #endif /* _INTEL_LRC_REG_H_ */

View File

@@ -24,8 +24,8 @@
#include "intel_engine.h" #include "intel_engine.h"
#include "intel_gt.h" #include "intel_gt.h"
#include "intel_lrc_reg.h"
#include "intel_mocs.h" #include "intel_mocs.h"
#include "intel_lrc.h"
#include "intel_ring.h" #include "intel_ring.h"
/* structures required */ /* structures required */

View File

@@ -95,7 +95,7 @@ region_lmem_init(struct intel_memory_region *mem)
return ret; return ret;
} }
const struct intel_memory_region_ops intel_region_lmem_ops = { static const struct intel_memory_region_ops intel_region_lmem_ops = {
.init = region_lmem_init, .init = region_lmem_init,
.release = region_lmem_release, .release = region_lmem_release,
.create_object = __i915_gem_lmem_object_create, .create_object = __i915_gem_lmem_object_create,

View File

@@ -8,8 +8,6 @@
struct drm_i915_private; struct drm_i915_private;
extern const struct intel_memory_region_ops intel_region_lmem_ops;
struct intel_memory_region * struct intel_memory_region *
intel_setup_fake_lmem(struct drm_i915_private *i915); intel_setup_fake_lmem(struct drm_i915_private *i915);

View File

@@ -27,7 +27,8 @@
#include "i915_drv.h" #include "i915_drv.h"
#include "intel_renderstate.h" #include "intel_renderstate.h"
#include "gt/intel_context.h" #include "intel_context.h"
#include "intel_gpu_commands.h"
#include "intel_ring.h" #include "intel_ring.h"
static const struct intel_renderstate_rodata * static const struct intel_renderstate_rodata *

View File

@@ -40,20 +40,19 @@ static void rmw_clear_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 clr)
intel_uncore_rmw_fw(uncore, reg, clr, 0); intel_uncore_rmw_fw(uncore, reg, clr, 0);
} }
static void engine_skip_context(struct i915_request *rq) static void skip_context(struct i915_request *rq)
{ {
struct intel_engine_cs *engine = rq->engine;
struct intel_context *hung_ctx = rq->context; struct intel_context *hung_ctx = rq->context;
list_for_each_entry_from_rcu(rq, &hung_ctx->timeline->requests, link) {
if (!i915_request_is_active(rq)) if (!i915_request_is_active(rq))
return; return;
lockdep_assert_held(&engine->active.lock);
list_for_each_entry_continue(rq, &engine->active.requests, sched.link)
if (rq->context == hung_ctx) { if (rq->context == hung_ctx) {
i915_request_set_error_once(rq, -EIO); i915_request_set_error_once(rq, -EIO);
__i915_request_skip(rq); __i915_request_skip(rq);
} }
}
} }
static void client_mark_guilty(struct i915_gem_context *ctx, bool banned) static void client_mark_guilty(struct i915_gem_context *ctx, bool banned)
@@ -160,7 +159,7 @@ void __i915_request_reset(struct i915_request *rq, bool guilty)
i915_request_set_error_once(rq, -EIO); i915_request_set_error_once(rq, -EIO);
__i915_request_skip(rq); __i915_request_skip(rq);
if (mark_guilty(rq)) if (mark_guilty(rq))
engine_skip_context(rq); skip_context(rq);
} else { } else {
i915_request_set_error_once(rq, -EAGAIN); i915_request_set_error_once(rq, -EAGAIN);
mark_innocent(rq); mark_innocent(rq);
@@ -231,7 +230,7 @@ static int g4x_do_reset(struct intel_gt *gt,
GRDOM_MEDIA | GRDOM_RESET_ENABLE); GRDOM_MEDIA | GRDOM_RESET_ENABLE);
ret = wait_for_atomic(g4x_reset_complete(pdev), 50); ret = wait_for_atomic(g4x_reset_complete(pdev), 50);
if (ret) { if (ret) {
drm_dbg(&gt->i915->drm, "Wait for media reset failed\n"); GT_TRACE(gt, "Wait for media reset failed\n");
goto out; goto out;
} }
@@ -239,7 +238,7 @@ static int g4x_do_reset(struct intel_gt *gt,
GRDOM_RENDER | GRDOM_RESET_ENABLE); GRDOM_RENDER | GRDOM_RESET_ENABLE);
ret = wait_for_atomic(g4x_reset_complete(pdev), 50); ret = wait_for_atomic(g4x_reset_complete(pdev), 50);
if (ret) { if (ret) {
drm_dbg(&gt->i915->drm, "Wait for render reset failed\n"); GT_TRACE(gt, "Wait for render reset failed\n");
goto out; goto out;
} }
@@ -265,7 +264,7 @@ static int ilk_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask,
5000, 0, 5000, 0,
NULL); NULL);
if (ret) { if (ret) {
drm_dbg(&gt->i915->drm, "Wait for render reset failed\n"); GT_TRACE(gt, "Wait for render reset failed\n");
goto out; goto out;
} }
@@ -276,7 +275,7 @@ static int ilk_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask,
5000, 0, 5000, 0,
NULL); NULL);
if (ret) { if (ret) {
drm_dbg(&gt->i915->drm, "Wait for media reset failed\n"); GT_TRACE(gt, "Wait for media reset failed\n");
goto out; goto out;
} }
@@ -305,7 +304,7 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
500, 0, 500, 0,
NULL); NULL);
if (err) if (err)
drm_dbg(&gt->i915->drm, GT_TRACE(gt,
"Wait for 0x%08x engines reset failed\n", "Wait for 0x%08x engines reset failed\n",
hw_domain_mask); hw_domain_mask);
@@ -407,8 +406,7 @@ static int gen11_lock_sfc(struct intel_engine_cs *engine, u32 *hw_mask)
return 0; return 0;
if (ret) { if (ret) {
drm_dbg(&engine->i915->drm, ENGINE_TRACE(engine, "Wait for SFC forced lock ack failed\n");
"Wait for SFC forced lock ack failed\n");
return ret; return ret;
} }
@@ -499,6 +497,9 @@ static int gen8_engine_reset_prepare(struct intel_engine_cs *engine)
u32 request, mask, ack; u32 request, mask, ack;
int ret; int ret;
if (I915_SELFTEST_ONLY(should_fail(&engine->reset_timeout, 1)))
return -ETIMEDOUT;
ack = intel_uncore_read_fw(uncore, reg); ack = intel_uncore_read_fw(uncore, reg);
if (ack & RESET_CTL_CAT_ERROR) { if (ack & RESET_CTL_CAT_ERROR) {
/* /*
@@ -754,8 +755,10 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask)
if (err) if (err)
return err; return err;
local_bh_disable();
for_each_engine(engine, gt, id) for_each_engine(engine, gt, id)
__intel_engine_reset(engine, stalled_mask & engine->mask); __intel_engine_reset(engine, stalled_mask & engine->mask);
local_bh_enable();
intel_ggtt_restore_fences(gt->ggtt); intel_ggtt_restore_fences(gt->ggtt);
@@ -833,9 +836,11 @@ static void __intel_gt_set_wedged(struct intel_gt *gt)
set_bit(I915_WEDGED, &gt->reset.flags); set_bit(I915_WEDGED, &gt->reset.flags);
/* Mark all executing requests as skipped */ /* Mark all executing requests as skipped */
local_bh_disable();
for_each_engine(engine, gt, id) for_each_engine(engine, gt, id)
if (engine->reset.cancel) if (engine->reset.cancel)
engine->reset.cancel(engine); engine->reset.cancel(engine);
local_bh_enable();
reset_finish(gt, awake); reset_finish(gt, awake);
@@ -1110,20 +1115,7 @@ static inline int intel_gt_reset_engine(struct intel_engine_cs *engine)
return __intel_gt_reset(engine->gt, engine->mask); return __intel_gt_reset(engine->gt, engine->mask);
} }
/** int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg)
* intel_engine_reset - reset GPU engine to recover from a hang
* @engine: engine to reset
* @msg: reason for GPU reset; or NULL for no drm_notice()
*
* Reset a specific GPU engine. Useful if a hang is detected.
* Returns zero on successful reset or otherwise an error code.
*
* Procedure is:
* - identifies the request that caused the hang and it is dropped
* - reset engine (which will force the engine to idle)
* - re-init/configure engine
*/
int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
{ {
struct intel_gt *gt = engine->gt; struct intel_gt *gt = engine->gt;
bool uses_guc = intel_engine_in_guc_submission_mode(engine); bool uses_guc = intel_engine_in_guc_submission_mode(engine);
@@ -1148,8 +1140,7 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
ret = intel_guc_reset_engine(&engine->gt->uc.guc, engine); ret = intel_guc_reset_engine(&engine->gt->uc.guc, engine);
if (ret) { if (ret) {
/* If we fail here, we expect to fallback to a global reset */ /* If we fail here, we expect to fallback to a global reset */
drm_dbg(&gt->i915->drm, "%sFailed to reset %s, ret=%d\n", ENGINE_TRACE(engine, "Failed to reset, err: %d\n", ret);
uses_guc ? "GuC " : "", engine->name, ret);
goto out; goto out;
} }
@@ -1174,6 +1165,30 @@ out:
return ret; return ret;
} }
/**
* intel_engine_reset - reset GPU engine to recover from a hang
* @engine: engine to reset
* @msg: reason for GPU reset; or NULL for no drm_notice()
*
* Reset a specific GPU engine. Useful if a hang is detected.
* Returns zero on successful reset or otherwise an error code.
*
* Procedure is:
* - identifies the request that caused the hang and it is dropped
* - reset engine (which will force the engine to idle)
* - re-init/configure engine
*/
int intel_engine_reset(struct intel_engine_cs *engine, const char *msg)
{
int err;
local_bh_disable();
err = __intel_engine_reset_bh(engine, msg);
local_bh_enable();
return err;
}
static void intel_gt_reset_global(struct intel_gt *gt, static void intel_gt_reset_global(struct intel_gt *gt,
u32 engine_mask, u32 engine_mask,
const char *reason) const char *reason)
@@ -1186,7 +1201,7 @@ static void intel_gt_reset_global(struct intel_gt *gt,
kobject_uevent_env(kobj, KOBJ_CHANGE, error_event); kobject_uevent_env(kobj, KOBJ_CHANGE, error_event);
drm_dbg(&gt->i915->drm, "resetting chip, engines=%x\n", engine_mask); GT_TRACE(gt, "resetting chip, engines=%x\n", engine_mask);
kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event); kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event);
/* Use a watchdog to ensure that our reset completes */ /* Use a watchdog to ensure that our reset completes */
@@ -1260,18 +1275,20 @@ void intel_gt_handle_error(struct intel_gt *gt,
* single reset fails. * single reset fails.
*/ */
if (intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) { if (intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) {
local_bh_disable();
for_each_engine_masked(engine, gt, engine_mask, tmp) { for_each_engine_masked(engine, gt, engine_mask, tmp) {
BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE); BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE);
if (test_and_set_bit(I915_RESET_ENGINE + engine->id, if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
&gt->reset.flags)) &gt->reset.flags))
continue; continue;
if (intel_engine_reset(engine, msg) == 0) if (__intel_engine_reset_bh(engine, msg) == 0)
engine_mask &= ~engine->mask; engine_mask &= ~engine->mask;
clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id,
&gt->reset.flags); &gt->reset.flags);
} }
local_bh_enable();
} }
if (!engine_mask) if (!engine_mask)
@@ -1380,6 +1397,17 @@ void intel_gt_init_reset(struct intel_gt *gt)
mutex_init(&gt->reset.mutex); mutex_init(&gt->reset.mutex);
init_srcu_struct(&gt->reset.backoff_srcu); init_srcu_struct(&gt->reset.backoff_srcu);
/*
* While undesirable to wait inside the shrinker, complain anyway.
*
* If we have to wait during shrinking, we guarantee forward progress
* by forcing the reset. Therefore during the reset we must not
* re-enter the shrinker. By declaring that we take the reset mutex
* within the shrinker, we forbid ourselves from performing any
* fs-reclaim or taking related locks during reset.
*/
i915_gem_shrinker_taints_mutex(gt->i915, &gt->reset.mutex);
/* no GPU until we are ready! */ /* no GPU until we are ready! */
__set_bit(I915_WEDGED, &gt->reset.flags); __set_bit(I915_WEDGED, &gt->reset.flags);
} }

View File

@@ -34,6 +34,8 @@ void intel_gt_reset(struct intel_gt *gt,
const char *reason); const char *reason);
int intel_engine_reset(struct intel_engine_cs *engine, int intel_engine_reset(struct intel_engine_cs *engine,
const char *reason); const char *reason);
int __intel_engine_reset_bh(struct intel_engine_cs *engine,
const char *reason);
void __i915_request_reset(struct i915_request *rq, bool guilty); void __i915_request_reset(struct i915_request *rq, bool guilty);

View File

@@ -5,9 +5,11 @@
*/ */
#include "gem/i915_gem_object.h" #include "gem/i915_gem_object.h"
#include "i915_drv.h" #include "i915_drv.h"
#include "i915_vma.h" #include "i915_vma.h"
#include "intel_engine.h" #include "intel_engine.h"
#include "intel_gpu_commands.h"
#include "intel_ring.h" #include "intel_ring.h"
#include "intel_timeline.h" #include "intel_timeline.h"

View File

@@ -32,6 +32,7 @@
#include "gen6_ppgtt.h" #include "gen6_ppgtt.h"
#include "gen7_renderclear.h" #include "gen7_renderclear.h"
#include "i915_drv.h" #include "i915_drv.h"
#include "i915_mitigations.h"
#include "intel_breadcrumbs.h" #include "intel_breadcrumbs.h"
#include "intel_context.h" #include "intel_context.h"
#include "intel_gt.h" #include "intel_gt.h"
@@ -158,30 +159,7 @@ static void ring_setup_status_page(struct intel_engine_cs *engine)
static bool stop_ring(struct intel_engine_cs *engine) static bool stop_ring(struct intel_engine_cs *engine)
{ {
struct drm_i915_private *dev_priv = engine->i915; intel_engine_stop_cs(engine);
if (INTEL_GEN(dev_priv) > 2) {
ENGINE_WRITE(engine,
RING_MI_MODE, _MASKED_BIT_ENABLE(STOP_RING));
if (intel_wait_for_register(engine->uncore,
RING_MI_MODE(engine->mmio_base),
MODE_IDLE,
MODE_IDLE,
1000)) {
drm_err(&dev_priv->drm,
"%s : timed out trying to stop ring\n",
engine->name);
/*
* Sometimes we observe that the idle flag is not
* set even though the ring is empty. So double
* check before giving up.
*/
if (ENGINE_READ(engine, RING_HEAD) !=
ENGINE_READ(engine, RING_TAIL))
return false;
}
}
ENGINE_WRITE(engine, RING_HEAD, ENGINE_READ(engine, RING_TAIL)); ENGINE_WRITE(engine, RING_HEAD, ENGINE_READ(engine, RING_TAIL));
@@ -321,6 +299,39 @@ out:
return ret; return ret;
} }
static void sanitize_hwsp(struct intel_engine_cs *engine)
{
struct intel_timeline *tl;
list_for_each_entry(tl, &engine->status_page.timelines, engine_link)
intel_timeline_reset_seqno(tl);
}
static void xcs_sanitize(struct intel_engine_cs *engine)
{
/*
* Poison residual state on resume, in case the suspend didn't!
*
* We have to assume that across suspend/resume (or other loss
* of control) that the contents of our pinned buffers has been
* lost, replaced by garbage. Since this doesn't always happen,
* let's poison such state so that we more quickly spot when
* we falsely assume it has been preserved.
*/
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
/*
* The kernel_context HWSP is stored in the status_page. As above,
* that may be lost on resume/initialisation, and so we need to
* reset the value in the HWSP.
*/
sanitize_hwsp(engine);
/* And scrub the dirty cachelines for the HWSP */
clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
}
static void reset_prepare(struct intel_engine_cs *engine) static void reset_prepare(struct intel_engine_cs *engine)
{ {
struct intel_uncore *uncore = engine->uncore; struct intel_uncore *uncore = engine->uncore;
@@ -440,10 +451,8 @@ static void reset_cancel(struct intel_engine_cs *engine)
spin_lock_irqsave(&engine->active.lock, flags); spin_lock_irqsave(&engine->active.lock, flags);
/* Mark all submitted requests as skipped. */ /* Mark all submitted requests as skipped. */
list_for_each_entry(request, &engine->active.requests, sched.link) { list_for_each_entry(request, &engine->active.requests, sched.link)
i915_request_set_error_once(request, -EIO); i915_request_mark_eio(request);
i915_request_mark_complete(request);
}
intel_engine_signal_breadcrumbs(engine); intel_engine_signal_breadcrumbs(engine);
/* Remaining _unready_ requests will be nop'ed when submitted */ /* Remaining _unready_ requests will be nop'ed when submitted */
@@ -602,6 +611,7 @@ static int ring_context_pin(struct intel_context *ce, void *unused)
static void ring_context_reset(struct intel_context *ce) static void ring_context_reset(struct intel_context *ce)
{ {
intel_ring_reset(ce->ring, ce->ring->emit); intel_ring_reset(ce->ring, ce->ring->emit);
clear_bit(CONTEXT_VALID_BIT, &ce->flags);
} }
static const struct intel_context_ops ring_context_ops = { static const struct intel_context_ops ring_context_ops = {
@@ -886,7 +896,8 @@ static int switch_context(struct i915_request *rq)
GEM_BUG_ON(HAS_EXECLISTS(engine->i915)); GEM_BUG_ON(HAS_EXECLISTS(engine->i915));
if (engine->wa_ctx.vma && ce != engine->kernel_context) { if (engine->wa_ctx.vma && ce != engine->kernel_context) {
if (engine->wa_ctx.vma->private != ce) { if (engine->wa_ctx.vma->private != ce &&
i915_mitigate_clear_residuals()) {
ret = clear_residuals(rq); ret = clear_residuals(rq);
if (ret) if (ret)
return ret; return ret;
@@ -1069,6 +1080,8 @@ static void setup_common(struct intel_engine_cs *engine)
setup_irq(engine); setup_irq(engine);
engine->resume = xcs_resume; engine->resume = xcs_resume;
engine->sanitize = xcs_sanitize;
engine->reset.prepare = reset_prepare; engine->reset.prepare = reset_prepare;
engine->reset.rewind = reset_rewind; engine->reset.rewind = reset_rewind;
engine->reset.cancel = reset_cancel; engine->reset.cancel = reset_cancel;
@@ -1290,7 +1303,7 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine)
GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma); GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma);
if (IS_HASWELL(engine->i915) && engine->class == RENDER_CLASS) { if (IS_GEN(engine->i915, 7) && engine->class == RENDER_CLASS) {
err = gen7_ctx_switch_bb_init(engine); err = gen7_ctx_switch_bb_init(engine);
if (err) if (err)
goto err_ring_unpin; goto err_ring_unpin;

View File

@@ -400,7 +400,7 @@ static unsigned int gen5_invert_freq(struct intel_rps *rps,
return val; return val;
} }
static bool gen5_rps_set(struct intel_rps *rps, u8 val) static int __gen5_rps_set(struct intel_rps *rps, u8 val)
{ {
struct intel_uncore *uncore = rps_to_uncore(rps); struct intel_uncore *uncore = rps_to_uncore(rps);
u16 rgvswctl; u16 rgvswctl;
@@ -410,7 +410,7 @@ static bool gen5_rps_set(struct intel_rps *rps, u8 val)
rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); rgvswctl = intel_uncore_read16(uncore, MEMSWCTL);
if (rgvswctl & MEMCTL_CMD_STS) { if (rgvswctl & MEMCTL_CMD_STS) {
DRM_DEBUG("gpu busy, RCS change rejected\n"); DRM_DEBUG("gpu busy, RCS change rejected\n");
return false; /* still busy with another command */ return -EBUSY; /* still busy with another command */
} }
/* Invert the frequency bin into an ips delay */ /* Invert the frequency bin into an ips delay */
@@ -426,7 +426,18 @@ static bool gen5_rps_set(struct intel_rps *rps, u8 val)
rgvswctl |= MEMCTL_CMD_STS; rgvswctl |= MEMCTL_CMD_STS;
intel_uncore_write16(uncore, MEMSWCTL, rgvswctl); intel_uncore_write16(uncore, MEMSWCTL, rgvswctl);
return true; return 0;
}
static int gen5_rps_set(struct intel_rps *rps, u8 val)
{
int err;
spin_lock_irq(&mchdev_lock);
err = __gen5_rps_set(rps, val);
spin_unlock_irq(&mchdev_lock);
return err;
} }
static unsigned long intel_pxfreq(u32 vidfreq) static unsigned long intel_pxfreq(u32 vidfreq)
@@ -557,7 +568,7 @@ static bool gen5_rps_enable(struct intel_rps *rps)
"stuck trying to change perf mode\n"); "stuck trying to change perf mode\n");
mdelay(1); mdelay(1);
gen5_rps_set(rps, rps->cur_freq); __gen5_rps_set(rps, rps->cur_freq);
rps->ips.last_count1 = intel_uncore_read(uncore, DMIEC); rps->ips.last_count1 = intel_uncore_read(uncore, DMIEC);
rps->ips.last_count1 += intel_uncore_read(uncore, DDREC); rps->ips.last_count1 += intel_uncore_read(uncore, DDREC);
@@ -599,7 +610,7 @@ static void gen5_rps_disable(struct intel_rps *rps)
intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG);
/* Go back to the starting frequency */ /* Go back to the starting frequency */
gen5_rps_set(rps, rps->idle_freq); __gen5_rps_set(rps, rps->idle_freq);
mdelay(1); mdelay(1);
rgvswctl |= MEMCTL_CMD_STS; rgvswctl |= MEMCTL_CMD_STS;
intel_uncore_write(uncore, MEMSWCTL, rgvswctl); intel_uncore_write(uncore, MEMSWCTL, rgvswctl);
@@ -797,20 +808,19 @@ static int rps_set(struct intel_rps *rps, u8 val, bool update)
struct drm_i915_private *i915 = rps_to_i915(rps); struct drm_i915_private *i915 = rps_to_i915(rps);
int err; int err;
if (INTEL_GEN(i915) < 6)
return 0;
if (val == rps->last_freq) if (val == rps->last_freq)
return 0; return 0;
if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
err = vlv_rps_set(rps, val); err = vlv_rps_set(rps, val);
else else if (INTEL_GEN(i915) >= 6)
err = gen6_rps_set(rps, val); err = gen6_rps_set(rps, val);
else
err = gen5_rps_set(rps, val);
if (err) if (err)
return err; return err;
if (update) if (update && INTEL_GEN(i915) >= 6)
gen6_rps_set_thresholds(rps, val); gen6_rps_set_thresholds(rps, val);
rps->last_freq = val; rps->last_freq = val;
@@ -852,6 +862,8 @@ void intel_rps_park(struct intel_rps *rps)
{ {
int adj; int adj;
GEM_BUG_ON(atomic_read(&rps->num_waiters));
if (!intel_rps_clear_active(rps)) if (!intel_rps_clear_active(rps))
return; return;
@@ -907,28 +919,27 @@ void intel_rps_park(struct intel_rps *rps)
void intel_rps_boost(struct i915_request *rq) void intel_rps_boost(struct i915_request *rq)
{ {
struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps; if (i915_request_signaled(rq) || i915_request_has_waitboost(rq))
unsigned long flags;
if (i915_request_signaled(rq) || !intel_rps_is_active(rps))
return; return;
/* Serializes with i915_request_retire() */ /* Serializes with i915_request_retire() */
spin_lock_irqsave(&rq->lock, flags); if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) {
if (!i915_request_has_waitboost(rq) && struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps;
!dma_fence_is_signaled_locked(&rq->fence)) {
set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags); if (atomic_fetch_inc(&rps->num_waiters))
return;
if (!intel_rps_is_active(rps))
return;
GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n", GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n",
rq->fence.context, rq->fence.seqno); rq->fence.context, rq->fence.seqno);
if (!atomic_fetch_inc(&rps->num_waiters) && if (READ_ONCE(rps->cur_freq) < rps->boost_freq)
READ_ONCE(rps->cur_freq) < rps->boost_freq)
schedule_work(&rps->work); schedule_work(&rps->work);
atomic_inc(&rps->boosts); WRITE_ONCE(rps->boosts, rps->boosts + 1); /* debug only */
} }
spin_unlock_irqrestore(&rq->lock, flags);
} }
int intel_rps_set(struct intel_rps *rps, u8 val) int intel_rps_set(struct intel_rps *rps, u8 val)
@@ -1798,7 +1809,7 @@ void gen5_rps_irq_handler(struct intel_rps *rps)
rps->min_freq_softlimit, rps->min_freq_softlimit,
rps->max_freq_softlimit); rps->max_freq_softlimit);
if (new_freq != rps->cur_freq && gen5_rps_set(rps, new_freq)) if (new_freq != rps->cur_freq && !__gen5_rps_set(rps, new_freq))
rps->cur_freq = new_freq; rps->cur_freq = new_freq;
spin_unlock(&mchdev_lock); spin_unlock(&mchdev_lock);
@@ -2109,7 +2120,7 @@ bool i915_gpu_turbo_disable(void)
spin_lock_irq(&mchdev_lock); spin_lock_irq(&mchdev_lock);
rps->max_freq_softlimit = rps->min_freq; rps->max_freq_softlimit = rps->min_freq;
ret = gen5_rps_set(&i915->gt.rps, rps->min_freq); ret = !__gen5_rps_set(&i915->gt.rps, rps->min_freq);
spin_unlock_irq(&mchdev_lock); spin_unlock_irq(&mchdev_lock);
drm_dev_put(&i915->drm); drm_dev_put(&i915->drm);

View File

@@ -93,7 +93,7 @@ struct intel_rps {
} power; } power;
atomic_t num_waiters; atomic_t num_waiters;
atomic_t boosts; unsigned int boosts;
/* manual wa residency calculations */ /* manual wa residency calculations */
struct intel_rps_ei ei; struct intel_rps_ei ei;

View File

@@ -126,6 +126,10 @@ static void __rcu_cacheline_free(struct rcu_head *rcu)
struct intel_timeline_cacheline *cl = struct intel_timeline_cacheline *cl =
container_of(rcu, typeof(*cl), rcu); container_of(rcu, typeof(*cl), rcu);
/* Must wait until after all *rq->hwsp are complete before removing */
i915_gem_object_unpin_map(cl->hwsp->vma->obj);
__idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
i915_active_fini(&cl->active); i915_active_fini(&cl->active);
kfree(cl); kfree(cl);
} }
@@ -133,11 +137,6 @@ static void __rcu_cacheline_free(struct rcu_head *rcu)
static void __idle_cacheline_free(struct intel_timeline_cacheline *cl) static void __idle_cacheline_free(struct intel_timeline_cacheline *cl)
{ {
GEM_BUG_ON(!i915_active_is_idle(&cl->active)); GEM_BUG_ON(!i915_active_is_idle(&cl->active));
i915_gem_object_unpin_map(cl->hwsp->vma->obj);
i915_vma_put(cl->hwsp->vma);
__idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS));
call_rcu(&cl->rcu, __rcu_cacheline_free); call_rcu(&cl->rcu, __rcu_cacheline_free);
} }
@@ -179,7 +178,6 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline)
return ERR_CAST(vaddr); return ERR_CAST(vaddr);
} }
i915_vma_get(hwsp->vma);
cl->hwsp = hwsp; cl->hwsp = hwsp;
cl->vaddr = page_pack_bits(vaddr, cacheline); cl->vaddr = page_pack_bits(vaddr, cacheline);
@@ -321,6 +319,25 @@ __intel_timeline_create(struct intel_gt *gt,
return timeline; return timeline;
} }
struct intel_timeline *
intel_timeline_create_from_engine(struct intel_engine_cs *engine,
unsigned int offset)
{
struct i915_vma *hwsp = engine->status_page.vma;
struct intel_timeline *tl;
tl = __intel_timeline_create(engine->gt, hwsp, offset);
if (IS_ERR(tl))
return tl;
/* Borrow a nearby lock; we only create these timelines during init */
mutex_lock(&hwsp->vm->mutex);
list_add_tail(&tl->engine_link, &engine->status_page.timelines);
mutex_unlock(&hwsp->vm->mutex);
return tl;
}
void __intel_timeline_pin(struct intel_timeline *tl) void __intel_timeline_pin(struct intel_timeline *tl)
{ {
GEM_BUG_ON(!atomic_read(&tl->pin_count)); GEM_BUG_ON(!atomic_read(&tl->pin_count));
@@ -617,6 +634,86 @@ void intel_gt_fini_timelines(struct intel_gt *gt)
GEM_BUG_ON(!list_empty(&timelines->hwsp_free_list)); GEM_BUG_ON(!list_empty(&timelines->hwsp_free_list));
} }
void intel_gt_show_timelines(struct intel_gt *gt,
struct drm_printer *m,
void (*show_request)(struct drm_printer *m,
const struct i915_request *rq,
const char *prefix,
int indent))
{
struct intel_gt_timelines *timelines = &gt->timelines;
struct intel_timeline *tl, *tn;
LIST_HEAD(free);
spin_lock(&timelines->lock);
list_for_each_entry_safe(tl, tn, &timelines->active_list, link) {
unsigned long count, ready, inflight;
struct i915_request *rq, *rn;
struct dma_fence *fence;
if (!mutex_trylock(&tl->mutex)) {
drm_printf(m, "Timeline %llx: busy; skipping\n",
tl->fence_context);
continue;
}
intel_timeline_get(tl);
GEM_BUG_ON(!atomic_read(&tl->active_count));
atomic_inc(&tl->active_count); /* pin the list element */
spin_unlock(&timelines->lock);
count = 0;
ready = 0;
inflight = 0;
list_for_each_entry_safe(rq, rn, &tl->requests, link) {
if (i915_request_completed(rq))
continue;
count++;
if (i915_request_is_ready(rq))
ready++;
if (i915_request_is_active(rq))
inflight++;
}
drm_printf(m, "Timeline %llx: { ", tl->fence_context);
drm_printf(m, "count: %lu, ready: %lu, inflight: %lu",
count, ready, inflight);
drm_printf(m, ", seqno: { current: %d, last: %d }",
*tl->hwsp_seqno, tl->seqno);
fence = i915_active_fence_get(&tl->last_request);
if (fence) {
drm_printf(m, ", engine: %s",
to_request(fence)->engine->name);
dma_fence_put(fence);
}
drm_printf(m, " }\n");
if (show_request) {
list_for_each_entry_safe(rq, rn, &tl->requests, link)
show_request(m, rq, "", 2);
}
mutex_unlock(&tl->mutex);
spin_lock(&timelines->lock);
/* Resume list iteration after reacquiring spinlock */
list_safe_reset_next(tl, tn, link);
if (atomic_dec_and_test(&tl->active_count))
list_del(&tl->link);
/* Defer the final release to after the spinlock */
if (refcount_dec_and_test(&tl->kref.refcount)) {
GEM_BUG_ON(atomic_read(&tl->active_count));
list_add(&tl->link, &free);
}
}
spin_unlock(&timelines->lock);
list_for_each_entry_safe(tl, tn, &free, link)
__intel_timeline_free(&tl->kref);
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "gt/selftests/mock_timeline.c" #include "gt/selftests/mock_timeline.c"
#include "gt/selftest_timeline.c" #include "gt/selftest_timeline.c"

View File

@@ -31,6 +31,8 @@
#include "i915_syncmap.h" #include "i915_syncmap.h"
#include "intel_timeline_types.h" #include "intel_timeline_types.h"
struct drm_printer;
struct intel_timeline * struct intel_timeline *
__intel_timeline_create(struct intel_gt *gt, __intel_timeline_create(struct intel_gt *gt,
struct i915_vma *global_hwsp, struct i915_vma *global_hwsp,
@@ -42,14 +44,9 @@ intel_timeline_create(struct intel_gt *gt)
return __intel_timeline_create(gt, NULL, 0); return __intel_timeline_create(gt, NULL, 0);
} }
static inline struct intel_timeline * struct intel_timeline *
intel_timeline_create_from_engine(struct intel_engine_cs *engine, intel_timeline_create_from_engine(struct intel_engine_cs *engine,
unsigned int offset) unsigned int offset);
{
return __intel_timeline_create(engine->gt,
engine->status_page.vma,
offset);
}
static inline struct intel_timeline * static inline struct intel_timeline *
intel_timeline_get(struct intel_timeline *timeline) intel_timeline_get(struct intel_timeline *timeline)
@@ -106,4 +103,18 @@ int intel_timeline_read_hwsp(struct i915_request *from,
void intel_gt_init_timelines(struct intel_gt *gt); void intel_gt_init_timelines(struct intel_gt *gt);
void intel_gt_fini_timelines(struct intel_gt *gt); void intel_gt_fini_timelines(struct intel_gt *gt);
void intel_gt_show_timelines(struct intel_gt *gt,
struct drm_printer *m,
void (*show_request)(struct drm_printer *m,
const struct i915_request *rq,
const char *prefix,
int indent));
static inline bool
intel_timeline_is_last(const struct intel_timeline *tl,
const struct i915_request *rq)
{
return list_is_last_rcu(&rq->link, &tl->requests);
}
#endif #endif

View File

@@ -84,6 +84,8 @@ struct intel_timeline {
struct list_head link; struct list_head link;
struct intel_gt *gt; struct intel_gt *gt;
struct list_head engine_link;
struct kref kref; struct kref kref;
struct rcu_head rcu; struct rcu_head rcu;
}; };

View File

@@ -7,6 +7,7 @@
#include "i915_drv.h" #include "i915_drv.h"
#include "intel_context.h" #include "intel_context.h"
#include "intel_engine_pm.h" #include "intel_engine_pm.h"
#include "intel_gpu_commands.h"
#include "intel_gt.h" #include "intel_gt.h"
#include "intel_ring.h" #include "intel_ring.h"
#include "intel_workarounds.h" #include "intel_workarounds.h"
@@ -194,7 +195,7 @@ static void wa_add(struct i915_wa_list *wal, i915_reg_t reg,
} }
static void static void
wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set) wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
{ {
wa_add(wal, reg, clear, set, clear); wa_add(wal, reg, clear, set, clear);
} }
@@ -202,21 +203,32 @@ wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
static void static void
wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set) wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
{ {
wa_write_masked_or(wal, reg, ~0, set); wa_write_clr_set(wal, reg, ~0, set);
} }
static void static void
wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set) wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
{ {
wa_write_masked_or(wal, reg, set, set); wa_write_clr_set(wal, reg, set, set);
} }
static void static void
wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr) wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
{ {
wa_write_masked_or(wal, reg, clr, 0); wa_write_clr_set(wal, reg, clr, 0);
} }
/*
* WA operations on "masked register". A masked register has the upper 16 bits
* documented as "masked" in b-spec. Its purpose is to allow writing to just a
* portion of the register without a rmw: you simply write in the upper 16 bits
* the mask of bits you are going to modify.
*
* The wa_masked_* family of functions already does the necessary operations to
* calculate the mask based on the parameters passed, so user only has to
* provide the lower 16 bits of that register.
*/
static void static void
wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
{ {
@@ -229,37 +241,35 @@ wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val); wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val);
} }
#define WA_SET_BIT_MASKED(addr, mask) \ static void
wa_masked_en(wal, (addr), (mask)) wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg,
u32 mask, u32 val)
#define WA_CLR_BIT_MASKED(addr, mask) \ {
wa_masked_dis(wal, (addr), (mask)) wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask);
}
#define WA_SET_FIELD_MASKED(addr, mask, value) \
wa_write_masked_or(wal, (addr), 0, _MASKED_FIELD((mask), (value)))
static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine, static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal) struct i915_wa_list *wal)
{ {
WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
} }
static void gen7_ctx_workarounds_init(struct intel_engine_cs *engine, static void gen7_ctx_workarounds_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal) struct i915_wa_list *wal)
{ {
WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
} }
static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine, static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal) struct i915_wa_list *wal)
{ {
WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
/* WaDisableAsyncFlipPerfMode:bdw,chv */ /* WaDisableAsyncFlipPerfMode:bdw,chv */
WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); wa_masked_en(wal, MI_MODE, ASYNC_FLIP_PERF_DISABLE);
/* WaDisablePartialInstShootdown:bdw,chv */ /* WaDisablePartialInstShootdown:bdw,chv */
WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, wa_masked_en(wal, GEN8_ROW_CHICKEN,
PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
/* Use Force Non-Coherent whenever executing a 3D context. This is a /* Use Force Non-Coherent whenever executing a 3D context. This is a
@@ -268,7 +278,7 @@ static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
*/ */
/* WaForceEnableNonCoherent:bdw,chv */ /* WaForceEnableNonCoherent:bdw,chv */
/* WaHdcDisableFetchWhenMasked:bdw,chv */ /* WaHdcDisableFetchWhenMasked:bdw,chv */
WA_SET_BIT_MASKED(HDC_CHICKEN0, wa_masked_en(wal, HDC_CHICKEN0,
HDC_DONOT_FETCH_MEM_WHEN_MASKED | HDC_DONOT_FETCH_MEM_WHEN_MASKED |
HDC_FORCE_NON_COHERENT); HDC_FORCE_NON_COHERENT);
@@ -280,10 +290,10 @@ static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
* *
* This optimization is off by default for BDW and CHV; turn it on. * This optimization is off by default for BDW and CHV; turn it on.
*/ */
WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); wa_masked_dis(wal, CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
/* Wa4x4STCOptimizationDisable:bdw,chv */ /* Wa4x4STCOptimizationDisable:bdw,chv */
WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); wa_masked_en(wal, CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
/* /*
* BSpec recommends 8x4 when MSAA is used, * BSpec recommends 8x4 when MSAA is used,
@@ -293,7 +303,7 @@ static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
* disable bit, which we don't touch here, but it's good * disable bit, which we don't touch here, but it's good
* to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
*/ */
WA_SET_FIELD_MASKED(GEN7_GT_MODE, wa_masked_field_set(wal, GEN7_GT_MODE,
GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_MASK,
GEN6_WIZ_HASHING_16x4); GEN6_WIZ_HASHING_16x4);
} }
@@ -306,20 +316,20 @@ static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
gen8_ctx_workarounds_init(engine, wal); gen8_ctx_workarounds_init(engine, wal);
/* WaDisableThreadStallDopClockGating:bdw (pre-production) */ /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
/* WaDisableDopClockGating:bdw /* WaDisableDopClockGating:bdw
* *
* Also see the related UCGTCL1 write in bdw_init_clock_gating() * Also see the related UCGTCL1 write in bdw_init_clock_gating()
* to disable EUTC clock gating. * to disable EUTC clock gating.
*/ */
WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, wa_masked_en(wal, GEN7_ROW_CHICKEN2,
DOP_CLOCK_GATING_DISABLE); DOP_CLOCK_GATING_DISABLE);
WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, wa_masked_en(wal, HALF_SLICE_CHICKEN3,
GEN8_SAMPLER_POWER_BYPASS_DIS); GEN8_SAMPLER_POWER_BYPASS_DIS);
WA_SET_BIT_MASKED(HDC_CHICKEN0, wa_masked_en(wal, HDC_CHICKEN0,
/* WaForceContextSaveRestoreNonCoherent:bdw */ /* WaForceContextSaveRestoreNonCoherent:bdw */
HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
/* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
@@ -332,10 +342,10 @@ static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
gen8_ctx_workarounds_init(engine, wal); gen8_ctx_workarounds_init(engine, wal);
/* WaDisableThreadStallDopClockGating:chv */ /* WaDisableThreadStallDopClockGating:chv */
WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
/* Improve HiZ throughput on CHV. */ /* Improve HiZ throughput on CHV. */
WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); wa_masked_en(wal, HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
} }
static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine, static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
@@ -349,36 +359,36 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
* Must match Display Engine. See * Must match Display Engine. See
* WaCompressedResourceDisplayNewHashMode. * WaCompressedResourceDisplayNewHashMode.
*/ */
WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
GEN9_PBE_COMPRESSED_HASH_SELECTION); GEN9_PBE_COMPRESSED_HASH_SELECTION);
WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR); GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
} }
/* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */ /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
/* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */ /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, wa_masked_en(wal, GEN8_ROW_CHICKEN,
FLOW_CONTROL_ENABLE | FLOW_CONTROL_ENABLE |
PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */ /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
/* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */ /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
GEN9_ENABLE_YV12_BUGFIX | GEN9_ENABLE_YV12_BUGFIX |
GEN9_ENABLE_GPGPU_PREEMPTION); GEN9_ENABLE_GPGPU_PREEMPTION);
/* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */ /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
/* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */ /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
WA_SET_BIT_MASKED(CACHE_MODE_1, wa_masked_en(wal, CACHE_MODE_1,
GEN8_4x4_STC_OPTIMIZATION_DISABLE | GEN8_4x4_STC_OPTIMIZATION_DISABLE |
GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE); GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
/* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */ /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, wa_masked_dis(wal, GEN9_HALF_SLICE_CHICKEN5,
GEN9_CCS_TLB_PREFETCH_ENABLE); GEN9_CCS_TLB_PREFETCH_ENABLE);
/* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */ /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
WA_SET_BIT_MASKED(HDC_CHICKEN0, wa_masked_en(wal, HDC_CHICKEN0,
HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
@@ -396,7 +406,7 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
*/ */
/* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */ /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
WA_SET_BIT_MASKED(HDC_CHICKEN0, wa_masked_en(wal, HDC_CHICKEN0,
HDC_FORCE_NON_COHERENT); HDC_FORCE_NON_COHERENT);
/* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */ /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
@@ -404,11 +414,11 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
IS_KABYLAKE(i915) || IS_KABYLAKE(i915) ||
IS_COFFEELAKE(i915) || IS_COFFEELAKE(i915) ||
IS_COMETLAKE(i915)) IS_COMETLAKE(i915))
WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, wa_masked_en(wal, HALF_SLICE_CHICKEN3,
GEN8_SAMPLER_POWER_BYPASS_DIS); GEN8_SAMPLER_POWER_BYPASS_DIS);
/* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */ /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); wa_masked_en(wal, HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
/* /*
* Supporting preemption with fine-granularity requires changes in the * Supporting preemption with fine-granularity requires changes in the
@@ -422,16 +432,16 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
*/ */
/* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */ /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); wa_masked_dis(wal, GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
/* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */ /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
GEN9_PREEMPT_GPGPU_LEVEL_MASK, GEN9_PREEMPT_GPGPU_LEVEL_MASK,
GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
/* WaClearHIZ_WM_CHICKEN3:bxt,glk */ /* WaClearHIZ_WM_CHICKEN3:bxt,glk */
if (IS_GEN9_LP(i915)) if (IS_GEN9_LP(i915))
WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ); wa_masked_en(wal, GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
} }
static void skl_tune_iz_hashing(struct intel_engine_cs *engine, static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
@@ -465,7 +475,7 @@ static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
return; return;
/* Tune IZ hashing. See intel_device_info_runtime_init() */ /* Tune IZ hashing. See intel_device_info_runtime_init() */
WA_SET_FIELD_MASKED(GEN7_GT_MODE, wa_masked_field_set(wal, GEN7_GT_MODE,
GEN9_IZ_HASHING_MASK(2) | GEN9_IZ_HASHING_MASK(2) |
GEN9_IZ_HASHING_MASK(1) | GEN9_IZ_HASHING_MASK(1) |
GEN9_IZ_HASHING_MASK(0), GEN9_IZ_HASHING_MASK(0),
@@ -487,11 +497,11 @@ static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine,
gen9_ctx_workarounds_init(engine, wal); gen9_ctx_workarounds_init(engine, wal);
/* WaDisableThreadStallDopClockGating:bxt */ /* WaDisableThreadStallDopClockGating:bxt */
WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, wa_masked_en(wal, GEN8_ROW_CHICKEN,
STALL_DOP_GATING_DISABLE); STALL_DOP_GATING_DISABLE);
/* WaToEnableHwFixForPushConstHWBug:bxt */ /* WaToEnableHwFixForPushConstHWBug:bxt */
WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
} }
@@ -504,11 +514,11 @@ static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
/* WaToEnableHwFixForPushConstHWBug:kbl */ /* WaToEnableHwFixForPushConstHWBug:kbl */
if (IS_KBL_GT_REVID(i915, KBL_REVID_C0, REVID_FOREVER)) if (IS_KBL_GT_REVID(i915, KBL_REVID_C0, REVID_FOREVER))
WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
/* WaDisableSbeCacheDispatchPortSharing:kbl */ /* WaDisableSbeCacheDispatchPortSharing:kbl */
WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1, wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1,
GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
} }
@@ -518,7 +528,7 @@ static void glk_ctx_workarounds_init(struct intel_engine_cs *engine,
gen9_ctx_workarounds_init(engine, wal); gen9_ctx_workarounds_init(engine, wal);
/* WaToEnableHwFixForPushConstHWBug:glk */ /* WaToEnableHwFixForPushConstHWBug:glk */
WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
} }
@@ -528,11 +538,11 @@ static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
gen9_ctx_workarounds_init(engine, wal); gen9_ctx_workarounds_init(engine, wal);
/* WaToEnableHwFixForPushConstHWBug:cfl */ /* WaToEnableHwFixForPushConstHWBug:cfl */
WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
/* WaDisableSbeCacheDispatchPortSharing:cfl */ /* WaDisableSbeCacheDispatchPortSharing:cfl */
WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1, wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1,
GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
} }
@@ -540,29 +550,29 @@ static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal) struct i915_wa_list *wal)
{ {
/* WaForceContextSaveRestoreNonCoherent:cnl */ /* WaForceContextSaveRestoreNonCoherent:cnl */
WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0, wa_masked_en(wal, CNL_HDC_CHICKEN0,
HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT); HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT);
/* WaDisableReplayBufferBankArbitrationOptimization:cnl */ /* WaDisableReplayBufferBankArbitrationOptimization:cnl */
WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
/* WaPushConstantDereferenceHoldDisable:cnl */ /* WaPushConstantDereferenceHoldDisable:cnl */
WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE); wa_masked_en(wal, GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE);
/* FtrEnableFastAnisoL1BankingFix:cnl */ /* FtrEnableFastAnisoL1BankingFix:cnl */
WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX); wa_masked_en(wal, HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX);
/* WaDisable3DMidCmdPreemption:cnl */ /* WaDisable3DMidCmdPreemption:cnl */
WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); wa_masked_dis(wal, GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
/* WaDisableGPGPUMidCmdPreemption:cnl */ /* WaDisableGPGPUMidCmdPreemption:cnl */
WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
GEN9_PREEMPT_GPGPU_LEVEL_MASK, GEN9_PREEMPT_GPGPU_LEVEL_MASK,
GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
/* WaDisableEarlyEOT:cnl */ /* WaDisableEarlyEOT:cnl */
WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT); wa_masked_en(wal, GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
} }
static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
@@ -580,7 +590,7 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
* Formerly known as WaPushConstantDereferenceHoldDisable * Formerly known as WaPushConstantDereferenceHoldDisable
*/ */
if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0))
WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, wa_masked_en(wal, GEN7_ROW_CHICKEN2,
PUSH_CONSTANT_DEREF_DISABLE); PUSH_CONSTANT_DEREF_DISABLE);
/* WaForceEnableNonCoherent:icl /* WaForceEnableNonCoherent:icl
@@ -590,38 +600,38 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
* (the register is whitelisted in hardware now, so UMDs can opt in * (the register is whitelisted in hardware now, so UMDs can opt in
* for coherency if they have a good reason). * for coherency if they have a good reason).
*/ */
WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT); wa_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
/* Wa_2006611047:icl (pre-prod) /* Wa_2006611047:icl (pre-prod)
* Formerly known as WaDisableImprovedTdlClkGating * Formerly known as WaDisableImprovedTdlClkGating
*/ */
if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, wa_masked_en(wal, GEN7_ROW_CHICKEN2,
GEN11_TDL_CLOCK_GATING_FIX_DISABLE); GEN11_TDL_CLOCK_GATING_FIX_DISABLE);
/* Wa_2006665173:icl (pre-prod) */ /* Wa_2006665173:icl (pre-prod) */
if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0))
WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3,
GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC); GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC);
/* WaEnableFloatBlendOptimization:icl */ /* WaEnableFloatBlendOptimization:icl */
wa_write_masked_or(wal, wa_write_clr_set(wal,
GEN10_CACHE_MODE_SS, GEN10_CACHE_MODE_SS,
0, /* write-only, so skip validation */ 0, /* write-only, so skip validation */
_MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE)); _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE));
/* WaDisableGPGPUMidThreadPreemption:icl */ /* WaDisableGPGPUMidThreadPreemption:icl */
WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
GEN9_PREEMPT_GPGPU_LEVEL_MASK, GEN9_PREEMPT_GPGPU_LEVEL_MASK,
GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL); GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
/* allow headerless messages for preemptible GPGPU context */ /* allow headerless messages for preemptible GPGPU context */
WA_SET_BIT_MASKED(GEN10_SAMPLER_MODE, wa_masked_en(wal, GEN10_SAMPLER_MODE,
GEN11_SAMPLER_ENABLE_HEADLESS_MSG); GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
/* Wa_1604278689:icl,ehl */ /* Wa_1604278689:icl,ehl */
wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID); wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID);
wa_write_masked_or(wal, IVB_FBC_RT_BASE_UPPER, wa_write_clr_set(wal, IVB_FBC_RT_BASE_UPPER,
0, /* write-only register; skip validation */ 0, /* write-only register; skip validation */
0xFFFFFFFF); 0xFFFFFFFF);
@@ -643,11 +653,11 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
* Wa_14010443199:rkl * Wa_14010443199:rkl
* Wa_14010698770:rkl * Wa_14010698770:rkl
*/ */
WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3,
GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
/* WaDisableGPGPUMidThreadPreemption:gen12 */ /* WaDisableGPGPUMidThreadPreemption:gen12 */
WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
GEN9_PREEMPT_GPGPU_LEVEL_MASK, GEN9_PREEMPT_GPGPU_LEVEL_MASK,
GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL); GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
} }
@@ -680,12 +690,22 @@ static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine,
gen12_ctx_workarounds_init(engine, wal); gen12_ctx_workarounds_init(engine, wal);
/* Wa_1409044764 */ /* Wa_1409044764 */
WA_CLR_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, wa_masked_dis(wal, GEN11_COMMON_SLICE_CHICKEN3,
DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN); DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN);
/* Wa_22010493298 */ /* Wa_22010493298 */
WA_SET_BIT_MASKED(HIZ_CHICKEN, wa_masked_en(wal, HIZ_CHICKEN,
DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE); DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE);
/*
* Wa_16011163337
*
* Like in tgl_ctx_workarounds_init(), read verification is ignored due
* to Wa_1608008084.
*/
wa_add(wal,
FF_MODE2,
FF_MODE2_GS_TIMER_MASK, FF_MODE2_GS_TIMER_224, 0);
} }
static void static void
@@ -804,57 +824,11 @@ ilk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
static void static void
snb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) snb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
{ {
/* WaDisableHiZPlanesWhenMSAAEnabled:snb */
wa_masked_en(wal,
_3D_CHICKEN,
_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB);
/* WaDisable_RenderCache_OperationalFlush:snb */
wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
/*
* BSpec recommends 8x4 when MSAA is used,
* however in practice 16x4 seems fastest.
*
* Note that PS/WM thread counts depend on the WIZ hashing
* disable bit, which we don't touch here, but it's good
* to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
*/
wa_add(wal,
GEN6_GT_MODE, 0,
_MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4),
GEN6_WIZ_HASHING_16x4);
wa_masked_dis(wal, CACHE_MODE_0, CM0_STC_EVICT_DISABLE_LRA_SNB);
wa_masked_en(wal,
_3D_CHICKEN3,
/* WaStripsFansDisableFastClipPerformanceFix:snb */
_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL |
/*
* Bspec says:
* "This bit must be set if 3DSTATE_CLIP clip mode is set
* to normal and 3DSTATE_SF number of SF output attributes
* is more than 16."
*/
_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH);
} }
static void static void
ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
{ {
/* WaDisableEarlyCull:ivb */
wa_masked_en(wal, _3D_CHICKEN3, _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
/* WaDisablePSDDualDispatchEnable:ivb */
if (IS_IVB_GT1(i915))
wa_masked_en(wal,
GEN7_HALF_SLICE_CHICKEN1,
GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
/* WaDisable_RenderCache_OperationalFlush:ivb */
wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);
/* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */ /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
wa_masked_dis(wal, wa_masked_dis(wal,
GEN7_COMMON_SLICE_CHICKEN1, GEN7_COMMON_SLICE_CHICKEN1,
@@ -866,90 +840,14 @@ ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
/* WaForceL3Serialization:ivb */ /* WaForceL3Serialization:ivb */
wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE); wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
/*
* WaVSThreadDispatchOverride:ivb,vlv
*
* This actually overrides the dispatch
* mode for all thread types.
*/
wa_write_masked_or(wal, GEN7_FF_THREAD_MODE,
GEN7_FF_SCHED_MASK,
GEN7_FF_TS_SCHED_HW |
GEN7_FF_VS_SCHED_HW |
GEN7_FF_DS_SCHED_HW);
if (0) { /* causes HiZ corruption on ivb:gt1 */
/* enable HiZ Raw Stall Optimization */
wa_masked_dis(wal, CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
}
/* WaDisable4x2SubspanOptimization:ivb */
wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
/*
* BSpec recommends 8x4 when MSAA is used,
* however in practice 16x4 seems fastest.
*
* Note that PS/WM thread counts depend on the WIZ hashing
* disable bit, which we don't touch here, but it's good
* to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
*/
wa_add(wal, GEN7_GT_MODE, 0,
_MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4),
GEN6_WIZ_HASHING_16x4);
} }
static void static void
vlv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) vlv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
{ {
/* WaDisableEarlyCull:vlv */
wa_masked_en(wal, _3D_CHICKEN3, _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
/* WaPsdDispatchEnable:vlv */
/* WaDisablePSDDualDispatchEnable:vlv */
wa_masked_en(wal,
GEN7_HALF_SLICE_CHICKEN1,
GEN7_MAX_PS_THREAD_DEP |
GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
/* WaDisable_RenderCache_OperationalFlush:vlv */
wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);
/* WaForceL3Serialization:vlv */ /* WaForceL3Serialization:vlv */
wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE); wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
/*
* WaVSThreadDispatchOverride:ivb,vlv
*
* This actually overrides the dispatch
* mode for all thread types.
*/
wa_write_masked_or(wal,
GEN7_FF_THREAD_MODE,
GEN7_FF_SCHED_MASK,
GEN7_FF_TS_SCHED_HW |
GEN7_FF_VS_SCHED_HW |
GEN7_FF_DS_SCHED_HW);
/*
* BSpec says this must be set, even though
* WaDisable4x2SubspanOptimization isn't listed for VLV.
*/
wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
/*
* BSpec recommends 8x4 when MSAA is used,
* however in practice 16x4 seems fastest.
*
* Note that PS/WM thread counts depend on the WIZ hashing
* disable bit, which we don't touch here, but it's good
* to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
*/
wa_add(wal, GEN7_GT_MODE, 0,
_MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4),
GEN6_WIZ_HASHING_16x4);
/* /*
* WaIncreaseL3CreditsForVLVB0:vlv * WaIncreaseL3CreditsForVLVB0:vlv
* This is the hardware default actually. * This is the hardware default actually.
@@ -970,31 +868,6 @@ hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
/* WaVSRefCountFullforceMissDisable:hsw */ /* WaVSRefCountFullforceMissDisable:hsw */
wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME); wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME);
wa_masked_dis(wal,
CACHE_MODE_0_GEN7,
/* WaDisable_RenderCache_OperationalFlush:hsw */
RC_OP_FLUSH_ENABLE |
/* enable HiZ Raw Stall Optimization */
HIZ_RAW_STALL_OPT_DISABLE);
/* WaDisable4x2SubspanOptimization:hsw */
wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
/*
* BSpec recommends 8x4 when MSAA is used,
* however in practice 16x4 seems fastest.
*
* Note that PS/WM thread counts depend on the WIZ hashing
* disable bit, which we don't touch here, but it's good
* to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
*/
wa_add(wal, GEN7_GT_MODE, 0,
_MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4),
GEN6_WIZ_HASHING_16x4);
/* WaSampleCChickenBitEnable:hsw */
wa_masked_en(wal, HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE);
} }
static void static void
@@ -1164,7 +1037,7 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
drm_dbg(&i915->drm, "MCR slice/subslice = %x\n", mcr); drm_dbg(&i915->drm, "MCR slice/subslice = %x\n", mcr);
wa_write_masked_or(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr); wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
} }
static void static void
@@ -1189,7 +1062,7 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
/* WaModifyGamTlbPartitioning:icl */ /* WaModifyGamTlbPartitioning:icl */
wa_write_masked_or(wal, wa_write_clr_set(wal,
GEN11_GACB_PERF_CTRL, GEN11_GACB_PERF_CTRL,
GEN11_HASH_CTRL_MASK, GEN11_HASH_CTRL_MASK,
GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4); GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
@@ -1260,6 +1133,11 @@ tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
wa_write_or(wal, wa_write_or(wal,
SLICE_UNIT_LEVEL_CLKGATE, SLICE_UNIT_LEVEL_CLKGATE,
L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
/* Wa_1408615072:tgl[a0] */
if (IS_TGL_UY_GT_REVID(i915, TGL_REVID_A0, TGL_REVID_A0))
wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
VSUNIT_CLKGATE_DIS_TGL);
} }
static void static void
@@ -1358,9 +1236,9 @@ static bool
wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from) wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from)
{ {
if ((cur ^ wa->set) & wa->read) { if ((cur ^ wa->set) & wa->read) {
DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x)\n", DRM_ERROR("%s workaround lost on %s! (reg[%x]=0x%x, relevant bits were 0x%x vs expected 0x%x)\n",
name, from, i915_mmio_reg_offset(wa->reg), name, from, i915_mmio_reg_offset(wa->reg),
cur, cur & wa->read, wa->set); cur, cur & wa->read, wa->set & wa->read);
return false; return false;
} }
@@ -1425,6 +1303,7 @@ bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from)
return wa_list_verify(gt->uncore, &gt->i915->gt_wa_list, from); return wa_list_verify(gt->uncore, &gt->i915->gt_wa_list, from);
} }
__maybe_unused
static inline bool is_nonpriv_flags_valid(u32 flags) static inline bool is_nonpriv_flags_valid(u32 flags)
{ {
/* Check only valid flag bits are set */ /* Check only valid flag bits are set */
@@ -1752,10 +1631,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
wa_write_or(wal, wa_write_or(wal,
GEN7_SARCHKMD, GEN7_SARCHKMD,
GEN7_DISABLE_SAMPLER_PREFETCH); GEN7_DISABLE_SAMPLER_PREFETCH);
/* Wa_1408615072:tgl */
wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
VSUNIT_CLKGATE_DIS_TGL);
} }
if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
@@ -1770,6 +1645,19 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
*/ */
wa_write_or(wal, GEN7_FF_THREAD_MODE, wa_write_or(wal, GEN7_FF_THREAD_MODE,
GEN12_FF_TESSELATION_DOP_GATE_DISABLE); GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
/*
* Wa_1606700617:tgl,dg1
* Wa_22010271021:tgl,rkl,dg1
*/
wa_masked_en(wal,
GEN9_CS_DEBUG_MODE1,
FF_DOP_CLOCK_GATE_DISABLE);
/* Wa_1406941453:tgl,rkl,dg1 */
wa_masked_en(wal,
GEN10_SAMPLER_MODE,
ENABLE_SMALLPL);
} }
if (IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0) || if (IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0) ||
@@ -1798,21 +1686,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
GEN6_RC_SLEEP_PSMI_CONTROL, GEN6_RC_SLEEP_PSMI_CONTROL,
GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
GEN8_RC_SEMA_IDLE_MSG_DISABLE); GEN8_RC_SEMA_IDLE_MSG_DISABLE);
/*
* Wa_1606700617:tgl
* Wa_22010271021:tgl,rkl
*/
wa_masked_en(wal,
GEN9_CS_DEBUG_MODE1,
FF_DOP_CLOCK_GATE_DISABLE);
}
if (IS_GEN(i915, 12)) {
/* Wa_1406941453:gen12 */
wa_masked_en(wal,
GEN10_SAMPLER_MODE,
ENABLE_SMALLPL);
} }
if (IS_GEN(i915, 11)) { if (IS_GEN(i915, 11)) {
@@ -1838,11 +1711,11 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
* Wa_1604223664:icl * Wa_1604223664:icl
* Formerly known as WaL3BankAddressHashing * Formerly known as WaL3BankAddressHashing
*/ */
wa_write_masked_or(wal, wa_write_clr_set(wal,
GEN8_GARBCNTL, GEN8_GARBCNTL,
GEN11_HASH_CTRL_EXCL_MASK, GEN11_HASH_CTRL_EXCL_MASK,
GEN11_HASH_CTRL_EXCL_BIT0); GEN11_HASH_CTRL_EXCL_BIT0);
wa_write_masked_or(wal, wa_write_clr_set(wal,
GEN11_GLBLINVL, GEN11_GLBLINVL,
GEN11_BANK_HASH_ADDR_EXCL_MASK, GEN11_BANK_HASH_ADDR_EXCL_MASK,
GEN11_BANK_HASH_ADDR_EXCL_BIT0); GEN11_BANK_HASH_ADDR_EXCL_BIT0);
@@ -1874,7 +1747,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
GEN7_DISABLE_SAMPLER_PREFETCH); GEN7_DISABLE_SAMPLER_PREFETCH);
/* Wa_1409178092:icl */ /* Wa_1409178092:icl */
wa_write_masked_or(wal, wa_write_clr_set(wal,
GEN11_SCRATCH2, GEN11_SCRATCH2,
GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE, GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
0); 0);
@@ -1951,7 +1824,7 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
/* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */ /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
if (IS_GEN9_LP(i915)) if (IS_GEN9_LP(i915))
wa_write_masked_or(wal, wa_write_clr_set(wal,
GEN8_L3SQCREG1, GEN8_L3SQCREG1,
L3_PRIO_CREDITS_MASK, L3_PRIO_CREDITS_MASK,
L3_GENERAL_PRIO_CREDITS(62) | L3_GENERAL_PRIO_CREDITS(62) |
@@ -1963,12 +1836,112 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
GEN8_LQSC_FLUSH_COHERENT_LINES); GEN8_LQSC_FLUSH_COHERENT_LINES);
} }
if (IS_GEN(i915, 7)) if (IS_HASWELL(i915)) {
/* WaSampleCChickenBitEnable:hsw */
wa_masked_en(wal,
HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE);
wa_masked_dis(wal,
CACHE_MODE_0_GEN7,
/* enable HiZ Raw Stall Optimization */
HIZ_RAW_STALL_OPT_DISABLE);
/* WaDisable4x2SubspanOptimization:hsw */
wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
}
if (IS_VALLEYVIEW(i915)) {
/* WaDisableEarlyCull:vlv */
wa_masked_en(wal,
_3D_CHICKEN3,
_3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
/*
* WaVSThreadDispatchOverride:ivb,vlv
*
* This actually overrides the dispatch
* mode for all thread types.
*/
wa_write_clr_set(wal,
GEN7_FF_THREAD_MODE,
GEN7_FF_SCHED_MASK,
GEN7_FF_TS_SCHED_HW |
GEN7_FF_VS_SCHED_HW |
GEN7_FF_DS_SCHED_HW);
/* WaPsdDispatchEnable:vlv */
/* WaDisablePSDDualDispatchEnable:vlv */
wa_masked_en(wal,
GEN7_HALF_SLICE_CHICKEN1,
GEN7_MAX_PS_THREAD_DEP |
GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
}
if (IS_IVYBRIDGE(i915)) {
/* WaDisableEarlyCull:ivb */
wa_masked_en(wal,
_3D_CHICKEN3,
_3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
if (0) { /* causes HiZ corruption on ivb:gt1 */
/* enable HiZ Raw Stall Optimization */
wa_masked_dis(wal,
CACHE_MODE_0_GEN7,
HIZ_RAW_STALL_OPT_DISABLE);
}
/*
* WaVSThreadDispatchOverride:ivb,vlv
*
* This actually overrides the dispatch
* mode for all thread types.
*/
wa_write_clr_set(wal,
GEN7_FF_THREAD_MODE,
GEN7_FF_SCHED_MASK,
GEN7_FF_TS_SCHED_HW |
GEN7_FF_VS_SCHED_HW |
GEN7_FF_DS_SCHED_HW);
/* WaDisablePSDDualDispatchEnable:ivb */
if (IS_IVB_GT1(i915))
wa_masked_en(wal,
GEN7_HALF_SLICE_CHICKEN1,
GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
}
if (IS_GEN(i915, 7)) {
/* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */ /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
wa_masked_en(wal, wa_masked_en(wal,
GFX_MODE_GEN7, GFX_MODE_GEN7,
GFX_TLB_INVALIDATE_EXPLICIT | GFX_REPLAY_MODE); GFX_TLB_INVALIDATE_EXPLICIT | GFX_REPLAY_MODE);
/* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */
wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);
/*
* BSpec says this must be set, even though
* WaDisable4x2SubspanOptimization:ivb,hsw
* WaDisable4x2SubspanOptimization isn't listed for VLV.
*/
wa_masked_en(wal,
CACHE_MODE_1,
PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
/*
* BSpec recommends 8x4 when MSAA is used,
* however in practice 16x4 seems fastest.
*
* Note that PS/WM thread counts depend on the WIZ hashing
* disable bit, which we don't touch here, but it's good
* to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
*/
wa_add(wal, GEN7_GT_MODE, 0,
_MASKED_FIELD(GEN6_WIZ_HASHING_MASK,
GEN6_WIZ_HASHING_16x4),
GEN6_WIZ_HASHING_16x4);
}
if (IS_GEN_RANGE(i915, 6, 7)) if (IS_GEN_RANGE(i915, 6, 7))
/* /*
* We need to disable the AsyncFlip performance optimisations in * We need to disable the AsyncFlip performance optimisations in
@@ -1991,6 +1964,39 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
GFX_MODE, GFX_MODE,
GFX_TLB_INVALIDATE_EXPLICIT); GFX_TLB_INVALIDATE_EXPLICIT);
/* WaDisableHiZPlanesWhenMSAAEnabled:snb */
wa_masked_en(wal,
_3D_CHICKEN,
_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB);
wa_masked_en(wal,
_3D_CHICKEN3,
/* WaStripsFansDisableFastClipPerformanceFix:snb */
_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL |
/*
* Bspec says:
* "This bit must be set if 3DSTATE_CLIP clip mode is set
* to normal and 3DSTATE_SF number of SF output attributes
* is more than 16."
*/
_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH);
/*
* BSpec recommends 8x4 when MSAA is used,
* however in practice 16x4 seems fastest.
*
* Note that PS/WM thread counts depend on the WIZ hashing
* disable bit, which we don't touch here, but it's good
* to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
*/
wa_add(wal,
GEN6_GT_MODE, 0,
_MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4),
GEN6_WIZ_HASHING_16x4);
/* WaDisable_RenderCache_OperationalFlush:snb */
wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
/* /*
* From the Sandybridge PRM, volume 1 part 3, page 24: * From the Sandybridge PRM, volume 1 part 3, page 24:
* "If this bit is set, STCunit will have LRA as replacement * "If this bit is set, STCunit will have LRA as replacement
@@ -2067,39 +2073,6 @@ void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
wa_list_apply(engine->uncore, &engine->wa_list); wa_list_apply(engine->uncore, &engine->wa_list);
} }
static struct i915_vma *
create_scratch(struct i915_address_space *vm, int count)
{
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
unsigned int size;
int err;
size = round_up(count * sizeof(u32), PAGE_SIZE);
obj = i915_gem_object_create_internal(vm->i915, size);
if (IS_ERR(obj))
return ERR_CAST(obj);
i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
vma = i915_vma_instance(obj, vm, NULL);
if (IS_ERR(vma)) {
err = PTR_ERR(vma);
goto err_obj;
}
err = i915_vma_pin(vma, 0, 0,
i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
if (err)
goto err_obj;
return vma;
err_obj:
i915_gem_object_put(obj);
return ERR_PTR(err);
}
struct mcr_range { struct mcr_range {
u32 start; u32 start;
u32 end; u32 end;
@@ -2202,7 +2175,8 @@ static int engine_wa_list_verify(struct intel_context *ce,
if (!wal->count) if (!wal->count)
return 0; return 0;
vma = create_scratch(&ce->engine->gt->ggtt->vm, wal->count); vma = __vm_create_scratch_for_read(&ce->engine->gt->ggtt->vm,
wal->count * sizeof(u32));
if (IS_ERR(vma)) if (IS_ERR(vma))
return PTR_ERR(vma); return PTR_ERR(vma);

View File

@@ -245,17 +245,6 @@ static void mock_reset_rewind(struct intel_engine_cs *engine, bool stalled)
GEM_BUG_ON(stalled); GEM_BUG_ON(stalled);
} }
static void mark_eio(struct i915_request *rq)
{
if (i915_request_completed(rq))
return;
GEM_BUG_ON(i915_request_signaled(rq));
i915_request_set_error_once(rq, -EIO);
i915_request_mark_complete(rq);
}
static void mock_reset_cancel(struct intel_engine_cs *engine) static void mock_reset_cancel(struct intel_engine_cs *engine)
{ {
struct mock_engine *mock = struct mock_engine *mock =
@@ -269,12 +258,12 @@ static void mock_reset_cancel(struct intel_engine_cs *engine)
/* Mark all submitted requests as skipped. */ /* Mark all submitted requests as skipped. */
list_for_each_entry(rq, &engine->active.requests, sched.link) list_for_each_entry(rq, &engine->active.requests, sched.link)
mark_eio(rq); i915_request_mark_eio(rq);
intel_engine_signal_breadcrumbs(engine); intel_engine_signal_breadcrumbs(engine);
/* Cancel and submit all pending requests. */ /* Cancel and submit all pending requests. */
list_for_each_entry(rq, &mock->hw_queue, mock.link) { list_for_each_entry(rq, &mock->hw_queue, mock.link) {
mark_eio(rq); i915_request_mark_eio(rq);
__i915_request_submit(rq); __i915_request_submit(rq);
} }
INIT_LIST_HEAD(&mock->hw_queue); INIT_LIST_HEAD(&mock->hw_queue);

View File

@@ -25,7 +25,7 @@ static int request_sync(struct i915_request *rq)
/* Opencode i915_request_add() so we can keep the timeline locked. */ /* Opencode i915_request_add() so we can keep the timeline locked. */
__i915_request_commit(rq); __i915_request_commit(rq);
rq->sched.attr.priority = I915_PRIORITY_BARRIER; rq->sched.attr.priority = I915_PRIORITY_BARRIER;
__i915_request_queue(rq, NULL); __i915_request_queue_bh(rq);
timeout = i915_request_wait(rq, 0, HZ / 10); timeout = i915_request_wait(rq, 0, HZ / 10);
if (timeout < 0) if (timeout < 0)

View File

@@ -6,6 +6,7 @@
#include <linux/sort.h> #include <linux/sort.h>
#include "intel_gpu_commands.h"
#include "intel_gt_pm.h" #include "intel_gt_pm.h"
#include "intel_rps.h" #include "intel_rps.h"

View File

@@ -197,6 +197,7 @@ static int cmp_u32(const void *_a, const void *_b)
static int __live_heartbeat_fast(struct intel_engine_cs *engine) static int __live_heartbeat_fast(struct intel_engine_cs *engine)
{ {
const unsigned int error_threshold = max(20000u, jiffies_to_usecs(6));
struct intel_context *ce; struct intel_context *ce;
struct i915_request *rq; struct i915_request *rq;
ktime_t t0, t1; ktime_t t0, t1;
@@ -254,12 +255,18 @@ static int __live_heartbeat_fast(struct intel_engine_cs *engine)
times[0], times[0],
times[ARRAY_SIZE(times) - 1]); times[ARRAY_SIZE(times) - 1]);
/* Min work delay is 2 * 2 (worst), +1 for scheduling, +1 for slack */ /*
if (times[ARRAY_SIZE(times) / 2] > jiffies_to_usecs(6)) { * Ideally, the upper bound on min work delay would be something like
* 2 * 2 (worst), +1 for scheduling, +1 for slack. In practice, we
* are, even with system_wq_highpri, at the mercy of the CPU scheduler
* and may be stuck behind some slow work for many millisecond. Such
* as our very own display workers.
*/
if (times[ARRAY_SIZE(times) / 2] > error_threshold) {
pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n", pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n",
engine->name, engine->name,
times[ARRAY_SIZE(times) / 2], times[ARRAY_SIZE(times) / 2],
jiffies_to_usecs(6)); error_threshold);
err = -EINVAL; err = -EINVAL;
} }

View File

@@ -4,13 +4,215 @@
* Copyright © 2018 Intel Corporation * Copyright © 2018 Intel Corporation
*/ */
#include <linux/sort.h>
#include "i915_selftest.h" #include "i915_selftest.h"
#include "intel_gpu_commands.h"
#include "intel_gt_clock_utils.h"
#include "selftest_engine.h" #include "selftest_engine.h"
#include "selftest_engine_heartbeat.h" #include "selftest_engine_heartbeat.h"
#include "selftests/igt_atomic.h" #include "selftests/igt_atomic.h"
#include "selftests/igt_flush_test.h" #include "selftests/igt_flush_test.h"
#include "selftests/igt_spinner.h" #include "selftests/igt_spinner.h"
#define COUNT 5
static int cmp_u64(const void *A, const void *B)
{
const u64 *a = A, *b = B;
return *a - *b;
}
static u64 trifilter(u64 *a)
{
sort(a, COUNT, sizeof(*a), cmp_u64, NULL);
return (a[1] + 2 * a[2] + a[3]) >> 2;
}
static u32 *emit_wait(u32 *cs, u32 offset, int op, u32 value)
{
*cs++ = MI_SEMAPHORE_WAIT |
MI_SEMAPHORE_GLOBAL_GTT |
MI_SEMAPHORE_POLL |
op;
*cs++ = value;
*cs++ = offset;
*cs++ = 0;
return cs;
}
static u32 *emit_store(u32 *cs, u32 offset, u32 value)
{
*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
*cs++ = offset;
*cs++ = 0;
*cs++ = value;
return cs;
}
static u32 *emit_srm(u32 *cs, i915_reg_t reg, u32 offset)
{
*cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
*cs++ = i915_mmio_reg_offset(reg);
*cs++ = offset;
*cs++ = 0;
return cs;
}
static void write_semaphore(u32 *x, u32 value)
{
WRITE_ONCE(*x, value);
wmb();
}
static int __measure_timestamps(struct intel_context *ce,
u64 *dt, u64 *d_ring, u64 *d_ctx)
{
struct intel_engine_cs *engine = ce->engine;
u32 *sema = memset32(engine->status_page.addr + 1000, 0, 5);
u32 offset = i915_ggtt_offset(engine->status_page.vma);
struct i915_request *rq;
u32 *cs;
rq = intel_context_create_request(ce);
if (IS_ERR(rq))
return PTR_ERR(rq);
cs = intel_ring_begin(rq, 28);
if (IS_ERR(cs)) {
i915_request_add(rq);
return PTR_ERR(cs);
}
/* Signal & wait for start */
cs = emit_store(cs, offset + 4008, 1);
cs = emit_wait(cs, offset + 4008, MI_SEMAPHORE_SAD_NEQ_SDD, 1);
cs = emit_srm(cs, RING_TIMESTAMP(engine->mmio_base), offset + 4000);
cs = emit_srm(cs, RING_CTX_TIMESTAMP(engine->mmio_base), offset + 4004);
/* Busy wait */
cs = emit_wait(cs, offset + 4008, MI_SEMAPHORE_SAD_EQ_SDD, 1);
cs = emit_srm(cs, RING_TIMESTAMP(engine->mmio_base), offset + 4016);
cs = emit_srm(cs, RING_CTX_TIMESTAMP(engine->mmio_base), offset + 4012);
intel_ring_advance(rq, cs);
i915_request_get(rq);
i915_request_add(rq);
intel_engine_flush_submission(engine);
/* Wait for the request to start executing, that then waits for us */
while (READ_ONCE(sema[2]) == 0)
cpu_relax();
/* Run the request for a 100us, sampling timestamps before/after */
preempt_disable();
*dt = local_clock();
write_semaphore(&sema[2], 0);
udelay(100);
*dt = local_clock() - *dt;
write_semaphore(&sema[2], 1);
preempt_enable();
if (i915_request_wait(rq, 0, HZ / 2) < 0) {
i915_request_put(rq);
return -ETIME;
}
i915_request_put(rq);
pr_debug("%s CTX_TIMESTAMP: [%x, %x], RING_TIMESTAMP: [%x, %x]\n",
engine->name, sema[1], sema[3], sema[0], sema[4]);
*d_ctx = sema[3] - sema[1];
*d_ring = sema[4] - sema[0];
return 0;
}
static int __live_engine_timestamps(struct intel_engine_cs *engine)
{
u64 s_ring[COUNT], s_ctx[COUNT], st[COUNT], d_ring, d_ctx, dt;
struct intel_context *ce;
int i, err = 0;
ce = intel_context_create(engine);
if (IS_ERR(ce))
return PTR_ERR(ce);
for (i = 0; i < COUNT; i++) {
err = __measure_timestamps(ce, &st[i], &s_ring[i], &s_ctx[i]);
if (err)
break;
}
intel_context_put(ce);
if (err)
return err;
dt = trifilter(st);
d_ring = trifilter(s_ring);
d_ctx = trifilter(s_ctx);
pr_info("%s elapsed:%lldns, CTX_TIMESTAMP:%lldns, RING_TIMESTAMP:%lldns\n",
engine->name, dt,
intel_gt_clock_interval_to_ns(engine->gt, d_ctx),
intel_gt_clock_interval_to_ns(engine->gt, d_ring));
d_ring = intel_gt_clock_interval_to_ns(engine->gt, d_ring);
if (3 * dt > 4 * d_ring || 4 * dt < 3 * d_ring) {
pr_err("%s Mismatch between ring timestamp and walltime!\n",
engine->name);
return -EINVAL;
}
d_ring = trifilter(s_ring);
d_ctx = trifilter(s_ctx);
d_ctx *= engine->gt->clock_frequency;
if (IS_ICELAKE(engine->i915))
d_ring *= 12500000; /* Fixed 80ns for icl ctx timestamp? */
else
d_ring *= engine->gt->clock_frequency;
if (3 * d_ctx > 4 * d_ring || 4 * d_ctx < 3 * d_ring) {
pr_err("%s Mismatch between ring and context timestamps!\n",
engine->name);
return -EINVAL;
}
return 0;
}
static int live_engine_timestamps(void *arg)
{
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
enum intel_engine_id id;
/*
* Check that CS_TIMESTAMP / CTX_TIMESTAMP are in sync, i.e. share
* the same CS clock.
*/
if (INTEL_GEN(gt->i915) < 8)
return 0;
for_each_engine(engine, gt, id) {
int err;
st_engine_heartbeat_disable(engine);
err = __live_engine_timestamps(engine);
st_engine_heartbeat_enable(engine);
if (err)
return err;
}
return 0;
}
static int live_engine_busy_stats(void *arg) static int live_engine_busy_stats(void *arg)
{ {
struct intel_gt *gt = arg; struct intel_gt *gt = arg;
@@ -177,6 +379,7 @@ static int live_engine_pm(void *arg)
int live_engine_pm_selftests(struct intel_gt *gt) int live_engine_pm_selftests(struct intel_gt *gt)
{ {
static const struct i915_subtest tests[] = { static const struct i915_subtest tests[] = {
SUBTEST(live_engine_timestamps),
SUBTEST(live_engine_busy_stats), SUBTEST(live_engine_busy_stats),
SUBTEST(live_engine_pm), SUBTEST(live_engine_pm),
}; };

File diff suppressed because it is too large Load Diff

View File

@@ -71,7 +71,7 @@ static int live_gt_clocks(void *arg)
enum intel_engine_id id; enum intel_engine_id id;
int err = 0; int err = 0;
if (!RUNTIME_INFO(gt->i915)->cs_timestamp_frequency_hz) { /* unknown */ if (!gt->clock_frequency) { /* unknown */
pr_info("CS_TIMESTAMP frequency unknown\n"); pr_info("CS_TIMESTAMP frequency unknown\n");
return 0; return 0;
} }
@@ -112,12 +112,12 @@ static int live_gt_clocks(void *arg)
measure_clocks(engine, &cycles, &dt); measure_clocks(engine, &cycles, &dt);
time = i915_cs_timestamp_ticks_to_ns(engine->i915, cycles); time = intel_gt_clock_interval_to_ns(engine->gt, cycles);
expected = i915_cs_timestamp_ns_to_ticks(engine->i915, dt); expected = intel_gt_ns_to_clock_interval(engine->gt, dt);
pr_info("%s: TIMESTAMP %d cycles [%lldns] in %lldns [%d cycles], using CS clock frequency of %uKHz\n", pr_info("%s: TIMESTAMP %d cycles [%lldns] in %lldns [%d cycles], using CS clock frequency of %uKHz\n",
engine->name, cycles, time, dt, expected, engine->name, cycles, time, dt, expected,
RUNTIME_INFO(engine->i915)->cs_timestamp_frequency_hz / 1000); engine->gt->clock_frequency / 1000);
if (9 * time < 8 * dt || 8 * time > 9 * dt) { if (9 * time < 8 * dt || 8 * time > 9 * dt) {
pr_err("%s: CS ticks did not match walltime!\n", pr_err("%s: CS ticks did not match walltime!\n",

View File

@@ -506,7 +506,8 @@ static int igt_reset_nop_engine(void *arg)
} }
err = intel_engine_reset(engine, NULL); err = intel_engine_reset(engine, NULL);
if (err) { if (err) {
pr_err("i915_reset_engine failed\n"); pr_err("intel_engine_reset(%s) failed, err:%d\n",
engine->name, err);
break; break;
} }
@@ -539,6 +540,149 @@ static int igt_reset_nop_engine(void *arg)
return 0; return 0;
} }
static void force_reset_timeout(struct intel_engine_cs *engine)
{
engine->reset_timeout.probability = 999;
atomic_set(&engine->reset_timeout.times, -1);
}
static void cancel_reset_timeout(struct intel_engine_cs *engine)
{
memset(&engine->reset_timeout, 0, sizeof(engine->reset_timeout));
}
static int igt_reset_fail_engine(void *arg)
{
struct intel_gt *gt = arg;
struct intel_engine_cs *engine;
enum intel_engine_id id;
/* Check that we can recover from engine-reset failues */
if (!intel_has_reset_engine(gt))
return 0;
for_each_engine(engine, gt, id) {
unsigned int count;
struct intel_context *ce;
IGT_TIMEOUT(end_time);
int err;
ce = intel_context_create(engine);
if (IS_ERR(ce))
return PTR_ERR(ce);
st_engine_heartbeat_disable(engine);
set_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
force_reset_timeout(engine);
err = intel_engine_reset(engine, NULL);
cancel_reset_timeout(engine);
if (err == 0) /* timeouts only generated on gen8+ */
goto skip;
count = 0;
do {
struct i915_request *last = NULL;
int i;
if (!wait_for_idle(engine)) {
pr_err("%s failed to idle before reset\n",
engine->name);
err = -EIO;
break;
}
for (i = 0; i < count % 15; i++) {
struct i915_request *rq;
rq = intel_context_create_request(ce);
if (IS_ERR(rq)) {
struct drm_printer p =
drm_info_printer(gt->i915->drm.dev);
intel_engine_dump(engine, &p,
"%s(%s): failed to submit request\n",
__func__,
engine->name);
GEM_TRACE("%s(%s): failed to submit request\n",
__func__,
engine->name);
GEM_TRACE_DUMP();
intel_gt_set_wedged(gt);
if (last)
i915_request_put(last);
err = PTR_ERR(rq);
goto out;
}
if (last)
i915_request_put(last);
last = i915_request_get(rq);
i915_request_add(rq);
}
if (count & 1) {
err = intel_engine_reset(engine, NULL);
if (err) {
GEM_TRACE_ERR("intel_engine_reset(%s) failed, err:%d\n",
engine->name, err);
GEM_TRACE_DUMP();
i915_request_put(last);
break;
}
} else {
force_reset_timeout(engine);
err = intel_engine_reset(engine, NULL);
cancel_reset_timeout(engine);
if (err != -ETIMEDOUT) {
pr_err("intel_engine_reset(%s) did not fail, err:%d\n",
engine->name, err);
i915_request_put(last);
break;
}
}
err = 0;
if (last) {
if (i915_request_wait(last, 0, HZ / 2) < 0) {
struct drm_printer p =
drm_info_printer(gt->i915->drm.dev);
intel_engine_dump(engine, &p,
"%s(%s): failed to complete request\n",
__func__,
engine->name);
GEM_TRACE("%s(%s): failed to complete request\n",
__func__,
engine->name);
GEM_TRACE_DUMP();
err = -EIO;
}
i915_request_put(last);
}
count++;
} while (err == 0 && time_before(jiffies, end_time));
out:
pr_info("%s(%s): %d resets\n", __func__, engine->name, count);
skip:
clear_bit(I915_RESET_ENGINE + id, &gt->reset.flags);
st_engine_heartbeat_enable(engine);
intel_context_put(ce);
if (igt_flush_test(gt->i915))
err = -EIO;
if (err)
return err;
}
return 0;
}
static int __igt_reset_engine(struct intel_gt *gt, bool active) static int __igt_reset_engine(struct intel_gt *gt, bool active)
{ {
struct i915_gpu_error *global = &gt->i915->gpu_error; struct i915_gpu_error *global = &gt->i915->gpu_error;
@@ -608,7 +752,8 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active)
err = intel_engine_reset(engine, NULL); err = intel_engine_reset(engine, NULL);
if (err) { if (err) {
pr_err("i915_reset_engine failed\n"); pr_err("intel_engine_reset(%s) failed, err:%d\n",
engine->name, err);
break; break;
} }
@@ -1576,12 +1721,17 @@ static int __igt_atomic_reset_engine(struct intel_engine_cs *engine,
engine->name, mode, p->name); engine->name, mode, p->name);
tasklet_disable(t); tasklet_disable(t);
if (strcmp(p->name, "softirq"))
local_bh_disable();
p->critical_section_begin(); p->critical_section_begin();
err = intel_engine_reset(engine, NULL); err = __intel_engine_reset_bh(engine, NULL);
p->critical_section_end(); p->critical_section_end();
if (strcmp(p->name, "softirq"))
local_bh_enable();
tasklet_enable(t); tasklet_enable(t);
tasklet_hi_schedule(t);
if (err) if (err)
pr_err("i915_reset_engine(%s:%s) failed under %s\n", pr_err("i915_reset_engine(%s:%s) failed under %s\n",
@@ -1687,6 +1837,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
SUBTEST(igt_reset_nop_engine), SUBTEST(igt_reset_nop_engine),
SUBTEST(igt_reset_idle_engine), SUBTEST(igt_reset_idle_engine),
SUBTEST(igt_reset_active_engine), SUBTEST(igt_reset_active_engine),
SUBTEST(igt_reset_fail_engine),
SUBTEST(igt_reset_engines), SUBTEST(igt_reset_engines),
SUBTEST(igt_reset_engines_atomic), SUBTEST(igt_reset_engines_atomic),
SUBTEST(igt_reset_queue), SUBTEST(igt_reset_queue),

File diff suppressed because it is too large Load Diff

View File

@@ -5,6 +5,7 @@
*/ */
#include "gt/intel_engine_pm.h" #include "gt/intel_engine_pm.h"
#include "gt/intel_gpu_commands.h"
#include "i915_selftest.h" #include "i915_selftest.h"
#include "gem/selftests/mock_context.h" #include "gem/selftests/mock_context.h"
@@ -56,33 +57,6 @@ static int request_add_spin(struct i915_request *rq, struct igt_spinner *spin)
return err; return err;
} }
static struct i915_vma *create_scratch(struct intel_gt *gt)
{
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
int err;
obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE);
if (IS_ERR(obj))
return ERR_CAST(obj);
i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
vma = i915_vma_instance(obj, &gt->ggtt->vm, NULL);
if (IS_ERR(vma)) {
i915_gem_object_put(obj);
return vma;
}
err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
if (err) {
i915_gem_object_put(obj);
return ERR_PTR(err);
}
return vma;
}
static int live_mocs_init(struct live_mocs *arg, struct intel_gt *gt) static int live_mocs_init(struct live_mocs *arg, struct intel_gt *gt)
{ {
struct drm_i915_mocs_table table; struct drm_i915_mocs_table table;
@@ -101,7 +75,7 @@ static int live_mocs_init(struct live_mocs *arg, struct intel_gt *gt)
if (flags & (HAS_GLOBAL_MOCS | HAS_ENGINE_MOCS)) if (flags & (HAS_GLOBAL_MOCS | HAS_ENGINE_MOCS))
arg->mocs = table; arg->mocs = table;
arg->scratch = create_scratch(gt); arg->scratch = __vm_create_scratch_for_read(&gt->ggtt->vm, PAGE_SIZE);
if (IS_ERR(arg->scratch)) if (IS_ERR(arg->scratch))
return PTR_ERR(arg->scratch); return PTR_ERR(arg->scratch);
@@ -125,7 +99,7 @@ static void live_mocs_fini(struct live_mocs *arg)
static int read_regs(struct i915_request *rq, static int read_regs(struct i915_request *rq,
u32 addr, unsigned int count, u32 addr, unsigned int count,
uint32_t *offset) u32 *offset)
{ {
unsigned int i; unsigned int i;
u32 *cs; u32 *cs;
@@ -153,7 +127,7 @@ static int read_regs(struct i915_request *rq,
static int read_mocs_table(struct i915_request *rq, static int read_mocs_table(struct i915_request *rq,
const struct drm_i915_mocs_table *table, const struct drm_i915_mocs_table *table,
uint32_t *offset) u32 *offset)
{ {
u32 addr; u32 addr;
@@ -167,7 +141,7 @@ static int read_mocs_table(struct i915_request *rq,
static int read_l3cc_table(struct i915_request *rq, static int read_l3cc_table(struct i915_request *rq,
const struct drm_i915_mocs_table *table, const struct drm_i915_mocs_table *table,
uint32_t *offset) u32 *offset)
{ {
u32 addr = i915_mmio_reg_offset(GEN9_LNCFCMOCS(0)); u32 addr = i915_mmio_reg_offset(GEN9_LNCFCMOCS(0));
@@ -176,7 +150,7 @@ static int read_l3cc_table(struct i915_request *rq,
static int check_mocs_table(struct intel_engine_cs *engine, static int check_mocs_table(struct intel_engine_cs *engine,
const struct drm_i915_mocs_table *table, const struct drm_i915_mocs_table *table,
uint32_t **vaddr) u32 **vaddr)
{ {
unsigned int i; unsigned int i;
u32 expect; u32 expect;
@@ -205,7 +179,7 @@ static bool mcr_range(struct drm_i915_private *i915, u32 offset)
static int check_l3cc_table(struct intel_engine_cs *engine, static int check_l3cc_table(struct intel_engine_cs *engine,
const struct drm_i915_mocs_table *table, const struct drm_i915_mocs_table *table,
uint32_t **vaddr) u32 **vaddr)
{ {
/* Can we read the MCR range 0xb00 directly? See intel_workarounds! */ /* Can we read the MCR range 0xb00 directly? See intel_workarounds! */
u32 reg = i915_mmio_reg_offset(GEN9_LNCFCMOCS(0)); u32 reg = i915_mmio_reg_offset(GEN9_LNCFCMOCS(0));
@@ -361,8 +335,10 @@ static int active_engine_reset(struct intel_context *ce,
static int __live_mocs_reset(struct live_mocs *mocs, static int __live_mocs_reset(struct live_mocs *mocs,
struct intel_context *ce) struct intel_context *ce)
{ {
struct intel_gt *gt = ce->engine->gt;
int err; int err;
if (intel_has_reset_engine(gt)) {
err = intel_engine_reset(ce->engine, "mocs"); err = intel_engine_reset(ce->engine, "mocs");
if (err) if (err)
return err; return err;
@@ -378,12 +354,15 @@ static int __live_mocs_reset(struct live_mocs *mocs,
err = check_mocs_engine(mocs, ce); err = check_mocs_engine(mocs, ce);
if (err) if (err)
return err; return err;
}
intel_gt_reset(ce->engine->gt, ce->engine->mask, "mocs"); if (intel_has_gpu_reset(gt)) {
intel_gt_reset(gt, ce->engine->mask, "mocs");
err = check_mocs_engine(mocs, ce); err = check_mocs_engine(mocs, ce);
if (err) if (err)
return err; return err;
}
return 0; return 0;
} }
@@ -398,9 +377,6 @@ static int live_mocs_reset(void *arg)
/* Check the mocs setup is retained over per-engine and global resets */ /* Check the mocs setup is retained over per-engine and global resets */
if (!intel_has_reset_engine(gt))
return 0;
err = live_mocs_init(&mocs, gt); err = live_mocs_init(&mocs, gt);
if (err) if (err)
return err; return err;

View File

@@ -6,6 +6,7 @@
#include "intel_context.h" #include "intel_context.h"
#include "intel_engine_pm.h" #include "intel_engine_pm.h"
#include "intel_gpu_commands.h"
#include "intel_gt_requests.h" #include "intel_gt_requests.h"
#include "intel_ring.h" #include "intel_ring.h"
#include "selftest_rc6.h" #include "selftest_rc6.h"

View File

@@ -9,6 +9,7 @@
#include "i915_memcpy.h" #include "i915_memcpy.h"
#include "i915_selftest.h" #include "i915_selftest.h"
#include "intel_gpu_commands.h"
#include "selftests/igt_reset.h" #include "selftests/igt_reset.h"
#include "selftests/igt_atomic.h" #include "selftests/igt_atomic.h"
#include "selftests/igt_spinner.h" #include "selftests/igt_spinner.h"
@@ -95,10 +96,10 @@ __igt_reset_stolen(struct intel_gt *gt,
if (!__drm_mm_interval_first(&gt->i915->mm.stolen, if (!__drm_mm_interval_first(&gt->i915->mm.stolen,
page << PAGE_SHIFT, page << PAGE_SHIFT,
((page + 1) << PAGE_SHIFT) - 1)) ((page + 1) << PAGE_SHIFT) - 1))
memset32(s, STACK_MAGIC, PAGE_SIZE / sizeof(u32)); memset_io(s, STACK_MAGIC, PAGE_SIZE);
in = s; in = (void __force *)s;
if (i915_memcpy_from_wc(tmp, s, PAGE_SIZE)) if (i915_memcpy_from_wc(tmp, in, PAGE_SIZE))
in = tmp; in = tmp;
crc[page] = crc32_le(0, in, PAGE_SIZE); crc[page] = crc32_le(0, in, PAGE_SIZE);
@@ -133,8 +134,8 @@ __igt_reset_stolen(struct intel_gt *gt,
ggtt->error_capture.start, ggtt->error_capture.start,
PAGE_SIZE); PAGE_SIZE);
in = s; in = (void __force *)s;
if (i915_memcpy_from_wc(tmp, s, PAGE_SIZE)) if (i915_memcpy_from_wc(tmp, in, PAGE_SIZE))
in = tmp; in = tmp;
x = crc32_le(0, in, PAGE_SIZE); x = crc32_le(0, in, PAGE_SIZE);
@@ -326,11 +327,16 @@ static int igt_atomic_engine_reset(void *arg)
for (p = igt_atomic_phases; p->name; p++) { for (p = igt_atomic_phases; p->name; p++) {
GEM_TRACE("intel_engine_reset(%s) under %s\n", GEM_TRACE("intel_engine_reset(%s) under %s\n",
engine->name, p->name); engine->name, p->name);
if (strcmp(p->name, "softirq"))
local_bh_disable();
p->critical_section_begin(); p->critical_section_begin();
err = intel_engine_reset(engine, NULL); err = __intel_engine_reset_bh(engine, NULL);
p->critical_section_end(); p->critical_section_end();
if (strcmp(p->name, "softirq"))
local_bh_enable();
if (err) { if (err) {
pr_err("intel_engine_reset(%s) failed under %s\n", pr_err("intel_engine_reset(%s) failed under %s\n",
engine->name, p->name); engine->name, p->name);
@@ -340,6 +346,7 @@ static int igt_atomic_engine_reset(void *arg)
intel_engine_pm_put(engine); intel_engine_pm_put(engine);
tasklet_enable(&engine->execlists.tasklet); tasklet_enable(&engine->execlists.tasklet);
tasklet_hi_schedule(&engine->execlists.tasklet);
if (err) if (err)
break; break;
} }

View File

@@ -185,7 +185,10 @@ static u8 rps_set_check(struct intel_rps *rps, u8 freq)
{ {
mutex_lock(&rps->lock); mutex_lock(&rps->lock);
GEM_BUG_ON(!intel_rps_is_active(rps)); GEM_BUG_ON(!intel_rps_is_active(rps));
intel_rps_set(rps, freq); if (wait_for(!intel_rps_set(rps, freq), 50)) {
mutex_unlock(&rps->lock);
return 0;
}
GEM_BUG_ON(rps->last_freq != freq); GEM_BUG_ON(rps->last_freq != freq);
mutex_unlock(&rps->lock); mutex_unlock(&rps->lock);

View File

@@ -9,6 +9,7 @@
#include "intel_context.h" #include "intel_context.h"
#include "intel_engine_heartbeat.h" #include "intel_engine_heartbeat.h"
#include "intel_engine_pm.h" #include "intel_engine_pm.h"
#include "intel_gpu_commands.h"
#include "intel_gt.h" #include "intel_gt.h"
#include "intel_gt_requests.h" #include "intel_gt_requests.h"
#include "intel_ring.h" #include "intel_ring.h"
@@ -1090,12 +1091,6 @@ static int live_hwsp_read(void *arg)
} }
count++; count++;
if (8 * watcher[1].rq->ring->emit >
3 * watcher[1].rq->ring->size) {
i915_request_put(rq);
break;
}
/* Flush the timeline before manually wrapping again */ /* Flush the timeline before manually wrapping again */
if (i915_request_wait(rq, if (i915_request_wait(rq,
I915_WAIT_INTERRUPTIBLE, I915_WAIT_INTERRUPTIBLE,
@@ -1104,9 +1099,14 @@ static int live_hwsp_read(void *arg)
i915_request_put(rq); i915_request_put(rq);
goto out; goto out;
} }
retire_requests(tl); retire_requests(tl);
i915_request_put(rq); i915_request_put(rq);
/* Single requests are limited to half a ring at most */
if (8 * watcher[1].rq->ring->emit >
3 * watcher[1].rq->ring->size)
break;
} while (!__igt_timeout(end_time, NULL)); } while (!__igt_timeout(end_time, NULL));
WRITE_ONCE(*(u32 *)tl->hwsp_seqno, 0xdeadbeef); WRITE_ONCE(*(u32 *)tl->hwsp_seqno, 0xdeadbeef);

View File

@@ -95,8 +95,9 @@ reference_lists_fini(struct intel_gt *gt, struct wa_lists *lists)
} }
static struct drm_i915_gem_object * static struct drm_i915_gem_object *
read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) read_nonprivs(struct intel_context *ce)
{ {
struct intel_engine_cs *engine = ce->engine;
const u32 base = engine->mmio_base; const u32 base = engine->mmio_base;
struct drm_i915_gem_object *result; struct drm_i915_gem_object *result;
struct i915_request *rq; struct i915_request *rq;
@@ -130,7 +131,7 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
if (err) if (err)
goto err_obj; goto err_obj;
rq = igt_request_alloc(ctx, engine); rq = intel_context_create_request(ce);
if (IS_ERR(rq)) { if (IS_ERR(rq)) {
err = PTR_ERR(rq); err = PTR_ERR(rq);
goto err_pin; goto err_pin;
@@ -145,7 +146,7 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
goto err_req; goto err_req;
srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
if (INTEL_GEN(ctx->i915) >= 8) if (INTEL_GEN(engine->i915) >= 8)
srm++; srm++;
cs = intel_ring_begin(rq, 4 * RING_MAX_NONPRIV_SLOTS); cs = intel_ring_begin(rq, 4 * RING_MAX_NONPRIV_SLOTS);
@@ -200,16 +201,16 @@ print_results(const struct intel_engine_cs *engine, const u32 *results)
} }
} }
static int check_whitelist(struct i915_gem_context *ctx, static int check_whitelist(struct intel_context *ce)
struct intel_engine_cs *engine)
{ {
struct intel_engine_cs *engine = ce->engine;
struct drm_i915_gem_object *results; struct drm_i915_gem_object *results;
struct intel_wedge_me wedge; struct intel_wedge_me wedge;
u32 *vaddr; u32 *vaddr;
int err; int err;
int i; int i;
results = read_nonprivs(ctx, engine); results = read_nonprivs(ce);
if (IS_ERR(results)) if (IS_ERR(results))
return PTR_ERR(results); return PTR_ERR(results);
@@ -293,8 +294,7 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine,
int (*reset)(struct intel_engine_cs *), int (*reset)(struct intel_engine_cs *),
const char *name) const char *name)
{ {
struct drm_i915_private *i915 = engine->i915; struct intel_context *ce, *tmp;
struct i915_gem_context *ctx, *tmp;
struct igt_spinner spin; struct igt_spinner spin;
intel_wakeref_t wakeref; intel_wakeref_t wakeref;
int err; int err;
@@ -302,15 +302,15 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine,
pr_info("Checking %d whitelisted registers on %s (RING_NONPRIV) [%s]\n", pr_info("Checking %d whitelisted registers on %s (RING_NONPRIV) [%s]\n",
engine->whitelist.count, engine->name, name); engine->whitelist.count, engine->name, name);
ctx = kernel_context(i915); ce = intel_context_create(engine);
if (IS_ERR(ctx)) if (IS_ERR(ce))
return PTR_ERR(ctx); return PTR_ERR(ce);
err = igt_spinner_init(&spin, engine->gt); err = igt_spinner_init(&spin, engine->gt);
if (err) if (err)
goto out_ctx; goto out_ctx;
err = check_whitelist(ctx, engine); err = check_whitelist(ce);
if (err) { if (err) {
pr_err("Invalid whitelist *before* %s reset!\n", name); pr_err("Invalid whitelist *before* %s reset!\n", name);
goto out_spin; goto out_spin;
@@ -330,22 +330,22 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine,
goto out_spin; goto out_spin;
} }
err = check_whitelist(ctx, engine); err = check_whitelist(ce);
if (err) { if (err) {
pr_err("Whitelist not preserved in context across %s reset!\n", pr_err("Whitelist not preserved in context across %s reset!\n",
name); name);
goto out_spin; goto out_spin;
} }
tmp = kernel_context(i915); tmp = intel_context_create(engine);
if (IS_ERR(tmp)) { if (IS_ERR(tmp)) {
err = PTR_ERR(tmp); err = PTR_ERR(tmp);
goto out_spin; goto out_spin;
} }
kernel_context_close(ctx); intel_context_put(ce);
ctx = tmp; ce = tmp;
err = check_whitelist(ctx, engine); err = check_whitelist(ce);
if (err) { if (err) {
pr_err("Invalid whitelist *after* %s reset in fresh context!\n", pr_err("Invalid whitelist *after* %s reset in fresh context!\n",
name); name);
@@ -355,7 +355,7 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine,
out_spin: out_spin:
igt_spinner_fini(&spin); igt_spinner_fini(&spin);
out_ctx: out_ctx:
kernel_context_close(ctx); intel_context_put(ce);
return err; return err;
} }
@@ -486,10 +486,11 @@ static int check_dirty_whitelist(struct intel_context *ce)
struct intel_engine_cs *engine = ce->engine; struct intel_engine_cs *engine = ce->engine;
struct i915_vma *scratch; struct i915_vma *scratch;
struct i915_vma *batch; struct i915_vma *batch;
int err = 0, i, v; int err = 0, i, v, sz;
u32 *cs, *results; u32 *cs, *results;
scratch = create_scratch(ce->vm, 2 * ARRAY_SIZE(values) + 1); sz = (2 * ARRAY_SIZE(values) + 1) * sizeof(u32);
scratch = __vm_create_scratch_for_read(ce->vm, sz);
if (IS_ERR(scratch)) if (IS_ERR(scratch))
return PTR_ERR(scratch); return PTR_ERR(scratch);
@@ -786,15 +787,15 @@ out:
return err; return err;
} }
static int read_whitelisted_registers(struct i915_gem_context *ctx, static int read_whitelisted_registers(struct intel_context *ce,
struct intel_engine_cs *engine,
struct i915_vma *results) struct i915_vma *results)
{ {
struct intel_engine_cs *engine = ce->engine;
struct i915_request *rq; struct i915_request *rq;
int i, err = 0; int i, err = 0;
u32 srm, *cs; u32 srm, *cs;
rq = igt_request_alloc(ctx, engine); rq = intel_context_create_request(ce);
if (IS_ERR(rq)) if (IS_ERR(rq))
return PTR_ERR(rq); return PTR_ERR(rq);
@@ -807,7 +808,7 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx,
goto err_req; goto err_req;
srm = MI_STORE_REGISTER_MEM; srm = MI_STORE_REGISTER_MEM;
if (INTEL_GEN(ctx->i915) >= 8) if (INTEL_GEN(engine->i915) >= 8)
srm++; srm++;
cs = intel_ring_begin(rq, 4 * engine->whitelist.count); cs = intel_ring_begin(rq, 4 * engine->whitelist.count);
@@ -834,18 +835,15 @@ err_req:
return request_add_sync(rq, err); return request_add_sync(rq, err);
} }
static int scrub_whitelisted_registers(struct i915_gem_context *ctx, static int scrub_whitelisted_registers(struct intel_context *ce)
struct intel_engine_cs *engine)
{ {
struct i915_address_space *vm; struct intel_engine_cs *engine = ce->engine;
struct i915_request *rq; struct i915_request *rq;
struct i915_vma *batch; struct i915_vma *batch;
int i, err = 0; int i, err = 0;
u32 *cs; u32 *cs;
vm = i915_gem_context_get_vm_rcu(ctx); batch = create_batch(ce->vm);
batch = create_batch(vm);
i915_vm_put(vm);
if (IS_ERR(batch)) if (IS_ERR(batch))
return PTR_ERR(batch); return PTR_ERR(batch);
@@ -873,7 +871,7 @@ static int scrub_whitelisted_registers(struct i915_gem_context *ctx,
i915_gem_object_flush_map(batch->obj); i915_gem_object_flush_map(batch->obj);
intel_gt_chipset_flush(engine->gt); intel_gt_chipset_flush(engine->gt);
rq = igt_request_alloc(ctx, engine); rq = intel_context_create_request(ce);
if (IS_ERR(rq)) { if (IS_ERR(rq)) {
err = PTR_ERR(rq); err = PTR_ERR(rq);
goto err_unpin; goto err_unpin;
@@ -1016,7 +1014,6 @@ static int live_isolated_whitelist(void *arg)
{ {
struct intel_gt *gt = arg; struct intel_gt *gt = arg;
struct { struct {
struct i915_gem_context *ctx;
struct i915_vma *scratch[2]; struct i915_vma *scratch[2];
} client[2] = {}; } client[2] = {};
struct intel_engine_cs *engine; struct intel_engine_cs *engine;
@@ -1032,61 +1029,57 @@ static int live_isolated_whitelist(void *arg)
return 0; return 0;
for (i = 0; i < ARRAY_SIZE(client); i++) { for (i = 0; i < ARRAY_SIZE(client); i++) {
struct i915_address_space *vm; client[i].scratch[0] =
struct i915_gem_context *c; __vm_create_scratch_for_read(gt->vm, 4096);
c = kernel_context(gt->i915);
if (IS_ERR(c)) {
err = PTR_ERR(c);
goto err;
}
vm = i915_gem_context_get_vm_rcu(c);
client[i].scratch[0] = create_scratch(vm, 1024);
if (IS_ERR(client[i].scratch[0])) { if (IS_ERR(client[i].scratch[0])) {
err = PTR_ERR(client[i].scratch[0]); err = PTR_ERR(client[i].scratch[0]);
i915_vm_put(vm);
kernel_context_close(c);
goto err; goto err;
} }
client[i].scratch[1] = create_scratch(vm, 1024); client[i].scratch[1] =
__vm_create_scratch_for_read(gt->vm, 4096);
if (IS_ERR(client[i].scratch[1])) { if (IS_ERR(client[i].scratch[1])) {
err = PTR_ERR(client[i].scratch[1]); err = PTR_ERR(client[i].scratch[1]);
i915_vma_unpin_and_release(&client[i].scratch[0], 0); i915_vma_unpin_and_release(&client[i].scratch[0], 0);
i915_vm_put(vm);
kernel_context_close(c);
goto err; goto err;
} }
client[i].ctx = c;
i915_vm_put(vm);
} }
for_each_engine(engine, gt, id) { for_each_engine(engine, gt, id) {
struct intel_context *ce[2];
if (!engine->kernel_context->vm) if (!engine->kernel_context->vm)
continue; continue;
if (!whitelist_writable_count(engine)) if (!whitelist_writable_count(engine))
continue; continue;
ce[0] = intel_context_create(engine);
if (IS_ERR(ce[0])) {
err = PTR_ERR(ce[0]);
break;
}
ce[1] = intel_context_create(engine);
if (IS_ERR(ce[1])) {
err = PTR_ERR(ce[1]);
intel_context_put(ce[0]);
break;
}
/* Read default values */ /* Read default values */
err = read_whitelisted_registers(client[0].ctx, engine, err = read_whitelisted_registers(ce[0], client[0].scratch[0]);
client[0].scratch[0]);
if (err) if (err)
goto err; goto err_ce;
/* Try to overwrite registers (should only affect ctx0) */ /* Try to overwrite registers (should only affect ctx0) */
err = scrub_whitelisted_registers(client[0].ctx, engine); err = scrub_whitelisted_registers(ce[0]);
if (err) if (err)
goto err; goto err_ce;
/* Read values from ctx1, we expect these to be defaults */ /* Read values from ctx1, we expect these to be defaults */
err = read_whitelisted_registers(client[1].ctx, engine, err = read_whitelisted_registers(ce[1], client[1].scratch[0]);
client[1].scratch[0]);
if (err) if (err)
goto err; goto err_ce;
/* Verify that both reads return the same default values */ /* Verify that both reads return the same default values */
err = check_whitelisted_registers(engine, err = check_whitelisted_registers(engine,
@@ -1094,31 +1087,29 @@ static int live_isolated_whitelist(void *arg)
client[1].scratch[0], client[1].scratch[0],
result_eq); result_eq);
if (err) if (err)
goto err; goto err_ce;
/* Read back the updated values in ctx0 */ /* Read back the updated values in ctx0 */
err = read_whitelisted_registers(client[0].ctx, engine, err = read_whitelisted_registers(ce[0], client[0].scratch[1]);
client[0].scratch[1]);
if (err) if (err)
goto err; goto err_ce;
/* User should be granted privilege to overwhite regs */ /* User should be granted privilege to overwhite regs */
err = check_whitelisted_registers(engine, err = check_whitelisted_registers(engine,
client[0].scratch[0], client[0].scratch[0],
client[0].scratch[1], client[0].scratch[1],
result_neq); result_neq);
err_ce:
intel_context_put(ce[1]);
intel_context_put(ce[0]);
if (err) if (err)
goto err; break;
} }
err: err:
for (i = 0; i < ARRAY_SIZE(client); i++) { for (i = 0; i < ARRAY_SIZE(client); i++) {
if (!client[i].ctx)
break;
i915_vma_unpin_and_release(&client[i].scratch[1], 0); i915_vma_unpin_and_release(&client[i].scratch[1], 0);
i915_vma_unpin_and_release(&client[i].scratch[0], 0); i915_vma_unpin_and_release(&client[i].scratch[0], 0);
kernel_context_close(client[i].ctx);
} }
if (igt_flush_test(gt->i915)) if (igt_flush_test(gt->i915))
@@ -1128,18 +1119,21 @@ err:
} }
static bool static bool
verify_wa_lists(struct i915_gem_context *ctx, struct wa_lists *lists, verify_wa_lists(struct intel_gt *gt, struct wa_lists *lists,
const char *str) const char *str)
{ {
struct drm_i915_private *i915 = ctx->i915; struct intel_engine_cs *engine;
struct i915_gem_engines_iter it; enum intel_engine_id id;
struct intel_context *ce;
bool ok = true; bool ok = true;
ok &= wa_list_verify(&i915->uncore, &lists->gt_wa_list, str); ok &= wa_list_verify(gt->uncore, &lists->gt_wa_list, str);
for_each_gem_engine(ce, i915_gem_context_engines(ctx), it) { for_each_engine(engine, gt, id) {
enum intel_engine_id id = ce->engine->id; struct intel_context *ce;
ce = intel_context_create(engine);
if (IS_ERR(ce))
return false;
ok &= engine_wa_list_verify(ce, ok &= engine_wa_list_verify(ce,
&lists->engine[id].wa_list, &lists->engine[id].wa_list,
@@ -1148,6 +1142,8 @@ verify_wa_lists(struct i915_gem_context *ctx, struct wa_lists *lists,
ok &= engine_wa_list_verify(ce, ok &= engine_wa_list_verify(ce,
&lists->engine[id].ctx_wa_list, &lists->engine[id].ctx_wa_list,
str) == 0; str) == 0;
intel_context_put(ce);
} }
return ok; return ok;
@@ -1157,7 +1153,6 @@ static int
live_gpu_reset_workarounds(void *arg) live_gpu_reset_workarounds(void *arg)
{ {
struct intel_gt *gt = arg; struct intel_gt *gt = arg;
struct i915_gem_context *ctx;
intel_wakeref_t wakeref; intel_wakeref_t wakeref;
struct wa_lists lists; struct wa_lists lists;
bool ok; bool ok;
@@ -1165,12 +1160,6 @@ live_gpu_reset_workarounds(void *arg)
if (!intel_has_gpu_reset(gt)) if (!intel_has_gpu_reset(gt))
return 0; return 0;
ctx = kernel_context(gt->i915);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
i915_gem_context_lock_engines(ctx);
pr_info("Verifying after GPU reset...\n"); pr_info("Verifying after GPU reset...\n");
igt_global_reset_lock(gt); igt_global_reset_lock(gt);
@@ -1178,17 +1167,15 @@ live_gpu_reset_workarounds(void *arg)
reference_lists_init(gt, &lists); reference_lists_init(gt, &lists);
ok = verify_wa_lists(ctx, &lists, "before reset"); ok = verify_wa_lists(gt, &lists, "before reset");
if (!ok) if (!ok)
goto out; goto out;
intel_gt_reset(gt, ALL_ENGINES, "live_workarounds"); intel_gt_reset(gt, ALL_ENGINES, "live_workarounds");
ok = verify_wa_lists(ctx, &lists, "after reset"); ok = verify_wa_lists(gt, &lists, "after reset");
out: out:
i915_gem_context_unlock_engines(ctx);
kernel_context_close(ctx);
reference_lists_fini(gt, &lists); reference_lists_fini(gt, &lists);
intel_runtime_pm_put(gt->uncore->rpm, wakeref); intel_runtime_pm_put(gt->uncore->rpm, wakeref);
igt_global_reset_unlock(gt); igt_global_reset_unlock(gt);
@@ -1200,8 +1187,8 @@ static int
live_engine_reset_workarounds(void *arg) live_engine_reset_workarounds(void *arg)
{ {
struct intel_gt *gt = arg; struct intel_gt *gt = arg;
struct i915_gem_engines_iter it; struct intel_engine_cs *engine;
struct i915_gem_context *ctx; enum intel_engine_id id;
struct intel_context *ce; struct intel_context *ce;
struct igt_spinner spin; struct igt_spinner spin;
struct i915_request *rq; struct i915_request *rq;
@@ -1212,30 +1199,30 @@ live_engine_reset_workarounds(void *arg)
if (!intel_has_reset_engine(gt)) if (!intel_has_reset_engine(gt))
return 0; return 0;
ctx = kernel_context(gt->i915);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
igt_global_reset_lock(gt); igt_global_reset_lock(gt);
wakeref = intel_runtime_pm_get(gt->uncore->rpm); wakeref = intel_runtime_pm_get(gt->uncore->rpm);
reference_lists_init(gt, &lists); reference_lists_init(gt, &lists);
for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { for_each_engine(engine, gt, id) {
struct intel_engine_cs *engine = ce->engine;
bool ok; bool ok;
pr_info("Verifying after %s reset...\n", engine->name); pr_info("Verifying after %s reset...\n", engine->name);
ce = intel_context_create(engine);
if (IS_ERR(ce)) {
ret = PTR_ERR(ce);
break;
}
ok = verify_wa_lists(ctx, &lists, "before reset"); ok = verify_wa_lists(gt, &lists, "before reset");
if (!ok) { if (!ok) {
ret = -ESRCH; ret = -ESRCH;
goto err; goto err;
} }
intel_engine_reset(engine, "live_workarounds"); intel_engine_reset(engine, "live_workarounds:idle");
ok = verify_wa_lists(ctx, &lists, "after idle reset"); ok = verify_wa_lists(gt, &lists, "after idle reset");
if (!ok) { if (!ok) {
ret = -ESRCH; ret = -ESRCH;
goto err; goto err;
@@ -1259,23 +1246,26 @@ live_engine_reset_workarounds(void *arg)
goto err; goto err;
} }
intel_engine_reset(engine, "live_workarounds"); intel_engine_reset(engine, "live_workarounds:active");
igt_spinner_end(&spin); igt_spinner_end(&spin);
igt_spinner_fini(&spin); igt_spinner_fini(&spin);
ok = verify_wa_lists(ctx, &lists, "after busy reset"); ok = verify_wa_lists(gt, &lists, "after busy reset");
if (!ok) { if (!ok) {
ret = -ESRCH; ret = -ESRCH;
goto err; goto err;
} }
}
err: err:
i915_gem_context_unlock_engines(ctx); intel_context_put(ce);
if (ret)
break;
}
reference_lists_fini(gt, &lists); reference_lists_fini(gt, &lists);
intel_runtime_pm_put(gt->uncore->rpm, wakeref); intel_runtime_pm_put(gt->uncore->rpm, wakeref);
igt_global_reset_unlock(gt); igt_global_reset_unlock(gt);
kernel_context_close(ctx);
igt_flush_test(gt->i915); igt_flush_test(gt->i915);

View File

@@ -579,20 +579,8 @@ int intel_guc_reset_engine(struct intel_guc *guc,
*/ */
int intel_guc_resume(struct intel_guc *guc) int intel_guc_resume(struct intel_guc *guc)
{ {
u32 action[] = { /* XXX: to be implemented with submission interface rework */
INTEL_GUC_ACTION_EXIT_S_STATE,
GUC_POWER_D0,
};
/*
* If GuC communication is enabled but submission is not supported,
* we do not need to resume the GuC but we do need to enable the
* GuC communication on resume (above).
*/
if (!intel_guc_submission_is_used(guc) || !intel_guc_is_ready(guc))
return 0; return 0;
return intel_guc_send(guc, action, ARRAY_SIZE(action));
} }
/** /**

View File

@@ -47,13 +47,6 @@ struct intel_guc {
struct i915_vma *stage_desc_pool; struct i915_vma *stage_desc_pool;
void *stage_desc_pool_vaddr; void *stage_desc_pool_vaddr;
struct i915_vma *workqueue;
void *workqueue_vaddr;
spinlock_t wq_lock;
struct i915_vma *proc_desc;
void *proc_desc_vaddr;
/* Control params for fw initialization */ /* Control params for fw initialization */
u32 params[GUC_CTL_MAX_DWORDS]; u32 params[GUC_CTL_MAX_DWORDS];

View File

@@ -4,6 +4,7 @@
*/ */
#include "gt/intel_gt.h" #include "gt/intel_gt.h"
#include "gt/intel_lrc.h"
#include "intel_guc_ads.h" #include "intel_guc_ads.h"
#include "intel_uc.h" #include "intel_uc.h"
#include "i915_drv.h" #include "i915_drv.h"

View File

@@ -76,7 +76,6 @@ static inline bool guc_ready(struct intel_uncore *uncore, u32 *status)
static int guc_wait_ucode(struct intel_uncore *uncore) static int guc_wait_ucode(struct intel_uncore *uncore)
{ {
struct drm_device *drm = &uncore->i915->drm;
u32 status; u32 status;
int ret; int ret;
@@ -89,11 +88,11 @@ static int guc_wait_ucode(struct intel_uncore *uncore)
* attempt the ucode load again if this happens.) * attempt the ucode load again if this happens.)
*/ */
ret = wait_for(guc_ready(uncore, &status), 100); ret = wait_for(guc_ready(uncore, &status), 100);
DRM_DEBUG_DRIVER("GuC status %#x\n", status);
if (ret) { if (ret) {
drm_err(drm, "GuC load failed: status = 0x%08X\n", status); struct drm_device *drm = &uncore->i915->drm;
drm_err(drm, "GuC load failed: status: Reset = %d, "
drm_dbg(drm, "GuC load failed: status = 0x%08X\n", status);
drm_dbg(drm, "GuC load failed: status: Reset = %d, "
"BootROM = 0x%02X, UKernel = 0x%02X, " "BootROM = 0x%02X, UKernel = 0x%02X, "
"MIA = 0x%02X, Auth = 0x%02X\n", "MIA = 0x%02X, Auth = 0x%02X\n",
REG_FIELD_GET(GS_MIA_IN_RESET, status), REG_FIELD_GET(GS_MIA_IN_RESET, status),
@@ -103,12 +102,12 @@ static int guc_wait_ucode(struct intel_uncore *uncore)
REG_FIELD_GET(GS_AUTH_STATUS_MASK, status)); REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) { if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) {
drm_err(drm, "GuC firmware signature verification failed\n"); drm_dbg(drm, "GuC firmware signature verification failed\n");
ret = -ENOEXEC; ret = -ENOEXEC;
} }
if ((status & GS_UKERNEL_MASK) == GS_UKERNEL_EXCEPTION) { if ((status & GS_UKERNEL_MASK) == GS_UKERNEL_EXCEPTION) {
drm_err(drm, "GuC firmware exception. EIP: %#x\n", drm_dbg(drm, "GuC firmware exception. EIP: %#x\n",
intel_uncore_read(uncore, SOFT_SCRATCH(13))); intel_uncore_read(uncore, SOFT_SCRATCH(13)));
ret = -ENXIO; ret = -ENXIO;
} }

View File

@@ -6,11 +6,14 @@
#include <linux/circ_buf.h> #include <linux/circ_buf.h>
#include "gem/i915_gem_context.h" #include "gem/i915_gem_context.h"
#include "gt/gen8_engine_cs.h"
#include "gt/intel_breadcrumbs.h"
#include "gt/intel_context.h" #include "gt/intel_context.h"
#include "gt/intel_engine_pm.h" #include "gt/intel_engine_pm.h"
#include "gt/intel_gt.h" #include "gt/intel_gt.h"
#include "gt/intel_gt_pm.h" #include "gt/intel_gt_pm.h"
#include "gt/intel_lrc_reg.h" #include "gt/intel_lrc.h"
#include "gt/intel_mocs.h"
#include "gt/intel_ring.h" #include "gt/intel_ring.h"
#include "intel_guc_submission.h" #include "intel_guc_submission.h"
@@ -54,6 +57,8 @@
* *
*/ */
#define GUC_REQUEST_SIZE 64 /* bytes */
static inline struct i915_priolist *to_priolist(struct rb_node *rb) static inline struct i915_priolist *to_priolist(struct rb_node *rb)
{ {
return rb_entry(rb, struct i915_priolist, node); return rb_entry(rb, struct i915_priolist, node);
@@ -66,58 +71,6 @@ static struct guc_stage_desc *__get_stage_desc(struct intel_guc *guc, u32 id)
return &base[id]; return &base[id];
} }
static int guc_workqueue_create(struct intel_guc *guc)
{
return intel_guc_allocate_and_map_vma(guc, GUC_WQ_SIZE, &guc->workqueue,
&guc->workqueue_vaddr);
}
static void guc_workqueue_destroy(struct intel_guc *guc)
{
i915_vma_unpin_and_release(&guc->workqueue, I915_VMA_RELEASE_MAP);
}
/*
* Initialise the process descriptor shared with the GuC firmware.
*/
static int guc_proc_desc_create(struct intel_guc *guc)
{
const u32 size = PAGE_ALIGN(sizeof(struct guc_process_desc));
return intel_guc_allocate_and_map_vma(guc, size, &guc->proc_desc,
&guc->proc_desc_vaddr);
}
static void guc_proc_desc_destroy(struct intel_guc *guc)
{
i915_vma_unpin_and_release(&guc->proc_desc, I915_VMA_RELEASE_MAP);
}
static void guc_proc_desc_init(struct intel_guc *guc)
{
struct guc_process_desc *desc;
desc = memset(guc->proc_desc_vaddr, 0, sizeof(*desc));
/*
* XXX: pDoorbell and WQVBaseAddress are pointers in process address
* space for ring3 clients (set them as in mmap_ioctl) or kernel
* space for kernel clients (map on demand instead? May make debug
* easier to have it mapped).
*/
desc->wq_base_addr = 0;
desc->db_base_addr = 0;
desc->wq_size_bytes = GUC_WQ_SIZE;
desc->wq_status = WQ_STATUS_ACTIVE;
desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL;
}
static void guc_proc_desc_fini(struct intel_guc *guc)
{
memset(guc->proc_desc_vaddr, 0, sizeof(struct guc_process_desc));
}
static int guc_stage_desc_pool_create(struct intel_guc *guc) static int guc_stage_desc_pool_create(struct intel_guc *guc)
{ {
u32 size = PAGE_ALIGN(sizeof(struct guc_stage_desc) * u32 size = PAGE_ALIGN(sizeof(struct guc_stage_desc) *
@@ -153,8 +106,6 @@ static void guc_stage_desc_init(struct intel_guc *guc)
desc->stage_id = 0; desc->stage_id = 0;
desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL; desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL;
desc->process_desc = intel_guc_ggtt_offset(guc, guc->proc_desc);
desc->wq_addr = intel_guc_ggtt_offset(guc, guc->workqueue);
desc->wq_size = GUC_WQ_SIZE; desc->wq_size = GUC_WQ_SIZE;
} }
@@ -166,62 +117,9 @@ static void guc_stage_desc_fini(struct intel_guc *guc)
memset(desc, 0, sizeof(*desc)); memset(desc, 0, sizeof(*desc));
} }
/* Construct a Work Item and append it to the GuC's Work Queue */
static void guc_wq_item_append(struct intel_guc *guc,
u32 target_engine, u32 context_desc,
u32 ring_tail, u32 fence_id)
{
/* wqi_len is in DWords, and does not include the one-word header */
const size_t wqi_size = sizeof(struct guc_wq_item);
const u32 wqi_len = wqi_size / sizeof(u32) - 1;
struct guc_process_desc *desc = guc->proc_desc_vaddr;
struct guc_wq_item *wqi;
u32 wq_off;
lockdep_assert_held(&guc->wq_lock);
/* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
* should not have the case where structure wqi is across page, neither
* wrapped to the beginning. This simplifies the implementation below.
*
* XXX: if not the case, we need save data to a temp wqi and copy it to
* workqueue buffer dw by dw.
*/
BUILD_BUG_ON(wqi_size != 16);
/* We expect the WQ to be active if we're appending items to it */
GEM_BUG_ON(desc->wq_status != WQ_STATUS_ACTIVE);
/* Free space is guaranteed. */
wq_off = READ_ONCE(desc->tail);
GEM_BUG_ON(CIRC_SPACE(wq_off, READ_ONCE(desc->head),
GUC_WQ_SIZE) < wqi_size);
GEM_BUG_ON(wq_off & (wqi_size - 1));
wqi = guc->workqueue_vaddr + wq_off;
/* Now fill in the 4-word work queue item */
wqi->header = WQ_TYPE_INORDER |
(wqi_len << WQ_LEN_SHIFT) |
(target_engine << WQ_TARGET_SHIFT) |
WQ_NO_WCFLUSH_WAIT;
wqi->context_desc = context_desc;
wqi->submit_element_info = ring_tail << WQ_RING_TAIL_SHIFT;
GEM_BUG_ON(ring_tail > WQ_RING_TAIL_MAX);
wqi->fence_id = fence_id;
/* Make the update visible to GuC */
WRITE_ONCE(desc->tail, (wq_off + wqi_size) & (GUC_WQ_SIZE - 1));
}
static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
{ {
struct intel_engine_cs *engine = rq->engine; /* Leaving stub as this function will be used in future patches */
u32 ctx_desc = rq->context->lrc.ccid;
u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64);
guc_wq_item_append(guc, engine->guc_id, ctx_desc,
ring_tail, rq->fence.seqno);
} }
/* /*
@@ -244,16 +142,12 @@ static void guc_submit(struct intel_engine_cs *engine,
{ {
struct intel_guc *guc = &engine->gt->uc.guc; struct intel_guc *guc = &engine->gt->uc.guc;
spin_lock(&guc->wq_lock);
do { do {
struct i915_request *rq = *out++; struct i915_request *rq = *out++;
flush_ggtt_writes(rq->ring->vma); flush_ggtt_writes(rq->ring->vma);
guc_add_request(guc, rq); guc_add_request(guc, rq);
} while (out != end); } while (out != end);
spin_unlock(&guc->wq_lock);
} }
static inline int rq_prio(const struct i915_request *rq) static inline int rq_prio(const struct i915_request *rq)
@@ -388,17 +282,26 @@ static void guc_reset_prepare(struct intel_engine_cs *engine)
__tasklet_disable_sync_once(&execlists->tasklet); __tasklet_disable_sync_once(&execlists->tasklet);
} }
static void static void guc_reset_state(struct intel_context *ce,
cancel_port_requests(struct intel_engine_execlists * const execlists) struct intel_engine_cs *engine,
u32 head,
bool scrub)
{ {
struct i915_request * const *port, *rq; GEM_BUG_ON(!intel_context_is_pinned(ce));
/* Note we are only using the inflight and not the pending queue */ /*
* We want a simple context + ring to execute the breadcrumb update.
* We cannot rely on the context being intact across the GPU hang,
* so clear it and rebuild just what we need for the breadcrumb.
* All pending requests for this context will be zapped, and any
* future request will be after userspace has had the opportunity
* to recreate its own state.
*/
if (scrub)
lrc_init_regs(ce, engine, true);
for (port = execlists->active; (rq = *port); port++) /* Rerun the request; its payload has been neutered (if guilty). */
schedule_out(rq); lrc_update_regs(ce, engine, head);
execlists->active =
memset(execlists->inflight, 0, sizeof(execlists->inflight));
} }
static void guc_reset_rewind(struct intel_engine_cs *engine, bool stalled) static void guc_reset_rewind(struct intel_engine_cs *engine, bool stalled)
@@ -409,8 +312,6 @@ static void guc_reset_rewind(struct intel_engine_cs *engine, bool stalled)
spin_lock_irqsave(&engine->active.lock, flags); spin_lock_irqsave(&engine->active.lock, flags);
cancel_port_requests(execlists);
/* Push back any incomplete requests for replay after the reset. */ /* Push back any incomplete requests for replay after the reset. */
rq = execlists_unwind_incomplete_requests(execlists); rq = execlists_unwind_incomplete_requests(execlists);
if (!rq) if (!rq)
@@ -420,7 +321,7 @@ static void guc_reset_rewind(struct intel_engine_cs *engine, bool stalled)
stalled = false; stalled = false;
__i915_request_reset(rq, stalled); __i915_request_reset(rq, stalled);
intel_lr_context_reset(engine, rq->context, rq->head, stalled); guc_reset_state(rq->context, engine, rq->head, stalled);
out_unlock: out_unlock:
spin_unlock_irqrestore(&engine->active.lock, flags); spin_unlock_irqrestore(&engine->active.lock, flags);
@@ -451,9 +352,6 @@ static void guc_reset_cancel(struct intel_engine_cs *engine)
*/ */
spin_lock_irqsave(&engine->active.lock, flags); spin_lock_irqsave(&engine->active.lock, flags);
/* Cancel the requests on the HW and clear the ELSP tracker. */
cancel_port_requests(execlists);
/* Mark all executing requests as skipped. */ /* Mark all executing requests as skipped. */
list_for_each_entry(rq, &engine->active.requests, sched.link) { list_for_each_entry(rq, &engine->active.requests, sched.link) {
i915_request_set_error_once(rq, -EIO); i915_request_set_error_once(rq, -EIO);
@@ -496,12 +394,6 @@ static void guc_reset_finish(struct intel_engine_cs *engine)
atomic_read(&execlists->tasklet.count)); atomic_read(&execlists->tasklet.count));
} }
/*
* Everything below here is concerned with setup & teardown, and is
* therefore not part of the somewhat time-critical batch-submission
* path of guc_submit() above.
*/
/* /*
* Set up the memory resources to be shared with the GuC (via the GGTT) * Set up the memory resources to be shared with the GuC (via the GGTT)
* at firmware loading time. * at firmware loading time.
@@ -522,30 +414,12 @@ int intel_guc_submission_init(struct intel_guc *guc)
*/ */
GEM_BUG_ON(!guc->stage_desc_pool); GEM_BUG_ON(!guc->stage_desc_pool);
ret = guc_workqueue_create(guc);
if (ret)
goto err_pool;
ret = guc_proc_desc_create(guc);
if (ret)
goto err_workqueue;
spin_lock_init(&guc->wq_lock);
return 0; return 0;
err_workqueue:
guc_workqueue_destroy(guc);
err_pool:
guc_stage_desc_pool_destroy(guc);
return ret;
} }
void intel_guc_submission_fini(struct intel_guc *guc) void intel_guc_submission_fini(struct intel_guc *guc)
{ {
if (guc->stage_desc_pool) { if (guc->stage_desc_pool) {
guc_proc_desc_destroy(guc);
guc_workqueue_destroy(guc);
guc_stage_desc_pool_destroy(guc); guc_stage_desc_pool_destroy(guc);
} }
} }
@@ -576,33 +450,186 @@ static void guc_interrupts_release(struct intel_gt *gt)
intel_uncore_rmw(uncore, GEN11_VCS_VECS_INTR_ENABLE, 0, dmask); intel_uncore_rmw(uncore, GEN11_VCS_VECS_INTR_ENABLE, 0, dmask);
} }
static void guc_set_default_submission(struct intel_engine_cs *engine) static int guc_context_alloc(struct intel_context *ce)
{
return lrc_alloc(ce, ce->engine);
}
static int guc_context_pre_pin(struct intel_context *ce,
struct i915_gem_ww_ctx *ww,
void **vaddr)
{
return lrc_pre_pin(ce, ce->engine, ww, vaddr);
}
static int guc_context_pin(struct intel_context *ce, void *vaddr)
{
return lrc_pin(ce, ce->engine, vaddr);
}
static const struct intel_context_ops guc_context_ops = {
.alloc = guc_context_alloc,
.pre_pin = guc_context_pre_pin,
.pin = guc_context_pin,
.unpin = lrc_unpin,
.post_unpin = lrc_post_unpin,
.enter = intel_context_enter_engine,
.exit = intel_context_exit_engine,
.reset = lrc_reset,
.destroy = lrc_destroy,
};
static int guc_request_alloc(struct i915_request *request)
{
int ret;
GEM_BUG_ON(!intel_context_is_pinned(request->context));
/*
* Flush enough space to reduce the likelihood of waiting after
* we start building the request - in which case we will just
* have to repeat work.
*/
request->reserved_space += GUC_REQUEST_SIZE;
/*
* Note that after this point, we have committed to using
* this request as it is being used to both track the
* state of engine initialisation and liveness of the
* golden renderstate above. Think twice before you try
* to cancel/unwind this request now.
*/
/* Unconditionally invalidate GPU caches and TLBs. */
ret = request->engine->emit_flush(request, EMIT_INVALIDATE);
if (ret)
return ret;
request->reserved_space -= GUC_REQUEST_SIZE;
return 0;
}
static inline void queue_request(struct intel_engine_cs *engine,
struct i915_request *rq,
int prio)
{
GEM_BUG_ON(!list_empty(&rq->sched.link));
list_add_tail(&rq->sched.link,
i915_sched_lookup_priolist(engine, prio));
set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
}
static void guc_submit_request(struct i915_request *rq)
{
struct intel_engine_cs *engine = rq->engine;
unsigned long flags;
/* Will be called from irq-context when using foreign fences. */
spin_lock_irqsave(&engine->active.lock, flags);
queue_request(engine, rq, rq_prio(rq));
GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
GEM_BUG_ON(list_empty(&rq->sched.link));
tasklet_hi_schedule(&engine->execlists.tasklet);
spin_unlock_irqrestore(&engine->active.lock, flags);
}
static void sanitize_hwsp(struct intel_engine_cs *engine)
{
struct intel_timeline *tl;
list_for_each_entry(tl, &engine->status_page.timelines, engine_link)
intel_timeline_reset_seqno(tl);
}
static void guc_sanitize(struct intel_engine_cs *engine)
{ {
/* /*
* We inherit a bunch of functions from execlists that we'd like * Poison residual state on resume, in case the suspend didn't!
* to keep using:
* *
* engine->submit_request = execlists_submit_request; * We have to assume that across suspend/resume (or other loss
* engine->cancel_requests = execlists_cancel_requests; * of control) that the contents of our pinned buffers has been
* engine->schedule = execlists_schedule; * lost, replaced by garbage. Since this doesn't always happen,
* * let's poison such state so that we more quickly spot when
* But we need to override the actual submission backend in order * we falsely assume it has been preserved.
* to talk to the GuC.
*/ */
intel_execlists_set_default_submission(engine); if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE);
/*
* The kernel_context HWSP is stored in the status_page. As above,
* that may be lost on resume/initialisation, and so we need to
* reset the value in the HWSP.
*/
sanitize_hwsp(engine);
/* And scrub the dirty cachelines for the HWSP */
clflush_cache_range(engine->status_page.addr, PAGE_SIZE);
}
static void setup_hwsp(struct intel_engine_cs *engine)
{
intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */
ENGINE_WRITE_FW(engine,
RING_HWS_PGA,
i915_ggtt_offset(engine->status_page.vma));
}
static void start_engine(struct intel_engine_cs *engine)
{
ENGINE_WRITE_FW(engine,
RING_MODE_GEN7,
_MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE));
ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
ENGINE_POSTING_READ(engine, RING_MI_MODE);
}
static int guc_resume(struct intel_engine_cs *engine)
{
assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL);
intel_mocs_init_engine(engine);
intel_breadcrumbs_reset(engine->breadcrumbs);
setup_hwsp(engine);
start_engine(engine);
return 0;
}
static void guc_set_default_submission(struct intel_engine_cs *engine)
{
engine->submit_request = guc_submit_request;
engine->schedule = i915_schedule;
engine->execlists.tasklet.func = guc_submission_tasklet; engine->execlists.tasklet.func = guc_submission_tasklet;
/* do not use execlists park/unpark */
engine->park = engine->unpark = NULL;
engine->reset.prepare = guc_reset_prepare; engine->reset.prepare = guc_reset_prepare;
engine->reset.rewind = guc_reset_rewind; engine->reset.rewind = guc_reset_rewind;
engine->reset.cancel = guc_reset_cancel; engine->reset.cancel = guc_reset_cancel;
engine->reset.finish = guc_reset_finish; engine->reset.finish = guc_reset_finish;
engine->flags &= ~I915_ENGINE_SUPPORTS_STATS;
engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET; engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
engine->flags |= I915_ENGINE_HAS_PREEMPTION;
/*
* TODO: GuC supports timeslicing and semaphores as well, but they're
* handled by the firmware so some minor tweaks are required before
* enabling.
*
* engine->flags |= I915_ENGINE_HAS_TIMESLICES;
* engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
*/
engine->emit_bb_start = gen8_emit_bb_start;
/* /*
* For the breadcrumb irq to work we need the interrupts to stay * For the breadcrumb irq to work we need the interrupts to stay
@@ -613,35 +640,92 @@ static void guc_set_default_submission(struct intel_engine_cs *engine)
GEM_BUG_ON(engine->irq_enable || engine->irq_disable); GEM_BUG_ON(engine->irq_enable || engine->irq_disable);
} }
void intel_guc_submission_enable(struct intel_guc *guc) static void guc_release(struct intel_engine_cs *engine)
{ {
struct intel_gt *gt = guc_to_gt(guc); engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
struct intel_engine_cs *engine;
enum intel_engine_id id; tasklet_kill(&engine->execlists.tasklet);
intel_engine_cleanup_common(engine);
lrc_fini_wa_ctx(engine);
}
static void guc_default_vfuncs(struct intel_engine_cs *engine)
{
/* Default vfuncs which can be overridden by each engine. */
engine->resume = guc_resume;
engine->cops = &guc_context_ops;
engine->request_alloc = guc_request_alloc;
engine->emit_flush = gen8_emit_flush_xcs;
engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs;
if (INTEL_GEN(engine->i915) >= 12) {
engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs;
engine->emit_flush = gen12_emit_flush_xcs;
}
engine->set_default_submission = guc_set_default_submission;
}
static void rcs_submission_override(struct intel_engine_cs *engine)
{
switch (INTEL_GEN(engine->i915)) {
case 12:
engine->emit_flush = gen12_emit_flush_rcs;
engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
break;
case 11:
engine->emit_flush = gen11_emit_flush_rcs;
engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
break;
default:
engine->emit_flush = gen8_emit_flush_rcs;
engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs;
break;
}
}
static inline void guc_default_irqs(struct intel_engine_cs *engine)
{
engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT;
}
int intel_guc_submission_setup(struct intel_engine_cs *engine)
{
struct drm_i915_private *i915 = engine->i915;
/* /*
* We're using GuC work items for submitting work through GuC. Since * The setup relies on several assumptions (e.g. irqs always enabled)
* we're coalescing multiple requests from a single context into a * that are only valid on gen11+
* single work item prior to assigning it to execlist_port, we can
* never have more work items than the total number of ports (for all
* engines). The GuC firmware is controlling the HEAD of work queue,
* and it is guaranteed that it will remove the work item from the
* queue before our request is completed.
*/ */
BUILD_BUG_ON(ARRAY_SIZE(engine->execlists.inflight) * GEM_BUG_ON(INTEL_GEN(i915) < 11);
sizeof(struct guc_wq_item) *
I915_NUM_ENGINES > GUC_WQ_SIZE);
guc_proc_desc_init(guc); tasklet_init(&engine->execlists.tasklet,
guc_submission_tasklet, (unsigned long)engine);
guc_default_vfuncs(engine);
guc_default_irqs(engine);
if (engine->class == RENDER_CLASS)
rcs_submission_override(engine);
lrc_init_wa_ctx(engine);
/* Finally, take ownership and responsibility for cleanup! */
engine->sanitize = guc_sanitize;
engine->release = guc_release;
return 0;
}
void intel_guc_submission_enable(struct intel_guc *guc)
{
guc_stage_desc_init(guc); guc_stage_desc_init(guc);
/* Take over from manual control of ELSP (execlists) */ /* Take over from manual control of ELSP (execlists) */
guc_interrupts_capture(gt); guc_interrupts_capture(guc_to_gt(guc));
for_each_engine(engine, gt, id) {
engine->set_default_submission = guc_set_default_submission;
engine->set_default_submission(engine);
}
} }
void intel_guc_submission_disable(struct intel_guc *guc) void intel_guc_submission_disable(struct intel_guc *guc)
@@ -655,7 +739,6 @@ void intel_guc_submission_disable(struct intel_guc *guc)
guc_interrupts_release(gt); guc_interrupts_release(gt);
guc_stage_desc_fini(guc); guc_stage_desc_fini(guc);
guc_proc_desc_fini(guc);
} }
static bool __guc_submission_selected(struct intel_guc *guc) static bool __guc_submission_selected(struct intel_guc *guc)

View File

@@ -19,6 +19,7 @@ void intel_guc_submission_disable(struct intel_guc *guc);
void intel_guc_submission_fini(struct intel_guc *guc); void intel_guc_submission_fini(struct intel_guc *guc);
int intel_guc_preempt_work_create(struct intel_guc *guc); int intel_guc_preempt_work_create(struct intel_guc *guc);
void intel_guc_preempt_work_destroy(struct intel_guc *guc); void intel_guc_preempt_work_destroy(struct intel_guc *guc);
int intel_guc_submission_setup(struct intel_engine_cs *engine);
bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine); bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine);
static inline bool intel_guc_submission_is_supported(struct intel_guc *guc) static inline bool intel_guc_submission_is_supported(struct intel_guc *guc)

View File

@@ -175,19 +175,15 @@ static void guc_get_mmio_msg(struct intel_guc *guc)
static void guc_handle_mmio_msg(struct intel_guc *guc) static void guc_handle_mmio_msg(struct intel_guc *guc)
{ {
struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
/* we need communication to be enabled to reply to GuC */ /* we need communication to be enabled to reply to GuC */
GEM_BUG_ON(!guc_communication_enabled(guc)); GEM_BUG_ON(!guc_communication_enabled(guc));
if (!guc->mmio_msg) spin_lock_irq(&guc->irq_lock);
return; if (guc->mmio_msg) {
spin_lock_irq(&i915->irq_lock);
intel_guc_to_host_process_recv_msg(guc, &guc->mmio_msg, 1); intel_guc_to_host_process_recv_msg(guc, &guc->mmio_msg, 1);
spin_unlock_irq(&i915->irq_lock);
guc->mmio_msg = 0; guc->mmio_msg = 0;
}
spin_unlock_irq(&guc->irq_lock);
} }
static void guc_reset_interrupts(struct intel_guc *guc) static void guc_reset_interrupts(struct intel_guc *guc)
@@ -207,7 +203,8 @@ static void guc_disable_interrupts(struct intel_guc *guc)
static int guc_enable_communication(struct intel_guc *guc) static int guc_enable_communication(struct intel_guc *guc)
{ {
struct drm_i915_private *i915 = guc_to_gt(guc)->i915; struct intel_gt *gt = guc_to_gt(guc);
struct drm_i915_private *i915 = gt->i915;
int ret; int ret;
GEM_BUG_ON(guc_communication_enabled(guc)); GEM_BUG_ON(guc_communication_enabled(guc));
@@ -227,9 +224,9 @@ static int guc_enable_communication(struct intel_guc *guc)
guc_enable_interrupts(guc); guc_enable_interrupts(guc);
/* check for CT messages received before we enabled interrupts */ /* check for CT messages received before we enabled interrupts */
spin_lock_irq(&i915->irq_lock); spin_lock_irq(&gt->irq_lock);
intel_guc_ct_event_handler(&guc->ct); intel_guc_ct_event_handler(&guc->ct);
spin_unlock_irq(&i915->irq_lock); spin_unlock_irq(&gt->irq_lock);
drm_dbg(&i915->drm, "GuC communication enabled\n"); drm_dbg(&i915->drm, "GuC communication enabled\n");

View File

@@ -53,6 +53,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw,
fw_def(ELKHARTLAKE, 0, guc_def(ehl, 49, 0, 1), huc_def(ehl, 9, 0, 0)) \ fw_def(ELKHARTLAKE, 0, guc_def(ehl, 49, 0, 1), huc_def(ehl, 9, 0, 0)) \
fw_def(ICELAKE, 0, guc_def(icl, 49, 0, 1), huc_def(icl, 9, 0, 0)) \ fw_def(ICELAKE, 0, guc_def(icl, 49, 0, 1), huc_def(icl, 9, 0, 0)) \
fw_def(COMETLAKE, 5, guc_def(cml, 49, 0, 1), huc_def(cml, 4, 0, 0)) \ fw_def(COMETLAKE, 5, guc_def(cml, 49, 0, 1), huc_def(cml, 4, 0, 0)) \
fw_def(COMETLAKE, 0, guc_def(kbl, 49, 0, 1), huc_def(kbl, 4, 0, 0)) \
fw_def(COFFEELAKE, 0, guc_def(kbl, 49, 0, 1), huc_def(kbl, 4, 0, 0)) \ fw_def(COFFEELAKE, 0, guc_def(kbl, 49, 0, 1), huc_def(kbl, 4, 0, 0)) \
fw_def(GEMINILAKE, 0, guc_def(glk, 49, 0, 1), huc_def(glk, 4, 0, 0)) \ fw_def(GEMINILAKE, 0, guc_def(glk, 49, 0, 1), huc_def(glk, 4, 0, 0)) \
fw_def(KABYLAKE, 0, guc_def(kbl, 49, 0, 1), huc_def(kbl, 4, 0, 0)) \ fw_def(KABYLAKE, 0, guc_def(kbl, 49, 0, 1), huc_def(kbl, 4, 0, 0)) \

View File

@@ -37,6 +37,7 @@
#include <linux/slab.h> #include <linux/slab.h>
#include "i915_drv.h" #include "i915_drv.h"
#include "gt/intel_gpu_commands.h"
#include "gt/intel_ring.h" #include "gt/intel_ring.h"
#include "gvt.h" #include "gvt.h"
#include "i915_pvinfo.h" #include "i915_pvinfo.h"

View File

@@ -1651,7 +1651,7 @@ static int edp_psr_imr_iir_write(struct intel_vgpu *vgpu,
return 0; return 0;
} }
/** /*
* FixMe: * FixMe:
* If guest fills non-priv batch buffer on ApolloLake/Broxton as Mesa i965 did: * If guest fills non-priv batch buffer on ApolloLake/Broxton as Mesa i965 did:
* 717e7539124d (i965: Use a WC map and memcpy for the batch instead of pwrite.) * 717e7539124d (i965: Use a WC map and memcpy for the batch instead of pwrite.)

View File

@@ -35,6 +35,7 @@
#include "i915_drv.h" #include "i915_drv.h"
#include "gt/intel_context.h" #include "gt/intel_context.h"
#include "gt/intel_gpu_commands.h"
#include "gt/intel_ring.h" #include "gt/intel_ring.h"
#include "gvt.h" #include "gvt.h"
#include "trace.h" #include "trace.h"

View File

@@ -39,6 +39,7 @@
#include <linux/types.h> #include <linux/types.h>
#include "gt/intel_engine_types.h" #include "gt/intel_engine_types.h"
#include "gt/intel_lrc_reg.h"
#include "i915_reg.h" #include "i915_reg.h"
struct i915_request; struct i915_request;

View File

@@ -37,6 +37,8 @@
#include "gem/i915_gem_pm.h" #include "gem/i915_gem_pm.h"
#include "gt/intel_context.h" #include "gt/intel_context.h"
#include "gt/intel_execlists_submission.h"
#include "gt/intel_lrc.h"
#include "gt/intel_ring.h" #include "gt/intel_ring.h"
#include "i915_drv.h" #include "i915_drv.h"

View File

@@ -159,7 +159,6 @@ __active_retire(struct i915_active *ref)
GEM_BUG_ON(ref->tree.rb_node != &ref->cache->node); GEM_BUG_ON(ref->tree.rb_node != &ref->cache->node);
/* Make the cached node available for reuse with any timeline */ /* Make the cached node available for reuse with any timeline */
if (IS_ENABLED(CONFIG_64BIT))
ref->cache->timeline = 0; /* needs cmpxchg(u64) */ ref->cache->timeline = 0; /* needs cmpxchg(u64) */
} }
@@ -256,7 +255,6 @@ static struct active_node *__active_lookup(struct i915_active *ref, u64 idx)
if (cached == idx) if (cached == idx)
return it; return it;
#ifdef CONFIG_64BIT /* for cmpxchg(u64) */
/* /*
* An unclaimed cache [.timeline=0] can only be claimed once. * An unclaimed cache [.timeline=0] can only be claimed once.
* *
@@ -267,9 +265,8 @@ static struct active_node *__active_lookup(struct i915_active *ref, u64 idx)
* only the winner of that race will cmpxchg return the old * only the winner of that race will cmpxchg return the old
* value of 0). * value of 0).
*/ */
if (!cached && !cmpxchg(&it->timeline, 0, idx)) if (!cached && !cmpxchg64(&it->timeline, 0, idx))
return it; return it;
#endif
} }
BUILD_BUG_ON(offsetof(typeof(*it), node)); BUILD_BUG_ON(offsetof(typeof(*it), node));

View File

@@ -26,6 +26,7 @@
*/ */
#include "gt/intel_engine.h" #include "gt/intel_engine.h"
#include "gt/intel_gpu_commands.h"
#include "i915_drv.h" #include "i915_drv.h"
#include "i915_memcpy.h" #include "i915_memcpy.h"
@@ -1166,7 +1167,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
} }
} }
if (IS_ERR(src)) { if (IS_ERR(src)) {
unsigned long x, n; unsigned long x, n, remain;
void *ptr; void *ptr;
/* /*
@@ -1177,14 +1178,15 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
* We don't care about copying too much here as we only * We don't care about copying too much here as we only
* validate up to the end of the batch. * validate up to the end of the batch.
*/ */
remain = length;
if (!(dst_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) if (!(dst_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
length = round_up(length, remain = round_up(remain,
boot_cpu_data.x86_clflush_size); boot_cpu_data.x86_clflush_size);
ptr = dst; ptr = dst;
x = offset_in_page(offset); x = offset_in_page(offset);
for (n = offset >> PAGE_SHIFT; length; n++) { for (n = offset >> PAGE_SHIFT; remain; n++) {
int len = min(length, PAGE_SIZE - x); int len = min(remain, PAGE_SIZE - x);
src = kmap_atomic(i915_gem_object_get_page(src_obj, n)); src = kmap_atomic(i915_gem_object_get_page(src_obj, n));
if (needs_clflush) if (needs_clflush)
@@ -1193,13 +1195,15 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
kunmap_atomic(src); kunmap_atomic(src);
ptr += len; ptr += len;
length -= len; remain -= len;
x = 0; x = 0;
} }
} }
i915_gem_object_unpin_pages(src_obj); i915_gem_object_unpin_pages(src_obj);
memset32(dst + length, 0, (dst_obj->base.size - length) / sizeof(u32));
/* dst_obj is returned with vmap pinned */ /* dst_obj is returned with vmap pinned */
return dst; return dst;
} }
@@ -1392,11 +1396,6 @@ static unsigned long *alloc_whitelist(u32 batch_length)
#define LENGTH_BIAS 2 #define LENGTH_BIAS 2
static bool shadow_needs_clflush(struct drm_i915_gem_object *obj)
{
return !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE);
}
/** /**
* intel_engine_cmd_parser() - parse a batch buffer for privilege violations * intel_engine_cmd_parser() - parse a batch buffer for privilege violations
* @engine: the engine on which the batch is to execute * @engine: the engine on which the batch is to execute
@@ -1538,16 +1537,9 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
ret = 0; /* allow execution */ ret = 0; /* allow execution */
} }
} }
if (shadow_needs_clflush(shadow->obj))
drm_clflush_virt_range(batch_end, 8);
} }
if (shadow_needs_clflush(shadow->obj)) { i915_gem_object_flush_map(shadow->obj);
void *ptr = page_mask_bits(shadow->obj->mm.mapping);
drm_clflush_virt_range(ptr, (void *)(cmd + 1) - ptr);
}
if (!IS_ERR_OR_NULL(jump_whitelist)) if (!IS_ERR_OR_NULL(jump_whitelist))
kfree(jump_whitelist); kfree(jump_whitelist);

View File

@@ -45,6 +45,7 @@
#include "i915_debugfs.h" #include "i915_debugfs.h"
#include "i915_debugfs_params.h" #include "i915_debugfs_params.h"
#include "i915_irq.h" #include "i915_irq.h"
#include "i915_scheduler.h"
#include "i915_trace.h" #include "i915_trace.h"
#include "intel_pm.h" #include "intel_pm.h"
#include "intel_sideband.h" #include "intel_sideband.h"
@@ -634,27 +635,27 @@ static int i915_frequency_info(struct seq_file *m, void *unused)
seq_printf(m, "RPDECLIMIT: 0x%08x\n", rpdeclimit); seq_printf(m, "RPDECLIMIT: 0x%08x\n", rpdeclimit);
seq_printf(m, "RPNSWREQ: %dMHz\n", reqf); seq_printf(m, "RPNSWREQ: %dMHz\n", reqf);
seq_printf(m, "CAGF: %dMHz\n", cagf); seq_printf(m, "CAGF: %dMHz\n", cagf);
seq_printf(m, "RP CUR UP EI: %d (%dns)\n", seq_printf(m, "RP CUR UP EI: %d (%lldns)\n",
rpupei, rpupei,
intel_gt_pm_interval_to_ns(&dev_priv->gt, rpupei)); intel_gt_pm_interval_to_ns(&dev_priv->gt, rpupei));
seq_printf(m, "RP CUR UP: %d (%dun)\n", seq_printf(m, "RP CUR UP: %d (%lldun)\n",
rpcurup, rpcurup,
intel_gt_pm_interval_to_ns(&dev_priv->gt, rpcurup)); intel_gt_pm_interval_to_ns(&dev_priv->gt, rpcurup));
seq_printf(m, "RP PREV UP: %d (%dns)\n", seq_printf(m, "RP PREV UP: %d (%lldns)\n",
rpprevup, rpprevup,
intel_gt_pm_interval_to_ns(&dev_priv->gt, rpprevup)); intel_gt_pm_interval_to_ns(&dev_priv->gt, rpprevup));
seq_printf(m, "Up threshold: %d%%\n", seq_printf(m, "Up threshold: %d%%\n",
rps->power.up_threshold); rps->power.up_threshold);
seq_printf(m, "RP CUR DOWN EI: %d (%dns)\n", seq_printf(m, "RP CUR DOWN EI: %d (%lldns)\n",
rpdownei, rpdownei,
intel_gt_pm_interval_to_ns(&dev_priv->gt, intel_gt_pm_interval_to_ns(&dev_priv->gt,
rpdownei)); rpdownei));
seq_printf(m, "RP CUR DOWN: %d (%dns)\n", seq_printf(m, "RP CUR DOWN: %d (%lldns)\n",
rpcurdown, rpcurdown,
intel_gt_pm_interval_to_ns(&dev_priv->gt, intel_gt_pm_interval_to_ns(&dev_priv->gt,
rpcurdown)); rpcurdown));
seq_printf(m, "RP PREV DOWN: %d (%dns)\n", seq_printf(m, "RP PREV DOWN: %d (%lldns)\n",
rpprevdown, rpprevdown,
intel_gt_pm_interval_to_ns(&dev_priv->gt, intel_gt_pm_interval_to_ns(&dev_priv->gt,
rpprevdown)); rpprevdown));
@@ -810,7 +811,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
intel_gpu_freq(rps, rps->efficient_freq), intel_gpu_freq(rps, rps->efficient_freq),
intel_gpu_freq(rps, rps->boost_freq)); intel_gpu_freq(rps, rps->boost_freq));
seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts)); seq_printf(m, "Wait boosts: %d\n", READ_ONCE(rps->boosts));
return 0; return 0;
} }
@@ -850,24 +851,28 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused)
static int i915_engine_info(struct seq_file *m, void *unused) static int i915_engine_info(struct seq_file *m, void *unused)
{ {
struct drm_i915_private *dev_priv = node_to_i915(m->private); struct drm_i915_private *i915 = node_to_i915(m->private);
struct intel_engine_cs *engine; struct intel_engine_cs *engine;
intel_wakeref_t wakeref; intel_wakeref_t wakeref;
struct drm_printer p; struct drm_printer p;
wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); wakeref = intel_runtime_pm_get(&i915->runtime_pm);
seq_printf(m, "GT awake? %s [%d]\n", seq_printf(m, "GT awake? %s [%d], %llums\n",
yesno(dev_priv->gt.awake), yesno(i915->gt.awake),
atomic_read(&dev_priv->gt.wakeref.count)); atomic_read(&i915->gt.wakeref.count),
seq_printf(m, "CS timestamp frequency: %u Hz\n", ktime_to_ms(intel_gt_get_awake_time(&i915->gt)));
RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_hz); seq_printf(m, "CS timestamp frequency: %u Hz, %d ns\n",
i915->gt.clock_frequency,
i915->gt.clock_period_ns);
p = drm_seq_file_printer(m); p = drm_seq_file_printer(m);
for_each_uabi_engine(engine, dev_priv) for_each_uabi_engine(engine, i915)
intel_engine_dump(engine, &p, "%s\n", engine->name); intel_engine_dump(engine, &p, "%s\n", engine->name);
intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); intel_gt_show_timelines(&i915->gt, &p, i915_request_show_with_schedule);
intel_runtime_pm_put(&i915->runtime_pm, wakeref);
return 0; return 0;
} }
@@ -945,7 +950,7 @@ i915_perf_noa_delay_set(void *data, u64 val)
* This would lead to infinite waits as we're doing timestamp * This would lead to infinite waits as we're doing timestamp
* difference on the CS with only 32bits. * difference on the CS with only 32bits.
*/ */
if (i915_cs_timestamp_ns_to_ticks(i915, val) > U32_MAX) if (intel_gt_ns_to_clock_interval(&i915->gt, val) > U32_MAX)
return -EINVAL; return -EINVAL;
atomic64_set(&i915->perf.noa_programming_delay, val); atomic64_set(&i915->perf.noa_programming_delay, val);

View File

@@ -410,6 +410,7 @@ static int i915_driver_mmio_probe(struct drm_i915_private *dev_priv)
/* Try to make sure MCHBAR is enabled before poking at it */ /* Try to make sure MCHBAR is enabled before poking at it */
intel_setup_mchbar(dev_priv); intel_setup_mchbar(dev_priv);
intel_device_info_runtime_init(dev_priv);
ret = intel_gt_init_mmio(&dev_priv->gt); ret = intel_gt_init_mmio(&dev_priv->gt);
if (ret) if (ret)
@@ -516,8 +517,6 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv)
if (i915_inject_probe_failure(dev_priv)) if (i915_inject_probe_failure(dev_priv))
return -ENODEV; return -ENODEV;
intel_device_info_runtime_init(dev_priv);
if (HAS_PPGTT(dev_priv)) { if (HAS_PPGTT(dev_priv)) {
if (intel_vgpu_active(dev_priv) && if (intel_vgpu_active(dev_priv) &&
!intel_vgpu_has_full_ppgtt(dev_priv)) { !intel_vgpu_has_full_ppgtt(dev_priv)) {

View File

@@ -79,9 +79,9 @@
#include "gem/i915_gem_shrinker.h" #include "gem/i915_gem_shrinker.h"
#include "gem/i915_gem_stolen.h" #include "gem/i915_gem_stolen.h"
#include "gt/intel_lrc.h"
#include "gt/intel_engine.h" #include "gt/intel_engine.h"
#include "gt/intel_gt_types.h" #include "gt/intel_gt_types.h"
#include "gt/intel_region_lmem.h"
#include "gt/intel_workarounds.h" #include "gt/intel_workarounds.h"
#include "gt/uc/intel_uc.h" #include "gt/uc/intel_uc.h"
@@ -103,7 +103,6 @@
#include "i915_vma.h" #include "i915_vma.h"
#include "i915_irq.h" #include "i915_irq.h"
#include "intel_region_lmem.h"
/* General customization: /* General customization:
*/ */
@@ -1170,9 +1169,6 @@ struct drm_i915_private {
struct i915_gem_contexts { struct i915_gem_contexts {
spinlock_t lock; /* locks list */ spinlock_t lock; /* locks list */
struct list_head list; struct list_head list;
struct llist_head free_list;
struct work_struct free_work;
} contexts; } contexts;
/* /*
@@ -1570,16 +1566,30 @@ enum {
TGL_REVID_D0, TGL_REVID_D0,
}; };
extern const struct i915_rev_steppings tgl_uy_revids[]; #define TGL_UY_REVIDS_SIZE 4
extern const struct i915_rev_steppings tgl_revids[]; #define TGL_REVIDS_SIZE 2
extern const struct i915_rev_steppings tgl_uy_revids[TGL_UY_REVIDS_SIZE];
extern const struct i915_rev_steppings tgl_revids[TGL_REVIDS_SIZE];
static inline const struct i915_rev_steppings * static inline const struct i915_rev_steppings *
tgl_revids_get(struct drm_i915_private *dev_priv) tgl_revids_get(struct drm_i915_private *dev_priv)
{ {
if (IS_TGL_U(dev_priv) || IS_TGL_Y(dev_priv)) u8 revid = INTEL_REVID(dev_priv);
return &tgl_uy_revids[INTEL_REVID(dev_priv)]; u8 size;
else const struct i915_rev_steppings *tgl_revid_tbl;
return &tgl_revids[INTEL_REVID(dev_priv)];
if (IS_TGL_U(dev_priv) || IS_TGL_Y(dev_priv)) {
tgl_revid_tbl = tgl_uy_revids;
size = ARRAY_SIZE(tgl_uy_revids);
} else {
tgl_revid_tbl = tgl_revids;
size = ARRAY_SIZE(tgl_revids);
}
revid = min_t(u8, revid, size - 1);
return &tgl_revid_tbl[revid];
} }
#define IS_TGL_DISP_REVID(p, since, until) \ #define IS_TGL_DISP_REVID(p, since, until) \
@@ -1589,14 +1599,14 @@ tgl_revids_get(struct drm_i915_private *dev_priv)
#define IS_TGL_UY_GT_REVID(p, since, until) \ #define IS_TGL_UY_GT_REVID(p, since, until) \
((IS_TGL_U(p) || IS_TGL_Y(p)) && \ ((IS_TGL_U(p) || IS_TGL_Y(p)) && \
tgl_uy_revids[INTEL_REVID(p)].gt_stepping >= (since) && \ tgl_revids_get(p)->gt_stepping >= (since) && \
tgl_uy_revids[INTEL_REVID(p)].gt_stepping <= (until)) tgl_revids_get(p)->gt_stepping <= (until))
#define IS_TGL_GT_REVID(p, since, until) \ #define IS_TGL_GT_REVID(p, since, until) \
(IS_TIGERLAKE(p) && \ (IS_TIGERLAKE(p) && \
!(IS_TGL_U(p) || IS_TGL_Y(p)) && \ !(IS_TGL_U(p) || IS_TGL_Y(p)) && \
tgl_revids[INTEL_REVID(p)].gt_stepping >= (since) && \ tgl_revids_get(p)->gt_stepping >= (since) && \
tgl_revids[INTEL_REVID(p)].gt_stepping <= (until)) tgl_revids_get(p)->gt_stepping <= (until))
#define RKL_REVID_A0 0x0 #define RKL_REVID_A0 0x0
#define RKL_REVID_B0 0x1 #define RKL_REVID_B0 0x1
@@ -1647,8 +1657,6 @@ tgl_revids_get(struct drm_i915_private *dev_priv)
(INTEL_INFO(dev_priv)->has_logical_ring_contexts) (INTEL_INFO(dev_priv)->has_logical_ring_contexts)
#define HAS_LOGICAL_RING_ELSQ(dev_priv) \ #define HAS_LOGICAL_RING_ELSQ(dev_priv) \
(INTEL_INFO(dev_priv)->has_logical_ring_elsq) (INTEL_INFO(dev_priv)->has_logical_ring_elsq)
#define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \
(INTEL_INFO(dev_priv)->has_logical_ring_preemption)
#define HAS_MASTER_UNIT_IRQ(dev_priv) (INTEL_INFO(dev_priv)->has_master_unit_irq) #define HAS_MASTER_UNIT_IRQ(dev_priv) (INTEL_INFO(dev_priv)->has_master_unit_irq)
@@ -1990,16 +1998,4 @@ i915_coherent_map_type(struct drm_i915_private *i915)
return HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC; return HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC;
} }
static inline u64 i915_cs_timestamp_ns_to_ticks(struct drm_i915_private *i915, u64 val)
{
return DIV_ROUND_UP_ULL(val * RUNTIME_INFO(i915)->cs_timestamp_frequency_hz,
1000000000);
}
static inline u64 i915_cs_timestamp_ticks_to_ns(struct drm_i915_private *i915, u64 val)
{
return div_u64(val * 1000000000,
RUNTIME_INFO(i915)->cs_timestamp_frequency_hz);
}
#endif #endif

Some files were not shown because too many files have changed in this diff Show More