drm/i915: Update rules for writing through the LLC with the cpu

As mentioned in the previous commit, reads and writes from both the CPU
and GPU go through the LLC. This gives us coherency between the CPU and
GPU irrespective of the attribute settings either device sets. We can
use to avoid having to clflush even uncached memory.

Except for the scanout.

The scanout resides within another functional block that does not use
the LLC but reads directly from main memory. So in order to maintain
coherency with the scanout, writes to uncached memory must be flushed.
In order to optimize writes elsewhere, we start tracking whether an
framebuffer is attached to an object.

v2: Use pin_display tracking rather than fb_count (to ensure we flush
cursors as well etc) and only force the clflush along explicit writes to
the scanout paths (i.e. pin_to_display_plane and pwrite into scanout).

v3: Force the flush after hitting the slowpath in pwrite, as after
dropping the lock the object's cache domain may be invalidated. (Ville)

Based on a patch by Ville Syrjälä.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
This commit is contained in:
Chris Wilson 2013-08-09 12:26:45 +01:00 committed by Daniel Vetter
parent cc98b413c1
commit 2c22569bba
4 changed files with 35 additions and 29 deletions

View File

@ -1841,7 +1841,7 @@ static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
}
void i915_gem_reset(struct drm_device *dev);
void i915_gem_clflush_object(struct drm_i915_gem_object *obj);
void i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
int __must_check i915_gem_init(struct drm_device *dev);
int __must_check i915_gem_init_hw(struct drm_device *dev);

View File

@ -37,7 +37,8 @@
#include <linux/dma-buf.h>
static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
bool force);
static __must_check int
i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
@ -67,6 +68,14 @@ static bool cpu_cache_is_coherent(struct drm_device *dev,
return HAS_LLC(dev) || level != I915_CACHE_NONE;
}
static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
{
if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
return true;
return obj->pin_display;
}
static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
{
if (obj->tiling_mode)
@ -742,8 +751,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
* write domain and manually flush cachelines (if required). This
* optimizes for the case when the gpu will use the data
* right away and we therefore have to clflush anyway. */
if (obj->cache_level == I915_CACHE_NONE)
needs_clflush_after = 1;
needs_clflush_after = cpu_write_needs_clflush(obj);
if (i915_gem_obj_bound_any(obj)) {
ret = i915_gem_object_set_to_gtt_domain(obj, true);
if (ret)
@ -833,7 +841,7 @@ out:
*/
if (!needs_clflush_after &&
obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
i915_gem_clflush_object(obj);
i915_gem_clflush_object(obj, obj->pin_display);
i915_gem_chipset_flush(dev);
}
}
@ -911,9 +919,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
goto out;
}
if (obj->cache_level == I915_CACHE_NONE &&
obj->tiling_mode == I915_TILING_NONE &&
obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
if (obj->tiling_mode == I915_TILING_NONE &&
obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
cpu_write_needs_clflush(obj)) {
ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
/* Note that the gtt paths might fail with non-page-backed user
* pointers (e.g. gtt mappings when moving data between
@ -1262,8 +1270,8 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
}
/* Pinned buffers may be scanout, so flush the cache */
if (obj->pin_count)
i915_gem_object_flush_cpu_write_domain(obj);
if (obj->pin_display)
i915_gem_object_flush_cpu_write_domain(obj, true);
drm_gem_object_unreference(&obj->base);
unlock:
@ -1640,7 +1648,7 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
* hope for the best.
*/
WARN_ON(ret != -EIO);
i915_gem_clflush_object(obj);
i915_gem_clflush_object(obj, true);
obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
}
@ -3217,7 +3225,8 @@ err_unpin:
}
void
i915_gem_clflush_object(struct drm_i915_gem_object *obj)
i915_gem_clflush_object(struct drm_i915_gem_object *obj,
bool force)
{
/* If we don't have a page list set up, then we're not pinned
* to GPU, and we can ignore the cache flush because it'll happen
@ -3241,7 +3250,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj)
* snooping behaviour occurs naturally as the result of our domain
* tracking.
*/
if (obj->cache_level != I915_CACHE_NONE)
if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
return;
trace_i915_gem_object_clflush(obj);
@ -3278,14 +3287,15 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
/** Flushes the CPU write domain for the object if it's dirty. */
static void
i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj,
bool force)
{
uint32_t old_write_domain;
if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
return;
i915_gem_clflush_object(obj);
i915_gem_clflush_object(obj, force);
i915_gem_chipset_flush(obj->base.dev);
old_write_domain = obj->base.write_domain;
obj->base.write_domain = 0;
@ -3319,7 +3329,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
if (ret)
return ret;
i915_gem_object_flush_cpu_write_domain(obj);
i915_gem_object_flush_cpu_write_domain(obj, false);
/* Serialise direct access to this object with the barriers for
* coherent writes from the GPU, by effectively invalidating the
@ -3409,7 +3419,11 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
obj, cache_level);
}
if (cache_level == I915_CACHE_NONE) {
list_for_each_entry(vma, &obj->vma_list, vma_link)
vma->node.color = cache_level;
obj->cache_level = cache_level;
if (cpu_write_needs_clflush(obj)) {
u32 old_read_domains, old_write_domain;
/* If we're coming from LLC cached, then we haven't
@ -3432,9 +3446,6 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
old_write_domain);
}
list_for_each_entry(vma, &obj->vma_list, vma_link)
vma->node.color = cache_level;
obj->cache_level = cache_level;
i915_gem_verify_gtt(dev);
return 0;
}
@ -3562,7 +3573,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
if (ret)
goto err_unpin_display;
i915_gem_object_flush_cpu_write_domain(obj);
i915_gem_object_flush_cpu_write_domain(obj, true);
old_write_domain = obj->base.write_domain;
old_read_domains = obj->base.read_domains;
@ -3634,8 +3645,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
/* Flush the CPU cache if it's still invalid. */
if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
i915_gem_clflush_object(obj);
i915_gem_clflush_object(obj, false);
obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
}
@ -3817,10 +3827,6 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data,
obj->user_pin_count++;
obj->pin_filp = file;
/* XXX - flush the CPU caches for pinned objects
* as the X server doesn't manage domains yet
*/
i915_gem_object_flush_cpu_write_domain(obj);
args->offset = i915_gem_obj_ggtt_offset(obj);
out:
drm_gem_object_unreference(&obj->base);

View File

@ -716,7 +716,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
return ret;
if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
i915_gem_clflush_object(obj);
i915_gem_clflush_object(obj, false);
flush_domains |= obj->base.write_domain;
}

View File

@ -487,7 +487,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
dev_priv->gtt.base.total / PAGE_SIZE);
list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
i915_gem_clflush_object(obj);
i915_gem_clflush_object(obj, obj->pin_display);
i915_gem_gtt_bind_object(obj, obj->cache_level);
}