From c43d0188d7be6e45eb52a27ed9df714ca2e04e0a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 11 Dec 2012 12:01:42 +0000 Subject: [PATCH 01/14] drm/i915: Fixup cursor latency used for IVB lp3 watermarks It operates at twice the declared latency, so adjust the computation to avoid potential flicker at low power. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=50248 Signed-off-by: Chris Wilson CC: Jesse Barnes Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 117 ++++++++++++++++++++++++++++++-- 1 file changed, 112 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 42839fc73499..9f1ee07ec4ae 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1811,8 +1811,110 @@ static void sandybridge_update_wm(struct drm_device *dev) enabled |= 2; } - if ((dev_priv->num_pipe == 3) && - g4x_compute_wm0(dev, 2, + /* + * Calculate and update the self-refresh watermark only when one + * display plane is used. + * + * SNB support 3 levels of watermark. + * + * WM1/WM2/WM2 watermarks have to be enabled in the ascending order, + * and disabled in the descending order + * + */ + I915_WRITE(WM3_LP_ILK, 0); + I915_WRITE(WM2_LP_ILK, 0); + I915_WRITE(WM1_LP_ILK, 0); + + if (!single_plane_enabled(enabled) || + dev_priv->sprite_scaling_enabled) + return; + enabled = ffs(enabled) - 1; + + /* WM1 */ + if (!ironlake_compute_srwm(dev, 1, enabled, + SNB_READ_WM1_LATENCY() * 500, + &sandybridge_display_srwm_info, + &sandybridge_cursor_srwm_info, + &fbc_wm, &plane_wm, &cursor_wm)) + return; + + I915_WRITE(WM1_LP_ILK, + WM1_LP_SR_EN | + (SNB_READ_WM1_LATENCY() << WM1_LP_LATENCY_SHIFT) | + (fbc_wm << WM1_LP_FBC_SHIFT) | + (plane_wm << WM1_LP_SR_SHIFT) | + cursor_wm); + + /* WM2 */ + if (!ironlake_compute_srwm(dev, 2, enabled, + SNB_READ_WM2_LATENCY() * 500, + &sandybridge_display_srwm_info, + &sandybridge_cursor_srwm_info, + &fbc_wm, &plane_wm, &cursor_wm)) + return; + + I915_WRITE(WM2_LP_ILK, + WM2_LP_EN | + (SNB_READ_WM2_LATENCY() << WM1_LP_LATENCY_SHIFT) | + (fbc_wm << WM1_LP_FBC_SHIFT) | + (plane_wm << WM1_LP_SR_SHIFT) | + cursor_wm); + + /* WM3 */ + if (!ironlake_compute_srwm(dev, 3, enabled, + SNB_READ_WM3_LATENCY() * 500, + &sandybridge_display_srwm_info, + &sandybridge_cursor_srwm_info, + &fbc_wm, &plane_wm, &cursor_wm)) + return; + + I915_WRITE(WM3_LP_ILK, + WM3_LP_EN | + (SNB_READ_WM3_LATENCY() << WM1_LP_LATENCY_SHIFT) | + (fbc_wm << WM1_LP_FBC_SHIFT) | + (plane_wm << WM1_LP_SR_SHIFT) | + cursor_wm); +} + +static void ivybridge_update_wm(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + int latency = SNB_READ_WM0_LATENCY() * 100; /* In unit 0.1us */ + u32 val; + int fbc_wm, plane_wm, cursor_wm; + int ignore_fbc_wm, ignore_plane_wm, ignore_cursor_wm; + unsigned int enabled; + + enabled = 0; + if (g4x_compute_wm0(dev, 0, + &sandybridge_display_wm_info, latency, + &sandybridge_cursor_wm_info, latency, + &plane_wm, &cursor_wm)) { + val = I915_READ(WM0_PIPEA_ILK); + val &= ~(WM0_PIPE_PLANE_MASK | WM0_PIPE_CURSOR_MASK); + I915_WRITE(WM0_PIPEA_ILK, val | + ((plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm)); + DRM_DEBUG_KMS("FIFO watermarks For pipe A -" + " plane %d, " "cursor: %d\n", + plane_wm, cursor_wm); + enabled |= 1; + } + + if (g4x_compute_wm0(dev, 1, + &sandybridge_display_wm_info, latency, + &sandybridge_cursor_wm_info, latency, + &plane_wm, &cursor_wm)) { + val = I915_READ(WM0_PIPEB_ILK); + val &= ~(WM0_PIPE_PLANE_MASK | WM0_PIPE_CURSOR_MASK); + I915_WRITE(WM0_PIPEB_ILK, val | + ((plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm)); + DRM_DEBUG_KMS("FIFO watermarks For pipe B -" + " plane %d, cursor: %d\n", + plane_wm, cursor_wm); + enabled |= 2; + } + + if (g4x_compute_wm0(dev, 2, &sandybridge_display_wm_info, latency, &sandybridge_cursor_wm_info, latency, &plane_wm, &cursor_wm)) { @@ -1875,12 +1977,17 @@ static void sandybridge_update_wm(struct drm_device *dev) (plane_wm << WM1_LP_SR_SHIFT) | cursor_wm); - /* WM3 */ + /* WM3, note we have to correct the cursor latency */ if (!ironlake_compute_srwm(dev, 3, enabled, SNB_READ_WM3_LATENCY() * 500, &sandybridge_display_srwm_info, &sandybridge_cursor_srwm_info, - &fbc_wm, &plane_wm, &cursor_wm)) + &fbc_wm, &plane_wm, &ignore_cursor_wm) || + !ironlake_compute_srwm(dev, 3, enabled, + 2 * SNB_READ_WM3_LATENCY() * 500, + &sandybridge_display_srwm_info, + &sandybridge_cursor_srwm_info, + &ignore_fbc_wm, &ignore_plane_wm, &cursor_wm)) return; I915_WRITE(WM3_LP_ILK, @@ -4005,7 +4112,7 @@ void intel_init_pm(struct drm_device *dev) } else if (IS_IVYBRIDGE(dev)) { /* FIXME: detect B0+ stepping and use auto training */ if (SNB_READ_WM0_LATENCY()) { - dev_priv->display.update_wm = sandybridge_update_wm; + dev_priv->display.update_wm = ivybridge_update_wm; dev_priv->display.update_sprite_wm = sandybridge_update_sprite_wm; } else { DRM_DEBUG_KMS("Failed to read display plane latency. " From af6c4575afada1a9c411e42439b5d40908865b11 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 11 Dec 2012 12:01:43 +0000 Subject: [PATCH 02/14] drm/i915: Double the cursor self-refresh latency on Valleyview It operates at twice the declared latency, so double the latency value used for the cursor watermark calculation. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=50248 Signed-off-by: Chris Wilson CC: Jesse Barnes Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 9f1ee07ec4ae..cdd70e654af5 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1286,6 +1286,7 @@ static void valleyview_update_wm(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; int planea_wm, planeb_wm, cursora_wm, cursorb_wm; int plane_sr, cursor_sr; + int ignore_plane_sr, ignore_cursor_sr; unsigned int enabled = 0; vlv_update_drain_latency(dev); @@ -1308,7 +1309,12 @@ static void valleyview_update_wm(struct drm_device *dev) sr_latency_ns, &valleyview_wm_info, &valleyview_cursor_wm_info, - &plane_sr, &cursor_sr)) + &plane_sr, &ignore_cursor_sr) && + g4x_compute_srwm(dev, ffs(enabled) - 1, + 2*sr_latency_ns, + &valleyview_wm_info, + &valleyview_cursor_wm_info, + &ignore_plane_sr, &cursor_sr)) I915_WRITE(FW_BLC_SELF_VLV, FW_CSPWRDWNEN); else I915_WRITE(FW_BLC_SELF_VLV, From 52bd02d8e3fd83201a1a81bdb4ec6fc0b54d24a0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 7 Dec 2012 10:43:24 +0000 Subject: [PATCH 03/14] drm/i915: Clear self-refresh watermarks when disabled If we elect to disable self-refresh as they require too many FIFO entries, clear the values prior to writing them into the registers. If they are too large they may occupy more bits than available and so corrupt neighbouring WM values. Signed-off-by: Chris Wilson Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index cdd70e654af5..bfc46529f6df 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1303,7 +1303,6 @@ static void valleyview_update_wm(struct drm_device *dev) &planeb_wm, &cursorb_wm)) enabled |= 2; - plane_sr = cursor_sr = 0; if (single_plane_enabled(enabled) && g4x_compute_srwm(dev, ffs(enabled) - 1, sr_latency_ns, @@ -1314,11 +1313,13 @@ static void valleyview_update_wm(struct drm_device *dev) 2*sr_latency_ns, &valleyview_wm_info, &valleyview_cursor_wm_info, - &ignore_plane_sr, &cursor_sr)) + &ignore_plane_sr, &cursor_sr)) { I915_WRITE(FW_BLC_SELF_VLV, FW_CSPWRDWNEN); - else + } else { I915_WRITE(FW_BLC_SELF_VLV, I915_READ(FW_BLC_SELF_VLV) & ~FW_CSPWRDWNEN); + plane_sr = cursor_sr = 0; + } DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n", planea_wm, cursora_wm, @@ -1358,17 +1359,18 @@ static void g4x_update_wm(struct drm_device *dev) &planeb_wm, &cursorb_wm)) enabled |= 2; - plane_sr = cursor_sr = 0; if (single_plane_enabled(enabled) && g4x_compute_srwm(dev, ffs(enabled) - 1, sr_latency_ns, &g4x_wm_info, &g4x_cursor_wm_info, - &plane_sr, &cursor_sr)) + &plane_sr, &cursor_sr)) { I915_WRITE(FW_BLC_SELF, FW_BLC_SELF_EN); - else + } else { I915_WRITE(FW_BLC_SELF, I915_READ(FW_BLC_SELF) & ~FW_BLC_SELF_EN); + plane_sr = cursor_sr = 0; + } DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n", planea_wm, cursora_wm, From f20e0b08b8b2a8432e6abf3683960099f0ab2958 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 7 Dec 2012 10:43:25 +0000 Subject: [PATCH 04/14] drm/i915: Prefer CRTC 'active' rather than 'enabled' during WM computations Only the intel_crtc->active is accurate at the point where we wish to perform WM computations, so use it instead of crtc->enabled. Signed-off-by: Chris Wilson Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index bfc46529f6df..5835277d9632 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -405,7 +405,7 @@ void intel_update_fbc(struct drm_device *dev) * - going to an unsupported config (interlace, pixel multiply, etc.) */ list_for_each_entry(tmp_crtc, &dev->mode_config.crtc_list, head) { - if (tmp_crtc->enabled && + if (to_intel_crtc(tmp_crtc)->active && !to_intel_crtc(tmp_crtc)->primary_disabled && tmp_crtc->fb) { if (crtc) { @@ -992,7 +992,7 @@ static struct drm_crtc *single_enabled_crtc(struct drm_device *dev) struct drm_crtc *crtc, *enabled = NULL; list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { - if (crtc->enabled && crtc->fb) { + if (to_intel_crtc(crtc)->active && crtc->fb) { if (enabled) return NULL; enabled = crtc; @@ -1086,7 +1086,7 @@ static bool g4x_compute_wm0(struct drm_device *dev, int entries, tlb_miss; crtc = intel_get_crtc_for_plane(dev, plane); - if (crtc->fb == NULL || !crtc->enabled) { + if (crtc->fb == NULL || !to_intel_crtc(crtc)->active) { *cursor_wm = cursor->guard_size; *plane_wm = display->guard_size; return false; @@ -1215,7 +1215,7 @@ static bool vlv_compute_drain_latency(struct drm_device *dev, int entries; crtc = intel_get_crtc_for_plane(dev, plane); - if (crtc->fb == NULL || !crtc->enabled) + if (crtc->fb == NULL || !to_intel_crtc(crtc)->active) return false; clock = crtc->mode.clock; /* VESA DOT Clock */ @@ -1476,7 +1476,7 @@ static void i9xx_update_wm(struct drm_device *dev) fifo_size = dev_priv->display.get_fifo_size(dev, 0); crtc = intel_get_crtc_for_plane(dev, 0); - if (crtc->enabled && crtc->fb) { + if (to_intel_crtc(crtc)->active && crtc->fb) { int cpp = crtc->fb->bits_per_pixel / 8; if (IS_GEN2(dev)) cpp = 4; @@ -1490,7 +1490,7 @@ static void i9xx_update_wm(struct drm_device *dev) fifo_size = dev_priv->display.get_fifo_size(dev, 1); crtc = intel_get_crtc_for_plane(dev, 1); - if (crtc->enabled && crtc->fb) { + if (to_intel_crtc(crtc)->active && crtc->fb) { int cpp = crtc->fb->bits_per_pixel / 8; if (IS_GEN2(dev)) cpp = 4; @@ -2044,7 +2044,7 @@ sandybridge_compute_sprite_wm(struct drm_device *dev, int plane, int entries, tlb_miss; crtc = intel_get_crtc_for_plane(dev, plane); - if (crtc->fb == NULL || !crtc->enabled) { + if (crtc->fb == NULL || !to_intel_crtc(crtc)->active) { *sprite_wm = display->guard_size; return false; } From 4283908ef7f11a72c3b80dd4cf026f1a86429f82 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 14 Dec 2012 23:38:28 +0100 Subject: [PATCH 05/14] drm/i915: Implement WaDisableHiZPlanesWhenMSAAEnabled Quoting from Bspec, 3D_CHICKEN1, bit 10 This bit needs to be set always to "1", Project: DevSNB " Reviewed-by: Rodrigo Vivi Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 3f75cfaf1c3f..e0019378f8b1 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -517,6 +517,7 @@ * the enables for writing to the corresponding low bit. */ #define _3D_CHICKEN 0x02084 +#define _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB (1 << 10) #define _3D_CHICKEN2 0x0208c /* Disables pipelining of read flushes past the SF-WIZ interface. * Required on all Ironlake steppings according to the B-Spec, but the diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 5835277d9632..3b85660ce4e4 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3592,6 +3592,10 @@ static void gen6_init_clock_gating(struct drm_device *dev) I915_READ(ILK_DISPLAY_CHICKEN2) | ILK_ELPIN_409_SELECT); + /* WaDisableHiZPlanesWhenMSAAEnabled */ + I915_WRITE(_3D_CHICKEN, + _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB)); + I915_WRITE(WM3_LP_ILK, 0); I915_WRITE(WM2_LP_ILK, 0); I915_WRITE(WM1_LP_ILK, 0); From 6547fbdbfff62c99e4f7b4f985ff8b3454f33b0f Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 14 Dec 2012 23:38:29 +0100 Subject: [PATCH 06/14] drm/i915: Implement WaSetupGtModeTdRowDispatch I'm not really sure, since the w/a entry is as thin on details as ever, and Bspec doesn't say anything about it. But I've figured only dispatching to rows 0&1 instead of all four should be the right thing for GT1. Reviewed-by: Rodrigo Vivi [danvet: Add the missing snb server GT1 to the check, spotted by Chris Wilson.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 3 +++ drivers/gpu/drm/i915/i915_reg.h | 3 ++- drivers/gpu/drm/i915/intel_pm.c | 5 +++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 557843dd4b2e..062a60b381b7 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1166,6 +1166,9 @@ struct drm_i915_file_private { #define IS_IVB_GT1(dev) ((dev)->pci_device == 0x0156 || \ (dev)->pci_device == 0x0152 || \ (dev)->pci_device == 0x015a) +#define IS_SNB_GT1(dev) ((dev)->pci_device == 0x0102 || \ + (dev)->pci_device == 0x0106 || \ + (dev)->pci_device == 0x010A) #define IS_VALLEYVIEW(dev) (INTEL_INFO(dev)->is_valleyview) #define IS_HASWELL(dev) (INTEL_INFO(dev)->is_haswell) #define IS_MOBILE(dev) (INTEL_INFO(dev)->is_mobile) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e0019378f8b1..186ee5c85b51 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -533,7 +533,8 @@ # define MI_FLUSH_ENABLE (1 << 12) #define GEN6_GT_MODE 0x20d0 -#define GEN6_GT_MODE_HI (1 << 9) +#define GEN6_GT_MODE_HI (1 << 9) +#define GEN6_TD_FOUR_ROW_DISPATCH_DISABLE (1 << 5) #define GFX_MODE 0x02520 #define GFX_MODE_GEN7 0x0229c diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 3b85660ce4e4..55f7a896a121 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3596,6 +3596,11 @@ static void gen6_init_clock_gating(struct drm_device *dev) I915_WRITE(_3D_CHICKEN, _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB)); + /* WaSetupGtModeTdRowDispatch */ + if (IS_SNB_GT1(dev)) + I915_WRITE(GEN6_GT_MODE, + _MASKED_BIT_ENABLE(GEN6_TD_FOUR_ROW_DISPATCH_DISABLE)); + I915_WRITE(WM3_LP_ILK, 0); I915_WRITE(WM2_LP_ILK, 0); I915_WRITE(WM1_LP_ILK, 0); From b45305fce5bb1abec263fcff9d81ebecd6306ede Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Mon, 17 Dec 2012 16:21:27 +0100 Subject: [PATCH 07/14] drm/i915: Implement workaround for broken CS tlb on i830/845 Now that Chris Wilson demonstrated that the key for stability on early gen 2 is to simple _never_ exchange the physical backing storage of batch buffers I've tried a stab at a kernel solution. Doesn't look too nefarious imho, now that I don't try to be too clever for my own good any more. v2: After discussing the various techniques, we've decided to always blit batches on the suspect devices, but allow userspace to opt out of the kernel workaround assume full responsibility for providing coherent batches. The principal reason is that avoiding the blit does improve performance in a few key microbenchmarks and also in cairo-trace replays. Signed-Off-by: Daniel Vetter Signed-off-by: Chris Wilson [danvet: - Drop the hunk which uses HAS_BROKEN_CS_TLB to implement the ring wrap w/a. Suggested by Chris Wilson. - Also add the ACTHD check from Chris Wilson for the error state dumping, so that we still catch batches when userspace opts out of the w/a.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_dma.c | 3 + drivers/gpu/drm/i915/i915_drv.h | 4 ++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 + drivers/gpu/drm/i915/i915_irq.c | 12 ++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 76 +++++++++++++++++++--- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + include/uapi/drm/i915_drm.h | 10 +++ 7 files changed, 100 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 8f63cd5de4b4..99daa896105d 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -989,6 +989,9 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_SECURE_BATCHES: value = capable(CAP_SYS_ADMIN); break; + case I915_PARAM_HAS_PINNED_BATCHES: + value = 1; + break; default: DRM_DEBUG_DRIVER("Unknown parameter %d\n", param->param); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 062a60b381b7..1a4c3a1c111f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1100,6 +1100,7 @@ struct drm_i915_gem_object { */ atomic_t pending_flip; }; +#define to_gem_object(obj) (&((struct drm_i915_gem_object *)(obj))->base) #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base) @@ -1199,6 +1200,9 @@ struct drm_i915_file_private { #define HAS_OVERLAY(dev) (INTEL_INFO(dev)->has_overlay) #define OVERLAY_NEEDS_PHYSICAL(dev) (INTEL_INFO(dev)->overlay_needs_physical) +/* Early gen2 have a totally busted CS tlb and require pinned batches. */ +#define HAS_BROKEN_CS_TLB(dev) (IS_I830(dev) || IS_845G(dev)) + /* With the 945 and later, Y tiling got adjusted so that it was 32 128-byte * rows, which changed the alignment requirements and fence programming. */ diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index ee8f97f0539e..d6a994a07393 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -808,6 +808,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, flags |= I915_DISPATCH_SECURE; } + if (args->flags & I915_EXEC_IS_PINNED) + flags |= I915_DISPATCH_PINNED; switch (args->flags & I915_EXEC_RING_MASK) { case I915_EXEC_DEFAULT: diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index a4dc97f8b9f0..2220dec3e5d9 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1087,6 +1087,18 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv, if (!ring->get_seqno) return NULL; + if (HAS_BROKEN_CS_TLB(dev_priv->dev)) { + u32 acthd = I915_READ(ACTHD); + + if (WARN_ON(ring->id != RCS)) + return NULL; + + obj = ring->private; + if (acthd >= obj->gtt_offset && + acthd < obj->gtt_offset + obj->base.size) + return i915_error_object_create(dev_priv, obj); + } + seqno = ring->get_seqno(ring, false); list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) { if (obj->ring != ring) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 2346b920bd86..ae253e04c391 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -547,9 +547,14 @@ static int init_render_ring(struct intel_ring_buffer *ring) static void render_ring_cleanup(struct intel_ring_buffer *ring) { + struct drm_device *dev = ring->dev; + if (!ring->private) return; + if (HAS_BROKEN_CS_TLB(dev)) + drm_gem_object_unreference(to_gem_object(ring->private)); + cleanup_pipe_control(ring); } @@ -969,6 +974,8 @@ i965_dispatch_execbuffer(struct intel_ring_buffer *ring, return 0; } +/* Just userspace ABI convention to limit the wa batch bo to a resonable size */ +#define I830_BATCH_LIMIT (256*1024) static int i830_dispatch_execbuffer(struct intel_ring_buffer *ring, u32 offset, u32 len, @@ -976,15 +983,47 @@ i830_dispatch_execbuffer(struct intel_ring_buffer *ring, { int ret; - ret = intel_ring_begin(ring, 4); - if (ret) - return ret; + if (flags & I915_DISPATCH_PINNED) { + ret = intel_ring_begin(ring, 4); + if (ret) + return ret; - intel_ring_emit(ring, MI_BATCH_BUFFER); - intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); - intel_ring_emit(ring, offset + len - 8); - intel_ring_emit(ring, 0); - intel_ring_advance(ring); + intel_ring_emit(ring, MI_BATCH_BUFFER); + intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); + intel_ring_emit(ring, offset + len - 8); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); + } else { + struct drm_i915_gem_object *obj = ring->private; + u32 cs_offset = obj->gtt_offset; + + if (len > I830_BATCH_LIMIT) + return -ENOSPC; + + ret = intel_ring_begin(ring, 9+3); + if (ret) + return ret; + /* Blit the batch (which has now all relocs applied) to the stable batch + * scratch bo area (so that the CS never stumbles over its tlb + * invalidation bug) ... */ + intel_ring_emit(ring, XY_SRC_COPY_BLT_CMD | + XY_SRC_COPY_BLT_WRITE_ALPHA | + XY_SRC_COPY_BLT_WRITE_RGB); + intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_GXCOPY | 4096); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, (DIV_ROUND_UP(len, 4096) << 16) | 1024); + intel_ring_emit(ring, cs_offset); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 4096); + intel_ring_emit(ring, offset); + intel_ring_emit(ring, MI_FLUSH); + + /* ... and execute it. */ + intel_ring_emit(ring, MI_BATCH_BUFFER); + intel_ring_emit(ring, cs_offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); + intel_ring_emit(ring, cs_offset + len - 8); + intel_ring_advance(ring); + } return 0; } @@ -1596,6 +1635,27 @@ int intel_init_render_ring_buffer(struct drm_device *dev) ring->init = init_render_ring; ring->cleanup = render_ring_cleanup; + /* Workaround batchbuffer to combat CS tlb bug. */ + if (HAS_BROKEN_CS_TLB(dev)) { + struct drm_i915_gem_object *obj; + int ret; + + obj = i915_gem_alloc_object(dev, I830_BATCH_LIMIT); + if (obj == NULL) { + DRM_ERROR("Failed to allocate batch bo\n"); + return -ENOMEM; + } + + ret = i915_gem_object_pin(obj, 0, true, false); + if (ret != 0) { + drm_gem_object_unreference(&obj->base); + DRM_ERROR("Failed to ping batch bo\n"); + return ret; + } + + ring->private = obj; + } + return intel_init_ring_buffer(dev, ring); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 526182ed0c6d..6af87cd05725 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -94,6 +94,7 @@ struct intel_ring_buffer { u32 offset, u32 length, unsigned flags); #define I915_DISPATCH_SECURE 0x1 +#define I915_DISPATCH_PINNED 0x2 void (*cleanup)(struct intel_ring_buffer *ring); int (*sync_to)(struct intel_ring_buffer *ring, struct intel_ring_buffer *to, diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index b746a3cf5fa9..c4d2e9c74002 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -307,6 +307,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_PRIME_VMAP_FLUSH 21 #define I915_PARAM_RSVD_FOR_FUTURE_USE 22 #define I915_PARAM_HAS_SECURE_BATCHES 23 +#define I915_PARAM_HAS_PINNED_BATCHES 24 typedef struct drm_i915_getparam { int param; @@ -677,6 +678,15 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_SECURE (1<<9) +/** Inform the kernel that the batch is and will always be pinned. This + * negates the requirement for a workaround to be performed to avoid + * an incoherent CS (such as can be found on 830/845). If this flag is + * not passed, the kernel will endeavour to make sure the batch is + * coherent with the CS before execution. If this flag is passed, + * userspace assumes the responsibility for ensuring the same. + */ +#define I915_EXEC_IS_PINNED (1<<10) + #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ (eb2).rsvd1 = context & I915_EXEC_CONTEXT_ID_MASK From b0a2658acb5bf9ca86b4aab011b7106de3af0add Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 18 Dec 2012 09:37:54 +0100 Subject: [PATCH 08/14] drm/i915: don't disable disconnected outputs This piece of neat lore has been ported painstakingly and bug-for-bug compatible from the old crtc helper code. Imo it's utter nonsense. If you disconnected a cable and before you reconnect it, userspace (or the kernel) does an set_crtc call, this will result in that connector getting disabled. Which will result in a nice black screen when plugging in the cable again. There's absolutely no reason the kernel does such policy enforcements - if userspace tries to set up a mode on something disconnected we might fail loudly (since the dp link training fails), but silently adjusting the output configuration behind userspace's back is a recipe for disaster. Specifically I think that this could explain some of our MI_WAIT hangs around suspend, where userspace issues a scanline wait on a disable pipe. This mechanisims here could explain how that pipe got disabled without userspace noticing. Note that this fixes a NULL deref at BIOS takeover when the firmware sets up a disconnected output in a clone configuration with a connected output on the 2nd pipe: When doing the full modeset we don't have a mode for the 2nd pipe and OOPS. On the first pipe this doesn't matter, since at boot-up the fbdev helpers will set up the choosen configuration on that on first. Since this is now the umptenth bug around handling this imo brain-dead semantics correctly, I think it's time to kill it and see whether there's any userspace out there which relies on this. It also nicely demonstrates that we have a tiny window where DP hotplug can still kill the driver. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=58396 Cc: stable@vger.kernel.org Tested-by: Peter Ujfalusi Reviewed-by: Rodrigo Vivi Reviewed-by: Jesse Barnes Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 82267b27b8f6..bc0b7aa0ab96 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -8133,10 +8133,6 @@ intel_modeset_stage_output_state(struct drm_device *dev, DRM_DEBUG_KMS("encoder changed, full mode switch\n"); config->mode_changed = true; } - - /* Disable all disconnected encoders. */ - if (connector->base.status == connector_status_disconnected) - connector->new_encoder = NULL; } /* connector->new_encoder is now updated for all connectors. */ From b81034506fc9b879cb726feb01342be0cdbe6e25 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 7 Dec 2012 20:37:06 +0000 Subject: [PATCH 09/14] drm: Export routines for inserting preallocated nodes into the mm manager Required by i915 in order to avoid the allocation in the middle of manipulating the drm_mm lists. Use a pair of stubs to preserve the existing EXPORT_SYMBOLs for backporting; to be removed later. Cc: Dave Airlie Cc: dri-devel@lists.freedesktop.org Signed-off-by: Chris Wilson Reviewed-by: Jani Nikula [danvet: bikeshedded-away the atomic parameter, it's not yet used anywhere.] Acked-by: Dave Airlie Signed-off-by: Daniel Vetter --- drivers/gpu/drm/drm_mm.c | 49 +++++++++++++++++++++++++++------------- include/drm/drm_mm.h | 25 ++++++++++++++++---- 2 files changed, 54 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 0761a03cdbb2..2bf9670ba29b 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -184,19 +184,27 @@ EXPORT_SYMBOL(drm_mm_get_block_generic); * -ENOSPC if no suitable free area is available. The preallocated memory node * must be cleared. */ -int drm_mm_insert_node(struct drm_mm *mm, struct drm_mm_node *node, - unsigned long size, unsigned alignment) +int drm_mm_insert_node_generic(struct drm_mm *mm, struct drm_mm_node *node, + unsigned long size, unsigned alignment, + unsigned long color) { struct drm_mm_node *hole_node; - hole_node = drm_mm_search_free(mm, size, alignment, false); + hole_node = drm_mm_search_free_generic(mm, size, alignment, + color, 0); if (!hole_node) return -ENOSPC; - drm_mm_insert_helper(hole_node, node, size, alignment, 0); - + drm_mm_insert_helper(hole_node, node, size, alignment, color); return 0; } +EXPORT_SYMBOL(drm_mm_insert_node_generic); + +int drm_mm_insert_node(struct drm_mm *mm, struct drm_mm_node *node, + unsigned long size, unsigned alignment) +{ + return drm_mm_insert_node_generic(mm, node, size, alignment, 0); +} EXPORT_SYMBOL(drm_mm_insert_node); static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node, @@ -275,21 +283,30 @@ EXPORT_SYMBOL(drm_mm_get_block_range_generic); * -ENOSPC if no suitable free area is available. This is for range * restricted allocations. The preallocated memory node must be cleared. */ +int drm_mm_insert_node_in_range_generic(struct drm_mm *mm, struct drm_mm_node *node, + unsigned long size, unsigned alignment, unsigned long color, + unsigned long start, unsigned long end) +{ + struct drm_mm_node *hole_node; + + hole_node = drm_mm_search_free_in_range_generic(mm, + size, alignment, color, + start, end, 0); + if (!hole_node) + return -ENOSPC; + + drm_mm_insert_helper_range(hole_node, node, + size, alignment, color, + start, end); + return 0; +} +EXPORT_SYMBOL(drm_mm_insert_node_in_range_generic); + int drm_mm_insert_node_in_range(struct drm_mm *mm, struct drm_mm_node *node, unsigned long size, unsigned alignment, unsigned long start, unsigned long end) { - struct drm_mm_node *hole_node; - - hole_node = drm_mm_search_free_in_range(mm, size, alignment, - start, end, false); - if (!hole_node) - return -ENOSPC; - - drm_mm_insert_helper_range(hole_node, node, size, alignment, 0, - start, end); - - return 0; + return drm_mm_insert_node_in_range_generic(mm, node, size, alignment, 0, start, end); } EXPORT_SYMBOL(drm_mm_insert_node_in_range); diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h index 06d7f798a08c..0f4a366f6fa6 100644 --- a/include/drm/drm_mm.h +++ b/include/drm/drm_mm.h @@ -158,12 +158,29 @@ static inline struct drm_mm_node *drm_mm_get_block_atomic_range( return drm_mm_get_block_range_generic(parent, size, alignment, 0, start, end, 1); } -extern int drm_mm_insert_node(struct drm_mm *mm, struct drm_mm_node *node, - unsigned long size, unsigned alignment); + +extern int drm_mm_insert_node(struct drm_mm *mm, + struct drm_mm_node *node, + unsigned long size, + unsigned alignment); extern int drm_mm_insert_node_in_range(struct drm_mm *mm, struct drm_mm_node *node, - unsigned long size, unsigned alignment, - unsigned long start, unsigned long end); + unsigned long size, + unsigned alignment, + unsigned long start, + unsigned long end); +extern int drm_mm_insert_node_generic(struct drm_mm *mm, + struct drm_mm_node *node, + unsigned long size, + unsigned alignment, + unsigned long color); +extern int drm_mm_insert_node_in_range_generic(struct drm_mm *mm, + struct drm_mm_node *node, + unsigned long size, + unsigned alignment, + unsigned long color, + unsigned long start, + unsigned long end); extern void drm_mm_put_block(struct drm_mm_node *cur); extern void drm_mm_remove_node(struct drm_mm_node *node); extern void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new); From dc9dd7a20fde95aa81a8307cde79c2dff9f83f3d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 7 Dec 2012 20:37:07 +0000 Subject: [PATCH 10/14] drm/i915: Preallocate the drm_mm_node prior to manipulating the GTT drm_mm manager As we may reap neighbouring objects in order to free up pages for allocations, we need to be careful not to allocate in the middle of the drm_mm manager. To accomplish this, we can simply allocate the drm_mm_node up front and then use the combined search & insert drm_mm routines, reducing our code footprint in the process. Fixes (partially) i-g-t/gem_tiled_swapping Reported-by: Mika Kuoppala Signed-off-by: Chris Wilson Reviewed-by: Jani Nikula [danvet: Again fixup atomic bikeshed.] Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 64 ++++++++++++++------------------- 1 file changed, 26 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c1f691958f89..5feda1f67042 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2890,7 +2890,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, { struct drm_device *dev = obj->base.dev; drm_i915_private_t *dev_priv = dev->dev_private; - struct drm_mm_node *free_space; + struct drm_mm_node *node; u32 size, fence_size, fence_alignment, unfenced_alignment; bool mappable, fenceable; int ret; @@ -2936,66 +2936,54 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, i915_gem_object_pin_pages(obj); + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (node == NULL) { + i915_gem_object_unpin_pages(obj); + return -ENOMEM; + } + search_free: if (map_and_fenceable) - free_space = drm_mm_search_free_in_range_color(&dev_priv->mm.gtt_space, - size, alignment, obj->cache_level, - 0, dev_priv->mm.gtt_mappable_end, - false); + ret = drm_mm_insert_node_in_range_generic(&dev_priv->mm.gtt_space, node, + size, alignment, obj->cache_level, + 0, dev_priv->mm.gtt_mappable_end); else - free_space = drm_mm_search_free_color(&dev_priv->mm.gtt_space, - size, alignment, obj->cache_level, - false); - - if (free_space != NULL) { - if (map_and_fenceable) - free_space = - drm_mm_get_block_range_generic(free_space, - size, alignment, obj->cache_level, - 0, dev_priv->mm.gtt_mappable_end, - false); - else - free_space = - drm_mm_get_block_generic(free_space, - size, alignment, obj->cache_level, - false); - } - if (free_space == NULL) { + ret = drm_mm_insert_node_generic(&dev_priv->mm.gtt_space, node, + size, alignment, obj->cache_level); + if (ret) { ret = i915_gem_evict_something(dev, size, alignment, obj->cache_level, map_and_fenceable, nonblocking); - if (ret) { - i915_gem_object_unpin_pages(obj); - return ret; - } + if (ret == 0) + goto search_free; - goto search_free; - } - if (WARN_ON(!i915_gem_valid_gtt_space(dev, - free_space, - obj->cache_level))) { i915_gem_object_unpin_pages(obj); - drm_mm_put_block(free_space); + kfree(node); + return ret; + } + if (WARN_ON(!i915_gem_valid_gtt_space(dev, node, obj->cache_level))) { + i915_gem_object_unpin_pages(obj); + drm_mm_put_block(node); return -EINVAL; } ret = i915_gem_gtt_prepare_object(obj); if (ret) { i915_gem_object_unpin_pages(obj); - drm_mm_put_block(free_space); + drm_mm_put_block(node); return ret; } list_move_tail(&obj->gtt_list, &dev_priv->mm.bound_list); list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list); - obj->gtt_space = free_space; - obj->gtt_offset = free_space->start; + obj->gtt_space = node; + obj->gtt_offset = node->start; fenceable = - free_space->size == fence_size && - (free_space->start & (fence_alignment - 1)) == 0; + node->size == fence_size && + (node->start & (fence_alignment - 1)) == 0; mappable = obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end; From 0fde901f1ddd2ce0e380a6444f1fb7ca555859e9 Mon Sep 17 00:00:00 2001 From: Krzysztof Mazur Date: Wed, 19 Dec 2012 11:03:41 +0100 Subject: [PATCH 11/14] i915: ensure that VGA plane is disabled Some broken systems (like HP nc6120) in some cases, usually after LID close/open, enable VGA plane, making display unusable (black screen on LVDS, some strange mode on VGA output). We used to disable VGA plane only once at startup. Now we also check, if VGA plane is still disabled while changing mode, and fix that if something changed it. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=57434 Signed-off-by: Krzysztof Mazur Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index bc0b7aa0ab96..7395f6a389a5 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9152,6 +9152,23 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder) * the crtc fixup. */ } +static void i915_redisable_vga(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + u32 vga_reg; + + if (HAS_PCH_SPLIT(dev)) + vga_reg = CPU_VGACNTRL; + else + vga_reg = VGACNTRL; + + if (I915_READ(vga_reg) != VGA_DISP_DISABLE) { + DRM_DEBUG_KMS("Something enabled VGA plane, disabling it\n"); + I915_WRITE(vga_reg, VGA_DISP_DISABLE); + POSTING_READ(vga_reg); + } +} + /* Scan out the current hw modeset state, sanitizes it and maps it into the drm * and i915 state tracking structures. */ void intel_modeset_setup_hw_state(struct drm_device *dev, @@ -9260,6 +9277,8 @@ void intel_modeset_setup_hw_state(struct drm_device *dev, intel_set_mode(&crtc->base, &crtc->base.mode, crtc->base.x, crtc->base.y, crtc->base.fb); } + + i915_redisable_vga(dev); } else { intel_modeset_update_staged_output_state(dev); } From 5b42427fc38ecb9056c4e64deaff36d6d6ba1b67 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 20 Dec 2012 10:51:09 +1000 Subject: [PATCH 12/14] drm/i915: fix flags in dma buf exporting As pointed out by Seung-Woo Kim this should have been passing flags like nouveau/radeon have. Cc: stable@vger.kernel.org Signed-off-by: Dave Airlie Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_dmabuf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index 773ef77b6c22..7be4241e8242 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -226,7 +226,7 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev, { struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); - return dma_buf_export(obj, &i915_dmabuf_ops, obj->base.size, 0600); + return dma_buf_export(obj, &i915_dmabuf_ops, obj->base.size, flags); } static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) From 677feac291c1ea7f0b84c6e499e858d440b96c7b Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 19 Dec 2012 14:33:45 +0100 Subject: [PATCH 13/14] drm/i915: optionally disable shrinker lock stealing commit 5774506f157a91400c587b85d1ce4de56f0d32f6 Author: Chris Wilson Date: Wed Nov 21 13:04:04 2012 +0000 drm/i915: Borrow our struct_mutex for the direct reclaim added a nice trick to steal the struct_mutex lock in the shrinker if it's the current task holding it. But this also caused the requirement that every place which allocates memory needs to be careful about the gem state of objects, since the shrinker could have pulled the rug out from under it. We've usually solved this by carefully preallocating things or ensure that buffers are pinned already. But the shrinker also reaps mmap offset, so allocating those needs to be careful, too. Now that code has been factored out into some common helpers, so either we have fragile code depending upon the common helper not doing something we don't want it to do. Or we need to reimplement the mmap offset creation and so also leak implementation details into our code. Since this all results in leaky abstraction, cop out by disabling the lock borrowing trick while calling down into the helpers. That way our craziness is nicely confined to files in drm/i915. v2: Split out the change to create_mmap_offset as request by Chris Wilson. Cc: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1a4c3a1c111f..ed3059575576 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -780,6 +780,7 @@ typedef struct drm_i915_private { struct i915_hw_ppgtt *aliasing_ppgtt; struct shrinker inactive_shrinker; + bool shrinker_no_lock_stealing; /** * List of objects currently involved in rendering. diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5feda1f67042..ad6f321083a6 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4380,6 +4380,9 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) if (!mutex_is_locked_by(&dev->struct_mutex, current)) return 0; + if (dev_priv->mm.shrinker_no_lock_stealing) + return 0; + unlock = false; } From da494d7ca5e0a1afca3480826b5060e15c951e80 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Thu, 20 Dec 2012 15:11:16 +0100 Subject: [PATCH 14/14] drm/i915: disable shrinker lock stealing for create_mmap_offset The mmap offset structure is not part of the drm/i915 code, but provided by gem helpers. To avoid leaky abstractions (by either depending upon implementation details of said helper wrt to preallocations, or reimplementing it in our code and so fuzzing around in internal details of that helpr) simply disable the shrinker lock stealing accross calls into the helper functions. This should fix igt/gem_tiled_swapping. v2: Fix cleanup path confusion bemoaned by Chris Wilson. Reported-by: Mika Kuoppala Cc: Chris Wilson Cc: Mika Kuoppala Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ad6f321083a6..84146ad565da 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1517,9 +1517,11 @@ static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) if (obj->base.map_list.map) return 0; + dev_priv->mm.shrinker_no_lock_stealing = true; + ret = drm_gem_create_mmap_offset(&obj->base); if (ret != -ENOSPC) - return ret; + goto out; /* Badly fragmented mmap space? The only way we can recover * space is by destroying unwanted objects. We can't randomly release @@ -1531,10 +1533,14 @@ static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) i915_gem_purge(dev_priv, obj->base.size >> PAGE_SHIFT); ret = drm_gem_create_mmap_offset(&obj->base); if (ret != -ENOSPC) - return ret; + goto out; i915_gem_shrink_all(dev_priv); - return drm_gem_create_mmap_offset(&obj->base); + ret = drm_gem_create_mmap_offset(&obj->base); +out: + dev_priv->mm.shrinker_no_lock_stealing = false; + + return ret; } static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)