From 701e1e789142042144c8cc10b8f6d1554e960144 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 15 Aug 2014 11:52:53 +0200 Subject: [PATCH 1/9] drm/radeon: properly document reloc priority mask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of hard coding the value properly document that this is an userspace interface. No intended functional change. Signed-off-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_cs.c | 3 ++- include/uapi/drm/radeon_drm.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index ee712c199b25..cb12df784d83 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -132,7 +132,8 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) * the buffers used for read only, which doubles the range * to 0 to 31. 32 is reserved for the kernel driver. */ - priority = (r->flags & 0xf) * 2 + !!r->write_domain; + priority = (r->flags & RADEON_RELOC_PRIO_MASK) * 2 + + !!r->write_domain; /* the first reloc of an UVD job is the msg and that must be in VRAM, also but everything into VRAM on AGP cards to avoid diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h index 509b2d7a41b7..fea6099608ef 100644 --- a/include/uapi/drm/radeon_drm.h +++ b/include/uapi/drm/radeon_drm.h @@ -944,6 +944,7 @@ struct drm_radeon_cs_chunk { }; /* drm_radeon_cs_reloc.flags */ +#define RADEON_RELOC_PRIO_MASK (0xf << 0) struct drm_radeon_cs_reloc { uint32_t handle; From 1538a9e0e04f6a5b323cd3d65e9320512978fcec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Mon, 18 Aug 2014 17:34:55 +0900 Subject: [PATCH 2/9] drm/radeon: Only flush HDP cache for indirect buffers from userspace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It isn't necessary for command streams generated by the kernel (at least not while we aren't storing ring or indirect buffers in VRAM). Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/cik.c | 8 ++++---- drivers/gpu/drm/radeon/cik_sdma.c | 6 +++--- drivers/gpu/drm/radeon/evergreen.c | 4 ++-- drivers/gpu/drm/radeon/evergreen_dma.c | 2 +- drivers/gpu/drm/radeon/ni.c | 4 ++-- drivers/gpu/drm/radeon/r100.c | 8 ++++---- drivers/gpu/drm/radeon/r200.c | 2 +- drivers/gpu/drm/radeon/r300.c | 2 +- drivers/gpu/drm/radeon/r420.c | 4 ++-- drivers/gpu/drm/radeon/r600.c | 8 ++++---- drivers/gpu/drm/radeon/r600_dma.c | 6 +++--- drivers/gpu/drm/radeon/radeon.h | 8 +++++--- drivers/gpu/drm/radeon/radeon_cs.c | 6 +++--- drivers/gpu/drm/radeon/radeon_ib.c | 5 +++-- drivers/gpu/drm/radeon/radeon_ring.c | 16 ++++++++++------ drivers/gpu/drm/radeon/radeon_semaphore.c | 2 +- drivers/gpu/drm/radeon/radeon_test.c | 18 +++++++++--------- drivers/gpu/drm/radeon/radeon_uvd.c | 2 +- drivers/gpu/drm/radeon/radeon_vce.c | 6 +++--- drivers/gpu/drm/radeon/radeon_vm.c | 6 +++--- drivers/gpu/drm/radeon/rv515.c | 2 +- drivers/gpu/drm/radeon/rv770_dma.c | 2 +- drivers/gpu/drm/radeon/si.c | 6 +++--- drivers/gpu/drm/radeon/si_dma.c | 2 +- drivers/gpu/drm/radeon/uvd_v1_0.c | 4 ++-- 25 files changed, 73 insertions(+), 66 deletions(-) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index e7d99e1547cc..d61bc1841d5e 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -3801,7 +3801,7 @@ int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2)); radeon_ring_write(ring, 0xDEADBEEF); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); for (i = 0; i < rdev->usec_timeout; i++) { tmp = RREG32(scratch); @@ -4004,7 +4004,7 @@ int cik_copy_cpdma(struct radeon_device *rdev, return r; } - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); radeon_semaphore_free(rdev, &sem, *fence); return r; @@ -4103,7 +4103,7 @@ int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2); ib.ptr[2] = 0xDEADBEEF; ib.length_dw = 3; - r = radeon_ib_schedule(rdev, &ib, NULL); + r = radeon_ib_schedule(rdev, &ib, NULL, false); if (r) { radeon_scratch_free(rdev, scratch); radeon_ib_free(rdev, &ib); @@ -4324,7 +4324,7 @@ static int cik_cp_gfx_start(struct radeon_device *rdev) radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */ radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */ - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); return 0; } diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index bcf480510ac2..192278bc993c 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -596,7 +596,7 @@ int cik_copy_dma(struct radeon_device *rdev, return r; } - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); radeon_semaphore_free(rdev, &sem, *fence); return r; @@ -638,7 +638,7 @@ int cik_sdma_ring_test(struct radeon_device *rdev, radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr)); radeon_ring_write(ring, 1); /* number of DWs to follow */ radeon_ring_write(ring, 0xDEADBEEF); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); for (i = 0; i < rdev->usec_timeout; i++) { tmp = readl(ptr); @@ -695,7 +695,7 @@ int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) ib.ptr[4] = 0xDEADBEEF; ib.length_dw = 5; - r = radeon_ib_schedule(rdev, &ib, NULL); + r = radeon_ib_schedule(rdev, &ib, NULL, false); if (r) { radeon_ib_free(rdev, &ib); DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 4fedd14e670a..dbca60c7d097 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -2869,7 +2869,7 @@ static int evergreen_cp_start(struct radeon_device *rdev) radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1)); radeon_ring_write(ring, 0); radeon_ring_write(ring, 0); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); cp_me = 0xff; WREG32(CP_ME_CNTL, cp_me); @@ -2912,7 +2912,7 @@ static int evergreen_cp_start(struct radeon_device *rdev) radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */ radeon_ring_write(ring, 0x00000010); /* */ - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); return 0; } diff --git a/drivers/gpu/drm/radeon/evergreen_dma.c b/drivers/gpu/drm/radeon/evergreen_dma.c index 478caefe0fef..afaba388c36d 100644 --- a/drivers/gpu/drm/radeon/evergreen_dma.c +++ b/drivers/gpu/drm/radeon/evergreen_dma.c @@ -155,7 +155,7 @@ int evergreen_copy_dma(struct radeon_device *rdev, return r; } - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); radeon_semaphore_free(rdev, &sem, *fence); return r; diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 327b85f7fd0d..ba89375f197f 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1505,7 +1505,7 @@ static int cayman_cp_start(struct radeon_device *rdev) radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1)); radeon_ring_write(ring, 0); radeon_ring_write(ring, 0); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); cayman_cp_enable(rdev, true); @@ -1547,7 +1547,7 @@ static int cayman_cp_start(struct radeon_device *rdev) radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */ radeon_ring_write(ring, 0x00000010); /* */ - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); /* XXX init other rings */ diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 04b5940b8923..4c5ec44ff328 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -925,7 +925,7 @@ int r100_copy_blit(struct radeon_device *rdev, if (fence) { r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX); } - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); return r; } @@ -958,7 +958,7 @@ void r100_ring_start(struct radeon_device *rdev, struct radeon_ring *ring) RADEON_ISYNC_ANY3D_IDLE2D | RADEON_ISYNC_WAIT_IDLEGUI | RADEON_ISYNC_CPSCRATCH_IDLEGUI); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); } @@ -3638,7 +3638,7 @@ int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) } radeon_ring_write(ring, PACKET0(scratch, 0)); radeon_ring_write(ring, 0xDEADBEEF); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); for (i = 0; i < rdev->usec_timeout; i++) { tmp = RREG32(scratch); if (tmp == 0xDEADBEEF) { @@ -3700,7 +3700,7 @@ int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) ib.ptr[6] = PACKET2(0); ib.ptr[7] = PACKET2(0); ib.length_dw = 8; - r = radeon_ib_schedule(rdev, &ib, NULL); + r = radeon_ib_schedule(rdev, &ib, NULL, false); if (r) { DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); goto free_ib; diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c index 58f0473aa73f..67780374a652 100644 --- a/drivers/gpu/drm/radeon/r200.c +++ b/drivers/gpu/drm/radeon/r200.c @@ -121,7 +121,7 @@ int r200_copy_dma(struct radeon_device *rdev, if (fence) { r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX); } - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); return r; } diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index 75b30338c226..1bc4704034ce 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -295,7 +295,7 @@ void r300_ring_start(struct radeon_device *rdev, struct radeon_ring *ring) radeon_ring_write(ring, R300_GEOMETRY_ROUND_NEAREST | R300_COLOR_ROUND_NEAREST); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); } static void r300_errata(struct radeon_device *rdev) diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c index 802b19220a21..2828605aef3f 100644 --- a/drivers/gpu/drm/radeon/r420.c +++ b/drivers/gpu/drm/radeon/r420.c @@ -219,7 +219,7 @@ static void r420_cp_errata_init(struct radeon_device *rdev) radeon_ring_write(ring, PACKET0(R300_CP_RESYNC_ADDR, 1)); radeon_ring_write(ring, rdev->config.r300.resync_scratch); radeon_ring_write(ring, 0xDEADBEEF); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); } static void r420_cp_errata_fini(struct radeon_device *rdev) @@ -232,7 +232,7 @@ static void r420_cp_errata_fini(struct radeon_device *rdev) radeon_ring_lock(rdev, ring, 8); radeon_ring_write(ring, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); radeon_ring_write(ring, R300_RB3D_DC_FINISH); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); radeon_scratch_free(rdev, rdev->config.r300.resync_scratch); } diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index c70a504d96af..1e4770138fd4 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2547,7 +2547,7 @@ int r600_cp_start(struct radeon_device *rdev) radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1)); radeon_ring_write(ring, 0); radeon_ring_write(ring, 0); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); cp_me = 0xff; WREG32(R_0086D8_CP_ME_CNTL, cp_me); @@ -2683,7 +2683,7 @@ int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); radeon_ring_write(ring, ((scratch - PACKET3_SET_CONFIG_REG_OFFSET) >> 2)); radeon_ring_write(ring, 0xDEADBEEF); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); for (i = 0; i < rdev->usec_timeout; i++) { tmp = RREG32(scratch); if (tmp == 0xDEADBEEF) @@ -2845,7 +2845,7 @@ int r600_copy_cpdma(struct radeon_device *rdev, return r; } - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); radeon_semaphore_free(rdev, &sem, *fence); return r; @@ -3165,7 +3165,7 @@ int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) ib.ptr[1] = ((scratch - PACKET3_SET_CONFIG_REG_OFFSET) >> 2); ib.ptr[2] = 0xDEADBEEF; ib.length_dw = 3; - r = radeon_ib_schedule(rdev, &ib, NULL); + r = radeon_ib_schedule(rdev, &ib, NULL, false); if (r) { DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); goto free_ib; diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c index 4969cef44a19..51fd98553eaf 100644 --- a/drivers/gpu/drm/radeon/r600_dma.c +++ b/drivers/gpu/drm/radeon/r600_dma.c @@ -261,7 +261,7 @@ int r600_dma_ring_test(struct radeon_device *rdev, radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc); radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff); radeon_ring_write(ring, 0xDEADBEEF); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); for (i = 0; i < rdev->usec_timeout; i++) { tmp = readl(ptr); @@ -368,7 +368,7 @@ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) ib.ptr[3] = 0xDEADBEEF; ib.length_dw = 4; - r = radeon_ib_schedule(rdev, &ib, NULL); + r = radeon_ib_schedule(rdev, &ib, NULL, false); if (r) { radeon_ib_free(rdev, &ib); DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); @@ -493,7 +493,7 @@ int r600_copy_dma(struct radeon_device *rdev, return r; } - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); radeon_semaphore_free(rdev, &sem, *fence); return r; diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index e715e0c1f5d8..b281886f6f51 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -968,7 +968,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring, unsigned size); void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib); int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, - struct radeon_ib *const_ib); + struct radeon_ib *const_ib, bool hdp_flush); int radeon_ib_pool_init(struct radeon_device *rdev); void radeon_ib_pool_fini(struct radeon_device *rdev); int radeon_ib_ring_tests(struct radeon_device *rdev); @@ -978,8 +978,10 @@ bool radeon_ring_supports_scratch_reg(struct radeon_device *rdev, void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *cp); int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ndw); int radeon_ring_lock(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ndw); -void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *cp); -void radeon_ring_unlock_commit(struct radeon_device *rdev, struct radeon_ring *cp); +void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *cp, + bool hdp_flush); +void radeon_ring_unlock_commit(struct radeon_device *rdev, struct radeon_ring *cp, + bool hdp_flush); void radeon_ring_undo(struct radeon_ring *ring); void radeon_ring_unlock_undo(struct radeon_device *rdev, struct radeon_ring *cp); int radeon_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index cb12df784d83..83f382e8e40e 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -451,7 +451,7 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev, radeon_vce_note_usage(rdev); radeon_cs_sync_rings(parser); - r = radeon_ib_schedule(rdev, &parser->ib, NULL); + r = radeon_ib_schedule(rdev, &parser->ib, NULL, true); if (r) { DRM_ERROR("Failed to schedule IB !\n"); } @@ -542,9 +542,9 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, if ((rdev->family >= CHIP_TAHITI) && (parser->chunk_const_ib_idx != -1)) { - r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib); + r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, true); } else { - r = radeon_ib_schedule(rdev, &parser->ib, NULL); + r = radeon_ib_schedule(rdev, &parser->ib, NULL, true); } out: diff --git a/drivers/gpu/drm/radeon/radeon_ib.c b/drivers/gpu/drm/radeon/radeon_ib.c index 65b0c213488d..5bf2c0a05827 100644 --- a/drivers/gpu/drm/radeon/radeon_ib.c +++ b/drivers/gpu/drm/radeon/radeon_ib.c @@ -107,6 +107,7 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib) * @rdev: radeon_device pointer * @ib: IB object to schedule * @const_ib: Const IB to schedule (SI only) + * @hdp_flush: Whether or not to perform an HDP cache flush * * Schedule an IB on the associated ring (all asics). * Returns 0 on success, error on failure. @@ -122,7 +123,7 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib) * to SI there was just a DE IB. */ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, - struct radeon_ib *const_ib) + struct radeon_ib *const_ib, bool hdp_flush) { struct radeon_ring *ring = &rdev->ring[ib->ring]; int r = 0; @@ -176,7 +177,7 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib, if (ib->vm) radeon_vm_fence(rdev, ib->vm, ib->fence); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, hdp_flush); return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 5b4e0cf231a0..0678998dbcb8 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -177,16 +177,18 @@ int radeon_ring_lock(struct radeon_device *rdev, struct radeon_ring *ring, unsig * * @rdev: radeon_device pointer * @ring: radeon_ring structure holding ring information + * @hdp_flush: Whether or not to perform an HDP cache flush * * Update the wptr (write pointer) to tell the GPU to * execute new commands on the ring buffer (all asics). */ -void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring) +void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring, + bool hdp_flush) { /* If we are emitting the HDP flush via the ring buffer, we need to * do it before padding. */ - if (rdev->asic->ring[ring->idx]->hdp_flush) + if (hdp_flush && rdev->asic->ring[ring->idx]->hdp_flush) rdev->asic->ring[ring->idx]->hdp_flush(rdev, ring); /* We pad to match fetch size */ while (ring->wptr & ring->align_mask) { @@ -196,7 +198,7 @@ void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring) /* If we are emitting the HDP flush via MMIO, we need to do it after * all CPU writes to VRAM finished. */ - if (rdev->asic->mmio_hdp_flush) + if (hdp_flush && rdev->asic->mmio_hdp_flush) rdev->asic->mmio_hdp_flush(rdev); radeon_ring_set_wptr(rdev, ring); } @@ -207,12 +209,14 @@ void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring) * * @rdev: radeon_device pointer * @ring: radeon_ring structure holding ring information + * @hdp_flush: Whether or not to perform an HDP cache flush * * Call radeon_ring_commit() then unlock the ring (all asics). */ -void radeon_ring_unlock_commit(struct radeon_device *rdev, struct radeon_ring *ring) +void radeon_ring_unlock_commit(struct radeon_device *rdev, struct radeon_ring *ring, + bool hdp_flush) { - radeon_ring_commit(rdev, ring); + radeon_ring_commit(rdev, ring, hdp_flush); mutex_unlock(&rdev->ring_lock); } @@ -372,7 +376,7 @@ int radeon_ring_restore(struct radeon_device *rdev, struct radeon_ring *ring, radeon_ring_write(ring, data[i]); } - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); kfree(data); return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c index dbd6bcde92de..56d9fd66d8ae 100644 --- a/drivers/gpu/drm/radeon/radeon_semaphore.c +++ b/drivers/gpu/drm/radeon/radeon_semaphore.c @@ -179,7 +179,7 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev, continue; } - radeon_ring_commit(rdev, &rdev->ring[i]); + radeon_ring_commit(rdev, &rdev->ring[i], false); radeon_fence_note_sync(fence, ring); semaphore->gpu_addr += 8; diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c index 5adf4207453d..17bc3dced9f1 100644 --- a/drivers/gpu/drm/radeon/radeon_test.c +++ b/drivers/gpu/drm/radeon/radeon_test.c @@ -288,7 +288,7 @@ static int radeon_test_create_and_emit_fence(struct radeon_device *rdev, return r; } radeon_fence_emit(rdev, fence, ring->idx); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); } return 0; } @@ -313,7 +313,7 @@ void radeon_test_ring_sync(struct radeon_device *rdev, goto out_cleanup; } radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore); - radeon_ring_unlock_commit(rdev, ringA); + radeon_ring_unlock_commit(rdev, ringA, false); r = radeon_test_create_and_emit_fence(rdev, ringA, &fence1); if (r) @@ -325,7 +325,7 @@ void radeon_test_ring_sync(struct radeon_device *rdev, goto out_cleanup; } radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore); - radeon_ring_unlock_commit(rdev, ringA); + radeon_ring_unlock_commit(rdev, ringA, false); r = radeon_test_create_and_emit_fence(rdev, ringA, &fence2); if (r) @@ -344,7 +344,7 @@ void radeon_test_ring_sync(struct radeon_device *rdev, goto out_cleanup; } radeon_semaphore_emit_signal(rdev, ringB->idx, semaphore); - radeon_ring_unlock_commit(rdev, ringB); + radeon_ring_unlock_commit(rdev, ringB, false); r = radeon_fence_wait(fence1, false); if (r) { @@ -365,7 +365,7 @@ void radeon_test_ring_sync(struct radeon_device *rdev, goto out_cleanup; } radeon_semaphore_emit_signal(rdev, ringB->idx, semaphore); - radeon_ring_unlock_commit(rdev, ringB); + radeon_ring_unlock_commit(rdev, ringB, false); r = radeon_fence_wait(fence2, false); if (r) { @@ -408,7 +408,7 @@ static void radeon_test_ring_sync2(struct radeon_device *rdev, goto out_cleanup; } radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore); - radeon_ring_unlock_commit(rdev, ringA); + radeon_ring_unlock_commit(rdev, ringA, false); r = radeon_test_create_and_emit_fence(rdev, ringA, &fenceA); if (r) @@ -420,7 +420,7 @@ static void radeon_test_ring_sync2(struct radeon_device *rdev, goto out_cleanup; } radeon_semaphore_emit_wait(rdev, ringB->idx, semaphore); - radeon_ring_unlock_commit(rdev, ringB); + radeon_ring_unlock_commit(rdev, ringB, false); r = radeon_test_create_and_emit_fence(rdev, ringB, &fenceB); if (r) goto out_cleanup; @@ -442,7 +442,7 @@ static void radeon_test_ring_sync2(struct radeon_device *rdev, goto out_cleanup; } radeon_semaphore_emit_signal(rdev, ringC->idx, semaphore); - radeon_ring_unlock_commit(rdev, ringC); + radeon_ring_unlock_commit(rdev, ringC, false); for (i = 0; i < 30; ++i) { mdelay(100); @@ -468,7 +468,7 @@ static void radeon_test_ring_sync2(struct radeon_device *rdev, goto out_cleanup; } radeon_semaphore_emit_signal(rdev, ringC->idx, semaphore); - radeon_ring_unlock_commit(rdev, ringC); + radeon_ring_unlock_commit(rdev, ringC, false); mdelay(1000); diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index 6bf55ec85b62..341848a14376 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -646,7 +646,7 @@ static int radeon_uvd_send_msg(struct radeon_device *rdev, ib.ptr[i] = PACKET2(0); ib.length_dw = 16; - r = radeon_ib_schedule(rdev, &ib, NULL); + r = radeon_ib_schedule(rdev, &ib, NULL, false); if (r) goto err; ttm_eu_fence_buffer_objects(&ticket, &head, ib.fence); diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c index f9b70a43aa52..c7190aadbd89 100644 --- a/drivers/gpu/drm/radeon/radeon_vce.c +++ b/drivers/gpu/drm/radeon/radeon_vce.c @@ -368,7 +368,7 @@ int radeon_vce_get_create_msg(struct radeon_device *rdev, int ring, for (i = ib.length_dw; i < ib_size_dw; ++i) ib.ptr[i] = 0x0; - r = radeon_ib_schedule(rdev, &ib, NULL); + r = radeon_ib_schedule(rdev, &ib, NULL, false); if (r) { DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); } @@ -425,7 +425,7 @@ int radeon_vce_get_destroy_msg(struct radeon_device *rdev, int ring, for (i = ib.length_dw; i < ib_size_dw; ++i) ib.ptr[i] = 0x0; - r = radeon_ib_schedule(rdev, &ib, NULL); + r = radeon_ib_schedule(rdev, &ib, NULL, false); if (r) { DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); } @@ -715,7 +715,7 @@ int radeon_vce_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) return r; } radeon_ring_write(ring, VCE_CMD_END); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); for (i = 0; i < rdev->usec_timeout; i++) { if (vce_v1_0_get_rptr(rdev, ring) != rptr) diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 058f20085369..832ef320c26d 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -422,7 +422,7 @@ static int radeon_vm_clear_bo(struct radeon_device *rdev, radeon_asic_vm_pad_ib(rdev, &ib); WARN_ON(ib.length_dw > 64); - r = radeon_ib_schedule(rdev, &ib, NULL); + r = radeon_ib_schedule(rdev, &ib, NULL, false); if (r) goto error; @@ -699,7 +699,7 @@ int radeon_vm_update_page_directory(struct radeon_device *rdev, radeon_semaphore_sync_to(ib.semaphore, pd->tbo.sync_obj); radeon_semaphore_sync_to(ib.semaphore, vm->last_id_use); WARN_ON(ib.length_dw > ndw); - r = radeon_ib_schedule(rdev, &ib, NULL); + r = radeon_ib_schedule(rdev, &ib, NULL, false); if (r) { radeon_ib_free(rdev, &ib); return r; @@ -963,7 +963,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev, WARN_ON(ib.length_dw > ndw); radeon_semaphore_sync_to(ib.semaphore, vm->fence); - r = radeon_ib_schedule(rdev, &ib, NULL); + r = radeon_ib_schedule(rdev, &ib, NULL, false); if (r) { radeon_ib_free(rdev, &ib); return r; diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c index 3e21e869015f..8a477bf1fdb3 100644 --- a/drivers/gpu/drm/radeon/rv515.c +++ b/drivers/gpu/drm/radeon/rv515.c @@ -124,7 +124,7 @@ void rv515_ring_start(struct radeon_device *rdev, struct radeon_ring *ring) radeon_ring_write(ring, GEOMETRY_ROUND_NEAREST | COLOR_ROUND_NEAREST); radeon_ring_write(ring, PACKET0(0x20C8, 0)); radeon_ring_write(ring, 0); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); } int rv515_mc_wait_for_idle(struct radeon_device *rdev) diff --git a/drivers/gpu/drm/radeon/rv770_dma.c b/drivers/gpu/drm/radeon/rv770_dma.c index bbf2e076ee45..74426ac2bb5c 100644 --- a/drivers/gpu/drm/radeon/rv770_dma.c +++ b/drivers/gpu/drm/radeon/rv770_dma.c @@ -90,7 +90,7 @@ int rv770_copy_dma(struct radeon_device *rdev, return r; } - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); radeon_semaphore_free(rdev, &sem, *fence); return r; diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index dbd9d8101f05..7e58423c0bf6 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -3541,7 +3541,7 @@ static int si_cp_start(struct radeon_device *rdev) radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE)); radeon_ring_write(ring, 0xc000); radeon_ring_write(ring, 0xe000); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); si_cp_enable(rdev, true); @@ -3570,7 +3570,7 @@ static int si_cp_start(struct radeon_device *rdev) radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */ radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */ - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) { ring = &rdev->ring[i]; @@ -3580,7 +3580,7 @@ static int si_cp_start(struct radeon_device *rdev) radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0)); radeon_ring_write(ring, 0); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); } return 0; diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c index 716505129450..7c22baaf94db 100644 --- a/drivers/gpu/drm/radeon/si_dma.c +++ b/drivers/gpu/drm/radeon/si_dma.c @@ -275,7 +275,7 @@ int si_copy_dma(struct radeon_device *rdev, return r; } - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); radeon_semaphore_free(rdev, &sem, *fence); return r; diff --git a/drivers/gpu/drm/radeon/uvd_v1_0.c b/drivers/gpu/drm/radeon/uvd_v1_0.c index be42c8125203..cda391347286 100644 --- a/drivers/gpu/drm/radeon/uvd_v1_0.c +++ b/drivers/gpu/drm/radeon/uvd_v1_0.c @@ -124,7 +124,7 @@ int uvd_v1_0_init(struct radeon_device *rdev) radeon_ring_write(ring, PACKET0(UVD_SEMA_CNTL, 0)); radeon_ring_write(ring, 3); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); done: /* lower clocks again */ @@ -331,7 +331,7 @@ int uvd_v1_0_ring_test(struct radeon_device *rdev, struct radeon_ring *ring) } radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0)); radeon_ring_write(ring, 0xDEADBEEF); - radeon_ring_unlock_commit(rdev, ring); + radeon_ring_unlock_commit(rdev, ring, false); for (i = 0; i < rdev->usec_timeout; i++) { tmp = RREG32(UVD_CONTEXT_ID); if (tmp == 0xDEADBEEF) From c940b4476f4fb649f6493b6a0ae837474ded8915 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 18 Aug 2014 11:57:28 -0400 Subject: [PATCH 3/9] drm/radeon: fix pm handling in radeon_gpu_reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pm_suspend is handled in the radeon_suspend callbacks. pm_resume has special handling depending on whether dpm or legacy pm is enabled. Change radeon_gpu_reset to mirror the behavior in the suspend and resume pathes. Signed-off-by: Alex Deucher Reviewed-by: Christian König Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_device.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index c8ea050c8fa4..8e6187055061 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1680,7 +1680,6 @@ int radeon_gpu_reset(struct radeon_device *rdev) radeon_save_bios_scratch_regs(rdev); /* block TTM */ resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev); - radeon_pm_suspend(rdev); radeon_suspend(rdev); for (i = 0; i < RADEON_NUM_RINGS; ++i) { @@ -1726,9 +1725,24 @@ retry: } } - radeon_pm_resume(rdev); + if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) { + /* do dpm late init */ + r = radeon_pm_late_init(rdev); + if (r) { + rdev->pm.dpm_enabled = false; + DRM_ERROR("radeon_pm_late_init failed, disabling dpm\n"); + } + } else { + /* resume old pm late */ + radeon_pm_resume(rdev); + } + drm_helper_resume_force_mode(rdev->ddev); + /* set the power state here in case we are a PX system or headless */ + if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) + radeon_pm_compute_clocks(rdev); + ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched); if (r) { /* bad news, how to tell it to userspace ? */ From 73ef0e0d62de4a8d40d34a6f645faee2f6e1ac33 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 18 Aug 2014 16:51:46 -0400 Subject: [PATCH 4/9] drm/radeon: fix display handling in radeon_gpu_reset If the display hw was reset or a hard reset was used, we need to re-init some of the common display hardware as well. Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_device.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 8e6187055061..6a219bcee66d 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1681,6 +1681,7 @@ int radeon_gpu_reset(struct radeon_device *rdev) /* block TTM */ resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev); radeon_suspend(rdev); + radeon_hpd_fini(rdev); for (i = 0; i < RADEON_NUM_RINGS; ++i) { ring_sizes[i] = radeon_ring_backup(rdev, &rdev->ring[i], @@ -1737,6 +1738,21 @@ retry: radeon_pm_resume(rdev); } + /* init dig PHYs, disp eng pll */ + if (rdev->is_atom_bios) { + radeon_atom_encoder_init(rdev); + radeon_atom_disp_eng_pll_init(rdev); + /* turn on the BL */ + if (rdev->mode_info.bl_encoder) { + u8 bl_level = radeon_get_backlight_level(rdev, + rdev->mode_info.bl_encoder); + radeon_set_backlight_level(rdev, rdev->mode_info.bl_encoder, + bl_level); + } + } + /* reset hpd state */ + radeon_hpd_init(rdev); + drm_helper_resume_force_mode(rdev->ddev); /* set the power state here in case we are a PX system or headless */ From 86302eeadebfab94530b00f5e53a23f911ff41e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Mon, 18 Aug 2014 16:30:12 +0200 Subject: [PATCH 5/9] drm/radeon: Sync ME and PFP after CP semaphore waits v4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes lockups due to CP read GPUVM faults when running piglit on Cape Verde. v2 (chk): apply the fix to R600+ as well, on CIK only the GFX CP has a PFP, add more comments to R600 code, enable flushing again v3: (agd5f): only apply to 7xx+. r6xx does not have the packet. v4: (agd5f): split flush change into a separate patch, fix formatting Signed-off-by: Michel Dänzer Signed-off-by: Christian König Signed-off-by: Alex Deucher Tested-by: Michel Dänzer --- drivers/gpu/drm/radeon/cik.c | 17 +++++++++++++++++ drivers/gpu/drm/radeon/r600.c | 18 ++++++++++++++++++ drivers/gpu/drm/radeon/r600d.h | 1 + 3 files changed, 36 insertions(+) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index d61bc1841d5e..af2cbc647c73 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -3920,6 +3920,17 @@ void cik_fence_compute_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, 0); } +/** + * cik_semaphore_ring_emit - emit a semaphore on the CP ring + * + * @rdev: radeon_device pointer + * @ring: radeon ring buffer object + * @semaphore: radeon semaphore object + * @emit_wait: Is this a sempahore wait? + * + * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP + * from running ahead of semaphore waits. + */ bool cik_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, @@ -3932,6 +3943,12 @@ bool cik_semaphore_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, lower_32_bits(addr)); radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel); + if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) { + /* Prevent the PFP from running ahead of the semaphore wait */ + radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); + radeon_ring_write(ring, 0x0); + } + return true; } diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 1e4770138fd4..e8bf0ea2dade 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2753,6 +2753,17 @@ void r600_fence_ring_emit(struct radeon_device *rdev, } } +/** + * r600_semaphore_ring_emit - emit a semaphore on the CP ring + * + * @rdev: radeon_device pointer + * @ring: radeon ring buffer object + * @semaphore: radeon semaphore object + * @emit_wait: Is this a sempahore wait? + * + * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP + * from running ahead of semaphore waits. + */ bool r600_semaphore_ring_emit(struct radeon_device *rdev, struct radeon_ring *ring, struct radeon_semaphore *semaphore, @@ -2768,6 +2779,13 @@ bool r600_semaphore_ring_emit(struct radeon_device *rdev, radeon_ring_write(ring, lower_32_bits(addr)); radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | sel); + /* PFP_SYNC_ME packet only exists on 7xx+ */ + if (emit_wait && (rdev->family >= CHIP_RV770)) { + /* Prevent the PFP from running ahead of the semaphore wait */ + radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); + radeon_ring_write(ring, 0x0); + } + return true; } diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h index f94e7a9afe75..0c4a7d8d93e0 100644 --- a/drivers/gpu/drm/radeon/r600d.h +++ b/drivers/gpu/drm/radeon/r600d.h @@ -1597,6 +1597,7 @@ */ # define PACKET3_CP_DMA_CMD_SAIC (1 << 28) # define PACKET3_CP_DMA_CMD_DAIC (1 << 29) +#define PACKET3_PFP_SYNC_ME 0x42 /* r7xx+ only */ #define PACKET3_SURFACE_SYNC 0x43 # define PACKET3_CB0_DEST_BASE_ENA (1 << 6) # define PACKET3_FULL_CACHE_ENA (1 << 20) /* r7xx+ only */ From cd1c9c1a4b06d3bc264e774ad84c410ce02e124e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 19 Aug 2014 11:48:30 -0400 Subject: [PATCH 6/9] drm/radeon: re-enable selective GPUVM flushing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that the PFP and ME synchronization is fixed, we can enable this again reliably. Signed-off-by: Alex Deucher Tested-by: Michel Dänzer --- drivers/gpu/drm/radeon/radeon_vm.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 832ef320c26d..088ffdc2f577 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -238,9 +238,7 @@ void radeon_vm_flush(struct radeon_device *rdev, uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory); /* if we can't remember our last VM flush then flush now! */ - /* XXX figure out why we have to flush all the time before CIK */ - if (rdev->family < CHIP_BONAIRE || - !vm->last_flush || pd_addr != vm->pd_gpu_addr) { + if (!vm->last_flush || pd_addr != vm->pd_gpu_addr) { trace_radeon_vm_flush(pd_addr, ring, vm->id); vm->pd_gpu_addr = pd_addr; radeon_ring_vm_flush(rdev, ring, vm); From 6101b3ae94b4f266456308824e9ca4eab1235d1a Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 19 Aug 2014 11:54:15 -0400 Subject: [PATCH 7/9] drm/radeon: fix active cu count for SI and CIK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes the CU count reported to userspace for OpenCL. bug: https://bugzilla.kernel.org/show_bug.cgi?id=82581 Signed-off-by: Alex Deucher Reviewed-by: Michel Dänzer Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/cik.c | 8 +++----- drivers/gpu/drm/radeon/si.c | 8 +++----- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index af2cbc647c73..23d56c689e12 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -3483,7 +3483,7 @@ static void cik_gpu_init(struct radeon_device *rdev) u32 mc_shared_chmap, mc_arb_ramcfg; u32 hdp_host_path_cntl; u32 tmp; - int i, j, k; + int i, j; switch (rdev->family) { case CHIP_BONAIRE: @@ -3674,10 +3674,8 @@ static void cik_gpu_init(struct radeon_device *rdev) for (i = 0; i < rdev->config.cik.max_shader_engines; i++) { for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) { - for (k = 0; k < rdev->config.cik.max_cu_per_sh; k++) { - rdev->config.cik.active_cus += - hweight32(cik_get_cu_active_bitmap(rdev, i, j)); - } + rdev->config.cik.active_cus += + hweight32(cik_get_cu_active_bitmap(rdev, i, j)); } } diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 7e58423c0bf6..c8f359ca64d1 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -3057,7 +3057,7 @@ static void si_gpu_init(struct radeon_device *rdev) u32 sx_debug_1; u32 hdp_host_path_cntl; u32 tmp; - int i, j, k; + int i, j; switch (rdev->family) { case CHIP_TAHITI: @@ -3257,10 +3257,8 @@ static void si_gpu_init(struct radeon_device *rdev) for (i = 0; i < rdev->config.si.max_shader_engines; i++) { for (j = 0; j < rdev->config.si.max_sh_per_se; j++) { - for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) { - rdev->config.si.active_cus += - hweight32(si_get_cu_active_bitmap(rdev, i, j)); - } + rdev->config.si.active_cus += + hweight32(si_get_cu_active_bitmap(rdev, i, j)); } } From 52da51f0f9ea9d213adfc99223630707b26d1d38 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 19 Aug 2014 11:56:38 -0400 Subject: [PATCH 8/9] drm/radeon: fix active_cu mask on SI and CIK after re-init (v3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Need to initialize the mask to 0 on init, otherwise it keeps increasing. bug: https://bugzilla.kernel.org/show_bug.cgi?id=82581 v2: also fix cu count v3: split count fix into separate patch Signed-off-by: Alex Deucher Reviewed-by: Michel Dänzer Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/cik.c | 1 + drivers/gpu/drm/radeon/si.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 23d56c689e12..f4e14702639d 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -3672,6 +3672,7 @@ static void cik_gpu_init(struct radeon_device *rdev) rdev->config.cik.max_sh_per_se, rdev->config.cik.max_backends_per_se); + rdev->config.cik.active_cus = 0; for (i = 0; i < rdev->config.cik.max_shader_engines; i++) { for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) { rdev->config.cik.active_cus += diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index c8f359ca64d1..a1274a31405c 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -3255,6 +3255,7 @@ static void si_gpu_init(struct radeon_device *rdev) rdev->config.si.max_sh_per_se, rdev->config.si.max_cu_per_sh); + rdev->config.si.active_cus = 0; for (i = 0; i < rdev->config.si.max_shader_engines; i++) { for (j = 0; j < rdev->config.si.max_sh_per_se; j++) { rdev->config.si.active_cus += From b738ca5d68e4051c86e32f46f67a69f3bb9cee5e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 18 Aug 2014 12:32:05 -0400 Subject: [PATCH 9/9] Revert "drm/radeon: Use write-combined CPU mappings of ring buffers with PCIe" This reverts commit 1490434f0da63afc6006411c8829c6a7935a4e7e. Several people have reported regressions with this patch on kabini. --- drivers/gpu/drm/radeon/radeon_ring.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 0678998dbcb8..d65607902537 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -404,9 +404,7 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsig /* Allocate ring buffer */ if (ring->ring_obj == NULL) { r = radeon_bo_create(rdev, ring->ring_size, PAGE_SIZE, true, - RADEON_GEM_DOMAIN_GTT, - (rdev->flags & RADEON_IS_PCIE) ? - RADEON_GEM_GTT_WC : 0, + RADEON_GEM_DOMAIN_GTT, 0, NULL, &ring->ring_obj); if (r) { dev_err(rdev->dev, "(%d) ring create failed\n", r);