From df67bed92fa86ef926da8b62a6da68722388ff72 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 30 Oct 2009 13:31:26 +1000 Subject: [PATCH 1/6] drm/radeon/kms: fix coherency issues on AGP cards. When we are evicting from VRAM->RAM we allocate the ttm object, but we don't set the caching policy on it before blitting into it. This means on AGP we end up blitting into cached pages, and the CPU later flushes out on top of them. This was mostly seen as font corruption. The other question is why we don't evict VRAM->GTT in a lot of cases, this would save us some cache transitions since a lot of objects that are evicted from VRAM will probably end up being pulled back in a few operations later, and evicting them to system memory involves 2 unnecessary cache transitions. Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_ttm.c | 6 ++++++ drivers/gpu/drm/ttm/ttm_tt.c | 1 + 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index c729cd1a7506..f489c0de6f13 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -295,6 +295,12 @@ static int radeon_move_vram_ram(struct ttm_buffer_object *bo, if (unlikely(r)) { return r; } + + r = ttm_tt_set_placement_caching(bo->ttm, tmp_mem.placement); + if (unlikely(r)) { + goto out_cleanup; + } + r = ttm_tt_bind(bo->ttm, &tmp_mem); if (unlikely(r)) { goto out_cleanup; diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index a55ee1a56c16..7bcb89f39ce8 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -279,6 +279,7 @@ int ttm_tt_set_placement_caching(struct ttm_tt *ttm, uint32_t placement) return ttm_tt_set_caching(ttm, state); } +EXPORT_SYMBOL(ttm_tt_set_placement_caching); static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm) { From d6f28938d9426d12eea1578949f1d73d24ad37ec Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 2 Nov 2009 16:01:27 -0500 Subject: [PATCH 2/6] drm/radeon/kms: Don't RMW CP_RB_CNTL Immediate readback seems faulty on some chips. I suspect it takes a while to get through the fifo to the actual register backbone. There's no need to read it back, so, just write the driver's copy of the register's value directly. Should fix bug 24535 and possibly 24218 Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/r100.c | 12 ++++++------ drivers/gpu/drm/radeon/r600.c | 8 +++----- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index b438b520ee7f..5e821a313a8c 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -578,19 +578,19 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size) indirect1_start = 16; /* cp setup */ WREG32(0x718, pre_write_timer | (pre_write_limit << 28)); - WREG32(RADEON_CP_RB_CNTL, -#ifdef __BIG_ENDIAN - RADEON_BUF_SWAP_32BIT | -#endif - REG_SET(RADEON_RB_BUFSZ, rb_bufsz) | + tmp = (REG_SET(RADEON_RB_BUFSZ, rb_bufsz) | REG_SET(RADEON_RB_BLKSZ, rb_blksz) | REG_SET(RADEON_MAX_FETCH, max_fetch) | RADEON_RB_NO_UPDATE); +#ifdef __BIG_ENDIAN + tmp |= RADEON_BUF_SWAP_32BIT; +#endif + WREG32(RADEON_CP_RB_CNTL, tmp); + /* Set ring address */ DRM_INFO("radeon: ring at 0x%016lX\n", (unsigned long)rdev->cp.gpu_addr); WREG32(RADEON_CP_RB_BASE, rdev->cp.gpu_addr); /* Force read & write ptr to 0 */ - tmp = RREG32(RADEON_CP_RB_CNTL); WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA); WREG32(RADEON_CP_RB_RPTR_WR, 0); WREG32(RADEON_CP_RB_WPTR, 0); diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index e87475c87d52..60fbb236edfd 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -1272,19 +1272,17 @@ int r600_cp_resume(struct radeon_device *rdev) /* Set ring buffer size */ rb_bufsz = drm_order(rdev->cp.ring_size / 8); + tmp = RB_NO_UPDATE | (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz; #ifdef __BIG_ENDIAN - WREG32(CP_RB_CNTL, BUF_SWAP_32BIT | RB_NO_UPDATE | - (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz); -#else - WREG32(CP_RB_CNTL, RB_NO_UPDATE | (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz); + tmp |= BUF_SWAP_32BIT; #endif + WREG32(CP_RB_CNTL, tmp); WREG32(CP_SEM_WAIT_TIMER, 0x4); /* Set the write pointer delay */ WREG32(CP_RB_WPTR_DELAY, 0); /* Initialize the ring buffer's read and write pointers */ - tmp = RREG32(CP_RB_CNTL); WREG32(CP_RB_CNTL, tmp | RB_RPTR_WR_ENA); WREG32(CP_RB_RPTR_WR, 0); WREG32(CP_RB_WPTR, 0); From 8a9832e89ff1bc7a039c8f966f07101570be3d5e Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 3 Nov 2009 13:23:15 +1000 Subject: [PATCH 3/6] drm/radeon/kms: disable D1VGA and D2VGA if enabled Once kms is enabled we don't need these, and it causes a problem with the Lenovo W500 ACPI brightness implementation, it hangs in a loop inside an SMI. Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/rv515.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c index 41a34c23e6d8..03c052d892c0 100644 --- a/drivers/gpu/drm/radeon/rv515.c +++ b/drivers/gpu/drm/radeon/rv515.c @@ -137,6 +137,8 @@ int rv515_mc_wait_for_idle(struct radeon_device *rdev) void rv515_vga_render_disable(struct radeon_device *rdev) { + WREG32(R_000330_D1VGA_CONTROL, 0); + WREG32(R_000338_D2VGA_CONTROL, 0); WREG32(R_000300_VGA_RENDER_CONTROL, RREG32(R_000300_VGA_RENDER_CONTROL) & C_000300_VGA_VSTATUS_CNTL); } From 4d357abb895ec51f1cbdebb1fbbf4d4576900a2e Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 3 Nov 2009 14:54:36 +1000 Subject: [PATCH 4/6] drm/radeon/kms: stop putting VRAM at 0 in MC space on r600s. The Lenovo W500 laptop hangs inside an SMI on brightness changes, I thought it just needed the VGA disable but it turned out to require slightly more work, setting the MC locations up just like the IGP chip requirements seems to make it all happy again and I can boot and play with brightness. We should probably just do this for all chips and give up the VRAM at 0x0 idea, it never seems to buy us anything but pain. Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/r600.c | 48 +++++++++++++++-------------------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 60fbb236edfd..3e5703f324bd 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -409,35 +409,29 @@ int r600_mc_init(struct radeon_device *rdev) rdev->mc.gtt_location = rdev->mc.mc_vram_size; } } else { - if (rdev->family == CHIP_RS780 || rdev->family == CHIP_RS880) { - rdev->mc.vram_location = (RREG32(MC_VM_FB_LOCATION) & - 0xFFFF) << 24; - rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024; - tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size; - if ((0xFFFFFFFFUL - tmp) >= rdev->mc.gtt_size) { - /* Enough place after vram */ - rdev->mc.gtt_location = tmp; - } else if (rdev->mc.vram_location >= rdev->mc.gtt_size) { - /* Enough place before vram */ - rdev->mc.gtt_location = 0; - } else { - /* Not enough place after or before shrink - * gart size - */ - if (rdev->mc.vram_location > (0xFFFFFFFFUL - tmp)) { - rdev->mc.gtt_location = 0; - rdev->mc.gtt_size = rdev->mc.vram_location; - } else { - rdev->mc.gtt_location = tmp; - rdev->mc.gtt_size = 0xFFFFFFFFUL - tmp; - } - } - rdev->mc.gtt_location = rdev->mc.mc_vram_size; + rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024; + rdev->mc.vram_location = (RREG32(MC_VM_FB_LOCATION) & + 0xFFFF) << 24; + tmp = rdev->mc.vram_location + rdev->mc.mc_vram_size; + if ((0xFFFFFFFFUL - tmp) >= rdev->mc.gtt_size) { + /* Enough place after vram */ + rdev->mc.gtt_location = tmp; + } else if (rdev->mc.vram_location >= rdev->mc.gtt_size) { + /* Enough place before vram */ + rdev->mc.gtt_location = 0; } else { - rdev->mc.vram_location = 0x00000000UL; - rdev->mc.gtt_location = rdev->mc.mc_vram_size; - rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024; + /* Not enough place after or before shrink + * gart size + */ + if (rdev->mc.vram_location > (0xFFFFFFFFUL - tmp)) { + rdev->mc.gtt_location = 0; + rdev->mc.gtt_size = rdev->mc.vram_location; + } else { + rdev->mc.gtt_location = tmp; + rdev->mc.gtt_size = 0xFFFFFFFFUL - tmp; + } } + rdev->mc.gtt_location = rdev->mc.mc_vram_size; } rdev->mc.vram_start = rdev->mc.vram_location; rdev->mc.vram_end = rdev->mc.vram_location + rdev->mc.mc_vram_size - 1; From 6fa8d66af83710b3610bd3b2581f051074f2b416 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 3 Nov 2009 16:10:36 -0500 Subject: [PATCH 5/6] drm/radeon/kms: remove some misleading debugging output Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_bios.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_bios.c b/drivers/gpu/drm/radeon/radeon_bios.c index 34a9b9119518..906921740c60 100644 --- a/drivers/gpu/drm/radeon/radeon_bios.c +++ b/drivers/gpu/drm/radeon/radeon_bios.c @@ -50,19 +50,16 @@ static bool igp_read_bios_from_vram(struct radeon_device *rdev) vram_base = drm_get_resource_start(rdev->ddev, 0); bios = ioremap(vram_base, size); if (!bios) { - DRM_ERROR("Unable to mmap vram\n"); return false; } if (size == 0 || bios[0] != 0x55 || bios[1] != 0xaa) { iounmap(bios); - DRM_ERROR("bad rom signature\n"); return false; } rdev->bios = kmalloc(size, GFP_KERNEL); if (rdev->bios == NULL) { iounmap(bios); - DRM_ERROR("kmalloc failed\n"); return false; } memcpy(rdev->bios, bios, size); From e29649db3bd5620499bf9bdcd63c5cf12edbd26e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 3 Nov 2009 10:04:01 -0500 Subject: [PATCH 6/6] drm/radeon/kms/r700: fix some typos in chip init Noticed by Andre on IRC. Also fix up some minor whitespace issues. Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/rv770.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 40553913b928..ae074fdf804d 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -529,11 +529,11 @@ static void rv770_gpu_init(struct radeon_device *rdev) if (rdev->family == CHIP_RV770) gb_tiling_config |= BANK_TILING(1); else - gb_tiling_config |= BANK_TILING((mc_arb_ramcfg & NOOFBANK_SHIFT) >> NOOFBANK_MASK); + gb_tiling_config |= BANK_TILING((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT); gb_tiling_config |= GROUP_SIZE(0); - if (((mc_arb_ramcfg & NOOFROWS_MASK) & NOOFROWS_SHIFT) > 3) { + if (((mc_arb_ramcfg & NOOFROWS_MASK) >> NOOFROWS_SHIFT) > 3) { gb_tiling_config |= ROW_TILING(3); gb_tiling_config |= SAMPLE_SPLIT(3); } else { @@ -579,14 +579,14 @@ static void rv770_gpu_init(struct radeon_device *rdev) /* set HW defaults for 3D engine */ WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) | - ROQ_IB2_START(0x2b))); + ROQ_IB2_START(0x2b))); WREG32(CP_MEQ_THRESHOLDS, STQ_SPLIT(0x30)); WREG32(TA_CNTL_AUX, (DISABLE_CUBE_ANISO | - SYNC_GRADIENT | - SYNC_WALKER | - SYNC_ALIGNER)); + SYNC_GRADIENT | + SYNC_WALKER | + SYNC_ALIGNER)); sx_debug_1 = RREG32(SX_DEBUG_1); sx_debug_1 |= ENABLE_NEW_SMX_ADDRESS; @@ -598,9 +598,9 @@ static void rv770_gpu_init(struct radeon_device *rdev) WREG32(SMX_DC_CTL0, smx_dc_ctl0); WREG32(SMX_EVENT_CTL, (ES_FLUSH_CTL(4) | - GS_FLUSH_CTL(4) | - ACK_FLUSH_CTL(3) | - SYNC_FLUSH_CTL)); + GS_FLUSH_CTL(4) | + ACK_FLUSH_CTL(3) | + SYNC_FLUSH_CTL)); if (rdev->family == CHIP_RV770) WREG32(DB_DEBUG3, DB_CLK_OFF_DELAY(0x1f)); @@ -611,12 +611,12 @@ static void rv770_gpu_init(struct radeon_device *rdev) } WREG32(SX_EXPORT_BUFFER_SIZES, (COLOR_BUFFER_SIZE((rdev->config.rv770.sx_max_export_size / 4) - 1) | - POSITION_BUFFER_SIZE((rdev->config.rv770.sx_max_export_pos_size / 4) - 1) | - SMX_BUFFER_SIZE((rdev->config.rv770.sx_max_export_smx_size / 4) - 1))); + POSITION_BUFFER_SIZE((rdev->config.rv770.sx_max_export_pos_size / 4) - 1) | + SMX_BUFFER_SIZE((rdev->config.rv770.sx_max_export_smx_size / 4) - 1))); WREG32(PA_SC_FIFO_SIZE, (SC_PRIM_FIFO_SIZE(rdev->config.rv770.sc_prim_fifo_size) | - SC_HIZ_TILE_FIFO_SIZE(rdev->config.rv770.sc_hiz_tile_fifo_size) | - SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.rv770.sc_earlyz_tile_fifo_fize))); + SC_HIZ_TILE_FIFO_SIZE(rdev->config.rv770.sc_hiz_tile_fifo_size) | + SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.rv770.sc_earlyz_tile_fifo_fize))); WREG32(PA_SC_MULTI_CHIP_CNTL, 0);