drm/amdgpu: Fix per-IB secure flag GFX hang
Since commit "Move to a per-IB secure flag (TMZ)", we've been seeing hangs in GFX. We need to send FRAME CONTROL stop/start back-to-back, every time we flip the TMZ flag. That is, when we transition from TMZ to non-TMZ we have to send a stop with TMZ followed by a start with non-TMZ, and similarly for transitioning from non-TMZ into TMZ. This patch implements this, thus fixing the GFX hang. v1 -> v2: As suggested by Luben, and accept part of implemetation from this patch: - Put "secure" closed to the loop and use optimization - Change "secure" to bool again, and move "secure == -1" out of loop. v3: Small fixes/optimizations. Reported-and-Tested-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Signed-off-by: Huang Rui <ray.huang@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Luben Tuikov <luben.tuikov@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
@@ -218,7 +218,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
|
|||||||
amdgpu_ring_emit_cntxcntl(ring, status);
|
amdgpu_ring_emit_cntxcntl(ring, status);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Setup initial TMZiness and send it off.
|
||||||
|
*/
|
||||||
secure = false;
|
secure = false;
|
||||||
|
if (job && ring->funcs->emit_frame_cntl) {
|
||||||
|
secure = ib->flags & AMDGPU_IB_FLAGS_SECURE;
|
||||||
|
amdgpu_ring_emit_frame_cntl(ring, true, secure);
|
||||||
|
}
|
||||||
|
|
||||||
for (i = 0; i < num_ibs; ++i) {
|
for (i = 0; i < num_ibs; ++i) {
|
||||||
ib = &ibs[i];
|
ib = &ibs[i];
|
||||||
|
|
||||||
@@ -230,27 +237,20 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
|
|||||||
!amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
|
!amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
/* If this IB is TMZ, add frame TMZ start packet,
|
if (job && ring->funcs->emit_frame_cntl) {
|
||||||
* else, turn off TMZ.
|
if (secure != !!(ib->flags & AMDGPU_IB_FLAGS_SECURE)) {
|
||||||
*/
|
amdgpu_ring_emit_frame_cntl(ring, false, secure);
|
||||||
if (ib->flags & AMDGPU_IB_FLAGS_SECURE && ring->funcs->emit_tmz) {
|
secure = !secure;
|
||||||
if (!secure) {
|
amdgpu_ring_emit_frame_cntl(ring, true, secure);
|
||||||
secure = true;
|
|
||||||
amdgpu_ring_emit_tmz(ring, true);
|
|
||||||
}
|
}
|
||||||
} else if (secure) {
|
|
||||||
secure = false;
|
|
||||||
amdgpu_ring_emit_tmz(ring, false);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
amdgpu_ring_emit_ib(ring, job, ib, status);
|
amdgpu_ring_emit_ib(ring, job, ib, status);
|
||||||
status &= ~AMDGPU_HAVE_CTX_SWITCH;
|
status &= ~AMDGPU_HAVE_CTX_SWITCH;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (secure) {
|
if (job && ring->funcs->emit_frame_cntl)
|
||||||
secure = false;
|
amdgpu_ring_emit_frame_cntl(ring, false, secure);
|
||||||
amdgpu_ring_emit_tmz(ring, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
if (!(adev->flags & AMD_IS_APU))
|
if (!(adev->flags & AMD_IS_APU))
|
||||||
|
|||||||
@@ -177,7 +177,8 @@ struct amdgpu_ring_funcs {
|
|||||||
void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring,
|
void (*emit_reg_write_reg_wait)(struct amdgpu_ring *ring,
|
||||||
uint32_t reg0, uint32_t reg1,
|
uint32_t reg0, uint32_t reg1,
|
||||||
uint32_t ref, uint32_t mask);
|
uint32_t ref, uint32_t mask);
|
||||||
void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
|
void (*emit_frame_cntl)(struct amdgpu_ring *ring, bool start,
|
||||||
|
bool secure);
|
||||||
/* Try to soft recover the ring to make the fence signal */
|
/* Try to soft recover the ring to make the fence signal */
|
||||||
void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid);
|
void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid);
|
||||||
int (*preempt_ib)(struct amdgpu_ring *ring);
|
int (*preempt_ib)(struct amdgpu_ring *ring);
|
||||||
@@ -256,7 +257,7 @@ struct amdgpu_ring {
|
|||||||
#define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
|
#define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
|
||||||
#define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
|
#define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
|
||||||
#define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
|
#define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
|
||||||
#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
|
#define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s))
|
||||||
#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
|
#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
|
||||||
#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
|
#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
|
||||||
#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
|
#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
|
||||||
|
|||||||
@@ -3037,7 +3037,7 @@ static int gfx_v10_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev);
|
|||||||
static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
|
static int gfx_v10_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
|
||||||
static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume);
|
static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume);
|
||||||
static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
|
static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
|
||||||
static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start);
|
static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, bool secure);
|
||||||
|
|
||||||
static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
|
static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
|
||||||
{
|
{
|
||||||
@@ -7599,12 +7599,13 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
|
|||||||
sizeof(de_payload) >> 2);
|
sizeof(de_payload) >> 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void gfx_v10_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
|
static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
|
||||||
|
bool secure)
|
||||||
{
|
{
|
||||||
if (amdgpu_is_tmz(ring->adev)) {
|
uint32_t v = secure ? FRAME_TMZ : 0;
|
||||||
|
|
||||||
amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
|
amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
|
||||||
amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
|
amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
|
static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
|
||||||
@@ -8058,7 +8059,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
|
|||||||
.init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec,
|
.init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec,
|
||||||
.patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec,
|
.patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec,
|
||||||
.preempt_ib = gfx_v10_0_ring_preempt_ib,
|
.preempt_ib = gfx_v10_0_ring_preempt_ib,
|
||||||
.emit_tmz = gfx_v10_0_ring_emit_tmz,
|
.emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl,
|
||||||
.emit_wreg = gfx_v10_0_ring_emit_wreg,
|
.emit_wreg = gfx_v10_0_ring_emit_wreg,
|
||||||
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
|
.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
|
||||||
.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
|
.emit_reg_write_reg_wait = gfx_v10_0_ring_emit_reg_write_reg_wait,
|
||||||
|
|||||||
@@ -5442,12 +5442,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
|
|||||||
amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
|
amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
|
static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
|
||||||
|
bool secure)
|
||||||
{
|
{
|
||||||
if (amdgpu_is_tmz(ring->adev)) {
|
uint32_t v = secure ? FRAME_TMZ : 0;
|
||||||
|
|
||||||
amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
|
amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
|
||||||
amdgpu_ring_write(ring, FRAME_TMZ | FRAME_CMD(start ? 0 : 1));
|
amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
|
static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
|
||||||
@@ -6699,7 +6700,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
|
|||||||
.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
|
.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
|
||||||
.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
|
.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
|
||||||
.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
|
.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
|
||||||
.emit_tmz = gfx_v9_0_ring_emit_tmz,
|
.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
|
||||||
.emit_wreg = gfx_v9_0_ring_emit_wreg,
|
.emit_wreg = gfx_v9_0_ring_emit_wreg,
|
||||||
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
|
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
|
||||||
.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
|
.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
|
||||||
|
|||||||
Reference in New Issue
Block a user