drm/amdgpu:fix world switch hang
for SR-IOV, we must keep the pipeline-sync in the protection of COND_EXEC, otherwise the command consumed by CPG is not consistent when world switch triggerd, e.g.: world switch hit and the IB frame is skipped so the fence won't signal, thus CP will jump to the next DMAframe's pipeline-sync command, and it will make CP hang foever. after pipelin-sync moved into COND_EXEC the consistency can be guaranteed Signed-off-by: Monk Liu <Monk.Liu@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
@@ -130,6 +130,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
|
|||||||
|
|
||||||
unsigned i;
|
unsigned i;
|
||||||
int r = 0;
|
int r = 0;
|
||||||
|
bool need_pipe_sync = false;
|
||||||
|
|
||||||
if (num_ibs == 0)
|
if (num_ibs == 0)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
@@ -165,7 +166,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
|
|||||||
if (ring->funcs->emit_pipeline_sync && job &&
|
if (ring->funcs->emit_pipeline_sync && job &&
|
||||||
((tmp = amdgpu_sync_get_fence(&job->sched_sync)) ||
|
((tmp = amdgpu_sync_get_fence(&job->sched_sync)) ||
|
||||||
amdgpu_vm_need_pipeline_sync(ring, job))) {
|
amdgpu_vm_need_pipeline_sync(ring, job))) {
|
||||||
amdgpu_ring_emit_pipeline_sync(ring);
|
need_pipe_sync = true;
|
||||||
dma_fence_put(tmp);
|
dma_fence_put(tmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -173,7 +174,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
|
|||||||
ring->funcs->insert_start(ring);
|
ring->funcs->insert_start(ring);
|
||||||
|
|
||||||
if (job) {
|
if (job) {
|
||||||
r = amdgpu_vm_flush(ring, job);
|
r = amdgpu_vm_flush(ring, job, need_pipe_sync);
|
||||||
if (r) {
|
if (r) {
|
||||||
amdgpu_ring_undo(ring);
|
amdgpu_ring_undo(ring);
|
||||||
return r;
|
return r;
|
||||||
|
|||||||
@@ -743,7 +743,7 @@ static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev)
|
|||||||
*
|
*
|
||||||
* Emit a VM flush when it is necessary.
|
* Emit a VM flush when it is necessary.
|
||||||
*/
|
*/
|
||||||
int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
|
int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync)
|
||||||
{
|
{
|
||||||
struct amdgpu_device *adev = ring->adev;
|
struct amdgpu_device *adev = ring->adev;
|
||||||
unsigned vmhub = ring->funcs->vmhub;
|
unsigned vmhub = ring->funcs->vmhub;
|
||||||
@@ -765,12 +765,15 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
|
|||||||
vm_flush_needed = true;
|
vm_flush_needed = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!vm_flush_needed && !gds_switch_needed)
|
if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (ring->funcs->init_cond_exec)
|
if (ring->funcs->init_cond_exec)
|
||||||
patch_offset = amdgpu_ring_init_cond_exec(ring);
|
patch_offset = amdgpu_ring_init_cond_exec(ring);
|
||||||
|
|
||||||
|
if (need_pipe_sync)
|
||||||
|
amdgpu_ring_emit_pipeline_sync(ring);
|
||||||
|
|
||||||
if (ring->funcs->emit_vm_flush && vm_flush_needed) {
|
if (ring->funcs->emit_vm_flush && vm_flush_needed) {
|
||||||
struct dma_fence *fence;
|
struct dma_fence *fence;
|
||||||
|
|
||||||
|
|||||||
@@ -222,7 +222,7 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
|
|||||||
int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
|
int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
|
||||||
struct amdgpu_sync *sync, struct dma_fence *fence,
|
struct amdgpu_sync *sync, struct dma_fence *fence,
|
||||||
struct amdgpu_job *job);
|
struct amdgpu_job *job);
|
||||||
int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job);
|
int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync);
|
||||||
void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub,
|
void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vmhub,
|
||||||
unsigned vmid);
|
unsigned vmid);
|
||||||
void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev);
|
void amdgpu_vm_reset_all_ids(struct amdgpu_device *adev);
|
||||||
|
|||||||
Reference in New Issue
Block a user