Merge branch 'drm-next-5.1' of git://people.freedesktop.org/~agd5f/linux into drm-next

Fixes for 5.1: amdgpu: - Fix missing fw declaration after dropping old CI DPM code - Fix debugfs access to registers beyond the MMIO bar size - Fix context priority handling - Add missing license on some new files - Various cleanups and bug fixes radeon: - Fix missing break in CS parser for evergreen - Various cleanups and bug fixes sched: - Fix entities with 0 run queues Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexdeucher@gmail.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190221214134.3308-1-alexander.deucher@amd.com
2019-02-22 15:56:35 +10:00
parent a5f2fafece 767e06a992
commit fbac3c48fa
60 changed files with 704 additions and 615 deletions
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -411,6 +411,8 @@ struct amdgpu_fpriv {
 	struct amdgpu_ctx_mgr	ctx_mgr;
 };

+int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
+
 int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		  unsigned size, struct amdgpu_ib *ib);
 void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -131,7 +131,7 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,

 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 {
-	int i, n;
+	int i;
 	int last_valid_bit;

 	if (adev->kfd.dev) {
@@ -142,7 +142,9 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 			.gpuvm_size = min(adev->vm_manager.max_pfn
 					  << AMDGPU_GPU_PAGE_SHIFT,
 					  AMDGPU_GMC_HOLE_START),
-			.drm_render_minor = adev->ddev->render->index
+			.drm_render_minor = adev->ddev->render->index,
+			.sdma_doorbell_idx = adev->doorbell_index.sdma_engine,
+
 		};

 		/* this is going to have a few of the MSBs set that we need to
@@ -172,35 +174,20 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 				&gpu_resources.doorbell_aperture_size,
 				&gpu_resources.doorbell_start_offset);

-		if (adev->asic_type < CHIP_VEGA10) {
-			kgd2kfd_device_init(adev->kfd.dev, &gpu_resources);
-			return;
-		}
-
-		n = (adev->asic_type < CHIP_VEGA20) ? 2 : 8;
-
-		for (i = 0; i < n; i += 2) {
-			/* On SOC15 the BIF is involved in routing
-			 * doorbells using the low 12 bits of the
-			 * address. Communicate the assignments to
-			 * KFD. KFD uses two doorbell pages per
-			 * process in case of 64-bit doorbells so we
-			 * can use each doorbell assignment twice.
-			 */
-			gpu_resources.sdma_doorbell[0][i] =
-				adev->doorbell_index.sdma_engine[0] + (i >> 1);
-			gpu_resources.sdma_doorbell[0][i+1] =
-				adev->doorbell_index.sdma_engine[0] + 0x200 + (i >> 1);
-			gpu_resources.sdma_doorbell[1][i] =
-				adev->doorbell_index.sdma_engine[1] + (i >> 1);
-			gpu_resources.sdma_doorbell[1][i+1] =
-				adev->doorbell_index.sdma_engine[1] + 0x200 + (i >> 1);
-		}
-		/* Doorbells 0x0e0-0ff and 0x2e0-2ff are reserved for
-		 * SDMA, IH and VCN. So don't use them for the CP.
+		/* Since SOC15, BIF starts to statically use the
+		 * lower 12 bits of doorbell addresses for routing
+		 * based on settings in registers like
+		 * SDMA0_DOORBELL_RANGE etc..
+		 * In order to route a doorbell to CP engine, the lower
+		 * 12 bits of its address has to be outside the range
+		 * set for SDMA, VCN, and IH blocks.
 		 */
-		gpu_resources.reserved_doorbell_mask = 0x1e0;
-		gpu_resources.reserved_doorbell_val  = 0x0e0;
+		if (adev->asic_type >= CHIP_VEGA10) {
+			gpu_resources.non_cp_doorbells_start =
+					adev->doorbell_index.first_non_cp;
+			gpu_resources.non_cp_doorbells_end =
+					adev->doorbell_index.last_non_cp;
+		}

 		kgd2kfd_device_init(adev->kfd.dev, &gpu_resources);
 	}
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -204,38 +204,25 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
 }


-/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence(s) from BO's
+/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's
 *  reservation object.
 *
 * @bo: [IN] Remove eviction fence(s) from this BO
- * @ef: [IN] If ef is specified, then this eviction fence is removed if it
+ * @ef: [IN] This eviction fence is removed if it
 *  is present in the shared list.
- * @ef_list: [OUT] Returns list of eviction fences. These fences are removed
- *  from BO's reservation object shared list.
- * @ef_count: [OUT] Number of fences in ef_list.
 *
- * NOTE: If called with ef_list, then amdgpu_amdkfd_add_eviction_fence must be
- *  called to restore the eviction fences and to avoid memory leak. This is
- *  useful for shared BOs.
 * NOTE: Must be called with BO reserved i.e. bo->tbo.resv->lock held.
 */
 static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
-					struct amdgpu_amdkfd_fence *ef,
-					struct amdgpu_amdkfd_fence ***ef_list,
-					unsigned int *ef_count)
+					struct amdgpu_amdkfd_fence *ef)
 {
 	struct reservation_object *resv = bo->tbo.resv;
 	struct reservation_object_list *old, *new;
 	unsigned int i, j, k;

-	if (!ef && !ef_list)
+	if (!ef)
 		return -EINVAL;

-	if (ef_list) {
-		*ef_list = NULL;
-		*ef_count = 0;
-	}
-
 	old = reservation_object_get_list(resv);
 	if (!old)
 		return 0;
@@ -254,8 +241,7 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
 		f = rcu_dereference_protected(old->shared[i],
 					      reservation_object_held(resv));

-		if ((ef && f->context == ef->base.context) ||
-		    (!ef && to_amdgpu_amdkfd_fence(f)))
+		if (f->context == ef->base.context)
 			RCU_INIT_POINTER(new->shared[--j], f);
 		else
 			RCU_INIT_POINTER(new->shared[k++], f);
@@ -263,21 +249,6 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
 	new->shared_max = old->shared_max;
 	new->shared_count = k;

-	if (!ef) {
-		unsigned int count = old->shared_count - j;
-
-		/* Alloc memory for count number of eviction fence pointers.
-		 * Fill the ef_list array and ef_count
-		 */
-		*ef_list = kcalloc(count, sizeof(**ef_list), GFP_KERNEL);
-		*ef_count = count;
-
-		if (!*ef_list) {
-			kfree(new);
-			return -ENOMEM;
-		}
-	}
-
 	/* Install the new fence list, seqcount provides the barriers */
 	preempt_disable();
 	write_seqcount_begin(&resv->seq);
@@ -291,46 +262,13 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,

 		f = rcu_dereference_protected(new->shared[i],
 					      reservation_object_held(resv));
-		if (!ef)
-			(*ef_list)[k++] = to_amdgpu_amdkfd_fence(f);
-		else
-			dma_fence_put(f);
+		dma_fence_put(f);
 	}
 	kfree_rcu(old, rcu);

 	return 0;
 }

-/* amdgpu_amdkfd_add_eviction_fence - Adds eviction fence(s) back into BO's
- *  reservation object.
- *
- * @bo: [IN] Add eviction fences to this BO
- * @ef_list: [IN] List of eviction fences to be added
- * @ef_count: [IN] Number of fences in ef_list.
- *
- * NOTE: Must call amdgpu_amdkfd_remove_eviction_fence before calling this
- *  function.
- */
-static void amdgpu_amdkfd_add_eviction_fence(struct amdgpu_bo *bo,
-				struct amdgpu_amdkfd_fence **ef_list,
-				unsigned int ef_count)
-{
-	int i;
-
-	if (!ef_list || !ef_count)
-		return;
-
-	for (i = 0; i < ef_count; i++) {
-		amdgpu_bo_fence(bo, &ef_list[i]->base, true);
-		/* Re-adding the fence takes an additional reference. Drop that
-		 * reference.
-		 */
-		dma_fence_put(&ef_list[i]->base);
-	}
-
-	kfree(ef_list);
-}
-
 static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
 				     bool wait)
 {
@@ -346,18 +284,8 @@ static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
 	ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 	if (ret)
 		goto validate_fail;
-	if (wait) {
-		struct amdgpu_amdkfd_fence **ef_list;
-		unsigned int ef_count;
-
-		ret = amdgpu_amdkfd_remove_eviction_fence(bo, NULL, &ef_list,
-							  &ef_count);
-		if (ret)
-			goto validate_fail;
-
-		ttm_bo_wait(&bo->tbo, false, false);
-		amdgpu_amdkfd_add_eviction_fence(bo, ef_list, ef_count);
-	}
+	if (wait)
+		amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false);

 validate_fail:
 	return ret;
@@ -444,7 +372,6 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
 {
 	int ret;
 	struct kfd_bo_va_list *bo_va_entry;
-	struct amdgpu_bo *pd = vm->root.base.bo;
 	struct amdgpu_bo *bo = mem->bo;
 	uint64_t va = mem->va;
 	struct list_head *list_bo_va = &mem->bo_va_list;
@@ -484,14 +411,8 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
 		*p_bo_va_entry = bo_va_entry;

 	/* Allocate new page tables if needed and validate
-	 * them. Clearing of new page tables and validate need to wait
-	 * on move fences. We don't want that to trigger the eviction
-	 * fence, so remove it temporarily.
+	 * them.
 	 */
-	amdgpu_amdkfd_remove_eviction_fence(pd,
-					vm->process_info->eviction_fence,
-					NULL, NULL);
-
 	ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
 	if (ret) {
 		pr_err("Failed to allocate pts, err=%d\n", ret);
@@ -504,13 +425,9 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
 		goto err_alloc_pts;
 	}

-	/* Add the eviction fence back */
-	amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
-
 	return 0;

 err_alloc_pts:
-	amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
 	amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va);
 	list_del(&bo_va_entry->bo_list);
 err_vmadd:
@@ -809,24 +726,11 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
 {
 	struct amdgpu_bo_va *bo_va = entry->bo_va;
 	struct amdgpu_vm *vm = bo_va->base.vm;
-	struct amdgpu_bo *pd = vm->root.base.bo;

-	/* Remove eviction fence from PD (and thereby from PTs too as
-	 * they share the resv. object). Otherwise during PT update
-	 * job (see amdgpu_vm_bo_update_mapping), eviction fence would
-	 * get added to job->sync object and job execution would
-	 * trigger the eviction fence.
-	 */
-	amdgpu_amdkfd_remove_eviction_fence(pd,
-					    vm->process_info->eviction_fence,
-					    NULL, NULL);
 	amdgpu_vm_bo_unmap(adev, bo_va, entry->va);

 	amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);

-	/* Add the eviction fence back */
-	amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
-
 	amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false);

 	return 0;
@@ -1002,7 +906,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
 		pr_err("validate_pt_pd_bos() failed\n");
 		goto validate_pd_fail;
 	}
-	ret = ttm_bo_wait(&vm->root.base.bo->tbo, false, false);
+	amdgpu_bo_sync_wait(vm->root.base.bo, AMDGPU_FENCE_OWNER_KFD, false);
 	if (ret)
 		goto wait_pd_fail;
 	amdgpu_bo_fence(vm->root.base.bo,
@@ -1389,8 +1293,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 	 * attached
 	 */
 	amdgpu_amdkfd_remove_eviction_fence(mem->bo,
-					process_info->eviction_fence,
-					NULL, NULL);
+					process_info->eviction_fence);
 	pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va,
 		mem->va + bo_size * (1 + mem->aql_queue));

@@ -1617,8 +1520,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
 	if (mem->mapped_to_gpu_memory == 0 &&
 	    !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && !mem->bo->pin_count)
 		amdgpu_amdkfd_remove_eviction_fence(mem->bo,
-						process_info->eviction_fence,
-						    NULL, NULL);
+						process_info->eviction_fence);

 unreserve_out:
 	unreserve_bo_and_vms(&ctx, false, false);
@@ -1679,7 +1581,7 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
 	}

 	amdgpu_amdkfd_remove_eviction_fence(
-		bo, mem->process_info->eviction_fence, NULL, NULL);
+		bo, mem->process_info->eviction_fence);
 	list_del_init(&mem->validate_list.head);

 	if (size)
@@ -1945,16 +1847,6 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)

 	amdgpu_sync_create(&sync);

-	/* Avoid triggering eviction fences when unmapping invalid
-	 * userptr BOs (waits for all fences, doesn't use
-	 * FENCE_OWNER_VM)
-	 */
-	list_for_each_entry(peer_vm, &process_info->vm_list_head,
-			    vm_list_node)
-		amdgpu_amdkfd_remove_eviction_fence(peer_vm->root.base.bo,
-						process_info->eviction_fence,
-						NULL, NULL);
-
 	ret = process_validate_vms(process_info);
 	if (ret)
 		goto unreserve_out;
@@ -2015,10 +1907,6 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 	ret = process_update_pds(process_info, &sync);

 unreserve_out:
-	list_for_each_entry(peer_vm, &process_info->vm_list_head,
-			    vm_list_node)
-		amdgpu_bo_fence(peer_vm->root.base.bo,
-				&process_info->eviction_fence->base, true);
 	ttm_eu_backoff_reservation(&ticket, &resv_list);
 	amdgpu_sync_wait(&sync, false);
 	amdgpu_sync_free(&sync);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -124,6 +124,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 		struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
 		struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
 		unsigned num_rings;
+		unsigned num_rqs = 0;

 		switch (i) {
 		case AMDGPU_HW_IP_GFX:
@@ -166,12 +167,16 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 			break;
 		}

-		for (j = 0; j < num_rings; ++j)
-			rqs[j] = &rings[j]->sched.sched_rq[priority];
+		for (j = 0; j < num_rings; ++j) {
+			if (!rings[j]->adev)
+				continue;
+
+			rqs[num_rqs++] = &rings[j]->sched.sched_rq[priority];
+		}

 		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
 			r = drm_sched_entity_init(&ctx->entities[i][j].entity,
-						  rqs, num_rings, &ctx->guilty);
+						  rqs, num_rqs, &ctx->guilty);
 		if (r)
 			goto error_cleanup_entities;
 	}
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -158,9 +158,6 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
 	while (size) {
 		uint32_t value;

-		if (*pos > adev->rmmio_size)
-			goto end;
-
 		if (read) {
 			value = RREG32(*pos >> 2);
 			r = put_user(value, (uint32_t *)buf);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
@@ -71,6 +71,8 @@ struct amdgpu_doorbell_index {
 			uint32_t vce_ring6_7;
 		} uvd_vce;
 	};
+	uint32_t first_non_cp;
+	uint32_t last_non_cp;
 	uint32_t max_assignment;
 	/* Per engine SDMA doorbell size in dword */
 	uint32_t sdma_doorbell_range;
@@ -143,6 +145,10 @@ typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT
 	AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3             = 0x18D,
 	AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5             = 0x18E,
 	AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7             = 0x18F,
+
+	AMDGPU_VEGA20_DOORBELL64_FIRST_NON_CP            = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0,
+	AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP             = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7,
+
 	AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT            = 0x18F,
 	AMDGPU_VEGA20_DOORBELL_INVALID                   = 0xFFFF
 } AMDGPU_VEGA20_DOORBELL_ASSIGNMENT;
@@ -222,6 +228,9 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT
 	AMDGPU_DOORBELL64_VCE_RING4_5             = 0xFE,
 	AMDGPU_DOORBELL64_VCE_RING6_7             = 0xFF,

+	AMDGPU_DOORBELL64_FIRST_NON_CP            = AMDGPU_DOORBELL64_sDMA_ENGINE0,
+	AMDGPU_DOORBELL64_LAST_NON_CP             = AMDGPU_DOORBELL64_VCE_RING6_7,
+
 	AMDGPU_DOORBELL64_MAX_ASSIGNMENT          = 0xFF,
 	AMDGPU_DOORBELL64_INVALID                 = 0xFFFF
 } AMDGPU_DOORBELL64_ASSIGNMENT;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
@@ -184,61 +184,6 @@ u32 amdgpu_dpm_get_vrefresh(struct amdgpu_device *adev)
 	return vrefresh;
 }

-void amdgpu_calculate_u_and_p(u32 i, u32 r_c, u32 p_b,
-			      u32 *p, u32 *u)
-{
-	u32 b_c = 0;
-	u32 i_c;
-	u32 tmp;
-
-	i_c = (i * r_c) / 100;
-	tmp = i_c >> p_b;
-
-	while (tmp) {
-		b_c++;
-		tmp >>= 1;
-	}
-
-	*u = (b_c + 1) / 2;
-	*p = i_c / (1 << (2 * (*u)));
-}
-
-int amdgpu_calculate_at(u32 t, u32 h, u32 fh, u32 fl, u32 *tl, u32 *th)
-{
-	u32 k, a, ah, al;
-	u32 t1;
-
-	if ((fl == 0) || (fh == 0) || (fl > fh))
-		return -EINVAL;
-
-	k = (100 * fh) / fl;
-	t1 = (t * (k - 100));
-	a = (1000 * (100 * h + t1)) / (10000 + (t1 / 100));
-	a = (a + 5) / 10;
-	ah = ((a * t) + 5000) / 10000;
-	al = a - ah;
-
-	*th = t - ah;
-	*tl = t + al;
-
-	return 0;
-}
-
-bool amdgpu_is_uvd_state(u32 class, u32 class2)
-{
-	if (class & ATOM_PPLIB_CLASSIFICATION_UVDSTATE)
-		return true;
-	if (class & ATOM_PPLIB_CLASSIFICATION_HD2STATE)
-		return true;
-	if (class & ATOM_PPLIB_CLASSIFICATION_HDSTATE)
-		return true;
-	if (class & ATOM_PPLIB_CLASSIFICATION_SDSTATE)
-		return true;
-	if (class2 & ATOM_PPLIB_CLASSIFICATION2_MVC)
-		return true;
-	return false;
-}
-
 bool amdgpu_is_internal_thermal_sensor(enum amdgpu_int_thermal_type sensor)
 {
 	switch (sensor) {
@@ -949,39 +894,6 @@ enum amdgpu_pcie_gen amdgpu_get_pcie_gen_support(struct amdgpu_device *adev,
 	return AMDGPU_PCIE_GEN1;
 }

-u16 amdgpu_get_pcie_lane_support(struct amdgpu_device *adev,
-				 u16 asic_lanes,
-				 u16 default_lanes)
-{
-	switch (asic_lanes) {
-	case 0:
-	default:
-		return default_lanes;
-	case 1:
-		return 1;
-	case 2:
-		return 2;
-	case 4:
-		return 4;
-	case 8:
-		return 8;
-	case 12:
-		return 12;
-	case 16:
-		return 16;
-	}
-}
-
-u8 amdgpu_encode_pci_lane_width(u32 lanes)
-{
-	u8 encoded_lanes[] = { 0, 1, 2, 0, 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 6 };
-
-	if (lanes > 16)
-		return 0;
-
-	return encoded_lanes[lanes];
-}
-
 struct amd_vce_state*
 amdgpu_get_vce_clock_state(void *handle, u32 idx)
 {
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -486,10 +486,6 @@ void amdgpu_dpm_print_ps_status(struct amdgpu_device *adev,
 u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev);
 u32 amdgpu_dpm_get_vrefresh(struct amdgpu_device *adev);
 void amdgpu_dpm_get_active_displays(struct amdgpu_device *adev);
-bool amdgpu_is_uvd_state(u32 class, u32 class2);
-void amdgpu_calculate_u_and_p(u32 i, u32 r_c, u32 p_b,
-			      u32 *p, u32 *u);
-int amdgpu_calculate_at(u32 t, u32 h, u32 fh, u32 fl, u32 *tl, u32 *th);

 bool amdgpu_is_internal_thermal_sensor(enum amdgpu_int_thermal_type sensor);

@@ -505,11 +501,6 @@ enum amdgpu_pcie_gen amdgpu_get_pcie_gen_support(struct amdgpu_device *adev,
 						 enum amdgpu_pcie_gen asic_gen,
 						 enum amdgpu_pcie_gen default_gen);

-u16 amdgpu_get_pcie_lane_support(struct amdgpu_device *adev,
-				 u16 asic_lanes,
-				 u16 default_lanes);
-u8 amdgpu_encode_pci_lane_width(u32 lanes);
-
 struct amd_vce_state*
 amdgpu_get_vce_clock_state(void *handle, u32 idx);

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -73,9 +73,10 @@
 * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
 * - 3.28.0 - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES
 * - 3.29.0 - Add AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID
+ * - 3.30.0 - Add AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE.
 */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	29
+#define KMS_DRIVER_MINOR	30
 #define KMS_DRIVER_PATCHLEVEL	0

 int amdgpu_vram_limit = 0;
@@ -1178,6 +1179,22 @@ static const struct file_operations amdgpu_driver_kms_fops = {
 #endif
 };

+int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv)
+{
+        struct drm_file *file;
+
+	if (!filp)
+		return -EINVAL;
+
+	if (filp->f_op != &amdgpu_driver_kms_fops) {
+		return -EINVAL;
+	}
+
+	file = filp->private_data;
+	*fpriv = file->driver_priv;
+	return 0;
+}
+
 static bool
 amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,
 				 bool in_vblank_irq, int *vpos, int *hpos,
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -140,9 +140,7 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
 * Interrupt hander (VI), walk the IH ring.
 * Returns irq process return code.
 */
-int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
-		      void (*callback)(struct amdgpu_device *adev,
-				       struct amdgpu_ih_ring *ih))
+int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
 {
 	u32 wptr;

@@ -162,7 +160,7 @@ restart_ih:
 	rmb();

 	while (ih->rptr != wptr) {
-		callback(adev, ih);
+		amdgpu_irq_dispatch(adev, ih);
 		ih->rptr &= ih->ptr_mask;
 	}

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
@@ -69,8 +69,6 @@ struct amdgpu_ih_funcs {
 int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
 			unsigned ring_size, bool use_bus_addr);
 void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
-int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
-		      void (*callback)(struct amdgpu_device *adev,
-				       struct amdgpu_ih_ring *ih));
+int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);

 #endif
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -130,29 +130,6 @@ void amdgpu_irq_disable_all(struct amdgpu_device *adev)
 	spin_unlock_irqrestore(&adev->irq.lock, irqflags);
 }

-/**
- * amdgpu_irq_callback - callback from the IH ring
- *
- * @adev: amdgpu device pointer
- * @ih: amdgpu ih ring
- *
- * Callback from IH ring processing to handle the entry at the current position
- * and advance the read pointer.
- */
-static void amdgpu_irq_callback(struct amdgpu_device *adev,
-				struct amdgpu_ih_ring *ih)
-{
-	u32 ring_index = ih->rptr >> 2;
-	struct amdgpu_iv_entry entry;
-
-	entry.iv_entry = (const uint32_t *)&ih->ring[ring_index];
-	amdgpu_ih_decode_iv(adev, &entry);
-
-	trace_amdgpu_iv(ih - &adev->irq.ih, &entry);
-
-	amdgpu_irq_dispatch(adev, &entry);
-}
-
 /**
 * amdgpu_irq_handler - IRQ handler
 *
@@ -170,7 +147,7 @@ irqreturn_t amdgpu_irq_handler(int irq, void *arg)
 	struct amdgpu_device *adev = dev->dev_private;
 	irqreturn_t ret;

-	ret = amdgpu_ih_process(adev, &adev->irq.ih, amdgpu_irq_callback);
+	ret = amdgpu_ih_process(adev, &adev->irq.ih);
 	if (ret == IRQ_HANDLED)
 		pm_runtime_mark_last_busy(dev->dev);
 	return ret;
@@ -188,7 +165,7 @@ static void amdgpu_irq_handle_ih1(struct work_struct *work)
 	struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
 						  irq.ih1_work);

-	amdgpu_ih_process(adev, &adev->irq.ih1, amdgpu_irq_callback);
+	amdgpu_ih_process(adev, &adev->irq.ih1);
 }

 /**
@@ -203,7 +180,7 @@ static void amdgpu_irq_handle_ih2(struct work_struct *work)
 	struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
 						  irq.ih2_work);

-	amdgpu_ih_process(adev, &adev->irq.ih2, amdgpu_irq_callback);
+	amdgpu_ih_process(adev, &adev->irq.ih2);
 }

 /**
@@ -394,14 +371,23 @@ int amdgpu_irq_add_id(struct amdgpu_device *adev,
 * Dispatches IRQ to IP blocks.
 */
 void amdgpu_irq_dispatch(struct amdgpu_device *adev,
-			 struct amdgpu_iv_entry *entry)
+			 struct amdgpu_ih_ring *ih)
 {
-	unsigned client_id = entry->client_id;
-	unsigned src_id = entry->src_id;
+	u32 ring_index = ih->rptr >> 2;
+	struct amdgpu_iv_entry entry;
+	unsigned client_id, src_id;
 	struct amdgpu_irq_src *src;
 	bool handled = false;
 	int r;

+	entry.iv_entry = (const uint32_t *)&ih->ring[ring_index];
+	amdgpu_ih_decode_iv(adev, &entry);
+
+	trace_amdgpu_iv(ih - &adev->irq.ih, &entry);
+
+	client_id = entry.client_id;
+	src_id = entry.src_id;
+
 	if (client_id >= AMDGPU_IRQ_CLIENTID_MAX) {
 		DRM_DEBUG("Invalid client_id in IV: %d\n", client_id);

@@ -416,7 +402,7 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev,
 			  client_id, src_id);

 	} else if ((src = adev->irq.client[client_id].sources[src_id])) {
-		r = src->funcs->process(adev, src, entry);
+		r = src->funcs->process(adev, src, &entry);
 		if (r < 0)
 			DRM_ERROR("error processing interrupt (%d)\n", r);
 		else if (r)
@@ -428,7 +414,7 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev,

 	/* Send it to amdkfd as well if it isn't already handled */
 	if (!handled)
-		amdgpu_amdkfd_interrupt(adev, entry->iv_entry);
+		amdgpu_amdkfd_interrupt(adev, entry.iv_entry);
 }

 /**
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
@@ -108,7 +108,7 @@ int amdgpu_irq_add_id(struct amdgpu_device *adev,
 		      unsigned client_id, unsigned src_id,
 		      struct amdgpu_irq_src *source);
 void amdgpu_irq_dispatch(struct amdgpu_device *adev,
-			 struct amdgpu_iv_entry *entry);
+			 struct amdgpu_ih_ring *ih);
 int amdgpu_irq_update(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
 		      unsigned type);
 int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -207,7 +207,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
 	if (!r) {
 		acpi_status = amdgpu_acpi_init(adev);
 		if (acpi_status)
-		dev_dbg(&dev->pdev->dev,
+			dev_dbg(&dev->pdev->dev,
 				"Error during ACPI methods call\n");
 	}

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -1284,6 +1284,30 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
 		reservation_object_add_excl_fence(resv, fence);
 }

+/**
+ * amdgpu_sync_wait_resv - Wait for BO reservation fences
+ *
+ * @bo: buffer object
+ * @owner: fence owner
+ * @intr: Whether the wait is interruptible
+ *
+ * Returns:
+ * 0 on success, errno otherwise.
+ */
+int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	struct amdgpu_sync sync;
+	int r;
+
+	amdgpu_sync_create(&sync);
+	amdgpu_sync_resv(adev, &sync, bo->tbo.resv, owner, false);
+	r = amdgpu_sync_wait(&sync, intr);
+	amdgpu_sync_free(&sync);
+
+	return r;
+}
+
 /**
 * amdgpu_bo_gpu_offset - return GPU offset of bo
 * @bo:	amdgpu object for which we query the offset
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -266,6 +266,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
 int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
 void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
 		     bool shared);
+int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr);
 u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo);
 int amdgpu_bo_validate(struct amdgpu_bo *bo);
 int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow,
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -54,16 +54,20 @@ static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev,
 						  enum drm_sched_priority priority)
 {
 	struct file *filp = fget(fd);
-	struct drm_file *file;
 	struct amdgpu_fpriv *fpriv;
 	struct amdgpu_ctx *ctx;
 	uint32_t id;
+	int r;

 	if (!filp)
 		return -EINVAL;

-	file = filp->private_data;
-	fpriv = file->driver_priv;
+	r = amdgpu_file_to_fpriv(filp, &fpriv);
+	if (r) {
+		fput(filp);
+		return r;
+	}
+
 	idr_for_each_entry(&fpriv->ctx_mgr.ctx_handles, ctx, id)
 		amdgpu_ctx_priority_override(ctx, priority);

@@ -72,6 +76,39 @@ static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev,
 	return 0;
 }

+static int amdgpu_sched_context_priority_override(struct amdgpu_device *adev,
+						  int fd,
+						  unsigned ctx_id,
+						  enum drm_sched_priority priority)
+{
+	struct file *filp = fget(fd);
+	struct amdgpu_fpriv *fpriv;
+	struct amdgpu_ctx *ctx;
+	int r;
+
+	if (!filp)
+		return -EINVAL;
+
+	r = amdgpu_file_to_fpriv(filp, &fpriv);
+	if (r) {
+		fput(filp);
+		return r;
+	}
+
+	ctx = amdgpu_ctx_get(fpriv, ctx_id);
+
+	if (!ctx) {
+		fput(filp);
+		return -EINVAL;
+	}
+
+	amdgpu_ctx_priority_override(ctx, priority);
+	amdgpu_ctx_put(ctx);
+	fput(filp);
+
+	return 0;
+}
+
 int amdgpu_sched_ioctl(struct drm_device *dev, void *data,
 		       struct drm_file *filp)
 {
@@ -81,7 +118,7 @@ int amdgpu_sched_ioctl(struct drm_device *dev, void *data,
 	int r;

 	priority = amdgpu_to_sched_priority(args->in.priority);
-	if (args->in.flags || priority == DRM_SCHED_PRIORITY_INVALID)
+	if (priority == DRM_SCHED_PRIORITY_INVALID)
 		return -EINVAL;

 	switch (args->in.op) {
@@ -90,6 +127,12 @@ int amdgpu_sched_ioctl(struct drm_device *dev, void *data,
 							   args->in.fd,
 							   priority);
 		break;
+	case AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE:
+		r = amdgpu_sched_context_priority_override(adev,
+							   args->in.fd,
+							   args->in.ctx_id,
+							   priority);
+		break;
 	default:
 		DRM_ERROR("Invalid sched op specified: %d\n", args->in.op);
 		r = -EINVAL;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -698,8 +698,6 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	struct amdgpu_vm_bo_base *bo_base, *tmp;
 	int r = 0;

-	vm->bulk_moveable &= list_empty(&vm->evicted);
-
 	list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) {
 		struct amdgpu_bo *bo = bo_base->bo;

@@ -828,7 +826,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,

 	WARN_ON(job->ibs[0].length_dw > 64);
 	r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv,
-			     AMDGPU_FENCE_OWNER_UNDEFINED, false);
+			     AMDGPU_FENCE_OWNER_KFD, false);
 	if (r)
 		goto error_free;

@@ -1332,31 +1330,6 @@ static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
 	}
 }

-
-/**
- * amdgpu_vm_wait_pd - Wait for PT BOs to be free.
- *
- * @adev: amdgpu_device pointer
- * @vm: related vm
- * @owner: fence owner
- *
- * Returns:
- * 0 on success, errno otherwise.
- */
-static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-			     void *owner)
-{
-	struct amdgpu_sync sync;
-	int r;
-
-	amdgpu_sync_create(&sync);
-	amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner, false);
-	r = amdgpu_sync_wait(&sync, true);
-	amdgpu_sync_free(&sync);
-
-	return r;
-}
-
 /**
 * amdgpu_vm_update_func - helper to call update function
 *
@@ -1451,7 +1424,8 @@ restart:
 	params.adev = adev;

 	if (vm->use_cpu_for_update) {
-		r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
+		r = amdgpu_bo_sync_wait(vm->root.base.bo,
+					AMDGPU_FENCE_OWNER_VM, true);
 		if (unlikely(r))
 			return r;

@@ -1772,9 +1746,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	params.adev = adev;
 	params.vm = vm;

-	/* sync to everything on unmapping */
+	/* sync to everything except eviction fences on unmapping */
 	if (!(flags & AMDGPU_PTE_VALID))
-		owner = AMDGPU_FENCE_OWNER_UNDEFINED;
+		owner = AMDGPU_FENCE_OWNER_KFD;

 	if (vm->use_cpu_for_update) {
 		/* params.src is used as flag to indicate system Memory */
@@ -1784,7 +1758,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 		/* Wait for PT BOs to be idle. PTs share the same resv. object
 		 * as the root PD BO
 		 */
-		r = amdgpu_vm_wait_pd(adev, vm, owner);
+		r = amdgpu_bo_sync_wait(vm->root.base.bo, owner, true);
 		if (unlikely(r))
 			return r;

--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -2980,7 +2980,7 @@ static int dce_v6_0_pageflip_irq(struct amdgpu_device *adev,
 				 struct amdgpu_irq_src *source,
 				 struct amdgpu_iv_entry *entry)
 {
-		unsigned long flags;
+	unsigned long flags;
 	unsigned crtc_id;
 	struct amdgpu_crtc *amdgpu_crtc;
 	struct amdgpu_flip_work *works;
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -266,7 +266,8 @@ flr_done:
 	}

 	/* Trigger recovery for world switch failure if no TDR */
-	if (amdgpu_device_should_recover_gpu(adev))
+	if (amdgpu_device_should_recover_gpu(adev)
+		&& amdgpu_lockup_timeout == MAX_SCHEDULE_TIMEOUT)
 		amdgpu_device_gpu_recover(adev, NULL);
 }

--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -32,7 +32,7 @@

 static u32 nbio_v7_4_get_rev_id(struct amdgpu_device *adev)
 {
-    u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
+	u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);

 	tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK;
 	tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT;
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -1436,7 +1436,7 @@ static int si_common_early_init(void *handle)
 			AMD_CG_SUPPORT_UVD_MGCG |
 			AMD_CG_SUPPORT_HDP_LS |
 			AMD_CG_SUPPORT_HDP_MGCG;
-			adev->pg_flags = 0;
+		adev->pg_flags = 0;
 		adev->external_rev_id = (adev->rev_id == 0) ? 1 :
 					(adev->rev_id == 1) ? 5 : 6;
 		break;
--- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
@@ -6216,10 +6216,12 @@ static void si_request_link_speed_change_before_state_change(struct amdgpu_devic
 			si_pi->force_pcie_gen = AMDGPU_PCIE_GEN2;
 			if (current_link_speed == AMDGPU_PCIE_GEN2)
 				break;
+			/* fall through */
 		case AMDGPU_PCIE_GEN2:
 			if (amdgpu_acpi_pcie_performance_request(adev, PCIE_PERF_REQ_PECI_GEN2, false) == 0)
 				break;
 #endif
+			/* fall through */
 		default:
 			si_pi->force_pcie_gen = si_get_current_pcie_speed(adev);
 			break;
--- a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
@@ -81,6 +81,10 @@ void vega10_doorbell_index_init(struct amdgpu_device *adev)
 	adev->doorbell_index.uvd_vce.vce_ring2_3 = AMDGPU_DOORBELL64_VCE_RING2_3;
 	adev->doorbell_index.uvd_vce.vce_ring4_5 = AMDGPU_DOORBELL64_VCE_RING4_5;
 	adev->doorbell_index.uvd_vce.vce_ring6_7 = AMDGPU_DOORBELL64_VCE_RING6_7;
+
+	adev->doorbell_index.first_non_cp = AMDGPU_DOORBELL64_FIRST_NON_CP;
+	adev->doorbell_index.last_non_cp = AMDGPU_DOORBELL64_LAST_NON_CP;
+
 	/* In unit of dword doorbell */
 	adev->doorbell_index.max_assignment = AMDGPU_DOORBELL64_MAX_ASSIGNMENT << 1;
 	adev->doorbell_index.sdma_doorbell_range = 4;
--- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
@@ -85,6 +85,10 @@ void vega20_doorbell_index_init(struct amdgpu_device *adev)
 	adev->doorbell_index.uvd_vce.vce_ring2_3 = AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3;
 	adev->doorbell_index.uvd_vce.vce_ring4_5 = AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5;
 	adev->doorbell_index.uvd_vce.vce_ring6_7 = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7;
+
+	adev->doorbell_index.first_non_cp = AMDGPU_VEGA20_DOORBELL64_FIRST_NON_CP;
+	adev->doorbell_index.last_non_cp = AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP;
+
 	adev->doorbell_index.max_assignment = AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT << 1;
 	adev->doorbell_index.sdma_doorbell_range = 20;
 }
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -134,12 +134,18 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
 		 */
 		q->doorbell_id = q->properties.queue_id;
 	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
-		/* For SDMA queues on SOC15, use static doorbell
-		 * assignments based on the engine and queue.
+		/* For SDMA queues on SOC15 with 8-byte doorbell, use static
+		 * doorbell assignments based on the engine and queue id.
+		 * The doobell index distance between RLC (2*i) and (2*i+1)
+		 * for a SDMA engine is 512.
 		 */
-		q->doorbell_id = dev->shared_resources.sdma_doorbell
-			[q->properties.sdma_engine_id]
-			[q->properties.sdma_queue_id];
+		uint32_t *idx_offset =
+				dev->shared_resources.sdma_doorbell_idx;
+
+		q->doorbell_id = idx_offset[q->properties.sdma_engine_id]
+			+ (q->properties.sdma_queue_id & 1)
+			* KFD_QUEUE_DOORBELL_MIRROR_OFFSET
+			+ (q->properties.sdma_queue_id >> 1);
 	} else {
 		/* For CP queues on SOC15 reserve a free doorbell ID */
 		unsigned int found;
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -97,17 +97,29 @@
 #define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2)
 #define KFD_CWSR_TMA_OFFSET PAGE_SIZE

+#define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE		\
+	(KFD_MAX_NUM_OF_PROCESSES *			\
+			KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
+
+#define KFD_KERNEL_QUEUE_SIZE 2048
+
+/*
+ * 512 = 0x200
+ * The doorbell index distance between SDMA RLC (2*i) and (2*i+1) in the
+ * same SDMA engine on SOC15, which has 8-byte doorbells for SDMA.
+ * 512 8-byte doorbell distance (i.e. one page away) ensures that SDMA RLC
+ * (2*i+1) doorbells (in terms of the lower 12 bit address) lie exactly in
+ * the OFFSET and SIZE set in registers like BIF_SDMA0_DOORBELL_RANGE.
+ */
+#define KFD_QUEUE_DOORBELL_MIRROR_OFFSET 512
+
+
 /*
 * Kernel module parameter to specify maximum number of supported queues per
 * device
 */
 extern int max_num_of_queues_per_device;

-#define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE		\
-	(KFD_MAX_NUM_OF_PROCESSES *			\
-			KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
-
-#define KFD_KERNEL_QUEUE_SIZE 2048

 /* Kernel module parameter to specify the scheduling policy */
 extern int sched_policy;
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -607,13 +607,17 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd,
 	if (!qpd->doorbell_bitmap)
 		return -ENOMEM;

-	/* Mask out any reserved doorbells */
-	for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS; i++)
-		if ((dev->shared_resources.reserved_doorbell_mask & i) ==
-		    dev->shared_resources.reserved_doorbell_val) {
+	/* Mask out doorbells reserved for SDMA, IH, and VCN on SOC15. */
+	for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) {
+		if (i >= dev->shared_resources.non_cp_doorbells_start
+			&& i <= dev->shared_resources.non_cp_doorbells_end) {
 			set_bit(i, qpd->doorbell_bitmap);
-			pr_debug("reserved doorbell 0x%03x\n", i);
+			set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
+				qpd->doorbell_bitmap);
+			pr_debug("reserved doorbell 0x%03x and 0x%03x\n", i,
+				i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);
 		}
+	}

 	return 0;
 }
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -3790,7 +3790,6 @@ static const struct drm_plane_helper_funcs dm_plane_helper_funcs = {
 * check will succeed, and let DC implement proper check
 */
 static const uint32_t rgb_formats[] = {
-	DRM_FORMAT_RGB888,
 	DRM_FORMAT_XRGB8888,
 	DRM_FORMAT_ARGB8888,
 	DRM_FORMAT_RGBA8888,
@@ -4678,10 +4677,9 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 		struct dc_plane_state *dc_plane;
 		struct dm_plane_state *dm_new_plane_state = to_dm_plane_state(new_plane_state);

-		if (plane->type == DRM_PLANE_TYPE_CURSOR) {
-			handle_cursor_update(plane, old_plane_state);
+		/* Cursor plane is handled after stream updates */
+		if (plane->type == DRM_PLANE_TYPE_CURSOR)
 			continue;
-		}

 		if (!fb || !crtc || pcrtc != crtc)
 			continue;
@@ -4712,14 +4710,21 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 			 */
 			abo = gem_to_amdgpu_bo(fb->obj[0]);
 			r = amdgpu_bo_reserve(abo, true);
-			if (unlikely(r != 0)) {
+			if (unlikely(r != 0))
 				DRM_ERROR("failed to reserve buffer before flip\n");
-				WARN_ON(1);
-			}

-			/* Wait for all fences on this FB */
-			WARN_ON(reservation_object_wait_timeout_rcu(abo->tbo.resv, true, false,
-										    MAX_SCHEDULE_TIMEOUT) < 0);
+			/*
+			 * Wait for all fences on this FB. Do limited wait to avoid
+			 * deadlock during GPU reset when this fence will not signal
+			 * but we hold reservation lock for the BO.
+			 */
+			r = reservation_object_wait_timeout_rcu(abo->tbo.resv,
+								true, false,
+								msecs_to_jiffies(5000));
+			if (unlikely(r == 0))
+				DRM_ERROR("Waiting for fences timed out.");
+
+

 			amdgpu_bo_get_tiling_flags(abo, &tiling_flags);

@@ -4874,6 +4879,10 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 		mutex_unlock(&dm->dc_lock);
 	}

+	for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, i)
+		if (plane->type == DRM_PLANE_TYPE_CURSOR)
+			handle_cursor_update(plane, old_plane_state);
+
 cleanup:
 	kfree(flip);
 	kfree(full);
@@ -5799,15 +5808,14 @@ dm_determine_update_type_for_commit(struct dc *dc,
 		old_dm_crtc_state = to_dm_crtc_state(old_crtc_state);
 		num_plane = 0;

-		if (!new_dm_crtc_state->stream) {
-			if (!new_dm_crtc_state->stream && old_dm_crtc_state->stream) {
-				update_type = UPDATE_TYPE_FULL;
-				goto cleanup;
-			}
-
-			continue;
+		if (new_dm_crtc_state->stream != old_dm_crtc_state->stream) {
+			update_type = UPDATE_TYPE_FULL;
+			goto cleanup;
 		}

+		if (!new_dm_crtc_state->stream)
+			continue;
+
 		for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, j) {
 			new_plane_crtc = new_plane_state->crtc;
 			old_plane_crtc = old_plane_state->crtc;
@@ -5817,6 +5825,11 @@ dm_determine_update_type_for_commit(struct dc *dc,
 			if (plane->type == DRM_PLANE_TYPE_CURSOR)
 				continue;

+			if (new_dm_plane_state->dc_state != old_dm_plane_state->dc_state) {
+				update_type = UPDATE_TYPE_FULL;
+				goto cleanup;
+			}
+
 			if (!state->allow_modeset)
 				continue;

@@ -5955,6 +5968,42 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
 			goto fail;
 	}

+	/*
+	 * Add all primary and overlay planes on the CRTC to the state
+	 * whenever a plane is enabled to maintain correct z-ordering
+	 * and to enable fast surface updates.
+	 */
+	drm_for_each_crtc(crtc, dev) {
+		bool modified = false;
+
+		for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, i) {
+			if (plane->type == DRM_PLANE_TYPE_CURSOR)
+				continue;
+
+			if (new_plane_state->crtc == crtc ||
+			    old_plane_state->crtc == crtc) {
+				modified = true;
+				break;
+			}
+		}
+
+		if (!modified)
+			continue;
+
+		drm_for_each_plane_mask(plane, state->dev, crtc->state->plane_mask) {
+			if (plane->type == DRM_PLANE_TYPE_CURSOR)
+				continue;
+
+			new_plane_state =
+				drm_atomic_get_plane_state(state, plane);
+
+			if (IS_ERR(new_plane_state)) {
+				ret = PTR_ERR(new_plane_state);
+				goto fail;
+			}
+		}
+	}
+
 	/* Remove exiting planes if they are modified */
 	for_each_oldnew_plane_in_state_reverse(state, plane, old_plane_state, new_plane_state, i) {
 		ret = dm_update_plane_state(dc, state, plane,
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
@@ -265,6 +265,7 @@ static struct atom_display_object_path_v2 *get_bios_object(
 					&& id.enum_id == obj_id.enum_id)
 				return &bp->object_info_tbl.v1_4->display_path[i];
 		}
+		/* fall through */
 	case OBJECT_TYPE_CONNECTOR:
 	case OBJECT_TYPE_GENERIC:
 		/* Both Generic and Connector Object ID
@@ -277,6 +278,7 @@ static struct atom_display_object_path_v2 *get_bios_object(
 					&& id.enum_id == obj_id.enum_id)
 				return &bp->object_info_tbl.v1_4->display_path[i];
 		}
+		/* fall through */
 	default:
 		return NULL;
 	}
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -1138,6 +1138,9 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
 	/* pplib is notified if disp_num changed */
 	dc->hwss.optimize_bandwidth(dc, context);

+	for (i = 0; i < context->stream_count; i++)
+		context->streams[i]->mode_changed = false;
+
 	dc_release_state(dc->current_state);

 	dc->current_state = context;
@@ -1623,13 +1626,13 @@ static void commit_planes_do_stream_update(struct dc *dc,
 					stream_update->adjust->v_total_min,
 					stream_update->adjust->v_total_max);

-			if (stream_update->periodic_vsync_config && pipe_ctx->stream_res.tg->funcs->program_vline_interrupt)
-				pipe_ctx->stream_res.tg->funcs->program_vline_interrupt(
-					pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, VLINE0, &stream->periodic_vsync_config);
+			if (stream_update->periodic_interrupt0 &&
+					dc->hwss.setup_periodic_interrupt)
+				dc->hwss.setup_periodic_interrupt(pipe_ctx, VLINE0);

-			if (stream_update->enhanced_sync_config && pipe_ctx->stream_res.tg->funcs->program_vline_interrupt)
-				pipe_ctx->stream_res.tg->funcs->program_vline_interrupt(
-					pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing, VLINE1, &stream->enhanced_sync_config);
+			if (stream_update->periodic_interrupt1 &&
+					dc->hwss.setup_periodic_interrupt)
+				dc->hwss.setup_periodic_interrupt(pipe_ctx, VLINE1);

 			if ((stream_update->hdr_static_metadata && !stream->use_dynamic_meta) ||
 					stream_update->vrr_infopacket ||
--- a/drivers/gpu/drm/amd/display/dc/dc_stream.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h
@@ -51,9 +51,19 @@ struct freesync_context {
 	bool dummy;
 };

-union vline_config {
-	unsigned int line_number;
-	unsigned long long delta_in_ns;
+enum vertical_interrupt_ref_point {
+	START_V_UPDATE = 0,
+	START_V_SYNC,
+	INVALID_POINT
+
+	//For now, only v_update interrupt is used.
+	//START_V_BLANK,
+	//START_V_ACTIVE
+};
+
+struct periodic_interrupt_config {
+	enum vertical_interrupt_ref_point ref_point;
+	int lines_offset;
 };


@@ -106,8 +116,8 @@ struct dc_stream_state {
 	/* DMCU info */
 	unsigned int abm_level;

-	union vline_config periodic_vsync_config;
-	union vline_config enhanced_sync_config;
+	struct periodic_interrupt_config periodic_interrupt0;
+	struct periodic_interrupt_config periodic_interrupt1;

 	/* from core_stream struct */
 	struct dc_context *ctx;
@@ -158,8 +168,8 @@ struct dc_stream_update {
 	struct dc_info_packet *hdr_static_metadata;
 	unsigned int *abm_level;

-	union vline_config *periodic_vsync_config;
-	union vline_config *enhanced_sync_config;
+	struct periodic_interrupt_config *periodic_interrupt0;
+	struct periodic_interrupt_config *periodic_interrupt1;

 	struct dc_crtc_timing_adjust *adjust;
 	struct dc_info_packet *vrr_infopacket;
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_abm.c
@@ -53,6 +53,27 @@

 #define MCP_DISABLE_ABM_IMMEDIATELY 255

+static bool dce_abm_set_pipe(struct abm *abm, uint32_t controller_id)
+{
+	struct dce_abm *abm_dce = TO_DCE_ABM(abm);
+	uint32_t rampingBoundary = 0xFFFF;
+
+	REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 0,
+			1, 80000);
+
+	/* set ramping boundary */
+	REG_WRITE(MASTER_COMM_DATA_REG1, rampingBoundary);
+
+	/* setDMCUParam_Pipe */
+	REG_UPDATE_2(MASTER_COMM_CMD_REG,
+			MASTER_COMM_CMD_REG_BYTE0, MCP_ABM_PIPE_SET,
+			MASTER_COMM_CMD_REG_BYTE1, controller_id);
+
+	/* notifyDMCUMsg */
+	REG_UPDATE(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 1);
+
+	return true;
+}

 static unsigned int calculate_16_bit_backlight_from_pwm(struct dce_abm *abm_dce)
 {
@@ -175,7 +196,6 @@ static void dmcu_set_backlight_level(
 	uint32_t controller_id)
 {
 	unsigned int backlight_8_bit = 0;
-	uint32_t rampingBoundary = 0xFFFF;
 	uint32_t s2;

 	if (backlight_pwm_u16_16 & 0x10000)
@@ -185,16 +205,7 @@ static void dmcu_set_backlight_level(
 		// Take MSB of fractional part since backlight is not max
 		backlight_8_bit = (backlight_pwm_u16_16 >> 8) & 0xFF;

-	/* set ramping boundary */
-	REG_WRITE(MASTER_COMM_DATA_REG1, rampingBoundary);
-
-	/* setDMCUParam_Pipe */
-	REG_UPDATE_2(MASTER_COMM_CMD_REG,
-			MASTER_COMM_CMD_REG_BYTE0, MCP_ABM_PIPE_SET,
-			MASTER_COMM_CMD_REG_BYTE1, controller_id);
-
-	/* notifyDMCUMsg */
-	REG_UPDATE(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 1);
+	dce_abm_set_pipe(&abm_dce->base, controller_id);

 	/* waitDMCUReadyForCmd */
 	REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT,
@@ -309,16 +320,7 @@ static bool dce_abm_immediate_disable(struct abm *abm)
 {
 	struct dce_abm *abm_dce = TO_DCE_ABM(abm);

-	REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 0,
-			1, 80000);
-
-	/* setDMCUParam_ABMLevel */
-	REG_UPDATE_2(MASTER_COMM_CMD_REG,
-			MASTER_COMM_CMD_REG_BYTE0, MCP_ABM_PIPE_SET,
-			MASTER_COMM_CMD_REG_BYTE1, MCP_DISABLE_ABM_IMMEDIATELY);
-
-	/* notifyDMCUMsg */
-	REG_UPDATE(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT, 1);
+	dce_abm_set_pipe(abm, MCP_DISABLE_ABM_IMMEDIATELY);

 	abm->stored_backlight_registers.BL_PWM_CNTL =
 		REG_READ(BL_PWM_CNTL);
@@ -419,6 +421,7 @@ static const struct abm_funcs dce_funcs = {
 	.abm_init = dce_abm_init,
 	.set_abm_level = dce_abm_set_level,
 	.init_backlight = dce_abm_init_backlight,
+	.set_pipe = dce_abm_set_pipe,
 	.set_backlight_level_pwm = dce_abm_set_backlight_level_pwm,
 	.get_current_backlight = dce_abm_get_current_backlight,
 	.get_target_backlight = dce_abm_get_target_backlight,
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c
@@ -479,7 +479,7 @@ static void program_grph_pixel_format(
 	case SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F:
 		sign = 1;
 		floating = 1;
-		/* no break */
+		/* fall through */
 	case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616F: /* shouldn't this get float too? */
 	case SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616:
 		grph_depth = 3;
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -1300,6 +1300,10 @@ static enum dc_status apply_single_controller_ctx_to_hw(
 	struct drr_params params = {0};
 	unsigned int event_triggers = 0;

+	if (dc->hwss.disable_stream_gating) {
+		dc->hwss.disable_stream_gating(dc, pipe_ctx);
+	}
+
 	if (pipe_ctx->stream_res.audio != NULL) {
 		struct audio_output audio_output;

@@ -1329,10 +1333,8 @@ static enum dc_status apply_single_controller_ctx_to_hw(
 	if (!pipe_ctx->stream->apply_seamless_boot_optimization)
 		dc->hwss.enable_stream_timing(pipe_ctx, context, dc);

-	if (pipe_ctx->stream_res.tg->funcs->program_vupdate_interrupt)
-		pipe_ctx->stream_res.tg->funcs->program_vupdate_interrupt(
-						pipe_ctx->stream_res.tg,
-							&stream->timing);
+	if (dc->hwss.setup_vupdate_interrupt)
+		dc->hwss.setup_vupdate_interrupt(pipe_ctx);

 	params.vertical_total_min = stream->adjust.v_total_min;
 	params.vertical_total_max = stream->adjust.v_total_max;
@@ -1521,6 +1523,14 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context)
 	struct dc_link *edp_link = get_link_for_edp(dc);
 	bool can_edp_fast_boot_optimize = false;
 	bool apply_edp_fast_boot_optimization = false;
+	bool can_apply_seamless_boot = false;
+
+	for (i = 0; i < context->stream_count; i++) {
+		if (context->streams[i]->apply_seamless_boot_optimization) {
+			can_apply_seamless_boot = true;
+			break;
+		}
+	}

 	if (edp_link) {
 		/* this seems to cause blank screens on DCE8 */
@@ -1549,7 +1559,7 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context)
 		}
 	}

-	if (!apply_edp_fast_boot_optimization) {
+	if (!apply_edp_fast_boot_optimization && !can_apply_seamless_boot) {
 		if (edp_link_to_turnoff) {
 			/*turn off backlight before DP_blank and encoder powered down*/
 			dc->hwss.edp_backlight_control(edp_link_to_turnoff, false);
@@ -2676,6 +2686,8 @@ static const struct hw_sequencer_funcs dce110_funcs = {
 	.set_static_screen_control = set_static_screen_control,
 	.reset_hw_ctx_wrap = dce110_reset_hw_ctx_wrap,
 	.enable_stream_timing = dce110_enable_stream_timing,
+	.disable_stream_gating = NULL,
+	.enable_stream_gating = NULL,
 	.setup_stereo = NULL,
 	.set_avmute = dce110_set_avmute,
 	.wait_for_mpcc_disconnect = dce110_wait_for_mpcc_disconnect,
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
@@ -959,9 +959,25 @@ static void dcn10_disable_plane(struct dc *dc, struct pipe_ctx *pipe_ctx)
 static void dcn10_init_pipes(struct dc *dc, struct dc_state *context)
 {
 	int i;
+	bool can_apply_seamless_boot = false;
+
+	for (i = 0; i < context->stream_count; i++) {
+		if (context->streams[i]->apply_seamless_boot_optimization) {
+			can_apply_seamless_boot = true;
+			break;
+		}
+	}

 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct timing_generator *tg = dc->res_pool->timing_generators[i];
+		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
+
+		/* There is assumption that pipe_ctx is not mapping irregularly
+		 * to non-preferred front end. If pipe_ctx->stream is not NULL,
+		 * we will use the pipe, so don't disable
+		 */
+		if (pipe_ctx->stream != NULL)
+			continue;

 		if (tg->funcs->is_tg_enabled(tg))
 			tg->funcs->lock(tg);
@@ -975,7 +991,9 @@ static void dcn10_init_pipes(struct dc *dc, struct dc_state *context)
 		}
 	}

-	dc->res_pool->mpc->funcs->mpc_init(dc->res_pool->mpc);
+	/* Cannot reset the MPC mux if seamless boot */
+	if (!can_apply_seamless_boot)
+		dc->res_pool->mpc->funcs->mpc_init(dc->res_pool->mpc);

 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct timing_generator *tg = dc->res_pool->timing_generators[i];
@@ -983,6 +1001,16 @@ static void dcn10_init_pipes(struct dc *dc, struct dc_state *context)
 		struct dpp *dpp = dc->res_pool->dpps[i];
 		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];

+		// W/A for issue with dc_post_update_surfaces_to_stream
+		hubp->power_gated = true;
+
+		/* There is assumption that pipe_ctx is not mapping irregularly
+		 * to non-preferred front end. If pipe_ctx->stream is not NULL,
+		 * we will use the pipe, so don't disable
+		 */
+		if (pipe_ctx->stream != NULL)
+			continue;
+
 		dpp->funcs->dpp_reset(dpp);

 		pipe_ctx->stream_res.tg = tg;
@@ -1137,11 +1165,13 @@ static void reset_hw_ctx_wrap(
 			struct clock_source *old_clk = pipe_ctx_old->clock_source;

 			reset_back_end_for_pipe(dc, pipe_ctx_old, dc->current_state);
+			if (dc->hwss.enable_stream_gating) {
+				dc->hwss.enable_stream_gating(dc, pipe_ctx);
+			}
 			if (old_clk)
 				old_clk->funcs->cs_power_down(old_clk);
 		}
 	}
-
 }

 static bool patch_address_for_sbs_tb_stereo(
@@ -2162,8 +2192,10 @@ static void dcn10_blank_pixel_data(
 	if (!blank) {
 		if (stream_res->tg->funcs->set_blank)
 			stream_res->tg->funcs->set_blank(stream_res->tg, blank);
-		if (stream_res->abm)
+		if (stream_res->abm) {
+			stream_res->abm->funcs->set_pipe(stream_res->abm, stream_res->tg->inst + 1);
 			stream_res->abm->funcs->set_abm_level(stream_res->abm, stream->abm_level);
+		}
 	} else if (blank) {
 		if (stream_res->abm)
 			stream_res->abm->funcs->set_abm_immediate_disable(stream_res->abm);
@@ -2709,6 +2741,147 @@ static void dcn10_set_cursor_sdr_white_level(struct pipe_ctx *pipe_ctx)
 			pipe_ctx->plane_res.dpp, &opt_attr);
 }

+/**
+* apply_front_porch_workaround  TODO FPGA still need?
+*
+* This is a workaround for a bug that has existed since R5xx and has not been
+* fixed keep Front porch at minimum 2 for Interlaced mode or 1 for progressive.
+*/
+static void apply_front_porch_workaround(
+	struct dc_crtc_timing *timing)
+{
+	if (timing->flags.INTERLACE == 1) {
+		if (timing->v_front_porch < 2)
+			timing->v_front_porch = 2;
+	} else {
+		if (timing->v_front_porch < 1)
+			timing->v_front_porch = 1;
+	}
+}
+
+int get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx)
+{
+	struct timing_generator *optc = pipe_ctx->stream_res.tg;
+	const struct dc_crtc_timing *dc_crtc_timing = &pipe_ctx->stream->timing;
+	struct dc_crtc_timing patched_crtc_timing;
+	int vesa_sync_start;
+	int asic_blank_end;
+	int interlace_factor;
+	int vertical_line_start;
+
+	patched_crtc_timing = *dc_crtc_timing;
+	apply_front_porch_workaround(&patched_crtc_timing);
+
+	interlace_factor = patched_crtc_timing.flags.INTERLACE ? 2 : 1;
+
+	vesa_sync_start = patched_crtc_timing.v_addressable +
+			patched_crtc_timing.v_border_bottom +
+			patched_crtc_timing.v_front_porch;
+
+	asic_blank_end = (patched_crtc_timing.v_total -
+			vesa_sync_start -
+			patched_crtc_timing.v_border_top)
+			* interlace_factor;
+
+	vertical_line_start = asic_blank_end -
+			optc->dlg_otg_param.vstartup_start + 1;
+
+	return vertical_line_start;
+}
+
+static void calc_vupdate_position(
+		struct pipe_ctx *pipe_ctx,
+		uint32_t *start_line,
+		uint32_t *end_line)
+{
+	const struct dc_crtc_timing *dc_crtc_timing = &pipe_ctx->stream->timing;
+	int vline_int_offset_from_vupdate =
+			pipe_ctx->stream->periodic_interrupt0.lines_offset;
+	int vupdate_offset_from_vsync = get_vupdate_offset_from_vsync(pipe_ctx);
+	int start_position;
+
+	if (vline_int_offset_from_vupdate > 0)
+		vline_int_offset_from_vupdate--;
+	else if (vline_int_offset_from_vupdate < 0)
+		vline_int_offset_from_vupdate++;
+
+	start_position = vline_int_offset_from_vupdate + vupdate_offset_from_vsync;
+
+	if (start_position >= 0)
+		*start_line = start_position;
+	else
+		*start_line = dc_crtc_timing->v_total + start_position - 1;
+
+	*end_line = *start_line + 2;
+
+	if (*end_line >= dc_crtc_timing->v_total)
+		*end_line = 2;
+}
+
+static void cal_vline_position(
+		struct pipe_ctx *pipe_ctx,
+		enum vline_select vline,
+		uint32_t *start_line,
+		uint32_t *end_line)
+{
+	enum vertical_interrupt_ref_point ref_point = INVALID_POINT;
+
+	if (vline == VLINE0)
+		ref_point = pipe_ctx->stream->periodic_interrupt0.ref_point;
+	else if (vline == VLINE1)
+		ref_point = pipe_ctx->stream->periodic_interrupt1.ref_point;
+
+	switch (ref_point) {
+	case START_V_UPDATE:
+		calc_vupdate_position(
+				pipe_ctx,
+				start_line,
+				end_line);
+		break;
+	case START_V_SYNC:
+		// Suppose to do nothing because vsync is 0;
+		break;
+	default:
+		ASSERT(0);
+		break;
+	}
+}
+
+static void dcn10_setup_periodic_interrupt(
+		struct pipe_ctx *pipe_ctx,
+		enum vline_select vline)
+{
+	struct timing_generator *tg = pipe_ctx->stream_res.tg;
+
+	if (vline == VLINE0) {
+		uint32_t start_line = 0;
+		uint32_t end_line = 0;
+
+		cal_vline_position(pipe_ctx, vline, &start_line, &end_line);
+
+		tg->funcs->setup_vertical_interrupt0(tg, start_line, end_line);
+
+	} else if (vline == VLINE1) {
+		pipe_ctx->stream_res.tg->funcs->setup_vertical_interrupt1(
+				tg,
+				pipe_ctx->stream->periodic_interrupt1.lines_offset);
+	}
+}
+
+static void dcn10_setup_vupdate_interrupt(struct pipe_ctx *pipe_ctx)
+{
+	struct timing_generator *tg = pipe_ctx->stream_res.tg;
+	int start_line = get_vupdate_offset_from_vsync(pipe_ctx);
+
+	if (start_line < 0) {
+		ASSERT(0);
+		start_line = 0;
+	}
+
+	if (tg->funcs->setup_vertical_interrupt2)
+		tg->funcs->setup_vertical_interrupt2(tg, start_line);
+}
+
 static const struct hw_sequencer_funcs dcn10_funcs = {
 	.program_gamut_remap = program_gamut_remap,
 	.init_hw = dcn10_init_hw,
@@ -2756,7 +2929,11 @@ static const struct hw_sequencer_funcs dcn10_funcs = {
 	.edp_wait_for_hpd_ready = hwss_edp_wait_for_hpd_ready,
 	.set_cursor_position = dcn10_set_cursor_position,
 	.set_cursor_attribute = dcn10_set_cursor_attribute,
-	.set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level
+	.set_cursor_sdr_white_level = dcn10_set_cursor_sdr_white_level,
+	.disable_stream_gating = NULL,
+	.enable_stream_gating = NULL,
+	.setup_periodic_interrupt = dcn10_setup_periodic_interrupt,
+	.setup_vupdate_interrupt = dcn10_setup_vupdate_interrupt
 };


--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.h
@@ -81,4 +81,6 @@ struct pipe_ctx *find_top_pipe_for_stream(
 		struct dc_state *context,
 		const struct dc_stream_state *stream);

+int get_vupdate_offset_from_vsync(struct pipe_ctx *pipe_ctx);
+
 #endif /* __DC_HWSS_DCN10_H__ */
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
@@ -92,134 +92,36 @@ static void optc1_disable_stereo(struct timing_generator *optc)
 		OTG_3D_STRUCTURE_STEREO_SEL_OVR, 0);
 }

-static uint32_t get_start_vline(struct timing_generator *optc, const struct dc_crtc_timing *dc_crtc_timing)
-{
-	struct dc_crtc_timing patched_crtc_timing;
-	int vesa_sync_start;
-	int asic_blank_end;
-	int interlace_factor;
-	int vertical_line_start;
-
-	patched_crtc_timing = *dc_crtc_timing;
-	optc1_apply_front_porch_workaround(optc, &patched_crtc_timing);
-
-	vesa_sync_start = patched_crtc_timing.h_addressable +
-			patched_crtc_timing.h_border_right +
-			patched_crtc_timing.h_front_porch;
-
-	asic_blank_end = patched_crtc_timing.h_total -
-			vesa_sync_start -
-			patched_crtc_timing.h_border_left;
-
-	interlace_factor = patched_crtc_timing.flags.INTERLACE ? 2 : 1;
-
-	vesa_sync_start = patched_crtc_timing.v_addressable +
-			patched_crtc_timing.v_border_bottom +
-			patched_crtc_timing.v_front_porch;
-
-	asic_blank_end = (patched_crtc_timing.v_total -
-			vesa_sync_start -
-			patched_crtc_timing.v_border_top)
-			* interlace_factor;
-
-	vertical_line_start = asic_blank_end - optc->dlg_otg_param.vstartup_start + 1;
-	if (vertical_line_start < 0) {
-		ASSERT(0);
-		vertical_line_start = 0;
-	}
-
-	return vertical_line_start;
-}
-
-static void calc_vline_position(
+void optc1_setup_vertical_interrupt0(
 		struct timing_generator *optc,
-		const struct dc_crtc_timing *dc_crtc_timing,
-		unsigned long long vsync_delta,
-		uint32_t *start_line,
-		uint32_t *end_line)
-{
-	unsigned long long req_delta_tens_of_usec = div64_u64((vsync_delta + 9999), 10000);
-	unsigned long long pix_clk_hundreds_khz = div64_u64((dc_crtc_timing->pix_clk_100hz + 999), 1000);
-	uint32_t req_delta_lines = (uint32_t) div64_u64(
-			(req_delta_tens_of_usec * pix_clk_hundreds_khz + dc_crtc_timing->h_total - 1),
-								dc_crtc_timing->h_total);
-
-	uint32_t vsync_line = get_start_vline(optc, dc_crtc_timing);
-
-	if (req_delta_lines != 0)
-			req_delta_lines--;
-
-		if (req_delta_lines > vsync_line)
-			*start_line = dc_crtc_timing->v_total - (req_delta_lines - vsync_line) - 1;
-		else
-			*start_line = vsync_line - req_delta_lines;
-
-		*end_line = *start_line + 2;
-
-		if (*end_line >= dc_crtc_timing->v_total)
-			*end_line = 2;
-}
-
-void optc1_program_vline_interrupt(
-		struct timing_generator *optc,
-		const struct dc_crtc_timing *dc_crtc_timing,
-		enum vline_select vline,
-		const union vline_config *vline_config)
+		uint32_t start_line,
+		uint32_t end_line)
 {
 	struct optc *optc1 = DCN10TG_FROM_TG(optc);
-	uint32_t start_line = 0;
-	uint32_t end_line = 0;

-	switch (vline) {
-	case VLINE0:
-		calc_vline_position(optc, dc_crtc_timing, vline_config->delta_in_ns, &start_line, &end_line);
-		REG_SET_2(OTG_VERTICAL_INTERRUPT0_POSITION, 0,
-				OTG_VERTICAL_INTERRUPT0_LINE_START, start_line,
-				OTG_VERTICAL_INTERRUPT0_LINE_END, end_line);
-		break;
-	case VLINE1:
-		REG_SET(OTG_VERTICAL_INTERRUPT1_POSITION, 0,
-					OTG_VERTICAL_INTERRUPT1_LINE_START, vline_config->line_number);
-		break;
-	default:
-		break;
-	}
+	REG_SET_2(OTG_VERTICAL_INTERRUPT0_POSITION, 0,
+			OTG_VERTICAL_INTERRUPT0_LINE_START, start_line,
+			OTG_VERTICAL_INTERRUPT0_LINE_END, end_line);
 }

-void optc1_program_vupdate_interrupt(
+void optc1_setup_vertical_interrupt1(
 		struct timing_generator *optc,
-		const struct dc_crtc_timing *dc_crtc_timing)
+		uint32_t start_line)
 {
 	struct optc *optc1 = DCN10TG_FROM_TG(optc);
-	int32_t vertical_line_start;
-	uint32_t asic_blank_end;
-	uint32_t vesa_sync_start;
-	struct dc_crtc_timing patched_crtc_timing;

-	patched_crtc_timing = *dc_crtc_timing;
-	optc1_apply_front_porch_workaround(optc, &patched_crtc_timing);
+	REG_SET(OTG_VERTICAL_INTERRUPT1_POSITION, 0,
+				OTG_VERTICAL_INTERRUPT1_LINE_START, start_line);
+}

-	/* asic_h_blank_end = HsyncWidth + HbackPorch =
-	 * vesa. usHorizontalTotal - vesa. usHorizontalSyncStart -
-	 * vesa.h_left_border
-	 */
-	vesa_sync_start = patched_crtc_timing.h_addressable +
-			patched_crtc_timing.h_border_right +
-			patched_crtc_timing.h_front_porch;
-
-	asic_blank_end = patched_crtc_timing.h_total -
-			vesa_sync_start -
-			patched_crtc_timing.h_border_left;
-
-	/* Use OTG_VERTICAL_INTERRUPT2 replace VUPDATE interrupt,
-	 * program the reg for interrupt postition.
-	 */
-	vertical_line_start = asic_blank_end - optc->dlg_otg_param.vstartup_start + 1;
-	if (vertical_line_start < 0)
-		vertical_line_start = 0;
+void optc1_setup_vertical_interrupt2(
+		struct timing_generator *optc,
+		uint32_t start_line)
+{
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);

 	REG_SET(OTG_VERTICAL_INTERRUPT2_POSITION, 0,
-			OTG_VERTICAL_INTERRUPT2_LINE_START, vertical_line_start);
+			OTG_VERTICAL_INTERRUPT2_LINE_START, start_line);
 }

 /**
@@ -1480,8 +1382,9 @@ bool optc1_get_crc(struct timing_generator *optc,
 static const struct timing_generator_funcs dcn10_tg_funcs = {
 		.validate_timing = optc1_validate_timing,
 		.program_timing = optc1_program_timing,
-		.program_vline_interrupt = optc1_program_vline_interrupt,
-		.program_vupdate_interrupt = optc1_program_vupdate_interrupt,
+		.setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0,
+		.setup_vertical_interrupt1 = optc1_setup_vertical_interrupt1,
+		.setup_vertical_interrupt2 = optc1_setup_vertical_interrupt2,
 		.program_global_sync = optc1_program_global_sync,
 		.enable_crtc = optc1_enable_crtc,
 		.disable_crtc = optc1_disable_crtc,
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.h
@@ -483,11 +483,16 @@ void optc1_program_timing(
 	const struct dc_crtc_timing *dc_crtc_timing,
 	bool use_vbios);

-void optc1_program_vline_interrupt(
+void optc1_setup_vertical_interrupt0(
 		struct timing_generator *optc,
-		const struct dc_crtc_timing *dc_crtc_timing,
-		enum vline_select vline,
-		const union vline_config *vline_config);
+		uint32_t start_line,
+		uint32_t end_line);
+void optc1_setup_vertical_interrupt1(
+		struct timing_generator *optc,
+		uint32_t start_line);
+void optc1_setup_vertical_interrupt2(
+		struct timing_generator *optc,
+		uint32_t start_line);

 void optc1_program_global_sync(
 		struct timing_generator *optc);
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/abm.h
@@ -46,6 +46,7 @@ struct abm_funcs {
 	void (*abm_init)(struct abm *abm);
 	bool (*set_abm_level)(struct abm *abm, unsigned int abm_level);
 	bool (*set_abm_immediate_disable)(struct abm *abm);
+	bool (*set_pipe)(struct abm *abm, unsigned int controller_id);
 	bool (*init_backlight)(struct abm *abm);

 	/* backlight_pwm_u16_16 is unsigned 32 bit,
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
@@ -134,14 +134,6 @@ struct dc_crtc_timing;

 struct drr_params;

-union vline_config;
-
-
-enum vline_select {
-	VLINE0,
-	VLINE1,
-	VLINE2
-};

 struct timing_generator_funcs {
 	bool (*validate_timing)(struct timing_generator *tg,
@@ -149,14 +141,17 @@ struct timing_generator_funcs {
 	void (*program_timing)(struct timing_generator *tg,
 							const struct dc_crtc_timing *timing,
 							bool use_vbios);
-	void (*program_vline_interrupt)(
+	void (*setup_vertical_interrupt0)(
 			struct timing_generator *optc,
-			const struct dc_crtc_timing *dc_crtc_timing,
-			enum vline_select vline,
-			const union vline_config *vline_config);
+			uint32_t start_line,
+			uint32_t end_line);
+	void (*setup_vertical_interrupt1)(
+			struct timing_generator *optc,
+			uint32_t start_line);
+	void (*setup_vertical_interrupt2)(
+			struct timing_generator *optc,
+			uint32_t start_line);

-	void (*program_vupdate_interrupt)(struct timing_generator *optc,
-			const struct dc_crtc_timing *dc_crtc_timing);
 	bool (*enable_crtc)(struct timing_generator *tg);
 	bool (*disable_crtc)(struct timing_generator *tg);
 	bool (*is_counter_moving)(struct timing_generator *tg);
--- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
@@ -38,6 +38,11 @@ enum pipe_gating_control {
 	PIPE_GATING_CONTROL_INIT
 };

+enum vline_select {
+	VLINE0,
+	VLINE1
+};
+
 struct dce_hwseq_wa {
 	bool blnd_crtc_trigger;
 	bool DEGVIDCN10_253;
@@ -68,6 +73,10 @@ struct stream_resource;

 struct hw_sequencer_funcs {

+	void (*disable_stream_gating)(struct dc *dc, struct pipe_ctx *pipe_ctx);
+
+	void (*enable_stream_gating)(struct dc *dc, struct pipe_ctx *pipe_ctx);
+
 	void (*init_hw)(struct dc *dc);

 	void (*init_pipes)(struct dc *dc, struct dc_state *context);
@@ -220,6 +229,9 @@ struct hw_sequencer_funcs {
 	void (*set_cursor_attribute)(struct pipe_ctx *pipe);
 	void (*set_cursor_sdr_white_level)(struct pipe_ctx *pipe);

+	void (*setup_periodic_interrupt)(struct pipe_ctx *pipe_ctx, enum vline_select vline);
+	void (*setup_vupdate_interrupt)(struct pipe_ctx *pipe_ctx);
+
 };

 void color_space_to_black_color(
--- a/drivers/gpu/drm/amd/display/include/dal_asic_id.h
+++ b/drivers/gpu/drm/amd/display/include/dal_asic_id.h
@@ -131,6 +131,7 @@
 #define INTERNAL_REV_RAVEN_A0             0x00    /* First spin of Raven */
 #define RAVEN_A0 0x01
 #define RAVEN_B0 0x21
+#define PICASSO_A0 0x41
 #if defined(CONFIG_DRM_AMD_DC_DCN1_01)
 /* DCN1_01 */
 #define RAVEN2_A0 0x81
@@ -165,4 +166,6 @@

 #define	FAMILY_UNKNOWN 0xFF

+
+
 #endif /* __DAL_ASIC_ID_H__ */
--- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
+++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
@@ -165,18 +165,11 @@ struct iram_table_v_2_2 {
 };
 #pragma pack(pop)

-static uint16_t backlight_8_to_16(unsigned int backlight_8bit)
-{
-	return (uint16_t)(backlight_8bit * 0x101);
-}
-
 static void fill_backlight_transform_table(struct dmcu_iram_parameters params,
 		struct iram_table_v_2 *table)
 {
 	unsigned int i;
 	unsigned int num_entries = NUM_BL_CURVE_SEGS;
-	unsigned int query_input_8bit;
-	unsigned int query_output_8bit;
 	unsigned int lut_index;

 	table->backlight_thresholds[0] = 0;
@@ -194,16 +187,13 @@ static void fill_backlight_transform_table(struct dmcu_iram_parameters params,
 	 * format U4.10.
 	 */
 	for (i = 1; i+1 < num_entries; i++) {
-		query_input_8bit = DIV_ROUNDUP((i * 256), num_entries);
-
 		lut_index = (params.backlight_lut_array_size - 1) * i / (num_entries - 1);
 		ASSERT(lut_index < params.backlight_lut_array_size);
-		query_output_8bit = params.backlight_lut_array[lut_index] >> 8;

 		table->backlight_thresholds[i] =
-				backlight_8_to_16(query_input_8bit);
+			cpu_to_be16(DIV_ROUNDUP((i * 65536), num_entries));
 		table->backlight_offsets[i] =
-				backlight_8_to_16(query_output_8bit);
+			cpu_to_be16(params.backlight_lut_array[lut_index]);
 	}
 }

@@ -212,8 +202,6 @@ static void fill_backlight_transform_table_v_2_2(struct dmcu_iram_parameters par
 {
 	unsigned int i;
 	unsigned int num_entries = NUM_BL_CURVE_SEGS;
-	unsigned int query_input_8bit;
-	unsigned int query_output_8bit;
 	unsigned int lut_index;

 	table->backlight_thresholds[0] = 0;
@@ -231,16 +219,13 @@ static void fill_backlight_transform_table_v_2_2(struct dmcu_iram_parameters par
 	 * format U4.10.
 	 */
 	for (i = 1; i+1 < num_entries; i++) {
-		query_input_8bit = DIV_ROUNDUP((i * 256), num_entries);
-
 		lut_index = (params.backlight_lut_array_size - 1) * i / (num_entries - 1);
 		ASSERT(lut_index < params.backlight_lut_array_size);
-		query_output_8bit = params.backlight_lut_array[lut_index] >> 8;

 		table->backlight_thresholds[i] =
-				backlight_8_to_16(query_input_8bit);
+			cpu_to_be16(DIV_ROUNDUP((i * 65536), num_entries));
 		table->backlight_offsets[i] =
-				backlight_8_to_16(query_output_8bit);
+			cpu_to_be16(params.backlight_lut_array[lut_index]);
 	}
 }

--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -137,20 +137,17 @@ struct kgd2kfd_shared_resources {
 	/* Bit n == 1 means Queue n is available for KFD */
 	DECLARE_BITMAP(queue_bitmap, KGD_MAX_QUEUES);

-	/* Doorbell assignments (SOC15 and later chips only). Only
+	/* SDMA doorbell assignments (SOC15 and later chips only). Only
 	 * specific doorbells are routed to each SDMA engine. Others
 	 * are routed to IH and VCN. They are not usable by the CP.
-	 *
-	 * Any doorbell number D that satisfies the following condition
-	 * is reserved: (D & reserved_doorbell_mask) == reserved_doorbell_val
-	 *
-	 * KFD currently uses 1024 (= 0x3ff) doorbells per process. If
-	 * doorbells 0x0e0-0x0ff and 0x2e0-0x2ff are reserved, that means
-	 * mask would be set to 0x1e0 and val set to 0x0e0.
 	 */
-	unsigned int sdma_doorbell[2][8];
-	unsigned int reserved_doorbell_mask;
-	unsigned int reserved_doorbell_val;
+	uint32_t *sdma_doorbell_idx;
+
+	/* From SOC15 onward, the doorbell index range not usable for CP
+	 * queues.
+	 */
+	uint32_t non_cp_doorbells_start;
+	uint32_t non_cp_doorbells_end;

 	/* Base address of doorbell aperture. */
 	phys_addr_t doorbell_physical_address;
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
@@ -139,12 +139,10 @@ static int smu10_construct_max_power_limits_table(struct pp_hwmgr *hwmgr,
 static int smu10_init_dynamic_state_adjustment_rule_settings(
 							struct pp_hwmgr *hwmgr)
 {
-	uint32_t table_size =
-		sizeof(struct phm_clock_voltage_dependency_table) +
-		(7 * sizeof(struct phm_clock_voltage_dependency_record));
+	struct phm_clock_voltage_dependency_table *table_clk_vlt;

-	struct phm_clock_voltage_dependency_table *table_clk_vlt =
-					kzalloc(table_size, GFP_KERNEL);
+	table_clk_vlt = kzalloc(struct_size(table_clk_vlt, entries, 7),
+				GFP_KERNEL);

 	if (NULL == table_clk_vlt) {
 		pr_err("Can not allocate memory!\n");
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -3681,10 +3681,12 @@ static int smu7_request_link_speed_change_before_state_change(
 			data->force_pcie_gen = PP_PCIEGen2;
 			if (current_link_speed == PP_PCIEGen2)
 				break;
+			/* fall through */
 		case PP_PCIEGen2:
 			if (0 == amdgpu_acpi_pcie_performance_request(hwmgr->adev, PCIE_PERF_REQ_GEN2, false))
 				break;
 #endif
+			/* fall through */
 		default:
 			data->force_pcie_gen = smu7_get_current_pcie_speed(hwmgr);
 			break;
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c
@@ -1211,7 +1211,7 @@ int smu7_power_control_set_level(struct pp_hwmgr *hwmgr)
 				hwmgr->platform_descriptor.TDPAdjustment :
 				(-1 * hwmgr->platform_descriptor.TDPAdjustment);

-		 if (hwmgr->chip_id > CHIP_TONGA)
+		if (hwmgr->chip_id > CHIP_TONGA)
 			target_tdp = ((100 + adjust_percent) * (int)(cac_table->usTDP * 256)) / 100;
 		else
 			target_tdp = ((100 + adjust_percent) * (int)(cac_table->usConfigurableTDP * 256)) / 100;
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu8_hwmgr.c
@@ -272,12 +272,10 @@ static int smu8_init_dynamic_state_adjustment_rule_settings(
 			struct pp_hwmgr *hwmgr,
 			ATOM_CLK_VOLT_CAPABILITY *disp_voltage_table)
 {
-	uint32_t table_size =
-		sizeof(struct phm_clock_voltage_dependency_table) +
-		(7 * sizeof(struct phm_clock_voltage_dependency_record));
+	struct phm_clock_voltage_dependency_table *table_clk_vlt;

-	struct phm_clock_voltage_dependency_table *table_clk_vlt =
-					kzalloc(table_size, GFP_KERNEL);
+	table_clk_vlt = kzalloc(struct_size(table_clk_vlt, entries, 7),
+				GFP_KERNEL);

 	if (NULL == table_clk_vlt) {
 		pr_err("Can not allocate memory!\n");
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.c
@@ -1,3 +1,25 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
 #include "amdgpu.h"
 #include "soc15.h"
 #include "soc15_hw_ip.h"
@@ -114,7 +136,7 @@ int vega10_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state)
 		if (soc15_baco_program_registers(hwmgr, pre_baco_tbl,
 					     ARRAY_SIZE(pre_baco_tbl))) {
 			if (smum_send_msg_to_smc(hwmgr, PPSMC_MSG_EnterBaco))
-				return -1;
+				return -EINVAL;

 			if (soc15_baco_program_registers(hwmgr, enter_baco_tbl,
 						   ARRAY_SIZE(enter_baco_tbl)))
@@ -132,5 +154,5 @@ int vega10_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state)
 		}
 	}

-	return -1;
+	return -EINVAL;
 }
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_baco.h
@@ -20,8 +20,8 @@
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */
-#ifndef __VEGA10_BOCO_H__
-#define __VEGA10_BOCO_H__
+#ifndef __VEGA10_BACO_H__
+#define __VEGA10_BACO_H__
 #include "hwmgr.h"
 #include "common_baco.h"

--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.c
@@ -1,3 +1,25 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
 #include "amdgpu.h"
 #include "soc15.h"
 #include "soc15_hw_ip.h"
@@ -67,14 +89,14 @@ int vega20_baco_set_state(struct pp_hwmgr *hwmgr, enum BACO_STATE state)


 		if(smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_EnterBaco, 0))
-			return -1;
+			return -EINVAL;

 	} else if (state == BACO_STATE_OUT) {
 		if (smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ExitBaco))
-			return -1;
+			return -EINVAL;
 		if (!soc15_baco_program_registers(hwmgr, clean_baco_tbl,
 						     ARRAY_SIZE(clean_baco_tbl)))
-			return -1;
+			return -EINVAL;
 	}

 	return 0;
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_baco.h
@@ -20,8 +20,8 @@
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */
-#ifndef __VEGA20_BOCO_H__
-#define __VEGA20_BOCO_H__
+#ifndef __VEGA20_BACO_H__
+#define __VEGA20_BACO_H__
 #include "hwmgr.h"
 #include "common_baco.h"

--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -3456,7 +3456,7 @@ static int vega20_apply_clocks_adjust_rules(struct pp_hwmgr *hwmgr)
 	disable_mclk_switching = ((1 < hwmgr->display_config->num_display) &&
                           !hwmgr->display_config->multi_monitor_in_sync) ||
                            vblank_too_short;
-    latency = hwmgr->display_config->dce_tolerable_mclk_in_active_latency;
+	latency = hwmgr->display_config->dce_tolerable_mclk_in_active_latency;

 	/* gfxclk */
 	dpm_table = &(data->dpm_table.gfx_table);
--- a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c
@@ -29,6 +29,10 @@
 #include <drm/amdgpu_drm.h>
 #include "smumgr.h"

+MODULE_FIRMWARE("amdgpu/bonaire_smc.bin");
+MODULE_FIRMWARE("amdgpu/bonaire_k_smc.bin");
+MODULE_FIRMWARE("amdgpu/hawaii_smc.bin");
+MODULE_FIRMWARE("amdgpu/hawaii_k_smc.bin");
 MODULE_FIRMWARE("amdgpu/topaz_smc.bin");
 MODULE_FIRMWARE("amdgpu/topaz_k_smc.bin");
 MODULE_FIRMWARE("amdgpu/tonga_smc.bin");
--- a/drivers/gpu/drm/radeon/ci_dpm.c
+++ b/drivers/gpu/drm/radeon/ci_dpm.c
@@ -4869,10 +4869,12 @@ static void ci_request_link_speed_change_before_state_change(struct radeon_devic
 			pi->force_pcie_gen = RADEON_PCIE_GEN2;
 			if (current_link_speed == RADEON_PCIE_GEN2)
 				break;
+			/* fall through */
 		case RADEON_PCIE_GEN2:
 			if (radeon_acpi_pcie_performance_request(rdev, PCIE_PERF_REQ_PECI_GEN2, false) == 0)
 				break;
 #endif
+			/* fall through */
 		default:
 			pi->force_pcie_gen = ci_get_current_pcie_speed(rdev);
 			break;
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -1299,6 +1299,7 @@ static int evergreen_cs_handle_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 			return -EINVAL;
 		}
 		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
+		break;
 	case CB_TARGET_MASK:
 		track->cb_target_mask = radeon_get_ib_value(p, idx);
 		track->cb_dirty = true;
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -5762,10 +5762,12 @@ static void si_request_link_speed_change_before_state_change(struct radeon_devic
 			si_pi->force_pcie_gen = RADEON_PCIE_GEN2;
 			if (current_link_speed == RADEON_PCIE_GEN2)
 				break;
+			/* fall through */
 		case RADEON_PCIE_GEN2:
 			if (radeon_acpi_pcie_performance_request(rdev, PCIE_PERF_REQ_PECI_GEN2, false) == 0)
 				break;
 #endif
+			/* fall through */
 		default:
 			si_pi->force_pcie_gen = si_get_current_pcie_speed(rdev);
 			break;
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -52,12 +52,12 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,
 {
 	int i;

-	if (!(entity && rq_list && num_rq_list > 0 && rq_list[0]))
+	if (!(entity && rq_list && (num_rq_list == 0 || rq_list[0])))
 		return -EINVAL;

 	memset(entity, 0, sizeof(struct drm_sched_entity));
 	INIT_LIST_HEAD(&entity->list);
-	entity->rq = rq_list[0];
+	entity->rq = NULL;
 	entity->guilty = guilty;
 	entity->num_rq_list = num_rq_list;
 	entity->rq_list = kcalloc(num_rq_list, sizeof(struct drm_sched_rq *),
@@ -67,6 +67,10 @@ int drm_sched_entity_init(struct drm_sched_entity *entity,

 	for (i = 0; i < num_rq_list; ++i)
 		entity->rq_list[i] = rq_list[i];
+
+	if (num_rq_list)
+		entity->rq = rq_list[0];
+
 	entity->last_scheduled = NULL;

 	spin_lock_init(&entity->rq_lock);
@@ -165,6 +169,9 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout)
 	struct task_struct *last_user;
 	long ret = timeout;

+	if (!entity->rq)
+		return 0;
+
 	sched = entity->rq->sched;
 	/**
 	 * The client will not queue more IBs during this fini, consume existing
@@ -264,20 +271,24 @@ static void drm_sched_entity_kill_jobs(struct drm_sched_entity *entity)
 */
 void drm_sched_entity_fini(struct drm_sched_entity *entity)
 {
-	struct drm_gpu_scheduler *sched;
+	struct drm_gpu_scheduler *sched = NULL;

-	sched = entity->rq->sched;
-	drm_sched_rq_remove_entity(entity->rq, entity);
+	if (entity->rq) {
+		sched = entity->rq->sched;
+		drm_sched_rq_remove_entity(entity->rq, entity);
+	}

 	/* Consumption of existing IBs wasn't completed. Forcefully
 	 * remove them here.
 	 */
 	if (spsc_queue_peek(&entity->job_queue)) {
-		/* Park the kernel for a moment to make sure it isn't processing
-		 * our enity.
-		 */
-		kthread_park(sched->thread);
-		kthread_unpark(sched->thread);
+		if (sched) {
+			/* Park the kernel for a moment to make sure it isn't processing
+			 * our enity.
+			 */
+			kthread_park(sched->thread);
+			kthread_unpark(sched->thread);
+		}
 		if (entity->dependency) {
 			dma_fence_remove_callback(entity->dependency,
 						  &entity->cb);
@@ -362,9 +373,11 @@ void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
 	for (i = 0; i < entity->num_rq_list; ++i)
 		drm_sched_entity_set_rq_priority(&entity->rq_list[i], priority);

-	drm_sched_rq_remove_entity(entity->rq, entity);
-	drm_sched_entity_set_rq_priority(&entity->rq, priority);
-	drm_sched_rq_add_entity(entity->rq, entity);
+	if (entity->rq) {
+		drm_sched_rq_remove_entity(entity->rq, entity);
+		drm_sched_entity_set_rq_priority(&entity->rq, priority);
+		drm_sched_rq_add_entity(entity->rq, entity);
+	}

 	spin_unlock(&entity->rq_lock);
 }
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -272,13 +272,14 @@ union drm_amdgpu_vm {

 /* sched ioctl */
 #define AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE	1
+#define AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE	2

 struct drm_amdgpu_sched_in {
 	/* AMDGPU_SCHED_OP_* */
 	__u32	op;
 	__u32	fd;
 	__s32	priority;
-	__u32	flags;
+	__u32   ctx_id;
 };

 union drm_amdgpu_sched {