Merge branch 'drm-next-4.4' of git://people.freedesktop.org/~agd5f/linux into drm-next

- Updated register headers for GFX 8.1 for Stoney - Add some new CZ revisions - minor pageflip optimizations - Fencing clean up - Warning fix - More fence cleanup - oops fix - Fiji fixes * 'drm-next-4.4' of git://people.freedesktop.org/~agd5f/linux: (29 commits) drm/amdgpu: group together common fence implementation drm/amdgpu: remove AMDGPU_FENCE_OWNER_MOVE drm/amdgpu: remove now unused fence functions drm/amdgpu: fix fence fallback check drm/amdgpu: fix stoping the scheduler timeout drm/amdgpu: cleanup on error in amdgpu_cs_ioctl() drm/amdgpu: update Fiji's Golden setting drm/amdgpu: update Fiji's rev id drm/amdgpu: extract common code in vi_common_early_init drm/amd/scheduler: don't oops on failure to load drm/amdgpu: don't oops on failure to load (v2) drm/amdgpu: don't VT switch on suspend drm/amdgpu: Make amdgpu_mn functions inline drm/amdgpu: remove amdgpu_fence_ref/unref drm/amdgpu: use common fence for sync drm/amdgpu: use the new fence_is_later drm/amdgpu: use common fences for VMID management v2 drm/amdgpu: move ring_from_fence to common code drm/amdgpu: switch to common fence_wait_any_timeout v2 drm/amdgpu: remove unneeded fence functions ...
2015-11-05 10:57:25 +10:00 · 2015-11-05 10:57:25 +10:00 · 793423ffcb
commit 793423ffcb
parent bf248ca1f5 a95e264254
26 changed files with 31360 additions and 502 deletions
--- a/drivers/dma-buf/fence.c
+++ b/drivers/dma-buf/fence.c
@ -397,6 +397,104 @@ out:
 }
 EXPORT_SYMBOL(fence_default_wait);

+static bool
+fence_test_signaled_any(struct fence **fences, uint32_t count)
+{
+	int i;
+
+	for (i = 0; i < count; ++i) {
+		struct fence *fence = fences[i];
+		if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+			return true;
+	}
+	return false;
+}
+
+/**
+ * fence_wait_any_timeout - sleep until any fence gets signaled
+ * or until timeout elapses
+ * @fences:	[in]	array of fences to wait on
+ * @count:	[in]	number of fences to wait on
+ * @intr:	[in]	if true, do an interruptible wait
+ * @timeout:	[in]	timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ *
+ * Returns -EINVAL on custom fence wait implementation, -ERESTARTSYS if
+ * interrupted, 0 if the wait timed out, or the remaining timeout in jiffies
+ * on success.
+ *
+ * Synchronous waits for the first fence in the array to be signaled. The
+ * caller needs to hold a reference to all fences in the array, otherwise a
+ * fence might be freed before return, resulting in undefined behavior.
+ */
+signed long
+fence_wait_any_timeout(struct fence **fences, uint32_t count,
+		       bool intr, signed long timeout)
+{
+	struct default_wait_cb *cb;
+	signed long ret = timeout;
+	unsigned i;
+
+	if (WARN_ON(!fences || !count || timeout < 0))
+		return -EINVAL;
+
+	if (timeout == 0) {
+		for (i = 0; i < count; ++i)
+			if (fence_is_signaled(fences[i]))
+				return 1;
+
+		return 0;
+	}
+
+	cb = kcalloc(count, sizeof(struct default_wait_cb), GFP_KERNEL);
+	if (cb == NULL) {
+		ret = -ENOMEM;
+		goto err_free_cb;
+	}
+
+	for (i = 0; i < count; ++i) {
+		struct fence *fence = fences[i];
+
+		if (fence->ops->wait != fence_default_wait) {
+			ret = -EINVAL;
+			goto fence_rm_cb;
+		}
+
+		cb[i].task = current;
+		if (fence_add_callback(fence, &cb[i].base,
+				       fence_default_wait_cb)) {
+			/* This fence is already signaled */
+			goto fence_rm_cb;
+		}
+	}
+
+	while (ret > 0) {
+		if (intr)
+			set_current_state(TASK_INTERRUPTIBLE);
+		else
+			set_current_state(TASK_UNINTERRUPTIBLE);
+
+		if (fence_test_signaled_any(fences, count))
+			break;
+
+		ret = schedule_timeout(ret);
+
+		if (ret > 0 && intr && signal_pending(current))
+			ret = -ERESTARTSYS;
+	}
+
+	__set_current_state(TASK_RUNNING);
+
+fence_rm_cb:
+	while (i-- > 0)
+		fence_remove_callback(fences[i], &cb[i].base);
+
+err_free_cb:
+	kfree(cb);
+
+	return ret;
+}
+EXPORT_SYMBOL(fence_wait_any_timeout);
+
 /**
 * fence_init - Initialize a custom fence.
 * @fence:	[in]	the fence to initialize
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@ -405,7 +405,6 @@ struct amdgpu_fence_driver {
 /* some special values for the owner field */
 #define AMDGPU_FENCE_OWNER_UNDEFINED	((void*)0ul)
 #define AMDGPU_FENCE_OWNER_VM		((void*)1ul)
-#define AMDGPU_FENCE_OWNER_MOVE		((void*)2ul)

 #define AMDGPU_FENCE_FLAG_64BIT         (1 << 0)
 #define AMDGPU_FENCE_FLAG_INT           (1 << 1)
@ -447,57 +446,11 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring);
 int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
 unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);

-signed long amdgpu_fence_wait_any(struct fence **array,
-				  uint32_t count,
-				  bool intr,
-				  signed long t);
-struct amdgpu_fence *amdgpu_fence_ref(struct amdgpu_fence *fence);
-void amdgpu_fence_unref(struct amdgpu_fence **fence);
-
 bool amdgpu_fence_need_sync(struct amdgpu_fence *fence,
 			    struct amdgpu_ring *ring);
 void amdgpu_fence_note_sync(struct amdgpu_fence *fence,
 			    struct amdgpu_ring *ring);

-static inline struct amdgpu_fence *amdgpu_fence_later(struct amdgpu_fence *a,
-						      struct amdgpu_fence *b)
-{
-	if (!a) {
-		return b;
-	}
-
-	if (!b) {
-		return a;
-	}
-
-	BUG_ON(a->ring != b->ring);
-
-	if (a->seq > b->seq) {
-		return a;
-	} else {
-		return b;
-	}
-}
-
-static inline bool amdgpu_fence_is_earlier(struct amdgpu_fence *a,
-					   struct amdgpu_fence *b)
-{
-	if (!a) {
-		return false;
-	}
-
-	if (!b) {
-		return true;
-	}
-
-	BUG_ON(a->ring != b->ring);
-
-	return a->seq < b->seq;
-}
-
-int amdgpu_user_fence_emit(struct amdgpu_ring *ring, struct amdgpu_user_fence *user,
-			   void *owner, struct amdgpu_fence **fence);
-
 /*
 * TTM.
 */
@ -708,7 +661,7 @@ void amdgpu_semaphore_free(struct amdgpu_device *adev,
 */
 struct amdgpu_sync {
 	struct amdgpu_semaphore *semaphores[AMDGPU_NUM_SYNCS];
-	struct amdgpu_fence	*sync_to[AMDGPU_MAX_RINGS];
+	struct fence		*sync_to[AMDGPU_MAX_RINGS];
 	DECLARE_HASHTABLE(fences, 4);
 	struct fence	        *last_vm_update;
 };
@ -974,7 +927,7 @@ struct amdgpu_vm_id {
 	/* last flushed PD/PT update */
 	struct fence	        *flushed_updates;
 	/* last use of vmid */
-	struct amdgpu_fence	*last_id_use;
+	struct fence		*last_id_use;
 };

 struct amdgpu_vm {
@ -1007,7 +960,7 @@ struct amdgpu_vm {
 };

 struct amdgpu_vm_manager {
-	struct amdgpu_fence		*active[AMDGPU_NUM_VM];
+	struct fence			*active[AMDGPU_NUM_VM];
 	uint32_t			max_pfn;
 	/* number of VMIDs */
 	unsigned			nvm;
@ -1235,6 +1188,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		     struct amdgpu_irq_src *irq_src, unsigned irq_type,
 		     enum amdgpu_ring_type ring_type);
 void amdgpu_ring_fini(struct amdgpu_ring *ring);
+struct amdgpu_ring *amdgpu_ring_from_fence(struct fence *f);

 /*
 * CS.
@ -1758,11 +1712,11 @@ void amdgpu_test_syncing(struct amdgpu_device *adev);
 int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
 void amdgpu_mn_unregister(struct amdgpu_bo *bo);
 #else
-static int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
+static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 {
 	return -ENODEV;
 }
-static void amdgpu_mn_unregister(struct amdgpu_bo *bo) {}
+static inline void amdgpu_mn_unregister(struct amdgpu_bo *bo) {}
 #endif

 /*
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@ -876,8 +876,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 		struct amdgpu_job *job;
 		struct amdgpu_ring * ring =  parser->ibs->ring;
 		job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL);
-		if (!job)
-			return -ENOMEM;
+		if (!job) {
+			r = -ENOMEM;
+			goto out;
+		}
 		job->base.sched = &ring->sched;
 		job->base.s_entity = &parser->ctx->rings[ring->idx].entity;
 		job->adev = parser->adev;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@ -69,6 +69,9 @@ void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
 	struct amdgpu_device *adev = ctx->adev;
 	unsigned i, j;

+	if (!adev)
+		return;
+
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
 		for (j = 0; j < AMDGPU_CTX_MAX_CS_PENDING; ++j)
 			fence_put(ctx->rings[i].fences[j]);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@ -207,6 +207,7 @@ static int amdgpufb_create(struct drm_fb_helper *helper,
 	}

 	info->par = rfbdev;
+	info->skip_vt_switch = true;

 	ret = amdgpu_framebuffer_init(adev->ddev, &rfbdev->rfb, &mode_cmd, gobj);
 	if (ret) {
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@ -136,42 +136,6 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner,
 	return 0;
 }

-/**
- * amdgpu_fence_check_signaled - callback from fence_queue
- *
- * this function is called with fence_queue lock held, which is also used
- * for the fence locking itself, so unlocked variants are used for
- * fence_signal, and remove_wait_queue.
- */
-static int amdgpu_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key)
-{
-	struct amdgpu_fence *fence;
-	struct amdgpu_device *adev;
-	u64 seq;
-	int ret;
-
-	fence = container_of(wait, struct amdgpu_fence, fence_wake);
-	adev = fence->ring->adev;
-
-	/*
-	 * We cannot use amdgpu_fence_process here because we're already
-	 * in the waitqueue, in a call from wake_up_all.
-	 */
-	seq = atomic64_read(&fence->ring->fence_drv.last_seq);
-	if (seq >= fence->seq) {
-		ret = fence_signal_locked(&fence->base);
-		if (!ret)
-			FENCE_TRACE(&fence->base, "signaled from irq context\n");
-		else
-			FENCE_TRACE(&fence->base, "was already signaled\n");
-
-		__remove_wait_queue(&fence->ring->fence_drv.fence_queue, &fence->fence_wake);
-		fence_put(&fence->base);
-	} else
-		FENCE_TRACE(&fence->base, "pending\n");
-	return 0;
-}
-
 /**
 * amdgpu_fence_activity - check for fence activity
 *
@ -305,47 +269,6 @@ static bool amdgpu_fence_seq_signaled(struct amdgpu_ring *ring, u64 seq)
 	return false;
 }

-static bool amdgpu_fence_is_signaled(struct fence *f)
-{
-	struct amdgpu_fence *fence = to_amdgpu_fence(f);
-	struct amdgpu_ring *ring = fence->ring;
-
-	if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
-		return true;
-
-	amdgpu_fence_process(ring);
-
-	if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
-		return true;
-
-	return false;
-}
-
-/**
- * amdgpu_fence_enable_signaling - enable signalling on fence
- * @fence: fence
- *
- * This function is called with fence_queue lock held, and adds a callback
- * to fence_queue that checks if this fence is signaled, and if so it
- * signals the fence and removes itself.
- */
-static bool amdgpu_fence_enable_signaling(struct fence *f)
-{
-	struct amdgpu_fence *fence = to_amdgpu_fence(f);
-	struct amdgpu_ring *ring = fence->ring;
-
-	if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
-		return false;
-
-	fence->fence_wake.flags = 0;
-	fence->fence_wake.private = NULL;
-	fence->fence_wake.func = amdgpu_fence_check_signaled;
-	__add_wait_queue(&ring->fence_drv.fence_queue, &fence->fence_wake);
-	fence_get(f);
-	FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
-	return true;
-}
-
 /*
 * amdgpu_ring_wait_seq_timeout - wait for seq of the specific ring to signal
 * @ring: ring to wait on for the seq number
@ -367,6 +290,7 @@ static int amdgpu_fence_ring_wait_seq(struct amdgpu_ring *ring, uint64_t seq)
 	if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
 		return 0;

+	amdgpu_fence_schedule_check(ring);
 	wait_event(ring->fence_drv.fence_queue, (
 		   (signaled = amdgpu_fence_seq_signaled(ring, seq))));

@ -416,36 +340,6 @@ int amdgpu_fence_wait_empty(struct amdgpu_ring *ring)
 	return amdgpu_fence_ring_wait_seq(ring, seq);
 }

-/**
- * amdgpu_fence_ref - take a ref on a fence
- *
- * @fence: amdgpu fence object
- *
- * Take a reference on a fence (all asics).
- * Returns the fence.
- */
-struct amdgpu_fence *amdgpu_fence_ref(struct amdgpu_fence *fence)
-{
-	fence_get(&fence->base);
-	return fence;
-}
-
-/**
- * amdgpu_fence_unref - remove a ref on a fence
- *
- * @fence: amdgpu fence object
- *
- * Remove a reference on a fence (all asics).
- */
-void amdgpu_fence_unref(struct amdgpu_fence **fence)
-{
-	struct amdgpu_fence *tmp = *fence;
-
-	*fence = NULL;
-	if (tmp)
-		fence_put(&tmp->base);
-}
-
 /**
 * amdgpu_fence_count_emitted - get the count of emitted fences
 *
@ -761,6 +655,115 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev)
 	}
 }

+/*
+ * Common fence implementation
+ */
+
+static const char *amdgpu_fence_get_driver_name(struct fence *fence)
+{
+	return "amdgpu";
+}
+
+static const char *amdgpu_fence_get_timeline_name(struct fence *f)
+{
+	struct amdgpu_fence *fence = to_amdgpu_fence(f);
+	return (const char *)fence->ring->name;
+}
+
+/**
+ * amdgpu_fence_is_signaled - test if fence is signaled
+ *
+ * @f: fence to test
+ *
+ * Test the fence sequence number if it is already signaled. If it isn't
+ * signaled start fence processing. Returns True if the fence is signaled.
+ */
+static bool amdgpu_fence_is_signaled(struct fence *f)
+{
+	struct amdgpu_fence *fence = to_amdgpu_fence(f);
+	struct amdgpu_ring *ring = fence->ring;
+
+	if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
+		return true;
+
+	amdgpu_fence_process(ring);
+
+	if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
+		return true;
+
+	return false;
+}
+
+/**
+ * amdgpu_fence_check_signaled - callback from fence_queue
+ *
+ * this function is called with fence_queue lock held, which is also used
+ * for the fence locking itself, so unlocked variants are used for
+ * fence_signal, and remove_wait_queue.
+ */
+static int amdgpu_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key)
+{
+	struct amdgpu_fence *fence;
+	struct amdgpu_device *adev;
+	u64 seq;
+	int ret;
+
+	fence = container_of(wait, struct amdgpu_fence, fence_wake);
+	adev = fence->ring->adev;
+
+	/*
+	 * We cannot use amdgpu_fence_process here because we're already
+	 * in the waitqueue, in a call from wake_up_all.
+	 */
+	seq = atomic64_read(&fence->ring->fence_drv.last_seq);
+	if (seq >= fence->seq) {
+		ret = fence_signal_locked(&fence->base);
+		if (!ret)
+			FENCE_TRACE(&fence->base, "signaled from irq context\n");
+		else
+			FENCE_TRACE(&fence->base, "was already signaled\n");
+
+		__remove_wait_queue(&fence->ring->fence_drv.fence_queue, &fence->fence_wake);
+		fence_put(&fence->base);
+	} else
+		FENCE_TRACE(&fence->base, "pending\n");
+	return 0;
+}
+
+/**
+ * amdgpu_fence_enable_signaling - enable signalling on fence
+ * @fence: fence
+ *
+ * This function is called with fence_queue lock held, and adds a callback
+ * to fence_queue that checks if this fence is signaled, and if so it
+ * signals the fence and removes itself.
+ */
+static bool amdgpu_fence_enable_signaling(struct fence *f)
+{
+	struct amdgpu_fence *fence = to_amdgpu_fence(f);
+	struct amdgpu_ring *ring = fence->ring;
+
+	if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
+		return false;
+
+	fence->fence_wake.flags = 0;
+	fence->fence_wake.private = NULL;
+	fence->fence_wake.func = amdgpu_fence_check_signaled;
+	__add_wait_queue(&ring->fence_drv.fence_queue, &fence->fence_wake);
+	fence_get(f);
+	amdgpu_fence_schedule_check(ring);
+	FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
+	return true;
+}
+
+const struct fence_ops amdgpu_fence_ops = {
+	.get_driver_name = amdgpu_fence_get_driver_name,
+	.get_timeline_name = amdgpu_fence_get_timeline_name,
+	.enable_signaling = amdgpu_fence_enable_signaling,
+	.signaled = amdgpu_fence_is_signaled,
+	.wait = fence_default_wait,
+	.release = NULL,
+};

 /*
 * Fence debugfs
@ -811,131 +814,3 @@ int amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
 #endif
 }

-static const char *amdgpu_fence_get_driver_name(struct fence *fence)
-{
-	return "amdgpu";
-}
-
-static const char *amdgpu_fence_get_timeline_name(struct fence *f)
-{
-	struct amdgpu_fence *fence = to_amdgpu_fence(f);
-	return (const char *)fence->ring->name;
-}
-
-static inline bool amdgpu_test_signaled(struct amdgpu_fence *fence)
-{
-	return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags);
-}
-
-static bool amdgpu_test_signaled_any(struct fence **fences, uint32_t count)
-{
-	int idx;
-	struct fence *fence;
-
-	for (idx = 0; idx < count; ++idx) {
-		fence = fences[idx];
-		if (fence) {
-			if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
-				return true;
-		}
-	}
-	return false;
-}
-
-struct amdgpu_wait_cb {
-	struct fence_cb base;
-	struct task_struct *task;
-};
-
-static void amdgpu_fence_wait_cb(struct fence *fence, struct fence_cb *cb)
-{
-	struct amdgpu_wait_cb *wait =
-		container_of(cb, struct amdgpu_wait_cb, base);
-	wake_up_process(wait->task);
-}
-
-static signed long amdgpu_fence_default_wait(struct fence *f, bool intr,
-					     signed long t)
-{
-	return amdgpu_fence_wait_any(&f, 1, intr, t);
-}
-
-/**
- * Wait the fence array with timeout
- *
- * @array:    the fence array with amdgpu fence pointer
- * @count:    the number of the fence array
- * @intr:     when sleep, set the current task interruptable or not
- * @t:        timeout to wait
- *
- * It will return when any fence is signaled or timeout.
- */
-signed long amdgpu_fence_wait_any(struct fence **array, uint32_t count,
-				  bool intr, signed long t)
-{
-	struct amdgpu_wait_cb *cb;
-	struct fence *fence;
-	unsigned idx;
-
-	BUG_ON(!array);
-
-	cb = kcalloc(count, sizeof(struct amdgpu_wait_cb), GFP_KERNEL);
-	if (cb == NULL) {
-		t = -ENOMEM;
-		goto err_free_cb;
-	}
-
-	for (idx = 0; idx < count; ++idx) {
-		fence = array[idx];
-		if (fence) {
-			cb[idx].task = current;
-			if (fence_add_callback(fence,
-					&cb[idx].base, amdgpu_fence_wait_cb)) {
-				/* The fence is already signaled */
-				goto fence_rm_cb;
-			}
-		}
-	}
-
-	while (t > 0) {
-		if (intr)
-			set_current_state(TASK_INTERRUPTIBLE);
-		else
-			set_current_state(TASK_UNINTERRUPTIBLE);
-
-		/*
-		 * amdgpu_test_signaled_any must be called after
-		 * set_current_state to prevent a race with wake_up_process
-		 */
-		if (amdgpu_test_signaled_any(array, count))
-			break;
-
-		t = schedule_timeout(t);
-
-		if (t > 0 && intr && signal_pending(current))
-			t = -ERESTARTSYS;
-	}
-
-	__set_current_state(TASK_RUNNING);
-
-fence_rm_cb:
-	for (idx = 0; idx < count; ++idx) {
-		fence = array[idx];
-		if (fence && cb[idx].base.func)
-			fence_remove_callback(fence, &cb[idx].base);
-	}
-
-err_free_cb:
-	kfree(cb);
-
-	return t;
-}
-
-const struct fence_ops amdgpu_fence_ops = {
-	.get_driver_name = amdgpu_fence_get_driver_name,
-	.get_timeline_name = amdgpu_fence_get_timeline_name,
-	.enable_signaling = amdgpu_fence_enable_signaling,
-	.signaled = amdgpu_fence_is_signaled,
-	.wait = amdgpu_fence_default_wait,
-	.release = NULL,
-};
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@ -95,7 +95,8 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib)
 {
 	amdgpu_sync_free(adev, &ib->sync, &ib->fence->base);
 	amdgpu_sa_bo_free(adev, &ib->sa_bo, &ib->fence->base);
-	amdgpu_fence_unref(&ib->fence);
+	if (ib->fence)
+		fence_put(&ib->fence->base);
 }

 /**
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@ -436,6 +436,30 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
 	}
 }

+/**
+ * amdgpu_ring_from_fence - get ring from fence
+ *
+ * @f: fence structure
+ *
+ * Extract the ring a fence belongs to. Handles both scheduler as
+ * well as hardware fences.
+ */
+struct amdgpu_ring *amdgpu_ring_from_fence(struct fence *f)
+{
+	struct amdgpu_fence *a_fence;
+	struct amd_sched_fence *s_fence;
+
+	s_fence = to_amd_sched_fence(f);
+	if (s_fence)
+		return container_of(s_fence->sched, struct amdgpu_ring, sched);
+
+	a_fence = to_amdgpu_fence(f);
+	if (a_fence)
+		return a_fence->ring;
+
+	return NULL;
+}
+
 /*
 * Debugfs info
 */
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@ -139,25 +139,6 @@ int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev,
 	return r;
 }

-static uint32_t amdgpu_sa_get_ring_from_fence(struct fence *f)
-{
-	struct amdgpu_fence *a_fence;
-	struct amd_sched_fence *s_fence;
-
-	s_fence = to_amd_sched_fence(f);
-	if (s_fence) {
-		struct amdgpu_ring *ring;
-
-		ring = container_of(s_fence->sched, struct amdgpu_ring, sched);
-		return ring->idx;
-	}
-
-	a_fence = to_amdgpu_fence(f);
-	if (a_fence)
-		return a_fence->ring->idx;
-	return 0;
-}
-
 static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo)
 {
 	struct amdgpu_sa_manager *sa_manager = sa_bo->manager;
@ -318,7 +299,7 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
 	}

 	if (best_bo) {
-		uint32_t idx = amdgpu_sa_get_ring_from_fence(best_bo->fence);
+		uint32_t idx = amdgpu_ring_from_fence(best_bo->fence)->idx;
 		++tries[idx];
 		sa_manager->hole = best_bo->olist.prev;

@ -337,6 +318,7 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev,
 {
 	struct fence *fences[AMDGPU_MAX_RINGS];
 	unsigned tries[AMDGPU_MAX_RINGS];
+	unsigned count;
 	int i, r;
 	signed long t;

@ -371,13 +353,18 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev,
 			/* see if we can skip over some allocations */
 		} while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries));

+		for (i = 0, count = 0; i < AMDGPU_MAX_RINGS; ++i)
+			if (fences[i])
+				fences[count++] = fences[i];
+
+		if (count) {
 			spin_unlock(&sa_manager->wq.lock);
-		t = amdgpu_fence_wait_any(fences, AMDGPU_MAX_RINGS,
-					  false, MAX_SCHEDULE_TIMEOUT);
+			t = fence_wait_any_timeout(fences, count, false,
+						   MAX_SCHEDULE_TIMEOUT);
 			r = (t > 0) ? 0 : t;
 			spin_lock(&sa_manager->wq.lock);
+		} else {
 			/* if we have nothing to wait for block */
-		if (r == -ENOENT) {
 			r = wait_event_interruptible_locked(
 				sa_manager->wq,
 				amdgpu_sa_event(sa_manager, size, align)
@ -406,7 +393,7 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
 	if (fence && !fence_is_signaled(fence)) {
 		uint32_t idx;
 		(*sa_bo)->fence = fence_get(fence);
-		idx = amdgpu_sa_get_ring_from_fence(fence);
+		idx = amdgpu_ring_from_fence(fence)->idx;
 		list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[idx]);
 	} else {
 		amdgpu_sa_bo_remove_locked(*sa_bo);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@ -54,7 +54,8 @@ static struct fence *amdgpu_sched_run_job(struct amd_sched_job *sched_job)
 		goto err;
 	}

-	fence = amdgpu_fence_ref(job->ibs[job->num_ibs - 1].fence);
+	fence = job->ibs[job->num_ibs - 1].fence;
+	fence_get(&fence->base);

 err:
 	if (job->free_job)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@ -87,6 +87,15 @@ static bool amdgpu_sync_test_owner(struct fence *f, void *owner)
 	return false;
 }

+static void amdgpu_sync_keep_later(struct fence **keep, struct fence *fence)
+{
+	if (*keep && fence_is_later(*keep, fence))
+		return;
+
+	fence_put(*keep);
+	*keep = fence_get(fence);
+}
+
 /**
 * amdgpu_sync_fence - remember to sync to this fence
 *
@ -99,35 +108,21 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
 {
 	struct amdgpu_sync_entry *e;
 	struct amdgpu_fence *fence;
-	struct amdgpu_fence *other;
-	struct fence *tmp, *later;

 	if (!f)
 		return 0;

 	if (amdgpu_sync_same_dev(adev, f) &&
-	    amdgpu_sync_test_owner(f, AMDGPU_FENCE_OWNER_VM)) {
-		if (sync->last_vm_update) {
-			tmp = sync->last_vm_update;
-			BUG_ON(f->context != tmp->context);
-			later = (f->seqno - tmp->seqno <= INT_MAX) ? f : tmp;
-			sync->last_vm_update = fence_get(later);
-			fence_put(tmp);
-		} else
-			sync->last_vm_update = fence_get(f);
-	}
+	    amdgpu_sync_test_owner(f, AMDGPU_FENCE_OWNER_VM))
+		amdgpu_sync_keep_later(&sync->last_vm_update, f);

 	fence = to_amdgpu_fence(f);
 	if (!fence || fence->ring->adev != adev) {
 		hash_for_each_possible(sync->fences, e, node, f->context) {
-			struct fence *new;
 			if (unlikely(e->fence->context != f->context))
 				continue;
-			new = fence_get(fence_later(e->fence, f));
-			if (new) {
-				fence_put(e->fence);
-				e->fence = new;
-			}
+
+			amdgpu_sync_keep_later(&e->fence, f);
 			return 0;
 		}

@ -140,10 +135,7 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
 		return 0;
 	}

-	other = sync->sync_to[fence->ring->idx];
-	sync->sync_to[fence->ring->idx] = amdgpu_fence_ref(
-		amdgpu_fence_later(fence, other));
-	amdgpu_fence_unref(&other);
+	amdgpu_sync_keep_later(&sync->sync_to[fence->ring->idx], f);

 	return 0;
 }
@ -199,8 +191,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
 			 * for other VM updates and moves.
 			 */
 			fence_owner = amdgpu_sync_get_owner(f);
-			if ((owner != AMDGPU_FENCE_OWNER_MOVE) &&
-			    (fence_owner != AMDGPU_FENCE_OWNER_MOVE) &&
+			if ((owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
+			    (fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
 			    ((owner == AMDGPU_FENCE_OWNER_VM) !=
 			     (fence_owner == AMDGPU_FENCE_OWNER_VM)))
 				continue;
@ -262,11 +254,11 @@ int amdgpu_sync_wait(struct amdgpu_sync *sync)
 		return 0;

 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-		struct amdgpu_fence *fence = sync->sync_to[i];
+		struct fence *fence = sync->sync_to[i];
 		if (!fence)
 			continue;

-		r = fence_wait(&fence->base, false);
+		r = fence_wait(fence, false);
 		if (r)
 			return r;
 	}
@ -291,9 +283,14 @@ int amdgpu_sync_rings(struct amdgpu_sync *sync,
 	int i, r;

 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-		struct amdgpu_fence *fence = sync->sync_to[i];
-		struct amdgpu_semaphore *semaphore;
 		struct amdgpu_ring *other = adev->rings[i];
+		struct amdgpu_semaphore *semaphore;
+		struct amdgpu_fence *fence;
+
+		if (!sync->sync_to[i])
+			continue;
+
+		fence = to_amdgpu_fence(sync->sync_to[i]);

 		/* check if we really need to sync */
 		if (!amdgpu_fence_need_sync(fence, ring))
@ -378,7 +375,7 @@ void amdgpu_sync_free(struct amdgpu_device *adev,
 		amdgpu_semaphore_free(adev, &sync->semaphores[i], fence);

 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
-		amdgpu_fence_unref(&sync->sync_to[i]);
+		fence_put(sync->sync_to[i]);

 	fence_put(sync->last_vm_update);
 }
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@ -1041,7 +1041,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
 	WARN_ON(ib->length_dw > num_dw);
 	r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
 						 &amdgpu_vm_free_job,
-						 AMDGPU_FENCE_OWNER_MOVE,
+						 AMDGPU_FENCE_OWNER_UNDEFINED,
 						 fence);
 	if (r)
 		goto error_free;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@ -135,7 +135,7 @@ struct amdgpu_bo_list_entry *amdgpu_vm_get_bos(struct amdgpu_device *adev,
 int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		      struct amdgpu_sync *sync)
 {
-	struct amdgpu_fence *best[AMDGPU_MAX_RINGS] = {};
+	struct fence *best[AMDGPU_MAX_RINGS] = {};
 	struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx];
 	struct amdgpu_device *adev = ring->adev;

@ -154,7 +154,8 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,

 	/* skip over VMID 0, since it is the system VM */
 	for (i = 1; i < adev->vm_manager.nvm; ++i) {
-		struct amdgpu_fence *fence = adev->vm_manager.active[i];
+		struct fence *fence = adev->vm_manager.active[i];
+		struct amdgpu_ring *fring;

 		if (fence == NULL) {
 			/* found a free one */
@ -163,21 +164,23 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 			return 0;
 		}

-		if (amdgpu_fence_is_earlier(fence, best[fence->ring->idx])) {
-			best[fence->ring->idx] = fence;
-			choices[fence->ring == ring ? 0 : 1] = i;
+		fring = amdgpu_ring_from_fence(fence);
+		if (best[fring->idx] == NULL ||
+		    fence_is_later(best[fring->idx], fence)) {
+			best[fring->idx] = fence;
+			choices[fring == ring ? 0 : 1] = i;
 		}
 	}

 	for (i = 0; i < 2; ++i) {
 		if (choices[i]) {
-			struct amdgpu_fence *fence;
+			struct fence *fence;

 			fence  = adev->vm_manager.active[choices[i]];
 			vm_id->id = choices[i];

 			trace_amdgpu_vm_grab_id(choices[i], ring->idx);
-			return amdgpu_sync_fence(ring->adev, sync, &fence->base);
+			return amdgpu_sync_fence(ring->adev, sync, fence);
 		}
 	}

@ -246,11 +249,11 @@ void amdgpu_vm_fence(struct amdgpu_device *adev,
 	unsigned ridx = fence->ring->idx;
 	unsigned vm_id = vm->ids[ridx].id;

-	amdgpu_fence_unref(&adev->vm_manager.active[vm_id]);
-	adev->vm_manager.active[vm_id] = amdgpu_fence_ref(fence);
+	fence_put(adev->vm_manager.active[vm_id]);
+	adev->vm_manager.active[vm_id] = fence_get(&fence->base);

-	amdgpu_fence_unref(&vm->ids[ridx].last_id_use);
-	vm->ids[ridx].last_id_use = amdgpu_fence_ref(fence);
+	fence_put(vm->ids[ridx].last_id_use);
+	vm->ids[ridx].last_id_use = fence_get(&fence->base);
 }

 /**
@ -1313,7 +1316,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)

 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		fence_put(vm->ids[i].flushed_updates);
-		amdgpu_fence_unref(&vm->ids[i].last_id_use);
+		fence_put(vm->ids[i].last_id_use);
 	}

 	mutex_destroy(&vm->mutex);
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@ -262,46 +262,22 @@ static u32 dce_v10_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
 * @crtc_id: crtc to cleanup pageflip on
 * @crtc_base: new address of the crtc (GPU MC address)
 *
- * Does the actual pageflip (evergreen+).
- * During vblank we take the crtc lock and wait for the update_pending
- * bit to go high, when it does, we release the lock, and allow the
- * double buffered update to take place.
- * Returns the current update pending status.
+ * Triggers the actual pageflip by updating the primary
+ * surface base address.
 */
 static void dce_v10_0_page_flip(struct amdgpu_device *adev,
 			      int crtc_id, u64 crtc_base)
 {
 	struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
-	u32 tmp = RREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset);
-	int i;
-
-	/* Lock the graphics update lock */
-	tmp = REG_SET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK, 1);
-	WREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset, tmp);
-
-	/* update the scanout addresses */
-	WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
-	       upper_32_bits(crtc_base));
-	WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
-	       lower_32_bits(crtc_base));

+	/* update the primary scanout address */
 	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
 	       upper_32_bits(crtc_base));
+	/* writing to the low address triggers the update */
 	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
 	       lower_32_bits(crtc_base));
-
-	/* Wait for update_pending to go high. */
-	for (i = 0; i < adev->usec_timeout; i++) {
-		if (RREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset) &
-				GRPH_UPDATE__GRPH_SURFACE_UPDATE_PENDING_MASK)
-			break;
-		udelay(1);
-	}
-	DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n");
-
-	/* Unlock the lock, so double-buffering can take place inside vblank */
-	tmp = REG_SET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK, 0);
-	WREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset, tmp);
+	/* post the write */
+	RREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset);
 }

 static int dce_v10_0_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@ -252,46 +252,22 @@ static u32 dce_v11_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
 * @crtc_id: crtc to cleanup pageflip on
 * @crtc_base: new address of the crtc (GPU MC address)
 *
- * Does the actual pageflip (evergreen+).
- * During vblank we take the crtc lock and wait for the update_pending
- * bit to go high, when it does, we release the lock, and allow the
- * double buffered update to take place.
- * Returns the current update pending status.
+ * Triggers the actual pageflip by updating the primary
+ * surface base address.
 */
 static void dce_v11_0_page_flip(struct amdgpu_device *adev,
 			      int crtc_id, u64 crtc_base)
 {
 	struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
-	u32 tmp = RREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset);
-	int i;
-
-	/* Lock the graphics update lock */
-	tmp = REG_SET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK, 1);
-	WREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset, tmp);

 	/* update the scanout addresses */
-	WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
-	       upper_32_bits(crtc_base));
-	WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
-	       lower_32_bits(crtc_base));
-
 	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
 	       upper_32_bits(crtc_base));
+	/* writing to the low address triggers the update */
 	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
 	       lower_32_bits(crtc_base));
-
-	/* Wait for update_pending to go high. */
-	for (i = 0; i < adev->usec_timeout; i++) {
-		if (RREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset) &
-				GRPH_UPDATE__GRPH_SURFACE_UPDATE_PENDING_MASK)
-			break;
-		udelay(1);
-	}
-	DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n");
-
-	/* Unlock the lock, so double-buffering can take place inside vblank */
-	tmp = REG_SET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK, 0);
-	WREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset, tmp);	
+	/* post the write */
+	RREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset);
 }

 static int dce_v11_0_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@ -211,46 +211,22 @@ static u32 dce_v8_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
 * @crtc_id: crtc to cleanup pageflip on
 * @crtc_base: new address of the crtc (GPU MC address)
 *
- * Does the actual pageflip (evergreen+).
- * During vblank we take the crtc lock and wait for the update_pending
- * bit to go high, when it does, we release the lock, and allow the
- * double buffered update to take place.
- * Returns the current update pending status.
+ * Triggers the actual pageflip by updating the primary
+ * surface base address.
 */
 static void dce_v8_0_page_flip(struct amdgpu_device *adev,
 			      int crtc_id, u64 crtc_base)
 {
 	struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
-	u32 tmp = RREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset);
-	int i;
-
-	/* Lock the graphics update lock */
-	tmp |= GRPH_UPDATE__GRPH_UPDATE_LOCK_MASK;
-	WREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset, tmp);
-
-	/* update the scanout addresses */
-	WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
-	       upper_32_bits(crtc_base));
-	WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
-	       (u32)crtc_base);

+	/* update the primary scanout addresses */
 	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
 	       upper_32_bits(crtc_base));
+	/* writing to the low address triggers the update */
 	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
-	       (u32)crtc_base);
-
-	/* Wait for update_pending to go high. */
-	for (i = 0; i < adev->usec_timeout; i++) {
-		if (RREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset) &
-				GRPH_UPDATE__GRPH_SURFACE_UPDATE_PENDING_MASK)
-			break;
-		udelay(1);
-	}
-	DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n");
-
-	/* Unlock the lock, so double-buffering can take place inside vblank */
-	tmp &= ~GRPH_UPDATE__GRPH_UPDATE_LOCK_MASK;
-	WREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset, tmp);
+	       lower_32_bits(crtc_base));
+	/* post the write */
+	RREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset);
 }

 static int dce_v8_0_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@ -235,11 +235,13 @@ static const u32 fiji_golden_common_all[] =
 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
-	mmGB_ADDR_CONFIG, 0xffffffff, 0x12011003,
+	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
-	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
+	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
+	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
 };

 static const u32 golden_settings_fiji_a10[] =
@ -247,24 +249,26 @@ static const u32 golden_settings_fiji_a10[] =
 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
-	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x00000100,
 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
+	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
+	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
-	mmTCC_CTRL, 0x00100000, 0xf30fff7f,
+	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
+	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
-	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x7d6cf5e4,
-	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x3928b1a0,
+	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 };

 static const u32 fiji_mgcg_cgcg_init[] =
 {
-	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffc0,
+	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
+	mmCGTT_DRM_CLK_CTRL0, 0xffffffff, 0x00600100,
 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
@ -292,6 +296,10 @@ static const u32 fiji_mgcg_cgcg_init[] =
 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
+	mmPCIE_INDEX, 0xffffffff, 0x0140001c,
+	mmPCIE_DATA, 0x000f0000, 0x00000000,
+	mmCGTT_DRM_CLK_CTRL0, 0xff000fff, 0x00000100,
+	mmHDP_XDP_CGTT_BLK_CTRL, 0xc0000fff, 0x00000104,
 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 };

@ -1031,6 +1039,8 @@ static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
 		case 0x84:
 		case 0xc8:
 		case 0xcc:
+		case 0xe1:
+		case 0xe3:
 			/* B10 */
 			adev->gfx.config.max_cu_per_sh = 8;
 			break;
@ -1039,18 +1049,23 @@ static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
 		case 0x85:
 		case 0xc9:
 		case 0xcd:
+		case 0xe2:
+		case 0xe4:
 			/* B8 */
 			adev->gfx.config.max_cu_per_sh = 6;
 			break;
 		case 0xc6:
 		case 0xca:
 		case 0xce:
+		case 0x88:
 			/* B6 */
 			adev->gfx.config.max_cu_per_sh = 6;
 			break;
 		case 0xc7:
 		case 0x87:
 		case 0xcb:
+		case 0xe5:
+		case 0x89:
 		default:
 			/* B4 */
 			adev->gfx.config.max_cu_per_sh = 4;
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@ -965,7 +965,7 @@ static int gmc_v7_0_sw_fini(void *handle)

 	if (adev->vm_manager.enabled) {
 		for (i = 0; i < AMDGPU_NUM_VM; ++i)
-			amdgpu_fence_unref(&adev->vm_manager.active[i]);
+			fence_put(adev->vm_manager.active[i]);
 		gmc_v7_0_vm_fini(adev);
 		adev->vm_manager.enabled = false;
 	}
@ -1015,7 +1015,7 @@ static int gmc_v7_0_suspend(void *handle)

 	if (adev->vm_manager.enabled) {
 		for (i = 0; i < AMDGPU_NUM_VM; ++i)
-			amdgpu_fence_unref(&adev->vm_manager.active[i]);
+			fence_put(adev->vm_manager.active[i]);
 		gmc_v7_0_vm_fini(adev);
 		adev->vm_manager.enabled = false;
 	}
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@ -984,7 +984,7 @@ static int gmc_v8_0_sw_fini(void *handle)

 	if (adev->vm_manager.enabled) {
 		for (i = 0; i < AMDGPU_NUM_VM; ++i)
-			amdgpu_fence_unref(&adev->vm_manager.active[i]);
+			fence_put(adev->vm_manager.active[i]);
 		gmc_v8_0_vm_fini(adev);
 		adev->vm_manager.enabled = false;
 	}
@ -1036,7 +1036,7 @@ static int gmc_v8_0_suspend(void *handle)

 	if (adev->vm_manager.enabled) {
 		for (i = 0; i < AMDGPU_NUM_VM; ++i)
-			amdgpu_fence_unref(&adev->vm_manager.active[i]);
+			fence_put(adev->vm_manager.active[i]);
 		gmc_v8_0_vm_fini(adev);
 		adev->vm_manager.enabled = false;
 	}
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@ -1413,17 +1413,18 @@ static int vi_common_early_init(void *handle)
 		adev->cg_flags = 0;
 		adev->pg_flags = 0;
 		adev->external_rev_id = 0x1;
-		if (amdgpu_smc_load_fw && smc_enabled)
-			adev->firmware.smu_load = true;
 		break;
 	case CHIP_FIJI:
+		adev->has_uvd = true;
+		adev->cg_flags = 0;
+		adev->pg_flags = 0;
+		adev->external_rev_id = adev->rev_id + 0x3c;
+		break;
 	case CHIP_TONGA:
 		adev->has_uvd = true;
 		adev->cg_flags = 0;
 		adev->pg_flags = 0;
 		adev->external_rev_id = adev->rev_id + 0x14;
-		if (amdgpu_smc_load_fw && smc_enabled)
-			adev->firmware.smu_load = true;
 		break;
 	case CHIP_CARRIZO:
 	case CHIP_STONEY:
@ -1432,14 +1433,15 @@ static int vi_common_early_init(void *handle)
 		/* Disable UVD pg */
 		adev->pg_flags = /* AMDGPU_PG_SUPPORT_UVD | */AMDGPU_PG_SUPPORT_VCE;
 		adev->external_rev_id = adev->rev_id + 0x1;
-		if (amdgpu_smc_load_fw && smc_enabled)
-			adev->firmware.smu_load = true;
 		break;
 	default:
 		/* FIXME: not supported yet */
 		return -EINVAL;
 	}

+	if (amdgpu_smc_load_fw && smc_enabled)
+		adev->firmware.smu_load = true;
+
 	return 0;
 }

--- a/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_1_d.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_1_d.h
--- a/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_1_enum.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_1_enum.h
--- a/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_1_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_1_sh_mask.h
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@ -222,6 +222,12 @@ amd_sched_entity_pop_job(struct amd_sched_entity *entity)

 	while ((entity->dependency = sched->ops->dependency(sched_job))) {

+		if (entity->dependency->context == entity->fence_context) {
+			/* We can ignore fences from ourself */
+			fence_put(entity->dependency);
+			continue;
+		}
+
 		if (fence_add_callback(entity->dependency, &entity->cb,
 				       amd_sched_entity_wakeup))
 			fence_put(entity->dependency);
@ -332,7 +338,7 @@ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
 	atomic_dec(&sched->hw_rq_count);
 	amd_sched_fence_signal(s_fence);
 	if (sched->timeout != MAX_SCHEDULE_TIMEOUT) {
-		cancel_delayed_work_sync(&s_fence->dwork);
+		cancel_delayed_work(&s_fence->dwork);
 		spin_lock_irqsave(&sched->fence_list_lock, flags);
 		list_del_init(&s_fence->list);
 		spin_unlock_irqrestore(&sched->fence_list_lock, flags);
@ -462,5 +468,6 @@ int amd_sched_init(struct amd_gpu_scheduler *sched,
 */
 void amd_sched_fini(struct amd_gpu_scheduler *sched)
 {
+	if (sched->thread)
 		kthread_stop(sched->thread);
 }
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@ -1404,44 +1404,20 @@ void dce4_wait_for_vblank(struct radeon_device *rdev, int crtc)
 * @crtc_id: crtc to cleanup pageflip on
 * @crtc_base: new address of the crtc (GPU MC address)
 *
- * Does the actual pageflip (evergreen+).
- * During vblank we take the crtc lock and wait for the update_pending
- * bit to go high, when it does, we release the lock, and allow the
- * double buffered update to take place.
- * Returns the current update pending status.
+ * Triggers the actual pageflip by updating the primary
+ * surface base address (evergreen+).
 */
 void evergreen_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base)
 {
 	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
-	u32 tmp = RREG32(EVERGREEN_GRPH_UPDATE + radeon_crtc->crtc_offset);
-	int i;
-
-	/* Lock the graphics update lock */
-	tmp |= EVERGREEN_GRPH_UPDATE_LOCK;
-	WREG32(EVERGREEN_GRPH_UPDATE + radeon_crtc->crtc_offset, tmp);

 	/* update the scanout addresses */
-	WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS_HIGH + radeon_crtc->crtc_offset,
-	       upper_32_bits(crtc_base));
-	WREG32(EVERGREEN_GRPH_SECONDARY_SURFACE_ADDRESS + radeon_crtc->crtc_offset,
-	       (u32)crtc_base);
-
 	WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS_HIGH + radeon_crtc->crtc_offset,
 	       upper_32_bits(crtc_base));
 	WREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS + radeon_crtc->crtc_offset,
 	       (u32)crtc_base);
-
-	/* Wait for update_pending to go high. */
-	for (i = 0; i < rdev->usec_timeout; i++) {
-		if (RREG32(EVERGREEN_GRPH_UPDATE + radeon_crtc->crtc_offset) & EVERGREEN_GRPH_SURFACE_UPDATE_PENDING)
-			break;
-		udelay(1);
-	}
-	DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n");
-
-	/* Unlock the lock, so double-buffering can take place inside vblank */
-	tmp &= ~EVERGREEN_GRPH_UPDATE_LOCK;
-	WREG32(EVERGREEN_GRPH_UPDATE + radeon_crtc->crtc_offset, tmp);
+	/* post the write */
+	RREG32(EVERGREEN_GRPH_PRIMARY_SURFACE_ADDRESS + radeon_crtc->crtc_offset);
 }

 /**
--- a/include/linux/fence.h
+++ b/include/linux/fence.h
@ -279,6 +279,22 @@ fence_is_signaled(struct fence *fence)
 	return false;
 }

+/**
+ * fence_is_later - return if f1 is chronologically later than f2
+ * @f1:	[in]	the first fence from the same context
+ * @f2:	[in]	the second fence from the same context
+ *
+ * Returns true if f1 is chronologically later than f2. Both fences must be
+ * from the same context, since a seqno is not re-used across contexts.
+ */
+static inline bool fence_is_later(struct fence *f1, struct fence *f2)
+{
+	if (WARN_ON(f1->context != f2->context))
+		return false;
+
+	return f1->seqno - f2->seqno < INT_MAX;
+}
+
 /**
 * fence_later - return the chronologically later fence
 * @f1:	[in]	the first fence from the same context
@ -298,14 +314,15 @@ static inline struct fence *fence_later(struct fence *f1, struct fence *f2)
 	 * set if enable_signaling wasn't called, and enabling that here is
 	 * overkill.
 	 */
-	if (f2->seqno - f1->seqno <= INT_MAX)
-		return fence_is_signaled(f2) ? NULL : f2;
-	else
+	if (fence_is_later(f1, f2))
 		return fence_is_signaled(f1) ? NULL : f1;
+	else
+		return fence_is_signaled(f2) ? NULL : f2;
 }

 signed long fence_wait_timeout(struct fence *, bool intr, signed long timeout);
-
+signed long fence_wait_any_timeout(struct fence **fences, uint32_t count,
+				   bool intr, signed long timeout);

 /**
 * fence_wait - sleep until the fence gets signaled