diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 3857ce097c84..824dfe14bcd0 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -772,6 +772,13 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
 	if (skip_rcs_switch(ppgtt, engine, to))
 		return 0;
 
+	/* Clear this page out of any CPU caches for coherent swap-in/out. */
+	if (!(vma->flags & I915_VMA_GLOBAL_BIND)) {
+		ret = i915_gem_object_set_to_gtt_domain(vma->obj, false);
+		if (ret)
+			return ret;
+	}
+
 	/* Trying to pin first makes error handling easier. */
 	ret = i915_vma_pin(vma, 0, to->ggtt_alignment, PIN_GLOBAL);
 	if (ret)
@@ -786,18 +793,6 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
 	 */
 	from = engine->last_context;
 
-	/*
-	 * Clear this page out of any CPU caches for coherent swap-in/out. Note
-	 * that thanks to write = false in this call and us not setting any gpu
-	 * write domains when putting a context object onto the active list
-	 * (when switching away from it), this won't block.
-	 *
-	 * XXX: We need a real interface to do this instead of trickery.
-	 */
-	ret = i915_gem_object_set_to_gtt_domain(vma->obj, false);
-	if (ret)
-		goto err;
-
 	if (needs_pd_load_pre(ppgtt, engine, to)) {
 		/* Older GENs and non render rings still want the load first,
 		 * "PP_DCLV followed by PP_DIR_BASE register through Load
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 2318a27341c8..81dc69d1ff05 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2092,6 +2092,10 @@ static int intel_ring_context_pin(struct i915_gem_context *ctx,
 		return 0;
 
 	if (ce->state) {
+		ret = i915_gem_object_set_to_gtt_domain(ce->state->obj, false);
+		if (ret)
+			goto error;
+
 		ret = i915_vma_pin(ce->state, 0, ctx->ggtt_alignment,
 				   PIN_GLOBAL | PIN_HIGH);
 		if (ret)