Merge tag 'drm-intel-testing-2017-04-03' of git://anongit.freedesktop.org/git/drm-intel into drm-next

Last 4.12 feature pile: GVT updates: - Add mdev attribute group for per-vgpu info - Time slice based vGPU scheduling QoS support (Gao Ping) - Initial KBL support for E3 server (Han Xu) - other misc. i915: - lots and lots of small fixes and improvements all over - refactor fw_domain code (Chris Wilson) - improve guc code (Oscar Mateo) - refactor cursor/sprite code, precompute more for less overhead in the critical path (Ville) - refactor guc/huc fw loading code a bit (Michal Wajdeczko) * tag 'drm-intel-testing-2017-04-03' of git://anongit.freedesktop.org/git/drm-intel: (121 commits) drm/i915: Update DRIVER_DATE to 20170403 drm/i915: Clear gt.active_requests before checking idle status drm/i915/uc: Drop use of MISSING_CASE on trivial enums drm/i915: make a few DDI functions static drm/i915: Combine reset_all_global_seqno() loops into one drm/i915: Remove redudant wait for each engine to idle from seqno wrap drm/i915: Wait for all engines to be idle as part of i915_gem_wait_for_idle() drm/i915: Move retire-requests into i915_gem_wait_for_idle() drm/i915/uc: Move fw path check to fetch_uc_fw() drm/i915/huc: Remove unused intel_huc_fini() drm/i915/uc: Add intel_uc_fw_fini() drm/i915/uc: Add intel_uc_fw_type_repr() drm/i915/uc: Move intel_uc_fw_status_repr() to intel_uc.h drivers: gpu: drm: i915L intel_lpe_audio: Fix kerneldoc comments drm/i915: Suppress busy status for engines if wedged drm/i915: Do request retirement before marking engines as wedged drm/i915: Drop verbose and archaic "ring" from our internal engine names drm/i915: Use a dummy timeline name for a signaled fence drm/i915: Ironlake do_idle_maps w/a may be called w/o struct_mutex drm/i915/guc: Take enable_guc_loading check out of GEM core code ...
2017-04-11 07:28:01 +10:00 · 2017-04-11 07:28:01 +10:00 · 2b2fc72aa5
commit 2b2fc72aa5
parent cdf5316bdb ba515d3407
60 changed files with 2633 additions and 2029 deletions
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@ -1215,7 +1215,7 @@ static int gen8_check_mi_display_flip(struct parser_exec_state *s,
 	if (!info->async_flip)
 		return 0;

-	if (IS_SKYLAKE(dev_priv)) {
+	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
 		stride = vgpu_vreg(s->vgpu, info->stride_reg) & GENMASK(9, 0);
 		tile = (vgpu_vreg(s->vgpu, info->ctrl_reg) &
 				GENMASK(12, 10)) >> 10;
@ -1243,7 +1243,7 @@ static int gen8_update_plane_mmio_from_mi_display_flip(

 	set_mask_bits(&vgpu_vreg(vgpu, info->surf_reg), GENMASK(31, 12),
 		      info->surf_val << 12);
-	if (IS_SKYLAKE(dev_priv)) {
+	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
 		set_mask_bits(&vgpu_vreg(vgpu, info->stride_reg), GENMASK(9, 0),
 			      info->stride_val);
 		set_mask_bits(&vgpu_vreg(vgpu, info->ctrl_reg), GENMASK(12, 10),
@ -1267,7 +1267,7 @@ static int decode_mi_display_flip(struct parser_exec_state *s,

 	if (IS_BROADWELL(dev_priv))
 		return gen8_decode_mi_display_flip(s, info);
-	if (IS_SKYLAKE(dev_priv))
+	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
 		return skl_decode_mi_display_flip(s, info);

 	return -ENODEV;
@ -1278,7 +1278,9 @@ static int check_mi_display_flip(struct parser_exec_state *s,
 {
 	struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv;

-	if (IS_BROADWELL(dev_priv) || IS_SKYLAKE(dev_priv))
+	if (IS_BROADWELL(dev_priv)
+		|| IS_SKYLAKE(dev_priv)
+		|| IS_KABYLAKE(dev_priv))
 		return gen8_check_mi_display_flip(s, info);
 	return -ENODEV;
 }
@ -1289,7 +1291,9 @@ static int update_plane_mmio_from_mi_display_flip(
 {
 	struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv;

-	if (IS_BROADWELL(dev_priv) || IS_SKYLAKE(dev_priv))
+	if (IS_BROADWELL(dev_priv)
+		|| IS_SKYLAKE(dev_priv)
+		|| IS_KABYLAKE(dev_priv))
 		return gen8_update_plane_mmio_from_mi_display_flip(s, info);
 	return -ENODEV;
 }
@ -1569,7 +1573,8 @@ static int batch_buffer_needs_scan(struct parser_exec_state *s)
 {
 	struct intel_gvt *gvt = s->vgpu->gvt;

-	if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)) {
+	if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)
+		|| IS_KABYLAKE(gvt->dev_priv)) {
 		/* BDW decides privilege based on address space */
 		if (cmd_val(s, 0) & (1 << 8))
 			return 0;
@ -2604,6 +2609,9 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 	unsigned long gma_head, gma_tail, gma_bottom, ring_size, ring_tail;
 	struct parser_exec_state s;
 	int ret = 0;
+	struct intel_vgpu_workload *workload = container_of(wa_ctx,
+				struct intel_vgpu_workload,
+				wa_ctx);

 	/* ring base is page aligned */
 	if (WARN_ON(!IS_ALIGNED(wa_ctx->indirect_ctx.guest_gma, GTT_PAGE_SIZE)))
@ -2618,14 +2626,14 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)

 	s.buf_type = RING_BUFFER_INSTRUCTION;
 	s.buf_addr_type = GTT_BUFFER;
-	s.vgpu = wa_ctx->workload->vgpu;
-	s.ring_id = wa_ctx->workload->ring_id;
+	s.vgpu = workload->vgpu;
+	s.ring_id = workload->ring_id;
 	s.ring_start = wa_ctx->indirect_ctx.guest_gma;
 	s.ring_size = ring_size;
 	s.ring_head = gma_head;
 	s.ring_tail = gma_tail;
 	s.rb_va = wa_ctx->indirect_ctx.shadow_va;
-	s.workload = wa_ctx->workload;
+	s.workload = workload;

 	ret = ip_gma_set(&s, gma_head);
 	if (ret)
@ -2708,12 +2716,15 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 {
 	int ctx_size = wa_ctx->indirect_ctx.size;
 	unsigned long guest_gma = wa_ctx->indirect_ctx.guest_gma;
-	struct intel_vgpu *vgpu = wa_ctx->workload->vgpu;
+	struct intel_vgpu_workload *workload = container_of(wa_ctx,
+					struct intel_vgpu_workload,
+					wa_ctx);
+	struct intel_vgpu *vgpu = workload->vgpu;
 	struct drm_i915_gem_object *obj;
 	int ret = 0;
 	void *map;

-	obj = i915_gem_object_create(wa_ctx->workload->vgpu->gvt->dev_priv,
+	obj = i915_gem_object_create(workload->vgpu->gvt->dev_priv,
 				     roundup(ctx_size + CACHELINE_BYTES,
 					     PAGE_SIZE));
 	if (IS_ERR(obj))
@ -2733,8 +2744,8 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 		goto unmap_src;
 	}

-	ret = copy_gma_to_hva(wa_ctx->workload->vgpu,
-				wa_ctx->workload->vgpu->gtt.ggtt_mm,
+	ret = copy_gma_to_hva(workload->vgpu,
+				workload->vgpu->gtt.ggtt_mm,
 				guest_gma, guest_gma + ctx_size,
 				map);
 	if (ret < 0) {
@ -2772,7 +2783,10 @@ static int combine_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 {
 	int ret;
-	struct intel_vgpu *vgpu = wa_ctx->workload->vgpu;
+	struct intel_vgpu_workload *workload = container_of(wa_ctx,
+					struct intel_vgpu_workload,
+					wa_ctx);
+	struct intel_vgpu *vgpu = workload->vgpu;

 	if (wa_ctx->indirect_ctx.size == 0)
 		return 0;
--- a/drivers/gpu/drm/i915/gvt/display.c
+++ b/drivers/gpu/drm/i915/gvt/display.c
@ -161,8 +161,9 @@ static unsigned char virtual_dp_monitor_edid[GVT_EDID_NUM][EDID_SIZE] = {

 #define DPCD_HEADER_SIZE        0xb

+/* let the virtual display supports DP1.2 */
 static u8 dpcd_fix_data[DPCD_HEADER_SIZE] = {
-	0x11, 0x0a, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+	0x12, 0x014, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
 };

 static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
@ -172,9 +173,20 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
 			SDE_PORTC_HOTPLUG_CPT |
 			SDE_PORTD_HOTPLUG_CPT);

-	if (IS_SKYLAKE(dev_priv))
+	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
 		vgpu_vreg(vgpu, SDEISR) &= ~(SDE_PORTA_HOTPLUG_SPT |
 				SDE_PORTE_HOTPLUG_SPT);
+		vgpu_vreg(vgpu, SKL_FUSE_STATUS) |=
+				SKL_FUSE_DOWNLOAD_STATUS |
+				SKL_FUSE_PG0_DIST_STATUS |
+				SKL_FUSE_PG1_DIST_STATUS |
+				SKL_FUSE_PG2_DIST_STATUS;
+		vgpu_vreg(vgpu, LCPLL1_CTL) |=
+				LCPLL_PLL_ENABLE |
+				LCPLL_PLL_LOCK;
+		vgpu_vreg(vgpu, LCPLL2_CTL) |= LCPLL_PLL_ENABLE;
+
+	}

 	if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) {
 		vgpu_vreg(vgpu, SDEISR) |= SDE_PORTB_HOTPLUG_CPT;
@ -191,7 +203,7 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
 		vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDID_DETECTED;
 	}

-	if (IS_SKYLAKE(dev_priv) &&
+	if ((IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) &&
 			intel_vgpu_has_monitor_on_port(vgpu, PORT_E)) {
 		vgpu_vreg(vgpu, SDEISR) |= SDE_PORTE_HOTPLUG_SPT;
 	}
@ -353,7 +365,7 @@ void intel_vgpu_clean_display(struct intel_vgpu *vgpu)
 {
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;

-	if (IS_SKYLAKE(dev_priv))
+	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
 		clean_virtual_dp_monitor(vgpu, PORT_D);
 	else
 		clean_virtual_dp_monitor(vgpu, PORT_B);
@ -375,7 +387,7 @@ int intel_vgpu_init_display(struct intel_vgpu *vgpu, u64 resolution)

 	intel_vgpu_init_i2c_edid(vgpu);

-	if (IS_SKYLAKE(dev_priv))
+	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
 		return setup_virtual_dp_monitor(vgpu, PORT_D, GVT_DP_D,
 						resolution);
 	else
--- a/drivers/gpu/drm/i915/gvt/execlist.c
+++ b/drivers/gpu/drm/i915/gvt/execlist.c
@ -394,9 +394,11 @@ static void prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)

 static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 {
-	int ring_id = wa_ctx->workload->ring_id;
-	struct i915_gem_context *shadow_ctx =
-		wa_ctx->workload->vgpu->shadow_ctx;
+	struct intel_vgpu_workload *workload = container_of(wa_ctx,
+					struct intel_vgpu_workload,
+					wa_ctx);
+	int ring_id = workload->ring_id;
+	struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx;
 	struct drm_i915_gem_object *ctx_obj =
 		shadow_ctx->engine[ring_id].state->obj;
 	struct execlist_ring_context *shadow_ring_context;
@ -680,7 +682,6 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id,
 			CACHELINE_BYTES;
 		workload->wa_ctx.per_ctx.guest_gma =
 			per_ctx & PER_CTX_ADDR_MASK;
-		workload->wa_ctx.workload = workload;

 		WARN_ON(workload->wa_ctx.indirect_ctx.size && !(per_ctx & 0x1));
 	}
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@ -2220,7 +2220,8 @@ int intel_gvt_init_gtt(struct intel_gvt *gvt)

 	gvt_dbg_core("init gtt\n");

-	if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)) {
+	if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)
+		|| IS_KABYLAKE(gvt->dev_priv)) {
 		gvt->gtt.pte_ops = &gen8_gtt_pte_ops;
 		gvt->gtt.gma_ops = &gen8_gtt_gma_ops;
 		gvt->gtt.mm_alloc_page_table = gen8_mm_alloc_page_table;
--- a/drivers/gpu/drm/i915/gvt/gvt.c
+++ b/drivers/gpu/drm/i915/gvt/gvt.c
@ -106,7 +106,8 @@ static void init_device_info(struct intel_gvt *gvt)
 	struct intel_gvt_device_info *info = &gvt->device_info;
 	struct pci_dev *pdev = gvt->dev_priv->drm.pdev;

-	if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)) {
+	if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)
+		|| IS_KABYLAKE(gvt->dev_priv)) {
 		info->max_support_vgpus = 8;
 		info->cfg_space_size = 256;
 		info->mmio_size = 2 * 1024 * 1024;
@ -143,6 +144,11 @@ static int gvt_service_thread(void *data)
 			intel_gvt_emulate_vblank(gvt);
 			mutex_unlock(&gvt->lock);
 		}
+
+		if (test_and_clear_bit(INTEL_GVT_REQUEST_SCHED,
+					(void *)&gvt->service_request)) {
+			intel_gvt_schedule(gvt);
+		}
 	}

 	return 0;
@ -196,6 +202,8 @@ void intel_gvt_clean_device(struct drm_i915_private *dev_priv)

 	idr_destroy(&gvt->vgpu_idr);

+	intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu);
+
 	kfree(dev_priv->gvt);
 	dev_priv->gvt = NULL;
 }
@ -214,6 +222,7 @@ void intel_gvt_clean_device(struct drm_i915_private *dev_priv)
 int intel_gvt_init_device(struct drm_i915_private *dev_priv)
 {
 	struct intel_gvt *gvt;
+	struct intel_vgpu *vgpu;
 	int ret;

 	/*
@ -286,6 +295,14 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv)
 		goto out_clean_types;
 	}

+	vgpu = intel_gvt_create_idle_vgpu(gvt);
+	if (IS_ERR(vgpu)) {
+		ret = PTR_ERR(vgpu);
+		gvt_err("failed to create idle vgpu\n");
+		goto out_clean_types;
+	}
+	gvt->idle_vgpu = vgpu;
+
 	gvt_dbg_core("gvt device initialization is done\n");
 	dev_priv->gvt = gvt;
 	return 0;
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@ -138,6 +138,10 @@ struct intel_vgpu_display {
 	struct intel_vgpu_sbi sbi;
 };

+struct vgpu_sched_ctl {
+	int weight;
+};
+
 struct intel_vgpu {
 	struct intel_gvt *gvt;
 	int id;
@ -147,6 +151,7 @@ struct intel_vgpu {
 	bool failsafe;
 	bool resetting;
 	void *sched_data;
+	struct vgpu_sched_ctl sched_ctl;

 	struct intel_vgpu_fence fence;
 	struct intel_vgpu_gm gm;
@ -160,6 +165,7 @@ struct intel_vgpu {
 	struct list_head workload_q_head[I915_NUM_ENGINES];
 	struct kmem_cache *workloads;
 	atomic_t running_workload_num;
+	ktime_t last_ctx_submit_time;
 	DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES);
 	struct i915_gem_context *shadow_ctx;

@ -215,6 +221,7 @@ struct intel_vgpu_type {
 	unsigned int low_gm_size;
 	unsigned int high_gm_size;
 	unsigned int fence;
+	unsigned int weight;
 	enum intel_vgpu_edid resolution;
 };

@ -236,6 +243,7 @@ struct intel_gvt {
 	DECLARE_HASHTABLE(cmd_table, GVT_CMD_HASH_BITS);
 	struct intel_vgpu_type *types;
 	unsigned int num_types;
+	struct intel_vgpu *idle_vgpu;

 	struct task_struct *service_thread;
 	wait_queue_head_t service_thread_wq;
@ -249,6 +257,7 @@ static inline struct intel_gvt *to_gvt(struct drm_i915_private *i915)

 enum {
 	INTEL_GVT_REQUEST_EMULATE_VBLANK = 0,
+	INTEL_GVT_REQUEST_SCHED = 1,
 };

 static inline void intel_gvt_request_service(struct intel_gvt *gvt,
@ -322,6 +331,8 @@ struct intel_vgpu_creation_params {
 	__u64 resolution;
 	__s32 primary;
 	__u64 vgpu_id;
+
+	__u32 weight;
 };

 int intel_vgpu_alloc_resource(struct intel_vgpu *vgpu,
@ -376,6 +387,8 @@ static inline void intel_vgpu_write_pci_bar(struct intel_vgpu *vgpu,
 int intel_gvt_init_vgpu_types(struct intel_gvt *gvt);
 void intel_gvt_clean_vgpu_types(struct intel_gvt *gvt);

+struct intel_vgpu *intel_gvt_create_idle_vgpu(struct intel_gvt *gvt);
+void intel_gvt_destroy_idle_vgpu(struct intel_vgpu *vgpu);
 struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt,
 					 struct intel_vgpu_type *type);
 void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu);
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@ -68,6 +68,8 @@ unsigned long intel_gvt_get_device_type(struct intel_gvt *gvt)
 		return D_BDW;
 	else if (IS_SKYLAKE(gvt->dev_priv))
 		return D_SKL;
+	else if (IS_KABYLAKE(gvt->dev_priv))
+		return D_KBL;

 	return 0;
 }
@ -234,7 +236,8 @@ static int mul_force_wake_write(struct intel_vgpu *vgpu,
 	old = vgpu_vreg(vgpu, offset);
 	new = CALC_MODE_MASK_REG(old, *(u32 *)p_data);

-	if (IS_SKYLAKE(vgpu->gvt->dev_priv)) {
+	if (IS_SKYLAKE(vgpu->gvt->dev_priv)
+		|| IS_KABYLAKE(vgpu->gvt->dev_priv)) {
 		switch (offset) {
 		case FORCEWAKE_RENDER_GEN9_REG:
 			ack_reg_offset = FORCEWAKE_ACK_RENDER_GEN9_REG;
@ -823,8 +826,9 @@ static int dp_aux_ch_ctl_mmio_write(struct intel_vgpu *vgpu,
 	write_vreg(vgpu, offset, p_data, bytes);
 	data = vgpu_vreg(vgpu, offset);

-	if (IS_SKYLAKE(vgpu->gvt->dev_priv) &&
-	    offset != _REG_SKL_DP_AUX_CH_CTL(port_index)) {
+	if ((IS_SKYLAKE(vgpu->gvt->dev_priv)
+		|| IS_KABYLAKE(vgpu->gvt->dev_priv))
+		&& offset != _REG_SKL_DP_AUX_CH_CTL(port_index)) {
 		/* SKL DPB/C/D aux ctl register changed */
 		return 0;
 	} else if (IS_BROADWELL(vgpu->gvt->dev_priv) &&
@ -1303,7 +1307,8 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset,

 	switch (cmd) {
 	case GEN9_PCODE_READ_MEM_LATENCY:
-		if (IS_SKYLAKE(vgpu->gvt->dev_priv)) {
+		if (IS_SKYLAKE(vgpu->gvt->dev_priv)
+			 || IS_KABYLAKE(vgpu->gvt->dev_priv)) {
 			/**
 			 * "Read memory latency" command on gen9.
 			 * Below memory latency values are read
@ -1316,7 +1321,8 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset,
 		}
 		break;
 	case SKL_PCODE_CDCLK_CONTROL:
-		if (IS_SKYLAKE(vgpu->gvt->dev_priv))
+		if (IS_SKYLAKE(vgpu->gvt->dev_priv)
+			 || IS_KABYLAKE(vgpu->gvt->dev_priv))
 			*data0 = SKL_CDCLK_READY_FOR_CHANGE;
 		break;
 	case GEN6_PCODE_READ_RC6VIDS:
@ -1410,6 +1416,7 @@ static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,

 	execlist->elsp_dwords.data[execlist->elsp_dwords.index] = data;
 	if (execlist->elsp_dwords.index == 3) {
+		vgpu->last_ctx_submit_time = ktime_get();
 		ret = intel_vgpu_submit_execlist(vgpu, ring_id);
 		if(ret)
 			gvt_vgpu_err("fail submit workload on ring %d\n",
@ -2584,219 +2591,232 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
 	MMIO_DH(FORCEWAKE_MEDIA_GEN9, D_SKL_PLUS, NULL, mul_force_wake_write);
 	MMIO_DH(FORCEWAKE_ACK_MEDIA_GEN9, D_SKL_PLUS, NULL, NULL);

-	MMIO_F(_DPB_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL, NULL, dp_aux_ch_ctl_mmio_write);
-	MMIO_F(_DPC_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL, NULL, dp_aux_ch_ctl_mmio_write);
-	MMIO_F(_DPD_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL, NULL, dp_aux_ch_ctl_mmio_write);
+	MMIO_F(_DPB_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL,
+						dp_aux_ch_ctl_mmio_write);
+	MMIO_F(_DPC_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL,
+						dp_aux_ch_ctl_mmio_write);
+	MMIO_F(_DPD_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL,
+						dp_aux_ch_ctl_mmio_write);

-	MMIO_D(HSW_PWR_WELL_BIOS, D_SKL);
-	MMIO_DH(HSW_PWR_WELL_DRIVER, D_SKL, NULL, skl_power_well_ctl_write);
+	MMIO_D(HSW_PWR_WELL_BIOS, D_SKL_PLUS);
+	MMIO_DH(HSW_PWR_WELL_DRIVER, D_SKL_PLUS, NULL,
+						skl_power_well_ctl_write);
+	MMIO_DH(GEN6_PCODE_MAILBOX, D_SKL_PLUS, NULL, mailbox_write);

 	MMIO_D(0xa210, D_SKL_PLUS);
 	MMIO_D(GEN9_MEDIA_PG_IDLE_HYSTERESIS, D_SKL_PLUS);
 	MMIO_D(GEN9_RENDER_PG_IDLE_HYSTERESIS, D_SKL_PLUS);
 	MMIO_DFH(GEN9_GAMT_ECO_REG_RW_IA, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DH(0x4ddc, D_SKL, NULL, skl_misc_ctl_write);
-	MMIO_DH(0x42080, D_SKL, NULL, skl_misc_ctl_write);
-	MMIO_D(0x45504, D_SKL);
-	MMIO_D(0x45520, D_SKL);
-	MMIO_D(0x46000, D_SKL);
-	MMIO_DH(0x46010, D_SKL, NULL, skl_lcpll_write);
-	MMIO_DH(0x46014, D_SKL, NULL, skl_lcpll_write);
-	MMIO_D(0x6C040, D_SKL);
-	MMIO_D(0x6C048, D_SKL);
-	MMIO_D(0x6C050, D_SKL);
-	MMIO_D(0x6C044, D_SKL);
-	MMIO_D(0x6C04C, D_SKL);
-	MMIO_D(0x6C054, D_SKL);
-	MMIO_D(0x6c058, D_SKL);
-	MMIO_D(0x6c05c, D_SKL);
-	MMIO_DH(0X6c060, D_SKL, dpll_status_read, NULL);
+	MMIO_DH(0x4ddc, D_SKL_PLUS, NULL, skl_misc_ctl_write);
+	MMIO_DH(0x42080, D_SKL_PLUS, NULL, skl_misc_ctl_write);
+	MMIO_D(0x45504, D_SKL_PLUS);
+	MMIO_D(0x45520, D_SKL_PLUS);
+	MMIO_D(0x46000, D_SKL_PLUS);
+	MMIO_DH(0x46010, D_SKL | D_KBL, NULL, skl_lcpll_write);
+	MMIO_DH(0x46014, D_SKL | D_KBL, NULL, skl_lcpll_write);
+	MMIO_D(0x6C040, D_SKL | D_KBL);
+	MMIO_D(0x6C048, D_SKL | D_KBL);
+	MMIO_D(0x6C050, D_SKL | D_KBL);
+	MMIO_D(0x6C044, D_SKL | D_KBL);
+	MMIO_D(0x6C04C, D_SKL | D_KBL);
+	MMIO_D(0x6C054, D_SKL | D_KBL);
+	MMIO_D(0x6c058, D_SKL | D_KBL);
+	MMIO_D(0x6c05c, D_SKL | D_KBL);
+	MMIO_DH(0X6c060, D_SKL | D_KBL, dpll_status_read, NULL);

-	MMIO_DH(SKL_PS_WIN_POS(PIPE_A, 0), D_SKL, NULL, pf_write);
-	MMIO_DH(SKL_PS_WIN_POS(PIPE_A, 1), D_SKL, NULL, pf_write);
-	MMIO_DH(SKL_PS_WIN_POS(PIPE_B, 0), D_SKL, NULL, pf_write);
-	MMIO_DH(SKL_PS_WIN_POS(PIPE_B, 1), D_SKL, NULL, pf_write);
-	MMIO_DH(SKL_PS_WIN_POS(PIPE_C, 0), D_SKL, NULL, pf_write);
-	MMIO_DH(SKL_PS_WIN_POS(PIPE_C, 1), D_SKL, NULL, pf_write);
+	MMIO_DH(SKL_PS_WIN_POS(PIPE_A, 0), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_WIN_POS(PIPE_A, 1), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_WIN_POS(PIPE_B, 0), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_WIN_POS(PIPE_B, 1), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_WIN_POS(PIPE_C, 0), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_WIN_POS(PIPE_C, 1), D_SKL_PLUS, NULL, pf_write);

-	MMIO_DH(SKL_PS_WIN_SZ(PIPE_A, 0), D_SKL, NULL, pf_write);
-	MMIO_DH(SKL_PS_WIN_SZ(PIPE_A, 1), D_SKL, NULL, pf_write);
-	MMIO_DH(SKL_PS_WIN_SZ(PIPE_B, 0), D_SKL, NULL, pf_write);
-	MMIO_DH(SKL_PS_WIN_SZ(PIPE_B, 1), D_SKL, NULL, pf_write);
-	MMIO_DH(SKL_PS_WIN_SZ(PIPE_C, 0), D_SKL, NULL, pf_write);
-	MMIO_DH(SKL_PS_WIN_SZ(PIPE_C, 1), D_SKL, NULL, pf_write);
+	MMIO_DH(SKL_PS_WIN_SZ(PIPE_A, 0), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_WIN_SZ(PIPE_A, 1), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_WIN_SZ(PIPE_B, 0), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_WIN_SZ(PIPE_B, 1), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_WIN_SZ(PIPE_C, 0), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_WIN_SZ(PIPE_C, 1), D_SKL_PLUS, NULL, pf_write);

-	MMIO_DH(SKL_PS_CTRL(PIPE_A, 0), D_SKL, NULL, pf_write);
-	MMIO_DH(SKL_PS_CTRL(PIPE_A, 1), D_SKL, NULL, pf_write);
-	MMIO_DH(SKL_PS_CTRL(PIPE_B, 0), D_SKL, NULL, pf_write);
-	MMIO_DH(SKL_PS_CTRL(PIPE_B, 1), D_SKL, NULL, pf_write);
-	MMIO_DH(SKL_PS_CTRL(PIPE_C, 0), D_SKL, NULL, pf_write);
-	MMIO_DH(SKL_PS_CTRL(PIPE_C, 1), D_SKL, NULL, pf_write);
+	MMIO_DH(SKL_PS_CTRL(PIPE_A, 0), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_CTRL(PIPE_A, 1), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_CTRL(PIPE_B, 0), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_CTRL(PIPE_B, 1), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_CTRL(PIPE_C, 0), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_CTRL(PIPE_C, 1), D_SKL_PLUS, NULL, pf_write);

-	MMIO_DH(PLANE_BUF_CFG(PIPE_A, 0), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_BUF_CFG(PIPE_A, 1), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_BUF_CFG(PIPE_A, 2), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_BUF_CFG(PIPE_A, 3), D_SKL, NULL, NULL);
+	MMIO_DH(PLANE_BUF_CFG(PIPE_A, 0), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_BUF_CFG(PIPE_A, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_BUF_CFG(PIPE_A, 2), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_BUF_CFG(PIPE_A, 3), D_SKL_PLUS, NULL, NULL);

-	MMIO_DH(PLANE_BUF_CFG(PIPE_B, 0), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_BUF_CFG(PIPE_B, 1), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_BUF_CFG(PIPE_B, 2), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_BUF_CFG(PIPE_B, 3), D_SKL, NULL, NULL);
+	MMIO_DH(PLANE_BUF_CFG(PIPE_B, 0), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_BUF_CFG(PIPE_B, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_BUF_CFG(PIPE_B, 2), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_BUF_CFG(PIPE_B, 3), D_SKL_PLUS, NULL, NULL);

-	MMIO_DH(PLANE_BUF_CFG(PIPE_C, 0), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_BUF_CFG(PIPE_C, 1), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_BUF_CFG(PIPE_C, 2), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_BUF_CFG(PIPE_C, 3), D_SKL, NULL, NULL);
+	MMIO_DH(PLANE_BUF_CFG(PIPE_C, 0), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_BUF_CFG(PIPE_C, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_BUF_CFG(PIPE_C, 2), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_BUF_CFG(PIPE_C, 3), D_SKL_PLUS, NULL, NULL);

-	MMIO_DH(CUR_BUF_CFG(PIPE_A), D_SKL, NULL, NULL);
-	MMIO_DH(CUR_BUF_CFG(PIPE_B), D_SKL, NULL, NULL);
-	MMIO_DH(CUR_BUF_CFG(PIPE_C), D_SKL, NULL, NULL);
+	MMIO_DH(CUR_BUF_CFG(PIPE_A), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(CUR_BUF_CFG(PIPE_B), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(CUR_BUF_CFG(PIPE_C), D_SKL_PLUS, NULL, NULL);

-	MMIO_F(PLANE_WM(PIPE_A, 0, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL);
-	MMIO_F(PLANE_WM(PIPE_A, 1, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL);
-	MMIO_F(PLANE_WM(PIPE_A, 2, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL);
+	MMIO_F(PLANE_WM(PIPE_A, 0, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL);
+	MMIO_F(PLANE_WM(PIPE_A, 1, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL);
+	MMIO_F(PLANE_WM(PIPE_A, 2, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL);

-	MMIO_F(PLANE_WM(PIPE_B, 0, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL);
-	MMIO_F(PLANE_WM(PIPE_B, 1, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL);
-	MMIO_F(PLANE_WM(PIPE_B, 2, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL);
+	MMIO_F(PLANE_WM(PIPE_B, 0, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL);
+	MMIO_F(PLANE_WM(PIPE_B, 1, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL);
+	MMIO_F(PLANE_WM(PIPE_B, 2, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL);

-	MMIO_F(PLANE_WM(PIPE_C, 0, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL);
-	MMIO_F(PLANE_WM(PIPE_C, 1, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL);
-	MMIO_F(PLANE_WM(PIPE_C, 2, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL);
+	MMIO_F(PLANE_WM(PIPE_C, 0, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL);
+	MMIO_F(PLANE_WM(PIPE_C, 1, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL);
+	MMIO_F(PLANE_WM(PIPE_C, 2, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL);

-	MMIO_F(CUR_WM(PIPE_A, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL);
-	MMIO_F(CUR_WM(PIPE_B, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL);
-	MMIO_F(CUR_WM(PIPE_C, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL);
+	MMIO_F(CUR_WM(PIPE_A, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL);
+	MMIO_F(CUR_WM(PIPE_B, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL);
+	MMIO_F(CUR_WM(PIPE_C, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL);

-	MMIO_DH(PLANE_WM_TRANS(PIPE_A, 0), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_WM_TRANS(PIPE_A, 1), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_WM_TRANS(PIPE_A, 2), D_SKL, NULL, NULL);
+	MMIO_DH(PLANE_WM_TRANS(PIPE_A, 0), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_WM_TRANS(PIPE_A, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_WM_TRANS(PIPE_A, 2), D_SKL_PLUS, NULL, NULL);

-	MMIO_DH(PLANE_WM_TRANS(PIPE_B, 0), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_WM_TRANS(PIPE_B, 1), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_WM_TRANS(PIPE_B, 2), D_SKL, NULL, NULL);
+	MMIO_DH(PLANE_WM_TRANS(PIPE_B, 0), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_WM_TRANS(PIPE_B, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_WM_TRANS(PIPE_B, 2), D_SKL_PLUS, NULL, NULL);

-	MMIO_DH(PLANE_WM_TRANS(PIPE_C, 0), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_WM_TRANS(PIPE_C, 1), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_WM_TRANS(PIPE_C, 2), D_SKL, NULL, NULL);
+	MMIO_DH(PLANE_WM_TRANS(PIPE_C, 0), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_WM_TRANS(PIPE_C, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_WM_TRANS(PIPE_C, 2), D_SKL_PLUS, NULL, NULL);

-	MMIO_DH(CUR_WM_TRANS(PIPE_A), D_SKL, NULL, NULL);
-	MMIO_DH(CUR_WM_TRANS(PIPE_B), D_SKL, NULL, NULL);
-	MMIO_DH(CUR_WM_TRANS(PIPE_C), D_SKL, NULL, NULL);
+	MMIO_DH(CUR_WM_TRANS(PIPE_A), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(CUR_WM_TRANS(PIPE_B), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(CUR_WM_TRANS(PIPE_C), D_SKL_PLUS, NULL, NULL);

-	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 0), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 1), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 2), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 3), D_SKL, NULL, NULL);
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 0), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 2), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 3), D_SKL_PLUS, NULL, NULL);

-	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 0), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 1), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 2), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 3), D_SKL, NULL, NULL);
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 0), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 2), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 3), D_SKL_PLUS, NULL, NULL);

-	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 0), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 1), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 2), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 3), D_SKL, NULL, NULL);
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 0), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 2), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 3), D_SKL_PLUS, NULL, NULL);

-	MMIO_DH(_REG_701C0(PIPE_A, 1), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C0(PIPE_A, 2), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C0(PIPE_A, 3), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C0(PIPE_A, 4), D_SKL, NULL, NULL);
+	MMIO_DH(_REG_701C0(PIPE_A, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C0(PIPE_A, 2), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C0(PIPE_A, 3), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C0(PIPE_A, 4), D_SKL_PLUS, NULL, NULL);

-	MMIO_DH(_REG_701C0(PIPE_B, 1), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C0(PIPE_B, 2), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C0(PIPE_B, 3), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C0(PIPE_B, 4), D_SKL, NULL, NULL);
+	MMIO_DH(_REG_701C0(PIPE_B, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C0(PIPE_B, 2), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C0(PIPE_B, 3), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C0(PIPE_B, 4), D_SKL_PLUS, NULL, NULL);

-	MMIO_DH(_REG_701C0(PIPE_C, 1), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C0(PIPE_C, 2), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C0(PIPE_C, 3), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C0(PIPE_C, 4), D_SKL, NULL, NULL);
+	MMIO_DH(_REG_701C0(PIPE_C, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C0(PIPE_C, 2), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C0(PIPE_C, 3), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C0(PIPE_C, 4), D_SKL_PLUS, NULL, NULL);

-	MMIO_DH(_REG_701C4(PIPE_A, 1), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C4(PIPE_A, 2), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C4(PIPE_A, 3), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C4(PIPE_A, 4), D_SKL, NULL, NULL);
+	MMIO_DH(_REG_701C4(PIPE_A, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C4(PIPE_A, 2), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C4(PIPE_A, 3), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C4(PIPE_A, 4), D_SKL_PLUS, NULL, NULL);

-	MMIO_DH(_REG_701C4(PIPE_B, 1), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C4(PIPE_B, 2), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C4(PIPE_B, 3), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C4(PIPE_B, 4), D_SKL, NULL, NULL);
+	MMIO_DH(_REG_701C4(PIPE_B, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C4(PIPE_B, 2), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C4(PIPE_B, 3), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C4(PIPE_B, 4), D_SKL_PLUS, NULL, NULL);

-	MMIO_DH(_REG_701C4(PIPE_C, 1), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C4(PIPE_C, 2), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C4(PIPE_C, 3), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C4(PIPE_C, 4), D_SKL, NULL, NULL);
+	MMIO_DH(_REG_701C4(PIPE_C, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C4(PIPE_C, 2), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C4(PIPE_C, 3), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C4(PIPE_C, 4), D_SKL_PLUS, NULL, NULL);

-	MMIO_D(0x70380, D_SKL);
-	MMIO_D(0x71380, D_SKL);
-	MMIO_D(0x72380, D_SKL);
-	MMIO_D(0x7039c, D_SKL);
+	MMIO_D(0x70380, D_SKL_PLUS);
+	MMIO_D(0x71380, D_SKL_PLUS);
+	MMIO_D(0x72380, D_SKL_PLUS);
+	MMIO_D(0x7039c, D_SKL_PLUS);

-	MMIO_F(0x80000, 0x3000, 0, 0, 0, D_SKL, NULL, NULL);
-	MMIO_D(0x8f074, D_SKL);
-	MMIO_D(0x8f004, D_SKL);
-	MMIO_D(0x8f034, D_SKL);
+	MMIO_F(0x80000, 0x3000, 0, 0, 0, D_SKL_PLUS, NULL, NULL);
+	MMIO_D(0x8f074, D_SKL | D_KBL);
+	MMIO_D(0x8f004, D_SKL | D_KBL);
+	MMIO_D(0x8f034, D_SKL | D_KBL);

-	MMIO_D(0xb11c, D_SKL);
+	MMIO_D(0xb11c, D_SKL | D_KBL);

-	MMIO_D(0x51000, D_SKL);
-	MMIO_D(0x6c00c, D_SKL);
+	MMIO_D(0x51000, D_SKL | D_KBL);
+	MMIO_D(0x6c00c, D_SKL_PLUS);

-	MMIO_F(0xc800, 0x7f8, F_CMD_ACCESS, 0, 0, D_SKL, NULL, NULL);
-	MMIO_F(0xb020, 0x80, F_CMD_ACCESS, 0, 0, D_SKL, NULL, NULL);
+	MMIO_F(0xc800, 0x7f8, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL);
+	MMIO_F(0xb020, 0x80, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL);

-	MMIO_D(0xd08, D_SKL);
-	MMIO_DFH(0x20e0, D_SKL, F_MODE_MASK, NULL, NULL);
-	MMIO_DFH(0x20ec, D_SKL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_D(0xd08, D_SKL_PLUS);
+	MMIO_DFH(0x20e0, D_SKL_PLUS, F_MODE_MASK, NULL, NULL);
+	MMIO_DFH(0x20ec, D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);

 	/* TRTT */
-	MMIO_DFH(0x4de0, D_SKL, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x4de4, D_SKL, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x4de8, D_SKL, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x4dec, D_SKL, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x4df0, D_SKL, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x4df4, D_SKL, F_CMD_ACCESS, NULL, gen9_trtte_write);
-	MMIO_DH(0x4dfc, D_SKL, NULL, gen9_trtt_chicken_write);
+	MMIO_DFH(0x4de0, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x4de4, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x4de8, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x4dec, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x4df0, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x4df4, D_SKL | D_KBL, F_CMD_ACCESS, NULL, gen9_trtte_write);
+	MMIO_DH(0x4dfc, D_SKL | D_KBL, NULL, gen9_trtt_chicken_write);

-	MMIO_D(0x45008, D_SKL);
+	MMIO_D(0x45008, D_SKL | D_KBL);

-	MMIO_D(0x46430, D_SKL);
+	MMIO_D(0x46430, D_SKL | D_KBL);

-	MMIO_D(0x46520, D_SKL);
+	MMIO_D(0x46520, D_SKL | D_KBL);

-	MMIO_D(0xc403c, D_SKL);
-	MMIO_D(0xb004, D_SKL);
+	MMIO_D(0xc403c, D_SKL | D_KBL);
+	MMIO_D(0xb004, D_SKL_PLUS);
 	MMIO_DH(DMA_CTRL, D_SKL_PLUS, NULL, dma_ctrl_write);

-	MMIO_D(0x65900, D_SKL);
-	MMIO_D(0x1082c0, D_SKL);
-	MMIO_D(0x4068, D_SKL);
-	MMIO_D(0x67054, D_SKL);
-	MMIO_D(0x6e560, D_SKL);
-	MMIO_D(0x6e554, D_SKL);
-	MMIO_D(0x2b20, D_SKL);
-	MMIO_D(0x65f00, D_SKL);
-	MMIO_D(0x65f08, D_SKL);
-	MMIO_D(0x320f0, D_SKL);
+	MMIO_D(0x65900, D_SKL_PLUS);
+	MMIO_D(0x1082c0, D_SKL | D_KBL);
+	MMIO_D(0x4068, D_SKL | D_KBL);
+	MMIO_D(0x67054, D_SKL | D_KBL);
+	MMIO_D(0x6e560, D_SKL | D_KBL);
+	MMIO_D(0x6e554, D_SKL | D_KBL);
+	MMIO_D(0x2b20, D_SKL | D_KBL);
+	MMIO_D(0x65f00, D_SKL | D_KBL);
+	MMIO_D(0x65f08, D_SKL | D_KBL);
+	MMIO_D(0x320f0, D_SKL | D_KBL);

-	MMIO_DFH(_REG_VCS2_EXCC, D_SKL, F_CMD_ACCESS, NULL, NULL);
-	MMIO_D(0x70034, D_SKL);
-	MMIO_D(0x71034, D_SKL);
-	MMIO_D(0x72034, D_SKL);
+	MMIO_DFH(_REG_VCS2_EXCC, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_REG_VECS_EXCC, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_D(0x70034, D_SKL_PLUS);
+	MMIO_D(0x71034, D_SKL_PLUS);
+	MMIO_D(0x72034, D_SKL_PLUS);

-	MMIO_D(_PLANE_KEYVAL_1(PIPE_A), D_SKL);
-	MMIO_D(_PLANE_KEYVAL_1(PIPE_B), D_SKL);
-	MMIO_D(_PLANE_KEYVAL_1(PIPE_C), D_SKL);
-	MMIO_D(_PLANE_KEYMSK_1(PIPE_A), D_SKL);
-	MMIO_D(_PLANE_KEYMSK_1(PIPE_B), D_SKL);
-	MMIO_D(_PLANE_KEYMSK_1(PIPE_C), D_SKL);
+	MMIO_D(_PLANE_KEYVAL_1(PIPE_A), D_SKL_PLUS);
+	MMIO_D(_PLANE_KEYVAL_1(PIPE_B), D_SKL_PLUS);
+	MMIO_D(_PLANE_KEYVAL_1(PIPE_C), D_SKL_PLUS);
+	MMIO_D(_PLANE_KEYMSK_1(PIPE_A), D_SKL_PLUS);
+	MMIO_D(_PLANE_KEYMSK_1(PIPE_B), D_SKL_PLUS);
+	MMIO_D(_PLANE_KEYMSK_1(PIPE_C), D_SKL_PLUS);

-	MMIO_D(0x44500, D_SKL);
+	MMIO_D(0x44500, D_SKL_PLUS);
 	MMIO_DFH(GEN9_CSFE_CHICKEN1_RCS, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(GEN8_HDC_CHICKEN1, D_SKL, F_MODE_MASK | F_CMD_ACCESS,
+	MMIO_DFH(GEN8_HDC_CHICKEN1, D_SKL | D_KBL, F_MODE_MASK | F_CMD_ACCESS,
 		NULL, NULL);
+
+	MMIO_D(0x4ab8, D_KBL);
+	MMIO_D(0x940c, D_SKL_PLUS);
+	MMIO_D(0x2248, D_SKL_PLUS | D_KBL);
+	MMIO_D(0x4ab0, D_SKL | D_KBL);
+	MMIO_D(0x20d4, D_SKL | D_KBL);
+
 	return 0;
 }

@ -2873,7 +2893,8 @@ int intel_gvt_setup_mmio_info(struct intel_gvt *gvt)
 		ret = init_broadwell_mmio_info(gvt);
 		if (ret)
 			goto err;
-	} else if (IS_SKYLAKE(dev_priv)) {
+	} else if (IS_SKYLAKE(dev_priv)
+		|| IS_KABYLAKE(dev_priv)) {
 		ret = init_broadwell_mmio_info(gvt);
 		if (ret)
 			goto err;
--- a/drivers/gpu/drm/i915/gvt/interrupt.c
+++ b/drivers/gpu/drm/i915/gvt/interrupt.c
@ -580,7 +580,7 @@ static void gen8_init_irq(

 		SET_BIT_INFO(irq, 4, PRIMARY_C_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_C);
 		SET_BIT_INFO(irq, 5, SPRITE_C_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_C);
-	} else if (IS_SKYLAKE(gvt->dev_priv)) {
+	} else if (IS_SKYLAKE(gvt->dev_priv) || IS_KABYLAKE(gvt->dev_priv)) {
 		SET_BIT_INFO(irq, 25, AUX_CHANNEL_B, INTEL_GVT_IRQ_INFO_DE_PORT);
 		SET_BIT_INFO(irq, 26, AUX_CHANNEL_C, INTEL_GVT_IRQ_INFO_DE_PORT);
 		SET_BIT_INFO(irq, 27, AUX_CHANNEL_D, INTEL_GVT_IRQ_INFO_DE_PORT);
@ -690,7 +690,8 @@ int intel_gvt_init_irq(struct intel_gvt *gvt)

 	gvt_dbg_core("init irq framework\n");

-	if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)) {
+	if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)
+		|| IS_KABYLAKE(gvt->dev_priv)) {
 		irq->ops = &gen8_irq_ops;
 		irq->irq_map = gen8_irq_map;
 	} else {
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@ -295,10 +295,12 @@ static ssize_t description_show(struct kobject *kobj, struct device *dev,
 		return 0;

 	return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n"
-		       "fence: %d\nresolution: %s\n",
+		       "fence: %d\nresolution: %s\n"
+		       "weight: %d\n",
 		       BYTES_TO_MB(type->low_gm_size),
 		       BYTES_TO_MB(type->high_gm_size),
-		       type->fence, vgpu_edid_str(type->resolution));
+		       type->fence, vgpu_edid_str(type->resolution),
+		       type->weight);
 }

 static MDEV_TYPE_ATTR_RO(available_instances);
@ -1146,8 +1148,40 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
 	return 0;
 }

+static ssize_t
+vgpu_id_show(struct device *dev, struct device_attribute *attr,
+	     char *buf)
+{
+	struct mdev_device *mdev = mdev_from_dev(dev);
+
+	if (mdev) {
+		struct intel_vgpu *vgpu = (struct intel_vgpu *)
+			mdev_get_drvdata(mdev);
+		return sprintf(buf, "%d\n", vgpu->id);
+	}
+	return sprintf(buf, "\n");
+}
+
+static DEVICE_ATTR_RO(vgpu_id);
+
+static struct attribute *intel_vgpu_attrs[] = {
+	&dev_attr_vgpu_id.attr,
+	NULL
+};
+
+static const struct attribute_group intel_vgpu_group = {
+	.name = "intel_vgpu",
+	.attrs = intel_vgpu_attrs,
+};
+
+static const struct attribute_group *intel_vgpu_groups[] = {
+	&intel_vgpu_group,
+	NULL,
+};
+
 static const struct mdev_parent_ops intel_vgpu_ops = {
 	.supported_type_groups	= intel_vgpu_type_groups,
+	.mdev_attr_groups       = intel_vgpu_groups,
 	.create			= intel_vgpu_create,
 	.remove			= intel_vgpu_remove,

@ -1339,13 +1373,6 @@ static int kvmgt_guest_init(struct mdev_device *mdev)

 static bool kvmgt_guest_exit(struct kvmgt_guest_info *info)
 {
-	struct intel_vgpu *vgpu = info->vgpu;
-
-	if (!info) {
-		gvt_vgpu_err("kvmgt_guest_info invalid\n");
-		return false;
-	}
-
 	kvm_page_track_unregister_notifier(info->kvm, &info->track_node);
 	kvmgt_protect_table_destroy(info);
 	gvt_cache_destroy(info->vgpu);
--- a/drivers/gpu/drm/i915/gvt/mmio.h
+++ b/drivers/gpu/drm/i915/gvt/mmio.h
@ -44,20 +44,21 @@ struct intel_vgpu;
 #define D_HSW   (1 << 2)
 #define D_BDW   (1 << 3)
 #define D_SKL	(1 << 4)
+#define D_KBL	(1 << 5)

-#define D_GEN9PLUS	(D_SKL)
-#define D_GEN8PLUS	(D_BDW | D_SKL)
-#define D_GEN75PLUS	(D_HSW | D_BDW | D_SKL)
-#define D_GEN7PLUS	(D_IVB | D_HSW | D_BDW | D_SKL)
+#define D_GEN9PLUS	(D_SKL | D_KBL)
+#define D_GEN8PLUS	(D_BDW | D_SKL | D_KBL)
+#define D_GEN75PLUS	(D_HSW | D_BDW | D_SKL | D_KBL)
+#define D_GEN7PLUS	(D_IVB | D_HSW | D_BDW | D_SKL | D_KBL)

-#define D_SKL_PLUS	(D_SKL)
-#define D_BDW_PLUS	(D_BDW | D_SKL)
-#define D_HSW_PLUS	(D_HSW | D_BDW | D_SKL)
-#define D_IVB_PLUS	(D_IVB | D_HSW | D_BDW | D_SKL)
+#define D_SKL_PLUS	(D_SKL | D_KBL)
+#define D_BDW_PLUS	(D_BDW | D_SKL | D_KBL)
+#define D_HSW_PLUS	(D_HSW | D_BDW | D_SKL | D_KBL)
+#define D_IVB_PLUS	(D_IVB | D_HSW | D_BDW | D_SKL | D_KBL)

 #define D_PRE_BDW	(D_SNB | D_IVB | D_HSW)
 #define D_PRE_SKL	(D_SNB | D_IVB | D_HSW | D_BDW)
-#define D_ALL		(D_SNB | D_IVB | D_HSW | D_BDW | D_SKL)
+#define D_ALL		(D_SNB | D_IVB | D_HSW | D_BDW | D_SKL | D_KBL)

 struct intel_gvt_mmio_info {
 	u32 offset;
--- a/drivers/gpu/drm/i915/gvt/render.c
+++ b/drivers/gpu/drm/i915/gvt/render.c
@ -126,6 +126,18 @@ static struct render_mmio gen9_render_mmio_list[] = {
 	{VCS2, _MMIO(0x1c028), 0xffff, false},

 	{VECS, _MMIO(0x1a028), 0xffff, false},
+
+	{RCS, _MMIO(0x7304), 0xffff, true},
+	{RCS, _MMIO(0x2248), 0x0, false},
+	{RCS, _MMIO(0x940c), 0x0, false},
+	{RCS, _MMIO(0x4ab8), 0x0, false},
+
+	{RCS, _MMIO(0x4ab0), 0x0, false},
+	{RCS, _MMIO(0x20d4), 0x0, false},
+
+	{RCS, _MMIO(0xb004), 0x0, false},
+	{RCS, _MMIO(0x20a0), 0x0, false},
+	{RCS, _MMIO(0x20e4), 0xffff, false},
 };

 static u32 gen9_render_mocs[I915_NUM_ENGINES][64];
@ -159,7 +171,7 @@ static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id)
 	 */
 	fw = intel_uncore_forcewake_for_reg(dev_priv, reg,
 					    FW_REG_READ | FW_REG_WRITE);
-	if (ring_id == RCS && IS_SKYLAKE(dev_priv))
+	if (ring_id == RCS && (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)))
 		fw |= FORCEWAKE_RENDER;

 	intel_uncore_forcewake_get(dev_priv, fw);
@ -192,7 +204,7 @@ static void load_mocs(struct intel_vgpu *vgpu, int ring_id)
 	if (WARN_ON(ring_id >= ARRAY_SIZE(regs)))
 		return;

-	if (!IS_SKYLAKE(dev_priv))
+	if (!(IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)))
 		return;

 	offset.reg = regs[ring_id];
@ -230,7 +242,7 @@ static void restore_mocs(struct intel_vgpu *vgpu, int ring_id)
 	if (WARN_ON(ring_id >= ARRAY_SIZE(regs)))
 		return;

-	if (!IS_SKYLAKE(dev_priv))
+	if (!(IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)))
 		return;

 	offset.reg = regs[ring_id];
@ -265,7 +277,8 @@ void intel_gvt_load_render_mmio(struct intel_vgpu *vgpu, int ring_id)
 	u32 inhibit_mask =
 		_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);

-	if (IS_SKYLAKE(vgpu->gvt->dev_priv)) {
+	if (IS_SKYLAKE(vgpu->gvt->dev_priv)
+		|| IS_KABYLAKE(vgpu->gvt->dev_priv)) {
 		mmio = gen9_render_mmio_list;
 		array_size = ARRAY_SIZE(gen9_render_mmio_list);
 		load_mocs(vgpu, ring_id);
@ -312,7 +325,7 @@ void intel_gvt_restore_render_mmio(struct intel_vgpu *vgpu, int ring_id)
 	u32 v;
 	int i, array_size;

-	if (IS_SKYLAKE(dev_priv)) {
+	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
 		mmio = gen9_render_mmio_list;
 		array_size = ARRAY_SIZE(gen9_render_mmio_list);
 		restore_mocs(vgpu, ring_id);
--- a/drivers/gpu/drm/i915/gvt/sched_policy.c
+++ b/drivers/gpu/drm/i915/gvt/sched_policy.c
@ -47,11 +47,87 @@ static bool vgpu_has_pending_workload(struct intel_vgpu *vgpu)
 	return false;
 }

+struct vgpu_sched_data {
+	struct list_head lru_list;
+	struct intel_vgpu *vgpu;
+
+	ktime_t sched_in_time;
+	ktime_t sched_out_time;
+	ktime_t sched_time;
+	ktime_t left_ts;
+	ktime_t allocated_ts;
+
+	struct vgpu_sched_ctl sched_ctl;
+};
+
+struct gvt_sched_data {
+	struct intel_gvt *gvt;
+	struct hrtimer timer;
+	unsigned long period;
+	struct list_head lru_runq_head;
+};
+
+static void vgpu_update_timeslice(struct intel_vgpu *pre_vgpu)
+{
+	ktime_t delta_ts;
+	struct vgpu_sched_data *vgpu_data = pre_vgpu->sched_data;
+
+	delta_ts = vgpu_data->sched_out_time - vgpu_data->sched_in_time;
+
+	vgpu_data->sched_time += delta_ts;
+	vgpu_data->left_ts -= delta_ts;
+}
+
+#define GVT_TS_BALANCE_PERIOD_MS 100
+#define GVT_TS_BALANCE_STAGE_NUM 10
+
+static void gvt_balance_timeslice(struct gvt_sched_data *sched_data)
+{
+	struct vgpu_sched_data *vgpu_data;
+	struct list_head *pos;
+	static uint64_t stage_check;
+	int stage = stage_check++ % GVT_TS_BALANCE_STAGE_NUM;
+
+	/* The timeslice accumulation reset at stage 0, which is
+	 * allocated again without adding previous debt.
+	 */
+	if (stage == 0) {
+		int total_weight = 0;
+		ktime_t fair_timeslice;
+
+		list_for_each(pos, &sched_data->lru_runq_head) {
+			vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list);
+			total_weight += vgpu_data->sched_ctl.weight;
+		}
+
+		list_for_each(pos, &sched_data->lru_runq_head) {
+			vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list);
+			fair_timeslice = ms_to_ktime(GVT_TS_BALANCE_PERIOD_MS) *
+						vgpu_data->sched_ctl.weight /
+						total_weight;
+
+			vgpu_data->allocated_ts = fair_timeslice;
+			vgpu_data->left_ts = vgpu_data->allocated_ts;
+		}
+	} else {
+		list_for_each(pos, &sched_data->lru_runq_head) {
+			vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list);
+
+			/* timeslice for next 100ms should add the left/debt
+			 * slice of previous stages.
+			 */
+			vgpu_data->left_ts += vgpu_data->allocated_ts;
+		}
+	}
+}
+
 static void try_to_schedule_next_vgpu(struct intel_gvt *gvt)
 {
 	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
 	enum intel_engine_id i;
 	struct intel_engine_cs *engine;
+	struct vgpu_sched_data *vgpu_data;
+	ktime_t cur_time;

 	/* no target to schedule */
 	if (!scheduler->next_vgpu)
@ -77,6 +153,15 @@ static void try_to_schedule_next_vgpu(struct intel_gvt *gvt)
 	gvt_dbg_sched("switch to next vgpu %d\n",
 			scheduler->next_vgpu->id);

+	cur_time = ktime_get();
+	if (scheduler->current_vgpu) {
+		vgpu_data = scheduler->current_vgpu->sched_data;
+		vgpu_data->sched_out_time = cur_time;
+		vgpu_update_timeslice(scheduler->current_vgpu);
+	}
+	vgpu_data = scheduler->next_vgpu->sched_data;
+	vgpu_data->sched_in_time = cur_time;
+
 	/* switch current vgpu */
 	scheduler->current_vgpu = scheduler->next_vgpu;
 	scheduler->next_vgpu = NULL;
@ -88,62 +173,61 @@ static void try_to_schedule_next_vgpu(struct intel_gvt *gvt)
 		wake_up(&scheduler->waitq[i]);
 }

-struct tbs_vgpu_data {
-	struct list_head list;
-	struct intel_vgpu *vgpu;
-	/* put some per-vgpu sched stats here */
-};
-
-struct tbs_sched_data {
-	struct intel_gvt *gvt;
-	struct delayed_work work;
-	unsigned long period;
-	struct list_head runq_head;
-};
-
-#define GVT_DEFAULT_TIME_SLICE (msecs_to_jiffies(1))
-
-static void tbs_sched_func(struct work_struct *work)
+static struct intel_vgpu *find_busy_vgpu(struct gvt_sched_data *sched_data)
 {
-	struct tbs_sched_data *sched_data = container_of(work,
-			struct tbs_sched_data, work.work);
-	struct tbs_vgpu_data *vgpu_data;
-
-	struct intel_gvt *gvt = sched_data->gvt;
-	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
-
+	struct vgpu_sched_data *vgpu_data;
 	struct intel_vgpu *vgpu = NULL;
-	struct list_head *pos, *head;
-
-	mutex_lock(&gvt->lock);
-
-	/* no vgpu or has already had a target */
-	if (list_empty(&sched_data->runq_head) || scheduler->next_vgpu)
-		goto out;
-
-	if (scheduler->current_vgpu) {
-		vgpu_data = scheduler->current_vgpu->sched_data;
-		head = &vgpu_data->list;
-	} else {
-		head = &sched_data->runq_head;
-	}
+	struct list_head *head = &sched_data->lru_runq_head;
+	struct list_head *pos;

 	/* search a vgpu with pending workload */
 	list_for_each(pos, head) {
-		if (pos == &sched_data->runq_head)
-			continue;

-		vgpu_data = container_of(pos, struct tbs_vgpu_data, list);
+		vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list);
 		if (!vgpu_has_pending_workload(vgpu_data->vgpu))
 			continue;

-		vgpu = vgpu_data->vgpu;
-		break;
+		/* Return the vGPU only if it has time slice left */
+		if (vgpu_data->left_ts > 0) {
+			vgpu = vgpu_data->vgpu;
+			break;
+		}
 	}

+	return vgpu;
+}
+
+/* in nanosecond */
+#define GVT_DEFAULT_TIME_SLICE 1000000
+
+static void tbs_sched_func(struct gvt_sched_data *sched_data)
+{
+	struct intel_gvt *gvt = sched_data->gvt;
+	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
+	struct vgpu_sched_data *vgpu_data;
+	struct intel_vgpu *vgpu = NULL;
+	static uint64_t timer_check;
+
+	if (!(timer_check++ % GVT_TS_BALANCE_PERIOD_MS))
+		gvt_balance_timeslice(sched_data);
+
+	/* no active vgpu or has already had a target */
+	if (list_empty(&sched_data->lru_runq_head) || scheduler->next_vgpu)
+		goto out;
+
+	vgpu = find_busy_vgpu(sched_data);
 	if (vgpu) {
 		scheduler->next_vgpu = vgpu;
+
+		/* Move the last used vGPU to the tail of lru_list */
+		vgpu_data = vgpu->sched_data;
+		list_del_init(&vgpu_data->lru_list);
+		list_add_tail(&vgpu_data->lru_list,
+				&sched_data->lru_runq_head);
+
 		gvt_dbg_sched("pick next vgpu %d\n", vgpu->id);
+	} else {
+		scheduler->next_vgpu = gvt->idle_vgpu;
 	}
 out:
 	if (scheduler->next_vgpu) {
@ -151,34 +235,49 @@ out:
 				scheduler->next_vgpu->id);
 		try_to_schedule_next_vgpu(gvt);
 	}
+}

-	/*
-	 * still have vgpu on runq
-	 * or last schedule haven't finished due to running workload
-	 */
-	if (!list_empty(&sched_data->runq_head) || scheduler->next_vgpu)
-		schedule_delayed_work(&sched_data->work, sched_data->period);
+void intel_gvt_schedule(struct intel_gvt *gvt)
+{
+	struct gvt_sched_data *sched_data = gvt->scheduler.sched_data;

+	mutex_lock(&gvt->lock);
+	tbs_sched_func(sched_data);
 	mutex_unlock(&gvt->lock);
 }

+static enum hrtimer_restart tbs_timer_fn(struct hrtimer *timer_data)
+{
+	struct gvt_sched_data *data;
+
+	data = container_of(timer_data, struct gvt_sched_data, timer);
+
+	intel_gvt_request_service(data->gvt, INTEL_GVT_REQUEST_SCHED);
+
+	hrtimer_add_expires_ns(&data->timer, data->period);
+
+	return HRTIMER_RESTART;
+}
+
 static int tbs_sched_init(struct intel_gvt *gvt)
 {
 	struct intel_gvt_workload_scheduler *scheduler =
 		&gvt->scheduler;

-	struct tbs_sched_data *data;
+	struct gvt_sched_data *data;

 	data = kzalloc(sizeof(*data), GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;

-	INIT_LIST_HEAD(&data->runq_head);
-	INIT_DELAYED_WORK(&data->work, tbs_sched_func);
+	INIT_LIST_HEAD(&data->lru_runq_head);
+	hrtimer_init(&data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	data->timer.function = tbs_timer_fn;
 	data->period = GVT_DEFAULT_TIME_SLICE;
 	data->gvt = gvt;

 	scheduler->sched_data = data;
+
 	return 0;
 }

@ -186,25 +285,28 @@ static void tbs_sched_clean(struct intel_gvt *gvt)
 {
 	struct intel_gvt_workload_scheduler *scheduler =
 		&gvt->scheduler;
-	struct tbs_sched_data *data = scheduler->sched_data;
+	struct gvt_sched_data *data = scheduler->sched_data;
+
+	hrtimer_cancel(&data->timer);

-	cancel_delayed_work(&data->work);
 	kfree(data);
 	scheduler->sched_data = NULL;
 }

 static int tbs_sched_init_vgpu(struct intel_vgpu *vgpu)
 {
-	struct tbs_vgpu_data *data;
+	struct vgpu_sched_data *data;

 	data = kzalloc(sizeof(*data), GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;

+	data->sched_ctl.weight = vgpu->sched_ctl.weight;
 	data->vgpu = vgpu;
-	INIT_LIST_HEAD(&data->list);
+	INIT_LIST_HEAD(&data->lru_list);

 	vgpu->sched_data = data;
+
 	return 0;
 }

@ -216,21 +318,24 @@ static void tbs_sched_clean_vgpu(struct intel_vgpu *vgpu)

 static void tbs_sched_start_schedule(struct intel_vgpu *vgpu)
 {
-	struct tbs_sched_data *sched_data = vgpu->gvt->scheduler.sched_data;
-	struct tbs_vgpu_data *vgpu_data = vgpu->sched_data;
+	struct gvt_sched_data *sched_data = vgpu->gvt->scheduler.sched_data;
+	struct vgpu_sched_data *vgpu_data = vgpu->sched_data;

-	if (!list_empty(&vgpu_data->list))
+	if (!list_empty(&vgpu_data->lru_list))
 		return;

-	list_add_tail(&vgpu_data->list, &sched_data->runq_head);
-	schedule_delayed_work(&sched_data->work, 0);
+	list_add_tail(&vgpu_data->lru_list, &sched_data->lru_runq_head);
+
+	if (!hrtimer_active(&sched_data->timer))
+		hrtimer_start(&sched_data->timer, ktime_add_ns(ktime_get(),
+			sched_data->period), HRTIMER_MODE_ABS);
 }

 static void tbs_sched_stop_schedule(struct intel_vgpu *vgpu)
 {
-	struct tbs_vgpu_data *vgpu_data = vgpu->sched_data;
+	struct vgpu_sched_data *vgpu_data = vgpu->sched_data;

-	list_del_init(&vgpu_data->list);
+	list_del_init(&vgpu_data->lru_list);
 }

 static struct intel_gvt_sched_policy_ops tbs_schedule_ops = {
--- a/drivers/gpu/drm/i915/gvt/sched_policy.h
+++ b/drivers/gpu/drm/i915/gvt/sched_policy.h
@ -43,6 +43,8 @@ struct intel_gvt_sched_policy_ops {
 	void (*stop_schedule)(struct intel_vgpu *vgpu);
 };

+void intel_gvt_schedule(struct intel_gvt *gvt);
+
 int intel_gvt_init_sched_policy(struct intel_gvt *gvt);

 void intel_gvt_clean_sched_policy(struct intel_gvt *gvt);
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@ -448,7 +448,8 @@ static int workload_thread(void *priv)
 	struct intel_vgpu_workload *workload = NULL;
 	struct intel_vgpu *vgpu = NULL;
 	int ret;
-	bool need_force_wake = IS_SKYLAKE(gvt->dev_priv);
+	bool need_force_wake = IS_SKYLAKE(gvt->dev_priv)
+			|| IS_KABYLAKE(gvt->dev_priv);
 	DEFINE_WAIT_FUNC(wait, woken_wake_function);

 	kfree(p);
--- a/drivers/gpu/drm/i915/gvt/scheduler.h
+++ b/drivers/gpu/drm/i915/gvt/scheduler.h
@ -67,7 +67,6 @@ struct shadow_per_ctx {
 };

 struct intel_shadow_wa_ctx {
-	struct intel_vgpu_workload *workload;
 	struct shadow_indirect_ctx indirect_ctx;
 	struct shadow_per_ctx per_ctx;

--- a/drivers/gpu/drm/i915/gvt/vgpu.c
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@ -64,18 +64,28 @@ void populate_pvinfo_page(struct intel_vgpu *vgpu)
 	WARN_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE);
 }

+#define VGPU_MAX_WEIGHT 16
+#define VGPU_WEIGHT(vgpu_num)	\
+	(VGPU_MAX_WEIGHT / (vgpu_num))
+
 static struct {
 	unsigned int low_mm;
 	unsigned int high_mm;
 	unsigned int fence;
+
+	/* A vGPU with a weight of 8 will get twice as much GPU as a vGPU
+	 * with a weight of 4 on a contended host, different vGPU type has
+	 * different weight set. Legal weights range from 1 to 16.
+	 */
+	unsigned int weight;
 	enum intel_vgpu_edid edid;
 	char *name;
 } vgpu_types[] = {
 /* Fixed vGPU type table */
-	{ MB_TO_BYTES(64), MB_TO_BYTES(512), 4, GVT_EDID_1024_768, "8" },
-	{ MB_TO_BYTES(128), MB_TO_BYTES(512), 4, GVT_EDID_1920_1200, "4" },
-	{ MB_TO_BYTES(256), MB_TO_BYTES(1024), 4, GVT_EDID_1920_1200, "2" },
-	{ MB_TO_BYTES(512), MB_TO_BYTES(2048), 4, GVT_EDID_1920_1200, "1" },
+	{ MB_TO_BYTES(64), MB_TO_BYTES(384), 4, VGPU_WEIGHT(8), GVT_EDID_1024_768, "8" },
+	{ MB_TO_BYTES(128), MB_TO_BYTES(512), 4, VGPU_WEIGHT(4), GVT_EDID_1920_1200, "4" },
+	{ MB_TO_BYTES(256), MB_TO_BYTES(1024), 4, VGPU_WEIGHT(2), GVT_EDID_1920_1200, "2" },
+	{ MB_TO_BYTES(512), MB_TO_BYTES(2048), 4, VGPU_WEIGHT(1), GVT_EDID_1920_1200, "1" },
 };

 /**
@ -120,6 +130,12 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt)
 		gvt->types[i].low_gm_size = vgpu_types[i].low_mm;
 		gvt->types[i].high_gm_size = vgpu_types[i].high_mm;
 		gvt->types[i].fence = vgpu_types[i].fence;
+
+		if (vgpu_types[i].weight < 1 ||
+					vgpu_types[i].weight > VGPU_MAX_WEIGHT)
+			return -EINVAL;
+
+		gvt->types[i].weight = vgpu_types[i].weight;
 		gvt->types[i].resolution = vgpu_types[i].edid;
 		gvt->types[i].avail_instance = min(low_avail / vgpu_types[i].low_mm,
 						   high_avail / vgpu_types[i].high_mm);
@ -131,11 +147,12 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt)
 			sprintf(gvt->types[i].name, "GVTg_V5_%s",
 						vgpu_types[i].name);

-		gvt_dbg_core("type[%d]: %s avail %u low %u high %u fence %u res %s\n",
+		gvt_dbg_core("type[%d]: %s avail %u low %u high %u fence %u weight %u res %s\n",
 			     i, gvt->types[i].name,
 			     gvt->types[i].avail_instance,
 			     gvt->types[i].low_gm_size,
 			     gvt->types[i].high_gm_size, gvt->types[i].fence,
+			     gvt->types[i].weight,
 			     vgpu_edid_str(gvt->types[i].resolution));
 	}

@ -216,6 +233,59 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu)
 	mutex_unlock(&gvt->lock);
 }

+#define IDLE_VGPU_IDR 0
+
+/**
+ * intel_gvt_create_idle_vgpu - create an idle virtual GPU
+ * @gvt: GVT device
+ *
+ * This function is called when user wants to create an idle virtual GPU.
+ *
+ * Returns:
+ * pointer to intel_vgpu, error pointer if failed.
+ */
+struct intel_vgpu *intel_gvt_create_idle_vgpu(struct intel_gvt *gvt)
+{
+	struct intel_vgpu *vgpu;
+	enum intel_engine_id i;
+	int ret;
+
+	vgpu = vzalloc(sizeof(*vgpu));
+	if (!vgpu)
+		return ERR_PTR(-ENOMEM);
+
+	vgpu->id = IDLE_VGPU_IDR;
+	vgpu->gvt = gvt;
+
+	for (i = 0; i < I915_NUM_ENGINES; i++)
+		INIT_LIST_HEAD(&vgpu->workload_q_head[i]);
+
+	ret = intel_vgpu_init_sched_policy(vgpu);
+	if (ret)
+		goto out_free_vgpu;
+
+	vgpu->active = false;
+
+	return vgpu;
+
+out_free_vgpu:
+	vfree(vgpu);
+	return ERR_PTR(ret);
+}
+
+/**
+ * intel_gvt_destroy_vgpu - destroy an idle virtual GPU
+ * @vgpu: virtual GPU
+ *
+ * This function is called when user wants to destroy an idle virtual GPU.
+ *
+ */
+void intel_gvt_destroy_idle_vgpu(struct intel_vgpu *vgpu)
+{
+	intel_vgpu_clean_sched_policy(vgpu);
+	vfree(vgpu);
+}
+
 static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt,
 		struct intel_vgpu_creation_params *param)
 {
@ -232,13 +302,15 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt,

 	mutex_lock(&gvt->lock);

-	ret = idr_alloc(&gvt->vgpu_idr, vgpu, 1, GVT_MAX_VGPU, GFP_KERNEL);
+	ret = idr_alloc(&gvt->vgpu_idr, vgpu, IDLE_VGPU_IDR + 1, GVT_MAX_VGPU,
+		GFP_KERNEL);
 	if (ret < 0)
 		goto out_free_vgpu;

 	vgpu->id = ret;
 	vgpu->handle = param->handle;
 	vgpu->gvt = gvt;
+	vgpu->sched_ctl.weight = param->weight;
 	bitmap_zero(vgpu->tlb_handle_pending, I915_NUM_ENGINES);

 	intel_vgpu_init_cfg_space(vgpu, param->primary);
@ -325,6 +397,7 @@ struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt,
 	param.low_gm_sz = type->low_gm_size;
 	param.high_gm_sz = type->high_gm_size;
 	param.fence_sz = type->fence;
+	param.weight = type->weight;
 	param.resolution = type->resolution;

 	/* XXX current param based on MB */
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@ -1012,9 +1012,12 @@ static int gpu_state_release(struct inode *inode, struct file *file)

 static int i915_gpu_info_open(struct inode *inode, struct file *file)
 {
+	struct drm_i915_private *i915 = inode->i_private;
 	struct i915_gpu_state *gpu;

-	gpu = i915_capture_gpu_state(inode->i_private);
+	intel_runtime_pm_get(i915);
+	gpu = i915_capture_gpu_state(i915);
+	intel_runtime_pm_put(i915);
 	if (!gpu)
 		return -ENOMEM;

@ -1459,16 +1462,14 @@ static int ironlake_drpc_info(struct seq_file *m)

 static int i915_forcewake_domains(struct seq_file *m, void *data)
 {
-	struct drm_i915_private *dev_priv = node_to_i915(m->private);
+	struct drm_i915_private *i915 = node_to_i915(m->private);
 	struct intel_uncore_forcewake_domain *fw_domain;
+	unsigned int tmp;

-	spin_lock_irq(&dev_priv->uncore.lock);
-	for_each_fw_domain(fw_domain, dev_priv) {
+	for_each_fw_domain(fw_domain, i915, tmp)
 		seq_printf(m, "%s.wake_count = %u\n",
 			   intel_uncore_forcewake_domain_to_str(fw_domain->id),
-			   fw_domain->wake_count);
-	}
-	spin_unlock_irq(&dev_priv->uncore.lock);
+			   READ_ONCE(fw_domain->wake_count));

 	return 0;
 }
@ -1938,9 +1939,8 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data)

 static void describe_ctx_ring(struct seq_file *m, struct intel_ring *ring)
 {
-	seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u, last head: %d)",
-		   ring->space, ring->head, ring->tail,
-		   ring->last_retired_head);
+	seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u)",
+		   ring->space, ring->head, ring->tail);
 }

 static int i915_context_status(struct seq_file *m, void *unused)
@ -2474,9 +2474,9 @@ static void i915_guc_client_info(struct seq_file *m,
 	enum intel_engine_id id;
 	uint64_t tot = 0;

-	seq_printf(m, "\tPriority %d, GuC ctx index: %u, PD offset 0x%x\n",
-		client->priority, client->ctx_index, client->proc_desc_offset);
-	seq_printf(m, "\tDoorbell id %d, offset: 0x%x, cookie 0x%x\n",
+	seq_printf(m, "\tPriority %d, GuC stage index: %u, PD offset 0x%x\n",
+		client->priority, client->stage_id, client->proc_desc_offset);
+	seq_printf(m, "\tDoorbell id %d, offset: 0x%lx, cookie 0x%x\n",
 		client->doorbell_id, client->doorbell_offset, client->doorbell_cookie);
 	seq_printf(m, "\tWQ size %d, offset: 0x%x, tail %d\n",
 		client->wq_size, client->wq_offset, client->wq_tail);
@ -2511,7 +2511,7 @@ static int i915_guc_info(struct seq_file *m, void *data)
 	}

 	seq_printf(m, "Doorbell map:\n");
-	seq_printf(m, "\t%*pb\n", GUC_MAX_DOORBELLS, guc->doorbell_bitmap);
+	seq_printf(m, "\t%*pb\n", GUC_NUM_DOORBELLS, guc->doorbell_bitmap);
 	seq_printf(m, "Doorbell next cacheline: 0x%x\n\n", guc->db_cacheline);

 	seq_printf(m, "GuC total action count: %llu\n", guc->action_count);
@ -4129,7 +4129,9 @@ i915_wedged_get(void *data, u64 *val)
 static int
 i915_wedged_set(void *data, u64 val)
 {
-	struct drm_i915_private *dev_priv = data;
+	struct drm_i915_private *i915 = data;
+	struct intel_engine_cs *engine;
+	unsigned int tmp;

 	/*
 	 * There is no safeguard against this debugfs entry colliding
@ -4139,13 +4141,17 @@ i915_wedged_set(void *data, u64 val)
 	 * while it is writing to 'i915_wedged'
 	 */

-	if (i915_reset_backoff(&dev_priv->gpu_error))
+	if (i915_reset_backoff(&i915->gpu_error))
 		return -EAGAIN;

-	i915_handle_error(dev_priv, val,
-			  "Manually setting wedged to %llu", val);
+	for_each_engine_masked(engine, i915, val, tmp) {
+		engine->hangcheck.seqno = intel_engine_get_seqno(engine);
+		engine->hangcheck.stalled = true;
+	}

-	wait_on_bit(&dev_priv->gpu_error.flags,
+	i915_handle_error(i915, val, "Manually setting wedged to %llu", val);
+
+	wait_on_bit(&i915->gpu_error.flags,
 		    I915_RESET_HANDOFF,
 		    TASK_UNINTERRUPTIBLE);

@ -4173,10 +4179,6 @@ fault_irq_set(struct drm_i915_private *i915,
 	if (err)
 		goto err_unlock;

-	/* Retire to kick idle work */
-	i915_gem_retire_requests(i915);
-	GEM_BUG_ON(i915->gt.active_requests);
-
 	*irq = val;
 	mutex_unlock(&i915->drm.struct_mutex);

@ -4280,7 +4282,7 @@ i915_drop_caches_set(void *data, u64 val)
 			goto unlock;
 	}

-	if (val & (DROP_RETIRE | DROP_ACTIVE))
+	if (val & DROP_RETIRE)
 		i915_gem_retire_requests(dev_priv);

 	lockdep_set_current_reclaim_state(GFP_KERNEL);
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@ -549,6 +549,7 @@ static const struct vga_switcheroo_client_ops i915_switcheroo_ops = {
 static void i915_gem_fini(struct drm_i915_private *dev_priv)
 {
 	mutex_lock(&dev_priv->drm.struct_mutex);
+	intel_uc_fini_hw(dev_priv);
 	i915_gem_cleanup_engines(dev_priv);
 	i915_gem_context_fini(dev_priv);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
@ -609,7 +610,7 @@ static int i915_load_modeset_init(struct drm_device *dev)

 	ret = i915_gem_init(dev_priv);
 	if (ret)
-		goto cleanup_irq;
+		goto cleanup_uc;

 	intel_modeset_gem_init(dev);

@ -631,9 +632,9 @@ cleanup_gem:
 	if (i915_gem_suspend(dev_priv))
 		DRM_ERROR("failed to idle hardware; continuing to unload!\n");
 	i915_gem_fini(dev_priv);
+cleanup_uc:
+	intel_uc_fini_fw(dev_priv);
 cleanup_irq:
-	intel_guc_fini(dev_priv);
-	intel_huc_fini(dev_priv);
 	drm_irq_uninstall(dev);
 	intel_teardown_gmbus(dev_priv);
 cleanup_csr:
@ -1351,9 +1352,8 @@ void i915_driver_unload(struct drm_device *dev)
 	/* Flush any outstanding unpin_work. */
 	drain_workqueue(dev_priv->wq);

-	intel_guc_fini(dev_priv);
-	intel_huc_fini(dev_priv);
 	i915_gem_fini(dev_priv);
+	intel_uc_fini_fw(dev_priv);
 	intel_fbc_cleanup_cfb(dev_priv);

 	intel_power_domains_fini(dev_priv);
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@ -79,26 +79,8 @@

 #define DRIVER_NAME		"i915"
 #define DRIVER_DESC		"Intel Graphics"
-#define DRIVER_DATE		"20170320"
-#define DRIVER_TIMESTAMP	1489994464
-
-#undef WARN_ON
-/* Many gcc seem to no see through this and fall over :( */
-#if 0
-#define WARN_ON(x) ({ \
-	bool __i915_warn_cond = (x); \
-	if (__builtin_constant_p(__i915_warn_cond)) \
-		BUILD_BUG_ON(__i915_warn_cond); \
-	WARN(__i915_warn_cond, "WARN_ON(" #x ")"); })
-#else
-#define WARN_ON(x) WARN((x), "%s", "WARN_ON(" __stringify(x) ")")
-#endif
-
-#undef WARN_ON_ONCE
-#define WARN_ON_ONCE(x) WARN_ONCE((x), "%s", "WARN_ON_ONCE(" __stringify(x) ")")
-
-#define MISSING_CASE(x) WARN(1, "Missing switch case (%lu) in %s\n", \
-			     (long) (x), __func__);
+#define DRIVER_DATE		"20170403"
+#define DRIVER_TIMESTAMP	1491198738

 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and
 * WARN_ON()) for hw state sanity checks to check for unexpected conditions
@ -703,9 +685,9 @@ enum forcewake_domain_id {
 };

 enum forcewake_domains {
-	FORCEWAKE_RENDER = (1 << FW_DOMAIN_ID_RENDER),
-	FORCEWAKE_BLITTER = (1 << FW_DOMAIN_ID_BLITTER),
-	FORCEWAKE_MEDIA	= (1 << FW_DOMAIN_ID_MEDIA),
+	FORCEWAKE_RENDER = BIT(FW_DOMAIN_ID_RENDER),
+	FORCEWAKE_BLITTER = BIT(FW_DOMAIN_ID_BLITTER),
+	FORCEWAKE_MEDIA	= BIT(FW_DOMAIN_ID_MEDIA),
 	FORCEWAKE_ALL = (FORCEWAKE_RENDER |
 			 FORCEWAKE_BLITTER |
 			 FORCEWAKE_MEDIA)
@ -732,21 +714,25 @@ intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,

 struct intel_uncore_funcs {
 	void (*force_wake_get)(struct drm_i915_private *dev_priv,
-							enum forcewake_domains domains);
+			       enum forcewake_domains domains);
 	void (*force_wake_put)(struct drm_i915_private *dev_priv,
-							enum forcewake_domains domains);
+			       enum forcewake_domains domains);

-	uint8_t  (*mmio_readb)(struct drm_i915_private *dev_priv, i915_reg_t r, bool trace);
-	uint16_t (*mmio_readw)(struct drm_i915_private *dev_priv, i915_reg_t r, bool trace);
-	uint32_t (*mmio_readl)(struct drm_i915_private *dev_priv, i915_reg_t r, bool trace);
-	uint64_t (*mmio_readq)(struct drm_i915_private *dev_priv, i915_reg_t r, bool trace);
+	uint8_t  (*mmio_readb)(struct drm_i915_private *dev_priv,
+			       i915_reg_t r, bool trace);
+	uint16_t (*mmio_readw)(struct drm_i915_private *dev_priv,
+			       i915_reg_t r, bool trace);
+	uint32_t (*mmio_readl)(struct drm_i915_private *dev_priv,
+			       i915_reg_t r, bool trace);
+	uint64_t (*mmio_readq)(struct drm_i915_private *dev_priv,
+			       i915_reg_t r, bool trace);

-	void (*mmio_writeb)(struct drm_i915_private *dev_priv, i915_reg_t r,
-				uint8_t val, bool trace);
-	void (*mmio_writew)(struct drm_i915_private *dev_priv, i915_reg_t r,
-				uint16_t val, bool trace);
-	void (*mmio_writel)(struct drm_i915_private *dev_priv, i915_reg_t r,
-				uint32_t val, bool trace);
+	void (*mmio_writeb)(struct drm_i915_private *dev_priv,
+			    i915_reg_t r, uint8_t val, bool trace);
+	void (*mmio_writew)(struct drm_i915_private *dev_priv,
+			    i915_reg_t r, uint16_t val, bool trace);
+	void (*mmio_writel)(struct drm_i915_private *dev_priv,
+			    i915_reg_t r, uint32_t val, bool trace);
 };

 struct intel_forcewake_range {
@ -770,32 +756,35 @@ struct intel_uncore {
 	enum forcewake_domains fw_domains;
 	enum forcewake_domains fw_domains_active;

+	u32 fw_set;
+	u32 fw_clear;
+	u32 fw_reset;
+
 	struct intel_uncore_forcewake_domain {
-		struct drm_i915_private *i915;
 		enum forcewake_domain_id id;
 		enum forcewake_domains mask;
 		unsigned wake_count;
 		struct hrtimer timer;
 		i915_reg_t reg_set;
-		u32 val_set;
-		u32 val_clear;
 		i915_reg_t reg_ack;
-		i915_reg_t reg_post;
-		u32 val_reset;
 	} fw_domain[FW_DOMAIN_ID_COUNT];

 	int unclaimed_mmio_check;
 };

-/* Iterate over initialised fw domains */
-#define for_each_fw_domain_masked(domain__, mask__, dev_priv__) \
-	for ((domain__) = &(dev_priv__)->uncore.fw_domain[0]; \
-	     (domain__) < &(dev_priv__)->uncore.fw_domain[FW_DOMAIN_ID_COUNT]; \
-	     (domain__)++) \
-		for_each_if ((mask__) & (domain__)->mask)
+#define __mask_next_bit(mask) ({					\
+	int __idx = ffs(mask) - 1;					\
+	mask &= ~BIT(__idx);						\
+	__idx;								\
+})

-#define for_each_fw_domain(domain__, dev_priv__) \
-	for_each_fw_domain_masked(domain__, FORCEWAKE_ALL, dev_priv__)
+/* Iterate over initialised fw domains */
+#define for_each_fw_domain_masked(domain__, mask__, dev_priv__, tmp__) \
+	for (tmp__ = (mask__); \
+	     tmp__ ? (domain__ = &(dev_priv__)->uncore.fw_domain[__mask_next_bit(tmp__)]), 1 : 0;)
+
+#define for_each_fw_domain(domain__, dev_priv__, tmp__) \
+	for_each_fw_domain_masked(domain__, (dev_priv__)->uncore.fw_domains, dev_priv__, tmp__)

 #define CSR_VERSION(major, minor)	((major) << 16 | (minor))
 #define CSR_VERSION_MAJOR(version)	((version) >> 16)
@ -846,6 +835,7 @@ struct intel_csr {
 	func(has_resource_streamer); \
 	func(has_runtime_pm); \
 	func(has_snoop); \
+	func(unfenced_needs_alignment); \
 	func(cursor_needs_physical); \
 	func(hws_needs_physical); \
 	func(overlay_needs_physical); \
@ -2578,12 +2568,6 @@ static inline struct drm_i915_private *huc_to_i915(struct intel_huc *huc)
 	     (id__)++) \
 		for_each_if ((engine__) = (dev_priv__)->engine[(id__)])

-#define __mask_next_bit(mask) ({					\
-	int __idx = ffs(mask) - 1;					\
-	mask &= ~BIT(__idx);						\
-	__idx;								\
-})
-
 /* Iterator over subset of engines selected by mask */
 #define for_each_engine_masked(engine__, dev_priv__, mask__, tmp__) \
 	for (tmp__ = mask__ & INTEL_INFO(dev_priv__)->ring_mask;	\
@ -3956,14 +3940,14 @@ u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv,
 #define POSTING_READ16(reg)	(void)I915_READ16_NOTRACE(reg)

 #define __raw_read(x, s) \
-static inline uint##x##_t __raw_i915_read##x(struct drm_i915_private *dev_priv, \
+static inline uint##x##_t __raw_i915_read##x(const struct drm_i915_private *dev_priv, \
 					     i915_reg_t reg) \
 { \
 	return read##s(dev_priv->regs + i915_mmio_reg_offset(reg)); \
 }

 #define __raw_write(x, s) \
-static inline void __raw_i915_write##x(struct drm_i915_private *dev_priv, \
+static inline void __raw_i915_write##x(const struct drm_i915_private *dev_priv, \
 				       i915_reg_t reg, uint##x##_t val) \
 { \
 	write##s(val, dev_priv->regs + i915_mmio_reg_offset(reg)); \
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@ -2321,7 +2321,7 @@ rebuild_st:
 	st->nents = 0;
 	for (i = 0; i < page_count; i++) {
 		page = shmem_read_mapping_page_gfp(mapping, i, gfp);
-		if (IS_ERR(page)) {
+		if (unlikely(IS_ERR(page))) {
 			i915_gem_shrink(dev_priv,
 					page_count,
 					I915_SHRINK_BOUND |
@ -2329,12 +2329,21 @@ rebuild_st:
 					I915_SHRINK_PURGEABLE);
 			page = shmem_read_mapping_page_gfp(mapping, i, gfp);
 		}
-		if (IS_ERR(page)) {
+		if (unlikely(IS_ERR(page))) {
+			gfp_t reclaim;
+
 			/* We've tried hard to allocate the memory by reaping
 			 * our own buffer, now let the real VM do its job and
 			 * go down in flames if truly OOM.
+			 *
+			 * However, since graphics tend to be disposable,
+			 * defer the oom here by reporting the ENOMEM back
+			 * to userspace.
 			 */
-			page = shmem_read_mapping_page(mapping, i);
+			reclaim = mapping_gfp_constraint(mapping, 0);
+			reclaim |= __GFP_NORETRY; /* reclaim, but no oom */
+
+			page = shmem_read_mapping_page_gfp(mapping, i, reclaim);
 			if (IS_ERR(page)) {
 				ret = PTR_ERR(page);
 				goto err_sg;
@ -2989,10 +2998,15 @@ void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 	set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);

+	/* Retire completed requests first so the list of inflight/incomplete
+	 * requests is accurate and we don't try and mark successful requests
+	 * as in error during __i915_gem_set_wedged_BKL().
+	 */
+	i915_gem_retire_requests(dev_priv);
+
 	stop_machine(__i915_gem_set_wedged_BKL, dev_priv, NULL);

 	i915_gem_context_lost(dev_priv);
-	i915_gem_retire_requests(dev_priv);

 	mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
 }
@ -3098,9 +3112,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
 	 * Wait for last execlists context complete, but bail out in case a
 	 * new request is submitted.
 	 */
-	wait_for(READ_ONCE(dev_priv->gt.active_requests) ||
-		 intel_engines_are_idle(dev_priv),
-		 10);
+	wait_for(intel_engines_are_idle(dev_priv), 10);
 	if (READ_ONCE(dev_priv->gt.active_requests))
 		return;

@ -3259,6 +3271,29 @@ static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags)
 	return 0;
 }

+static int wait_for_engine(struct intel_engine_cs *engine, int timeout_ms)
+{
+	return wait_for(intel_engine_is_idle(engine), timeout_ms);
+}
+
+static int wait_for_engines(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	for_each_engine(engine, i915, id) {
+		if (GEM_WARN_ON(wait_for_engine(engine, 50))) {
+			i915_gem_set_wedged(i915);
+			return -EIO;
+		}
+
+		GEM_BUG_ON(intel_engine_get_seqno(engine) !=
+			   intel_engine_last_submit(engine));
+	}
+
+	return 0;
+}
+
 int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
 {
 	int ret;
@ -3273,13 +3308,16 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
 			if (ret)
 				return ret;
 		}
+
+		i915_gem_retire_requests(i915);
+		GEM_BUG_ON(i915->gt.active_requests);
+
+		ret = wait_for_engines(i915);
 	} else {
 		ret = wait_for_timeline(&i915->gt.global_timeline, flags);
-		if (ret)
-			return ret;
 	}

-	return 0;
+	return ret;
 }

 /** Flushes the GTT write domain for the object if it's dirty. */
@ -3307,8 +3345,14 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
 	 * system agents we cannot reproduce this behaviour).
 	 */
 	wmb();
-	if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv))
-		POSTING_READ(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
+	if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
+		if (intel_runtime_pm_get_if_in_use(dev_priv)) {
+			spin_lock_irq(&dev_priv->uncore.lock);
+			POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
+			spin_unlock_irq(&dev_priv->uncore.lock);
+			intel_runtime_pm_put(dev_priv);
+		}
+	}

 	intel_fb_obj_flush(obj, write_origin(obj, I915_GEM_DOMAIN_GTT));

@ -4408,9 +4452,6 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv)
 	if (ret)
 		goto err_unlock;

-	i915_gem_retire_requests(dev_priv);
-	GEM_BUG_ON(dev_priv->gt.active_requests);
-
 	assert_kernel_context_is_current(dev_priv);
 	i915_gem_context_lost(dev_priv);
 	mutex_unlock(&dev->struct_mutex);
--- a/drivers/gpu/drm/i915/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/i915_gem_clflush.c
@ -168,7 +168,7 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj,

 		i915_sw_fence_await_reservation(&clflush->wait,
 						obj->resv, NULL,
-						false, I915_FENCE_TIMEOUT,
+						true, I915_FENCE_TIMEOUT,
 						GFP_KERNEL);

 		reservation_object_lock(obj->resv, NULL);
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@ -576,25 +576,25 @@ void i915_gem_context_close(struct drm_device *dev, struct drm_file *file)
 }

 static inline int
-mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
+mi_set_context(struct drm_i915_gem_request *req, u32 flags)
 {
 	struct drm_i915_private *dev_priv = req->i915;
 	struct intel_engine_cs *engine = req->engine;
 	enum intel_engine_id id;
-	u32 *cs, flags = hw_flags | MI_MM_SPACE_GTT;
 	const int num_rings =
-		/* Use an extended w/a on ivb+ if signalling from other rings */
-		i915.semaphores ?
+		/* Use an extended w/a on gen7 if signalling from other rings */
+		(i915.semaphores && INTEL_GEN(dev_priv) == 7) ?
 		INTEL_INFO(dev_priv)->num_rings - 1 :
 		0;
 	int len;
+	u32 *cs;

-	/* These flags are for resource streamer on HSW+ */
+	flags |= MI_MM_SPACE_GTT;
 	if (IS_HASWELL(dev_priv) || INTEL_GEN(dev_priv) >= 8)
-		flags |= (HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN);
-	else if (INTEL_GEN(dev_priv) < 8)
-		flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN);
-
+		/* These flags are for resource streamer on HSW+ */
+		flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN;
+	else
+		flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN;

 	len = 4;
 	if (INTEL_GEN(dev_priv) >= 7)
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@ -196,7 +196,6 @@ search_again:
 	if (ret)
 		return ret;

-	i915_gem_retire_requests(dev_priv);
 	goto search_again;

 found:
@ -383,7 +382,6 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle)
 		if (ret)
 			return ret;

-		i915_gem_retire_requests(dev_priv);
 		WARN_ON(!list_empty(&vm->active_list));
 	}

--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@ -890,6 +890,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine,
 	struct list_head ordered_vmas;
 	struct list_head pinned_vmas;
 	bool has_fenced_gpu_access = INTEL_GEN(engine->i915) < 4;
+	bool needs_unfenced_map = INTEL_INFO(engine->i915)->unfenced_needs_alignment;
 	int retry;

 	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
@ -910,7 +911,8 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine,
 		if (!has_fenced_gpu_access)
 			entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
 		need_fence =
-			entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
+			(entry->flags & EXEC_OBJECT_NEEDS_FENCE ||
+			 needs_unfenced_map) &&
 			i915_gem_object_is_tiled(obj);
 		need_mappable = need_fence || need_reloc_mappable(vma);

--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@ -2364,7 +2364,7 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;

 	if (unlikely(ggtt->do_idle_maps)) {
-		if (i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED)) {
+		if (i915_gem_wait_for_idle(dev_priv, 0)) {
 			DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
 			/* Wait a bit, in hopes it avoids the hang */
 			udelay(10);
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@ -37,6 +37,17 @@ static const char *i915_fence_get_driver_name(struct dma_fence *fence)

 static const char *i915_fence_get_timeline_name(struct dma_fence *fence)
 {
+	/* The timeline struct (as part of the ppgtt underneath a context)
+	 * may be freed when the request is no longer in use by the GPU.
+	 * We could extend the life of a context to beyond that of all
+	 * fences, possibly keeping the hw resource around indefinitely,
+	 * or we just give them a false name. Since
+	 * dma_fence_ops.get_timeline_name is a debug feature, the occasional
+	 * lie seems justifiable.
+	 */
+	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+		return "signaled";
+
 	return to_request(fence)->timeline->common->name;
 }

@ -180,7 +191,6 @@ i915_priotree_init(struct i915_priotree *pt)

 static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
 {
-	struct i915_gem_timeline *timeline = &i915->gt.global_timeline;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 	int ret;
@ -192,15 +202,10 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
 	if (ret)
 		return ret;

-	i915_gem_retire_requests(i915);
-	GEM_BUG_ON(i915->gt.active_requests > 1);
-
 	/* If the seqno wraps around, we need to clear the breadcrumb rbtree */
 	for_each_engine(engine, i915, id) {
-		struct intel_timeline *tl = &timeline->engine[id];
-
-		if (wait_for(intel_engine_is_idle(engine), 50))
-			return -EBUSY;
+		struct i915_gem_timeline *timeline;
+		struct intel_timeline *tl = engine->timeline;

 		if (!i915_seqno_passed(seqno, tl->seqno)) {
 			/* spin until threads are complete */
@ -211,14 +216,10 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
 		/* Finally reset hw state */
 		tl->seqno = seqno;
 		intel_engine_init_global_seqno(engine, seqno);
-	}

-	list_for_each_entry(timeline, &i915->gt.timelines, link) {
-		for_each_engine(engine, i915, id) {
-			struct intel_timeline *tl = &timeline->engine[id];
-
-			memset(tl->sync_seqno, 0, sizeof(tl->sync_seqno));
-		}
+		list_for_each_entry(timeline, &i915->gt.timelines, link)
+			memset(timeline->engine[id].sync_seqno, 0,
+			       sizeof(timeline->engine[id].sync_seqno));
 	}

 	return 0;
@ -295,7 +296,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 	 * completion order.
 	 */
 	list_del(&request->ring_link);
-	request->ring->last_retired_head = request->postfix;
+	request->ring->head = request->postfix;
 	if (!--request->i915->gt.active_requests) {
 		GEM_BUG_ON(!request->i915->gt.awake);
 		mod_delayed_work(request->i915->wq,
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@ -1742,8 +1742,8 @@ static void gen9_guc_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir)
 			I915_WRITE(SOFT_SCRATCH(15), msg & ~flush);

 			/* Handle flush interrupt in bottom half */
-			queue_work(dev_priv->guc.log.flush_wq,
-				   &dev_priv->guc.log.flush_work);
+			queue_work(dev_priv->guc.log.runtime.flush_wq,
+				   &dev_priv->guc.log.runtime.flush_work);

 			dev_priv->guc.log.flush_interrupt_count++;
 		} else {
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@ -61,6 +61,7 @@
 	.has_overlay = 1, .overlay_needs_physical = 1, \
 	.has_gmch_display = 1, \
 	.hws_needs_physical = 1, \
+	.unfenced_needs_alignment = 1, \
 	.ring_mask = RENDER_RING, \
 	GEN_DEFAULT_PIPEOFFSETS, \
 	CURSOR_OFFSETS
@ -102,6 +103,7 @@ static const struct intel_device_info intel_i915g_info = {
 	.platform = INTEL_I915G, .cursor_needs_physical = 1,
 	.has_overlay = 1, .overlay_needs_physical = 1,
 	.hws_needs_physical = 1,
+	.unfenced_needs_alignment = 1,
 };

 static const struct intel_device_info intel_i915gm_info = {
@ -113,6 +115,7 @@ static const struct intel_device_info intel_i915gm_info = {
 	.supports_tv = 1,
 	.has_fbc = 1,
 	.hws_needs_physical = 1,
+	.unfenced_needs_alignment = 1,
 };

 static const struct intel_device_info intel_i945g_info = {
@ -121,6 +124,7 @@ static const struct intel_device_info intel_i945g_info = {
 	.has_hotplug = 1, .cursor_needs_physical = 1,
 	.has_overlay = 1, .overlay_needs_physical = 1,
 	.hws_needs_physical = 1,
+	.unfenced_needs_alignment = 1,
 };

 static const struct intel_device_info intel_i945gm_info = {
@ -131,6 +135,7 @@ static const struct intel_device_info intel_i945gm_info = {
 	.supports_tv = 1,
 	.has_fbc = 1,
 	.hws_needs_physical = 1,
+	.unfenced_needs_alignment = 1,
 };

 static const struct intel_device_info intel_g33_info = {
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@ -1705,7 +1705,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
 	 */
 	if (WARN_ON(stream->sample_flags != props->sample_flags)) {
 		ret = -ENODEV;
-		goto err_alloc;
+		goto err_flags;
 	}

 	list_add(&stream->link, &dev_priv->perf.streams);
@ -1728,6 +1728,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,

 err_open:
 	list_del(&stream->link);
+err_flags:
 	if (stream->ops->destroy)
 		stream->ops->destroy(stream);
 err_alloc:
@ -1793,6 +1794,11 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
 		if (ret)
 			return ret;

+		if (id == 0 || id >= DRM_I915_PERF_PROP_MAX) {
+			DRM_DEBUG("Unknown i915 perf property ID\n");
+			return -EINVAL;
+		}
+
 		switch ((enum drm_i915_perf_property_id)id) {
 		case DRM_I915_PERF_PROP_CTX_HANDLE:
 			props->single_context = 1;
@ -1862,9 +1868,8 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
 			props->oa_periodic = true;
 			props->oa_period_exponent = value;
 			break;
-		default:
+		case DRM_I915_PERF_PROP_MAX:
 			MISSING_CASE(id);
-			DRM_DEBUG("Unknown i915 perf property ID\n");
 			return -EINVAL;
 		}

--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@ -7829,7 +7829,14 @@ enum {
 #define  TRANS_DDI_EDP_INPUT_B_ONOFF	(5<<12)
 #define  TRANS_DDI_EDP_INPUT_C_ONOFF	(6<<12)
 #define  TRANS_DDI_DP_VC_PAYLOAD_ALLOC	(1<<8)
+#define  TRANS_DDI_HDMI_SCRAMBLER_CTS_ENABLE (1<<7)
+#define  TRANS_DDI_HDMI_SCRAMBLER_RESET_FREQ (1<<6)
 #define  TRANS_DDI_BFI_ENABLE		(1<<4)
+#define  TRANS_DDI_HIGH_TMDS_CHAR_RATE	(1<<4)
+#define  TRANS_DDI_HDMI_SCRAMBLING	(1<<0)
+#define  TRANS_DDI_HDMI_SCRAMBLING_MASK (TRANS_DDI_HDMI_SCRAMBLER_CTS_ENABLE \
+					| TRANS_DDI_HDMI_SCRAMBLER_RESET_FREQ \
+					| TRANS_DDI_HDMI_SCRAMBLING)

 /* DisplayPort Transport Control */
 #define _DP_TP_CTL_A			0x64040
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@ -25,6 +25,24 @@
 #ifndef __I915_UTILS_H
 #define __I915_UTILS_H

+#undef WARN_ON
+/* Many gcc seem to no see through this and fall over :( */
+#if 0
+#define WARN_ON(x) ({ \
+	bool __i915_warn_cond = (x); \
+	if (__builtin_constant_p(__i915_warn_cond)) \
+		BUILD_BUG_ON(__i915_warn_cond); \
+	WARN(__i915_warn_cond, "WARN_ON(" #x ")"); })
+#else
+#define WARN_ON(x) WARN((x), "%s", "WARN_ON(" __stringify(x) ")")
+#endif
+
+#undef WARN_ON_ONCE
+#define WARN_ON_ONCE(x) WARN_ONCE((x), "%s", "WARN_ON_ONCE(" __stringify(x) ")")
+
+#define MISSING_CASE(x) WARN(1, "Missing switch case (%lu) in %s\n", \
+			     (long)(x), __func__)
+
 #if GCC_VERSION >= 70000
 #define add_overflows(A, B) \
 	__builtin_add_overflow_p((A), (B), (typeof((A) + (B)))0)
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@ -47,11 +47,12 @@ static unsigned int __intel_breadcrumbs_wakeup(struct intel_breadcrumbs *b)
 unsigned int intel_engine_wakeup(struct intel_engine_cs *engine)
 {
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+	unsigned long flags;
 	unsigned int result;

-	spin_lock_irq(&b->irq_lock);
+	spin_lock_irqsave(&b->irq_lock, flags);
 	result = __intel_breadcrumbs_wakeup(b);
-	spin_unlock_irq(&b->irq_lock);
+	spin_unlock_irqrestore(&b->irq_lock, flags);

 	return result;
 }
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c
@ -1442,16 +1442,33 @@ static int bdw_adjust_min_pipe_pixel_rate(struct intel_crtc_state *crtc_state,
 	if (IS_BROADWELL(dev_priv) && crtc_state->ips_enabled)
 		pixel_rate = DIV_ROUND_UP(pixel_rate * 100, 95);

-	/* BSpec says "Do not use DisplayPort with CDCLK less than
-	 * 432 MHz, audio enabled, port width x4, and link rate
-	 * HBR2 (5.4 GHz), or else there may be audio corruption or
-	 * screen corruption."
+	/* BSpec says "Do not use DisplayPort with CDCLK less than 432 MHz,
+	 * audio enabled, port width x4, and link rate HBR2 (5.4 GHz), or else
+	 * there may be audio corruption or screen corruption." This cdclk
+	 * restriction for GLK is 316.8 MHz and since GLK can output two
+	 * pixels per clock, the pixel rate becomes 2 * 316.8 MHz.
 	 */
 	if (intel_crtc_has_dp_encoder(crtc_state) &&
 	    crtc_state->has_audio &&
 	    crtc_state->port_clock >= 540000 &&
-	    crtc_state->lane_count == 4)
-		pixel_rate = max(432000, pixel_rate);
+	    crtc_state->lane_count == 4) {
+		if (IS_GEMINILAKE(dev_priv))
+			pixel_rate = max(2 * 316800, pixel_rate);
+		else
+			pixel_rate = max(432000, pixel_rate);
+	}
+
+	/* According to BSpec, "The CD clock frequency must be at least twice
+	 * the frequency of the Azalia BCLK." and BCLK is 96 MHz by default.
+	 * The check for GLK has to be adjusted as the platform can output
+	 * two pixels per clock.
+	 */
+	if (crtc_state->has_audio && INTEL_GEN(dev_priv) >= 9) {
+		if (IS_GEMINILAKE(dev_priv))
+			pixel_rate = max(2 * 2 * 96000, pixel_rate);
+		else
+			pixel_rate = max(2 * 96000, pixel_rate);
+	}

 	return pixel_rate;
 }
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@ -49,7 +49,7 @@ MODULE_FIRMWARE(I915_CSR_SKL);
 MODULE_FIRMWARE(I915_CSR_BXT);
 #define BXT_CSR_VERSION_REQUIRED	CSR_VERSION(1, 7)

-#define FIRMWARE_URL  "https://01.org/linuxgraphics/intel-linux-graphics-firmwares"
+#define FIRMWARE_URL  "https://01.org/linuxgraphics/downloads/firmware"



--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@ -539,7 +539,7 @@ intel_ddi_get_buf_trans_fdi(struct drm_i915_private *dev_priv,
 * values in advance. This function programs the correct values for
 * DP/eDP/FDI use cases.
 */
-void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder)
+static void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	u32 iboost_bit = 0;
@ -806,7 +806,7 @@ void hsw_fdi_link_train(struct intel_crtc *crtc,
 		   DP_TP_CTL_ENABLE);
 }

-void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder)
+static void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder)
 {
 	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
 	struct intel_digital_port *intel_dig_port =
@ -837,7 +837,8 @@ intel_ddi_get_crtc_encoder(struct intel_crtc *crtc)
 	return ret;
 }

-static struct intel_encoder *
+/* Finds the only possible encoder associated with the given CRTC. */
+struct intel_encoder *
 intel_ddi_get_crtc_new_encoder(struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
@ -1127,72 +1128,6 @@ void intel_ddi_clock_get(struct intel_encoder *encoder,
 		bxt_ddi_clock_get(encoder, pipe_config);
 }

-static bool
-hsw_ddi_pll_select(struct intel_crtc *intel_crtc,
-		   struct intel_crtc_state *crtc_state,
-		   struct intel_encoder *encoder)
-{
-	struct intel_shared_dpll *pll;
-
-	pll = intel_get_shared_dpll(intel_crtc, crtc_state,
-				    encoder);
-	if (!pll)
-		DRM_DEBUG_DRIVER("failed to find PLL for pipe %c\n",
-				 pipe_name(intel_crtc->pipe));
-
-	return pll;
-}
-
-static bool
-skl_ddi_pll_select(struct intel_crtc *intel_crtc,
-		   struct intel_crtc_state *crtc_state,
-		   struct intel_encoder *encoder)
-{
-	struct intel_shared_dpll *pll;
-
-	pll = intel_get_shared_dpll(intel_crtc, crtc_state, encoder);
-	if (pll == NULL) {
-		DRM_DEBUG_DRIVER("failed to find PLL for pipe %c\n",
-				 pipe_name(intel_crtc->pipe));
-		return false;
-	}
-
-	return true;
-}
-
-static bool
-bxt_ddi_pll_select(struct intel_crtc *intel_crtc,
-		   struct intel_crtc_state *crtc_state,
-		   struct intel_encoder *encoder)
-{
-	return !!intel_get_shared_dpll(intel_crtc, crtc_state, encoder);
-}
-
-/*
- * Tries to find a *shared* PLL for the CRTC and store it in
- * intel_crtc->ddi_pll_sel.
- *
- * For private DPLLs, compute_config() should do the selection for us. This
- * function should be folded into compute_config() eventually.
- */
-bool intel_ddi_pll_select(struct intel_crtc *intel_crtc,
-			  struct intel_crtc_state *crtc_state)
-{
-	struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev);
-	struct intel_encoder *encoder =
-		intel_ddi_get_crtc_new_encoder(crtc_state);
-
-	if (IS_GEN9_BC(dev_priv))
-		return skl_ddi_pll_select(intel_crtc, crtc_state,
-					  encoder);
-	else if (IS_GEN9_LP(dev_priv))
-		return bxt_ddi_pll_select(intel_crtc, crtc_state,
-					  encoder);
-	else
-		return hsw_ddi_pll_select(intel_crtc, crtc_state,
-					  encoder);
-}
-
 void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
@ -1309,6 +1244,11 @@ void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state)
 			temp |= TRANS_DDI_MODE_SELECT_HDMI;
 		else
 			temp |= TRANS_DDI_MODE_SELECT_DVI;
+
+		if (crtc_state->hdmi_scrambling)
+			temp |= TRANS_DDI_HDMI_SCRAMBLING_MASK;
+		if (crtc_state->hdmi_high_tmds_clock_ratio)
+			temp |= TRANS_DDI_HIGH_TMDS_CHAR_RATE;
 	} else if (type == INTEL_OUTPUT_ANALOG) {
 		temp |= TRANS_DDI_MODE_SELECT_FDI;
 		temp |= (crtc_state->fdi_lanes - 1) << 1;
@ -1676,8 +1616,8 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp)
 	return DDI_BUF_TRANS_SELECT(level);
 }

-void intel_ddi_clk_select(struct intel_encoder *encoder,
-			  struct intel_shared_dpll *pll)
+static void intel_ddi_clk_select(struct intel_encoder *encoder,
+				 struct intel_shared_dpll *pll)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	enum port port = intel_ddi_get_encoder_port(encoder);
@ -1881,6 +1821,12 @@ static void intel_enable_ddi(struct intel_encoder *intel_encoder,
 	if (type == INTEL_OUTPUT_HDMI) {
 		struct intel_digital_port *intel_dig_port =
 			enc_to_dig_port(encoder);
+		bool clock_ratio = pipe_config->hdmi_high_tmds_clock_ratio;
+		bool scrambling = pipe_config->hdmi_scrambling;
+
+		intel_hdmi_handle_sink_scrambling(intel_encoder,
+						  conn_state->connector,
+						  clock_ratio, scrambling);

 		/* In HDMI/DVI mode, the port width, and swing/emphasis values
 		 * are ignored so nothing special needs to be done besides
@ -1914,6 +1860,12 @@ static void intel_disable_ddi(struct intel_encoder *intel_encoder,
 	if (old_crtc_state->has_audio)
 		intel_audio_codec_disable(intel_encoder);

+	if (type == INTEL_OUTPUT_HDMI) {
+		intel_hdmi_handle_sink_scrambling(intel_encoder,
+						  old_conn_state->connector,
+						  false, false);
+	}
+
 	if (type == INTEL_OUTPUT_EDP) {
 		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);

@ -2040,6 +1992,12 @@ void intel_ddi_get_config(struct intel_encoder *encoder,

 		if (intel_hdmi->infoframe_enabled(&encoder->base, pipe_config))
 			pipe_config->has_infoframe = true;
+
+		if ((temp & TRANS_DDI_HDMI_SCRAMBLING_MASK) ==
+			TRANS_DDI_HDMI_SCRAMBLING_MASK)
+			pipe_config->hdmi_scrambling = true;
+		if (temp & TRANS_DDI_HIGH_TMDS_CHAR_RATE)
+			pipe_config->hdmi_high_tmds_clock_ratio = true;
 		/* fall through */
 	case TRANS_DDI_MODE_SELECT_DVI:
 		pipe_config->lane_count = 4;
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@ -1997,7 +1997,7 @@ intel_tile_width_bytes(const struct drm_framebuffer *fb, int plane)
 	unsigned int cpp = fb->format->cpp[plane];

 	switch (fb->modifier) {
-	case DRM_FORMAT_MOD_NONE:
+	case DRM_FORMAT_MOD_LINEAR:
 		return cpp;
 	case I915_FORMAT_MOD_X_TILED:
 		if (IS_GEN2(dev_priv))
@ -2033,7 +2033,7 @@ intel_tile_width_bytes(const struct drm_framebuffer *fb, int plane)
 static unsigned int
 intel_tile_height(const struct drm_framebuffer *fb, int plane)
 {
-	if (fb->modifier == DRM_FORMAT_MOD_NONE)
+	if (fb->modifier == DRM_FORMAT_MOD_LINEAR)
 		return 1;
 	else
 		return intel_tile_size(to_i915(fb->dev)) /
@ -2107,7 +2107,7 @@ static unsigned int intel_surf_alignment(const struct drm_framebuffer *fb,
 		return 4096;

 	switch (fb->modifier) {
-	case DRM_FORMAT_MOD_NONE:
+	case DRM_FORMAT_MOD_LINEAR:
 		return intel_linear_alignment(dev_priv);
 	case I915_FORMAT_MOD_X_TILED:
 		if (INTEL_GEN(dev_priv) >= 9)
@ -2290,7 +2290,7 @@ static u32 intel_adjust_tile_offset(int *x, int *y,

 	WARN_ON(new_offset > old_offset);

-	if (fb->modifier != DRM_FORMAT_MOD_NONE) {
+	if (fb->modifier != DRM_FORMAT_MOD_LINEAR) {
 		unsigned int tile_size, tile_width, tile_height;
 		unsigned int pitch_tiles;

@ -2345,7 +2345,7 @@ static u32 _intel_compute_tile_offset(const struct drm_i915_private *dev_priv,
 	if (alignment)
 		alignment--;

-	if (fb_modifier != DRM_FORMAT_MOD_NONE) {
+	if (fb_modifier != DRM_FORMAT_MOD_LINEAR) {
 		unsigned int tile_size, tile_width, tile_height;
 		unsigned int tile_rows, tiles, pitch_tiles;

@ -2471,7 +2471,7 @@ intel_fill_fb_info(struct drm_i915_private *dev_priv,
 						    DRM_ROTATE_0, tile_size);
 		offset /= tile_size;

-		if (fb->modifier != DRM_FORMAT_MOD_NONE) {
+		if (fb->modifier != DRM_FORMAT_MOD_LINEAR) {
 			unsigned int tile_width, tile_height;
 			unsigned int pitch_tiles;
 			struct drm_rect r;
@ -2803,7 +2803,7 @@ static int skl_max_plane_width(const struct drm_framebuffer *fb, int plane,
 	int cpp = fb->format->cpp[plane];

 	switch (fb->modifier) {
-	case DRM_FORMAT_MOD_NONE:
+	case DRM_FORMAT_MOD_LINEAR:
 	case I915_FORMAT_MOD_X_TILED:
 		switch (cpp) {
 		case 8:
@ -2962,28 +2962,27 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state)
 	return 0;
 }

-static void i9xx_update_primary_plane(struct drm_plane *primary,
-				      const struct intel_crtc_state *crtc_state,
-				      const struct intel_plane_state *plane_state)
+static u32 i9xx_plane_ctl(const struct intel_crtc_state *crtc_state,
+			  const struct intel_plane_state *plane_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(primary->dev);
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
-	struct drm_framebuffer *fb = plane_state->base.fb;
-	int plane = intel_crtc->plane;
-	u32 linear_offset;
-	u32 dspcntr;
-	i915_reg_t reg = DSPCNTR(plane);
+	struct drm_i915_private *dev_priv =
+		to_i915(plane_state->base.plane->dev);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	const struct drm_framebuffer *fb = plane_state->base.fb;
 	unsigned int rotation = plane_state->base.rotation;
-	int x = plane_state->base.src.x1 >> 16;
-	int y = plane_state->base.src.y1 >> 16;
-	unsigned long irqflags;
+	u32 dspcntr;

-	dspcntr = DISPPLANE_GAMMA_ENABLE;
+	dspcntr = DISPLAY_PLANE_ENABLE | DISPPLANE_GAMMA_ENABLE;

-	dspcntr |= DISPLAY_PLANE_ENABLE;
+	if (IS_G4X(dev_priv) || IS_GEN5(dev_priv) ||
+	    IS_GEN6(dev_priv) || IS_IVYBRIDGE(dev_priv))
+		dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE;
+
+	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
+		dspcntr |= DISPPLANE_PIPE_CSC_ENABLE;

 	if (INTEL_GEN(dev_priv) < 4) {
-		if (intel_crtc->pipe == PIPE_B)
+		if (crtc->pipe == PIPE_B)
 			dspcntr |= DISPPLANE_SEL_PIPE_B;
 	}

@ -3010,7 +3009,8 @@ static void i9xx_update_primary_plane(struct drm_plane *primary,
 		dspcntr |= DISPPLANE_RGBX101010;
 		break;
 	default:
-		BUG();
+		MISSING_CASE(fb->format->format);
+		return 0;
 	}

 	if (INTEL_GEN(dev_priv) >= 4 &&
@ -3023,25 +3023,66 @@ static void i9xx_update_primary_plane(struct drm_plane *primary,
 	if (rotation & DRM_REFLECT_X)
 		dspcntr |= DISPPLANE_MIRROR;

-	if (IS_G4X(dev_priv))
-		dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE;
+	return dspcntr;
+}

-	intel_add_fb_offsets(&x, &y, plane_state, 0);
+int i9xx_check_plane_surface(struct intel_plane_state *plane_state)
+{
+	struct drm_i915_private *dev_priv =
+		to_i915(plane_state->base.plane->dev);
+	int src_x = plane_state->base.src.x1 >> 16;
+	int src_y = plane_state->base.src.y1 >> 16;
+	u32 offset;
+
+	intel_add_fb_offsets(&src_x, &src_y, plane_state, 0);

 	if (INTEL_GEN(dev_priv) >= 4)
-		intel_crtc->dspaddr_offset =
-			intel_compute_tile_offset(&x, &y, plane_state, 0);
+		offset = intel_compute_tile_offset(&src_x, &src_y,
+						   plane_state, 0);
+	else
+		offset = 0;

-	if (rotation & DRM_ROTATE_180) {
-		x += crtc_state->pipe_src_w - 1;
-		y += crtc_state->pipe_src_h - 1;
-	} else if (rotation & DRM_REFLECT_X) {
-		x += crtc_state->pipe_src_w - 1;
+	/* HSW/BDW do this automagically in hardware */
+	if (!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)) {
+		unsigned int rotation = plane_state->base.rotation;
+		int src_w = drm_rect_width(&plane_state->base.src) >> 16;
+		int src_h = drm_rect_height(&plane_state->base.src) >> 16;
+
+		if (rotation & DRM_ROTATE_180) {
+			src_x += src_w - 1;
+			src_y += src_h - 1;
+		} else if (rotation & DRM_REFLECT_X) {
+			src_x += src_w - 1;
+		}
 	}

+	plane_state->main.offset = offset;
+	plane_state->main.x = src_x;
+	plane_state->main.y = src_y;
+
+	return 0;
+}
+
+static void i9xx_update_primary_plane(struct drm_plane *primary,
+				      const struct intel_crtc_state *crtc_state,
+				      const struct intel_plane_state *plane_state)
+{
+	struct drm_i915_private *dev_priv = to_i915(primary->dev);
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct drm_framebuffer *fb = plane_state->base.fb;
+	int plane = intel_crtc->plane;
+	u32 linear_offset;
+	u32 dspcntr = plane_state->ctl;
+	i915_reg_t reg = DSPCNTR(plane);
+	int x = plane_state->main.x;
+	int y = plane_state->main.y;
+	unsigned long irqflags;
+
 	linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0);

-	if (INTEL_GEN(dev_priv) < 4)
+	if (INTEL_GEN(dev_priv) >= 4)
+		intel_crtc->dspaddr_offset = plane_state->main.offset;
+	else
 		intel_crtc->dspaddr_offset = linear_offset;

 	intel_crtc->adjusted_x = x;
@ -3068,7 +3109,12 @@ static void i9xx_update_primary_plane(struct drm_plane *primary,
 	I915_WRITE_FW(reg, dspcntr);

 	I915_WRITE_FW(DSPSTRIDE(plane), fb->pitches[0]);
-	if (INTEL_GEN(dev_priv) >= 4) {
+	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
+		I915_WRITE_FW(DSPSURF(plane),
+			      intel_plane_ggtt_offset(plane_state) +
+			      intel_crtc->dspaddr_offset);
+		I915_WRITE_FW(DSPOFFSET(plane), (y << 16) | x);
+	} else if (INTEL_GEN(dev_priv) >= 4) {
 		I915_WRITE_FW(DSPSURF(plane),
 			      intel_plane_ggtt_offset(plane_state) +
 			      intel_crtc->dspaddr_offset);
@ -3105,101 +3151,10 @@ static void i9xx_disable_primary_plane(struct drm_plane *primary,
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 }

-static void ironlake_update_primary_plane(struct drm_plane *primary,
-					  const struct intel_crtc_state *crtc_state,
-					  const struct intel_plane_state *plane_state)
-{
-	struct drm_device *dev = primary->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
-	struct drm_framebuffer *fb = plane_state->base.fb;
-	int plane = intel_crtc->plane;
-	u32 linear_offset;
-	u32 dspcntr;
-	i915_reg_t reg = DSPCNTR(plane);
-	unsigned int rotation = plane_state->base.rotation;
-	int x = plane_state->base.src.x1 >> 16;
-	int y = plane_state->base.src.y1 >> 16;
-	unsigned long irqflags;
-
-	dspcntr = DISPPLANE_GAMMA_ENABLE;
-	dspcntr |= DISPLAY_PLANE_ENABLE;
-
-	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
-		dspcntr |= DISPPLANE_PIPE_CSC_ENABLE;
-
-	switch (fb->format->format) {
-	case DRM_FORMAT_C8:
-		dspcntr |= DISPPLANE_8BPP;
-		break;
-	case DRM_FORMAT_RGB565:
-		dspcntr |= DISPPLANE_BGRX565;
-		break;
-	case DRM_FORMAT_XRGB8888:
-		dspcntr |= DISPPLANE_BGRX888;
-		break;
-	case DRM_FORMAT_XBGR8888:
-		dspcntr |= DISPPLANE_RGBX888;
-		break;
-	case DRM_FORMAT_XRGB2101010:
-		dspcntr |= DISPPLANE_BGRX101010;
-		break;
-	case DRM_FORMAT_XBGR2101010:
-		dspcntr |= DISPPLANE_RGBX101010;
-		break;
-	default:
-		BUG();
-	}
-
-	if (fb->modifier == I915_FORMAT_MOD_X_TILED)
-		dspcntr |= DISPPLANE_TILED;
-
-	if (rotation & DRM_ROTATE_180)
-		dspcntr |= DISPPLANE_ROTATE_180;
-
-	if (!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv))
-		dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE;
-
-	intel_add_fb_offsets(&x, &y, plane_state, 0);
-
-	intel_crtc->dspaddr_offset =
-		intel_compute_tile_offset(&x, &y, plane_state, 0);
-
-	/* HSW+ does this automagically in hardware */
-	if (!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv) &&
-	    rotation & DRM_ROTATE_180) {
-		x += crtc_state->pipe_src_w - 1;
-		y += crtc_state->pipe_src_h - 1;
-	}
-
-	linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0);
-
-	intel_crtc->adjusted_x = x;
-	intel_crtc->adjusted_y = y;
-
-	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
-
-	I915_WRITE_FW(reg, dspcntr);
-
-	I915_WRITE_FW(DSPSTRIDE(plane), fb->pitches[0]);
-	I915_WRITE_FW(DSPSURF(plane),
-		      intel_plane_ggtt_offset(plane_state) +
-		      intel_crtc->dspaddr_offset);
-	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
-		I915_WRITE_FW(DSPOFFSET(plane), (y << 16) | x);
-	} else {
-		I915_WRITE_FW(DSPTILEOFF(plane), (y << 16) | x);
-		I915_WRITE_FW(DSPLINOFF(plane), linear_offset);
-	}
-	POSTING_READ_FW(reg);
-
-	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
-}
-
 static u32
 intel_fb_stride_alignment(const struct drm_framebuffer *fb, int plane)
 {
-	if (fb->modifier == DRM_FORMAT_MOD_NONE)
+	if (fb->modifier == DRM_FORMAT_MOD_LINEAR)
 		return 64;
 	else
 		return intel_tile_width_bytes(fb, plane);
@ -3254,7 +3209,7 @@ u32 skl_plane_stride(const struct drm_framebuffer *fb, int plane,
 	return stride;
 }

-u32 skl_plane_ctl_format(uint32_t pixel_format)
+static u32 skl_plane_ctl_format(uint32_t pixel_format)
 {
 	switch (pixel_format) {
 	case DRM_FORMAT_C8:
@ -3295,10 +3250,10 @@ u32 skl_plane_ctl_format(uint32_t pixel_format)
 	return 0;
 }

-u32 skl_plane_ctl_tiling(uint64_t fb_modifier)
+static u32 skl_plane_ctl_tiling(uint64_t fb_modifier)
 {
 	switch (fb_modifier) {
-	case DRM_FORMAT_MOD_NONE:
+	case DRM_FORMAT_MOD_LINEAR:
 		break;
 	case I915_FORMAT_MOD_X_TILED:
 		return PLANE_CTL_TILED_X;
@ -3313,7 +3268,7 @@ u32 skl_plane_ctl_tiling(uint64_t fb_modifier)
 	return 0;
 }

-u32 skl_plane_ctl_rotation(unsigned int rotation)
+static u32 skl_plane_ctl_rotation(unsigned int rotation)
 {
 	switch (rotation) {
 	case DRM_ROTATE_0:
@ -3335,30 +3290,15 @@ u32 skl_plane_ctl_rotation(unsigned int rotation)
 	return 0;
 }

-static void skylake_update_primary_plane(struct drm_plane *plane,
-					 const struct intel_crtc_state *crtc_state,
-					 const struct intel_plane_state *plane_state)
+u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state,
+		  const struct intel_plane_state *plane_state)
 {
-	struct drm_device *dev = plane->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
-	struct drm_framebuffer *fb = plane_state->base.fb;
-	enum plane_id plane_id = to_intel_plane(plane)->id;
-	enum pipe pipe = to_intel_plane(plane)->pipe;
-	u32 plane_ctl;
+	struct drm_i915_private *dev_priv =
+		to_i915(plane_state->base.plane->dev);
+	const struct drm_framebuffer *fb = plane_state->base.fb;
 	unsigned int rotation = plane_state->base.rotation;
-	u32 stride = skl_plane_stride(fb, 0, rotation);
-	u32 surf_addr = plane_state->main.offset;
-	int scaler_id = plane_state->scaler_id;
-	int src_x = plane_state->main.x;
-	int src_y = plane_state->main.y;
-	int src_w = drm_rect_width(&plane_state->base.src) >> 16;
-	int src_h = drm_rect_height(&plane_state->base.src) >> 16;
-	int dst_x = plane_state->base.dst.x1;
-	int dst_y = plane_state->base.dst.y1;
-	int dst_w = drm_rect_width(&plane_state->base.dst);
-	int dst_h = drm_rect_height(&plane_state->base.dst);
-	unsigned long irqflags;
+	const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
+	u32 plane_ctl;

 	plane_ctl = PLANE_CTL_ENABLE;

@ -3373,6 +3313,39 @@ static void skylake_update_primary_plane(struct drm_plane *plane,
 	plane_ctl |= skl_plane_ctl_tiling(fb->modifier);
 	plane_ctl |= skl_plane_ctl_rotation(rotation);

+	if (key->flags & I915_SET_COLORKEY_DESTINATION)
+		plane_ctl |= PLANE_CTL_KEY_ENABLE_DESTINATION;
+	else if (key->flags & I915_SET_COLORKEY_SOURCE)
+		plane_ctl |= PLANE_CTL_KEY_ENABLE_SOURCE;
+
+	return plane_ctl;
+}
+
+static void skylake_update_primary_plane(struct drm_plane *plane,
+					 const struct intel_crtc_state *crtc_state,
+					 const struct intel_plane_state *plane_state)
+{
+	struct drm_device *dev = plane->dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct drm_framebuffer *fb = plane_state->base.fb;
+	enum plane_id plane_id = to_intel_plane(plane)->id;
+	enum pipe pipe = to_intel_plane(plane)->pipe;
+	u32 plane_ctl = plane_state->ctl;
+	unsigned int rotation = plane_state->base.rotation;
+	u32 stride = skl_plane_stride(fb, 0, rotation);
+	u32 surf_addr = plane_state->main.offset;
+	int scaler_id = plane_state->scaler_id;
+	int src_x = plane_state->main.x;
+	int src_y = plane_state->main.y;
+	int src_w = drm_rect_width(&plane_state->base.src) >> 16;
+	int src_h = drm_rect_height(&plane_state->base.src) >> 16;
+	int dst_x = plane_state->base.dst.x1;
+	int dst_y = plane_state->base.dst.y1;
+	int dst_w = drm_rect_width(&plane_state->base.dst);
+	int dst_h = drm_rect_height(&plane_state->base.dst);
+	unsigned long irqflags;
+
 	/* Sizes are 0 based */
 	src_w--;
 	src_h--;
@ -6317,6 +6290,17 @@ intel_reduce_m_n_ratio(uint32_t *num, uint32_t *den)
 static void compute_m_n(unsigned int m, unsigned int n,
 			uint32_t *ret_m, uint32_t *ret_n)
 {
+	/*
+	 * Reduce M/N as much as possible without loss in precision. Several DP
+	 * dongles in particular seem to be fussy about too large *link* M/N
+	 * values. The passed in values are more likely to have the least
+	 * significant bits zero than M after rounding below, so do this first.
+	 */
+	while ((m & 1) == 0 && (n & 1) == 0) {
+		m >>= 1;
+		n >>= 1;
+	}
+
 	*ret_n = min_t(unsigned int, roundup_pow_of_two(n), DATA_LINK_N_MAX);
 	*ret_m = div_u64((uint64_t) m * *ret_n, n);
 	intel_reduce_m_n_ratio(ret_m, ret_n);
@ -8406,7 +8390,7 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc,
 	tiling = val & PLANE_CTL_TILED_MASK;
 	switch (tiling) {
 	case PLANE_CTL_TILED_LINEAR:
-		fb->modifier = DRM_FORMAT_MOD_NONE;
+		fb->modifier = DRM_FORMAT_MOD_LINEAR;
 		break;
 	case PLANE_CTL_TILED_X:
 		plane_config->tiling = I915_TILING_X;
@ -8862,8 +8846,14 @@ static int haswell_crtc_compute_clock(struct intel_crtc *crtc,
 				      struct intel_crtc_state *crtc_state)
 {
 	if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DSI)) {
-		if (!intel_ddi_pll_select(crtc, crtc_state))
+		struct intel_encoder *encoder =
+			intel_ddi_get_crtc_new_encoder(crtc_state);
+
+		if (!intel_get_shared_dpll(crtc, crtc_state, encoder)) {
+			DRM_DEBUG_DRIVER("failed to find PLL for pipe %c\n",
+					 pipe_name(crtc->pipe));
 			return -EINVAL;
+		}
 	}

 	crtc->lowfreq_avail = false;
@ -9159,6 +9149,31 @@ out:
 	return active;
 }

+static u32 i845_cursor_ctl(const struct intel_crtc_state *crtc_state,
+			   const struct intel_plane_state *plane_state)
+{
+	unsigned int width = plane_state->base.crtc_w;
+	unsigned int stride = roundup_pow_of_two(width) * 4;
+
+	switch (stride) {
+	default:
+		WARN_ONCE(1, "Invalid cursor width/stride, width=%u, stride=%u\n",
+			  width, stride);
+		stride = 256;
+		/* fallthrough */
+	case 256:
+	case 512:
+	case 1024:
+	case 2048:
+		break;
+	}
+
+	return CURSOR_ENABLE |
+		CURSOR_GAMMA_ENABLE |
+		CURSOR_FORMAT_ARGB |
+		CURSOR_STRIDE(stride);
+}
+
 static void i845_update_cursor(struct drm_crtc *crtc, u32 base,
 			       const struct intel_plane_state *plane_state)
 {
@ -9170,26 +9185,8 @@ static void i845_update_cursor(struct drm_crtc *crtc, u32 base,
 	if (plane_state && plane_state->base.visible) {
 		unsigned int width = plane_state->base.crtc_w;
 		unsigned int height = plane_state->base.crtc_h;
-		unsigned int stride = roundup_pow_of_two(width) * 4;
-
-		switch (stride) {
-		default:
-			WARN_ONCE(1, "Invalid cursor width/stride, width=%u, stride=%u\n",
-				  width, stride);
-			stride = 256;
-			/* fallthrough */
-		case 256:
-		case 512:
-		case 1024:
-		case 2048:
-			break;
-		}
-
-		cntl |= CURSOR_ENABLE |
-			CURSOR_GAMMA_ENABLE |
-			CURSOR_FORMAT_ARGB |
-			CURSOR_STRIDE(stride);

+		cntl = plane_state->ctl;
 		size = (height << 12) | width;
 	}

@ -9222,6 +9219,43 @@ static void i845_update_cursor(struct drm_crtc *crtc, u32 base,
 	}
 }

+static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state,
+			   const struct intel_plane_state *plane_state)
+{
+	struct drm_i915_private *dev_priv =
+		to_i915(plane_state->base.plane->dev);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	enum pipe pipe = crtc->pipe;
+	u32 cntl;
+
+	cntl = MCURSOR_GAMMA_ENABLE;
+
+	if (HAS_DDI(dev_priv))
+		cntl |= CURSOR_PIPE_CSC_ENABLE;
+
+	cntl |= pipe << 28; /* Connect to correct pipe */
+
+	switch (plane_state->base.crtc_w) {
+	case 64:
+		cntl |= CURSOR_MODE_64_ARGB_AX;
+		break;
+	case 128:
+		cntl |= CURSOR_MODE_128_ARGB_AX;
+		break;
+	case 256:
+		cntl |= CURSOR_MODE_256_ARGB_AX;
+		break;
+	default:
+		MISSING_CASE(plane_state->base.crtc_w);
+		return 0;
+	}
+
+	if (plane_state->base.rotation & DRM_ROTATE_180)
+		cntl |= CURSOR_ROTATE_180;
+
+	return cntl;
+}
+
 static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base,
 			       const struct intel_plane_state *plane_state)
 {
@ -9231,30 +9265,8 @@ static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base,
 	int pipe = intel_crtc->pipe;
 	uint32_t cntl = 0;

-	if (plane_state && plane_state->base.visible) {
-		cntl = MCURSOR_GAMMA_ENABLE;
-		switch (plane_state->base.crtc_w) {
-			case 64:
-				cntl |= CURSOR_MODE_64_ARGB_AX;
-				break;
-			case 128:
-				cntl |= CURSOR_MODE_128_ARGB_AX;
-				break;
-			case 256:
-				cntl |= CURSOR_MODE_256_ARGB_AX;
-				break;
-			default:
-				MISSING_CASE(plane_state->base.crtc_w);
-				return;
-		}
-		cntl |= pipe << 28; /* Connect to correct pipe */
-
-		if (HAS_DDI(dev_priv))
-			cntl |= CURSOR_PIPE_CSC_ENABLE;
-
-		if (plane_state->base.rotation & DRM_ROTATE_180)
-			cntl |= CURSOR_ROTATE_180;
-	}
+	if (plane_state && plane_state->base.visible)
+		cntl = plane_state->ctl;

 	if (intel_crtc->cursor_cntl != cntl) {
 		I915_WRITE_FW(CURCNTR(pipe), cntl);
@ -10354,7 +10366,7 @@ static void skl_do_mmio_flip(struct intel_crtc *intel_crtc,
 	ctl = I915_READ(PLANE_CTL(pipe, 0));
 	ctl &= ~PLANE_CTL_TILED_MASK;
 	switch (fb->modifier) {
-	case DRM_FORMAT_MOD_NONE:
+	case DRM_FORMAT_MOD_LINEAR:
 		break;
 	case I915_FORMAT_MOD_X_TILED:
 		ctl |= PLANE_CTL_TILED_X;
@ -11709,6 +11721,9 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv,
 	if ((INTEL_GEN(dev_priv) < 8 && !IS_HASWELL(dev_priv)) ||
 	    IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
 		PIPE_CONF_CHECK_I(limited_color_range);
+
+	PIPE_CONF_CHECK_I(hdmi_scrambling);
+	PIPE_CONF_CHECK_I(hdmi_high_tmds_clock_ratio);
 	PIPE_CONF_CHECK_I(has_infoframe);

 	PIPE_CONF_CHECK_I(has_audio);
@ -13344,6 +13359,14 @@ intel_check_primary_plane(struct drm_plane *plane,
 		ret = skl_check_plane_surface(state);
 		if (ret)
 			return ret;
+
+		state->ctl = skl_plane_ctl(crtc_state, state);
+	} else {
+		ret = i9xx_check_plane_surface(state);
+		if (ret)
+			return ret;
+
+		state->ctl = i9xx_plane_ctl(crtc_state, state);
 	}

 	return 0;
@ -13603,12 +13626,6 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)

 		primary->update_plane = skylake_update_primary_plane;
 		primary->disable_plane = skylake_disable_primary_plane;
-	} else if (HAS_PCH_SPLIT(dev_priv)) {
-		intel_primary_formats = i965_primary_formats;
-		num_formats = ARRAY_SIZE(i965_primary_formats);
-
-		primary->update_plane = ironlake_update_primary_plane;
-		primary->disable_plane = i9xx_disable_primary_plane;
 	} else if (INTEL_GEN(dev_priv) >= 4) {
 		intel_primary_formats = i965_primary_formats;
 		num_formats = ARRAY_SIZE(i965_primary_formats);
@ -13680,6 +13697,7 @@ intel_check_cursor_plane(struct drm_plane *plane,
 			 struct intel_crtc_state *crtc_state,
 			 struct intel_plane_state *state)
 {
+	struct drm_i915_private *dev_priv = to_i915(plane->dev);
 	struct drm_framebuffer *fb = state->base.fb;
 	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	enum pipe pipe = to_intel_plane(plane)->pipe;
@ -13699,7 +13717,7 @@ intel_check_cursor_plane(struct drm_plane *plane,
 		return 0;

 	/* Check for which cursor types we support */
-	if (!cursor_size_ok(to_i915(plane->dev), state->base.crtc_w,
+	if (!cursor_size_ok(dev_priv, state->base.crtc_w,
 			    state->base.crtc_h)) {
 		DRM_DEBUG("Cursor dimension %dx%d not supported\n",
 			  state->base.crtc_w, state->base.crtc_h);
@ -13712,7 +13730,7 @@ intel_check_cursor_plane(struct drm_plane *plane,
 		return -ENOMEM;
 	}

-	if (fb->modifier != DRM_FORMAT_MOD_NONE) {
+	if (fb->modifier != DRM_FORMAT_MOD_LINEAR) {
 		DRM_DEBUG_KMS("cursor cannot be tiled\n");
 		return -EINVAL;
 	}
@ -13727,12 +13745,17 @@ intel_check_cursor_plane(struct drm_plane *plane,
 	 * display power well must be turned off and on again.
 	 * Refuse the put the cursor into that compromised position.
 	 */
-	if (IS_CHERRYVIEW(to_i915(plane->dev)) && pipe == PIPE_C &&
+	if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_C &&
 	    state->base.visible && state->base.crtc_x < 0) {
 		DRM_DEBUG_KMS("CHV cursor C not allowed to straddle the left screen edge\n");
 		return -EINVAL;
 	}

+	if (IS_I845G(dev_priv) || IS_I865G(dev_priv))
+		state->ctl = i845_cursor_ctl(crtc_state, state);
+	else
+		state->ctl = i9xx_cursor_ctl(crtc_state, state);
+
 	return 0;
 }

@ -14368,7 +14391,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 				      mode_cmd->modifier[0]);
 			goto err;
 		}
-	case DRM_FORMAT_MOD_NONE:
+	case DRM_FORMAT_MOD_LINEAR:
 	case I915_FORMAT_MOD_X_TILED:
 		break;
 	default:
@ -14391,7 +14414,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 					   mode_cmd->pixel_format);
 	if (mode_cmd->pitches[0] > pitch_limit) {
 		DRM_DEBUG_KMS("%s pitch (%u) must be at most %d\n",
-			      mode_cmd->modifier[0] != DRM_FORMAT_MOD_NONE ?
+			      mode_cmd->modifier[0] != DRM_FORMAT_MOD_LINEAR ?
 			      "tiled" : "linear",
 			      mode_cmd->pitches[0], pitch_limit);
 		goto err;
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@ -398,6 +398,9 @@ struct intel_plane_state {
 		int x, y;
 	} aux;

+	/* plane control register */
+	u32 ctl;
+
 	/*
 	 * scaler_id
 	 *    = -1 : not using a scaler
@ -729,6 +732,12 @@ struct intel_crtc_state {

 	/* bitmask of visible planes (enum plane_id) */
 	u8 active_planes;
+
+	/* HDMI scrambling status */
+	bool hdmi_scrambling;
+
+	/* HDMI High TMDS char rate ratio */
+	bool hdmi_high_tmds_clock_ratio;
 };

 struct intel_crtc {
@ -1220,12 +1229,9 @@ void intel_crt_init(struct drm_i915_private *dev_priv);
 void intel_crt_reset(struct drm_encoder *encoder);

 /* intel_ddi.c */
-void intel_ddi_clk_select(struct intel_encoder *encoder,
-			  struct intel_shared_dpll *pll);
 void intel_ddi_fdi_post_disable(struct intel_encoder *intel_encoder,
 				struct intel_crtc_state *old_crtc_state,
 				struct drm_connector_state *old_conn_state);
-void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder);
 void hsw_fdi_link_train(struct intel_crtc *crtc,
 			const struct intel_crtc_state *crtc_state);
 void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port);
@ -1236,8 +1242,8 @@ void intel_ddi_disable_transcoder_func(struct drm_i915_private *dev_priv,
 				       enum transcoder cpu_transcoder);
 void intel_ddi_enable_pipe_clock(const struct intel_crtc_state *crtc_state);
 void intel_ddi_disable_pipe_clock(const  struct intel_crtc_state *crtc_state);
-bool intel_ddi_pll_select(struct intel_crtc *crtc,
-			  struct intel_crtc_state *crtc_state);
+struct intel_encoder *
+intel_ddi_get_crtc_new_encoder(struct intel_crtc_state *crtc_state);
 void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state);
 void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp);
 bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector);
@ -1246,7 +1252,6 @@ bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv,
 void intel_ddi_get_config(struct intel_encoder *encoder,
 			  struct intel_crtc_state *pipe_config);

-void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder);
 void intel_ddi_clock_get(struct intel_encoder *encoder,
 			 struct intel_crtc_state *pipe_config);
 void intel_ddi_set_vc_payload_alloc(const struct intel_crtc_state *crtc_state,
@ -1445,12 +1450,12 @@ static inline u32 intel_plane_ggtt_offset(const struct intel_plane_state *state)
 	return i915_ggtt_offset(state->vma);
 }

-u32 skl_plane_ctl_format(uint32_t pixel_format);
-u32 skl_plane_ctl_tiling(uint64_t fb_modifier);
-u32 skl_plane_ctl_rotation(unsigned int rotation);
+u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state,
+		  const struct intel_plane_state *plane_state);
 u32 skl_plane_stride(const struct drm_framebuffer *fb, int plane,
 		     unsigned int rotation);
 int skl_check_plane_surface(struct intel_plane_state *plane_state);
+int i9xx_check_plane_surface(struct intel_plane_state *plane_state);

 /* intel_csr.c */
 void intel_csr_ucode_init(struct drm_i915_private *);
@ -1620,6 +1625,10 @@ struct intel_hdmi *enc_to_intel_hdmi(struct drm_encoder *encoder);
 bool intel_hdmi_compute_config(struct intel_encoder *encoder,
 			       struct intel_crtc_state *pipe_config,
 			       struct drm_connector_state *conn_state);
+void intel_hdmi_handle_sink_scrambling(struct intel_encoder *intel_encoder,
+				       struct drm_connector *connector,
+				       bool high_tmds_clock_ratio,
+				       bool scrambling);
 void intel_dp_dual_mode_set_tmds_output(struct intel_hdmi *hdmi, bool enable);


--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@ -36,45 +36,45 @@ static const struct engine_info {
 	int (*init_execlists)(struct intel_engine_cs *engine);
 } intel_engines[] = {
 	[RCS] = {
-		.name = "render ring",
-		.exec_id = I915_EXEC_RENDER,
+		.name = "rcs",
 		.hw_id = RCS_HW,
+		.exec_id = I915_EXEC_RENDER,
 		.mmio_base = RENDER_RING_BASE,
 		.irq_shift = GEN8_RCS_IRQ_SHIFT,
 		.init_execlists = logical_render_ring_init,
 		.init_legacy = intel_init_render_ring_buffer,
 	},
 	[BCS] = {
-		.name = "blitter ring",
-		.exec_id = I915_EXEC_BLT,
+		.name = "bcs",
 		.hw_id = BCS_HW,
+		.exec_id = I915_EXEC_BLT,
 		.mmio_base = BLT_RING_BASE,
 		.irq_shift = GEN8_BCS_IRQ_SHIFT,
 		.init_execlists = logical_xcs_ring_init,
 		.init_legacy = intel_init_blt_ring_buffer,
 	},
 	[VCS] = {
-		.name = "bsd ring",
-		.exec_id = I915_EXEC_BSD,
+		.name = "vcs",
 		.hw_id = VCS_HW,
+		.exec_id = I915_EXEC_BSD,
 		.mmio_base = GEN6_BSD_RING_BASE,
 		.irq_shift = GEN8_VCS1_IRQ_SHIFT,
 		.init_execlists = logical_xcs_ring_init,
 		.init_legacy = intel_init_bsd_ring_buffer,
 	},
 	[VCS2] = {
-		.name = "bsd2 ring",
-		.exec_id = I915_EXEC_BSD,
+		.name = "vcs2",
 		.hw_id = VCS2_HW,
+		.exec_id = I915_EXEC_BSD,
 		.mmio_base = GEN8_BSD2_RING_BASE,
 		.irq_shift = GEN8_VCS2_IRQ_SHIFT,
 		.init_execlists = logical_xcs_ring_init,
 		.init_legacy = intel_init_bsd2_ring_buffer,
 	},
 	[VECS] = {
-		.name = "video enhancement ring",
-		.exec_id = I915_EXEC_VEBOX,
+		.name = "vecs",
 		.hw_id = VECS_HW,
+		.exec_id = I915_EXEC_VEBOX,
 		.mmio_base = VEBOX_RING_BASE,
 		.irq_shift = GEN8_VECS_IRQ_SHIFT,
 		.init_execlists = logical_xcs_ring_init,
@ -242,12 +242,12 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
 		void *semaphores;

 		/* Semaphores are in noncoherent memory, flush to be safe */
-		semaphores = kmap(page);
+		semaphores = kmap_atomic(page);
 		memset(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0),
 		       0, I915_NUM_ENGINES * gen8_semaphore_seqno_size);
 		drm_clflush_virt_range(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0),
 				       I915_NUM_ENGINES * gen8_semaphore_seqno_size);
-		kunmap(page);
+		kunmap_atomic(semaphores);
 	}

 	intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
@ -1111,6 +1111,15 @@ bool intel_engines_are_idle(struct drm_i915_private *dev_priv)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;

+	if (READ_ONCE(dev_priv->gt.active_requests))
+		return false;
+
+	/* If the driver is wedged, HW state may be very inconsistent and
+	 * report that it is still busy, even though we have stopped using it.
+	 */
+	if (i915_terminally_wedged(&dev_priv->gpu_error))
+		return true;
+
 	for_each_engine(engine, dev_priv, id) {
 		if (!intel_engine_is_idle(engine))
 			return false;
--- a/drivers/gpu/drm/i915/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/intel_guc_fwif.h
@ -26,14 +26,14 @@
 #define GFXCORE_FAMILY_GEN9		12
 #define GFXCORE_FAMILY_UNKNOWN		0x7fffffff

-#define GUC_CTX_PRIORITY_KMD_HIGH	0
-#define GUC_CTX_PRIORITY_HIGH		1
-#define GUC_CTX_PRIORITY_KMD_NORMAL	2
-#define GUC_CTX_PRIORITY_NORMAL		3
-#define GUC_CTX_PRIORITY_NUM		4
+#define GUC_CLIENT_PRIORITY_KMD_HIGH	0
+#define GUC_CLIENT_PRIORITY_HIGH	1
+#define GUC_CLIENT_PRIORITY_KMD_NORMAL	2
+#define GUC_CLIENT_PRIORITY_NORMAL	3
+#define GUC_CLIENT_PRIORITY_NUM		4

-#define GUC_MAX_GPU_CONTEXTS		1024
-#define	GUC_INVALID_CTX_ID		GUC_MAX_GPU_CONTEXTS
+#define GUC_MAX_STAGE_DESCRIPTORS	1024
+#define	GUC_INVALID_STAGE_ID		GUC_MAX_STAGE_DESCRIPTORS

 #define GUC_RENDER_ENGINE		0
 #define GUC_VIDEO_ENGINE		1
@ -68,14 +68,14 @@
 #define GUC_DOORBELL_ENABLED		1
 #define GUC_DOORBELL_DISABLED		0

-#define GUC_CTX_DESC_ATTR_ACTIVE	(1 << 0)
-#define GUC_CTX_DESC_ATTR_PENDING_DB	(1 << 1)
-#define GUC_CTX_DESC_ATTR_KERNEL	(1 << 2)
-#define GUC_CTX_DESC_ATTR_PREEMPT	(1 << 3)
-#define GUC_CTX_DESC_ATTR_RESET		(1 << 4)
-#define GUC_CTX_DESC_ATTR_WQLOCKED	(1 << 5)
-#define GUC_CTX_DESC_ATTR_PCH		(1 << 6)
-#define GUC_CTX_DESC_ATTR_TERMINATED	(1 << 7)
+#define GUC_STAGE_DESC_ATTR_ACTIVE	BIT(0)
+#define GUC_STAGE_DESC_ATTR_PENDING_DB	BIT(1)
+#define GUC_STAGE_DESC_ATTR_KERNEL	BIT(2)
+#define GUC_STAGE_DESC_ATTR_PREEMPT	BIT(3)
+#define GUC_STAGE_DESC_ATTR_RESET	BIT(4)
+#define GUC_STAGE_DESC_ATTR_WQLOCKED	BIT(5)
+#define GUC_STAGE_DESC_ATTR_PCH		BIT(6)
+#define GUC_STAGE_DESC_ATTR_TERMINATED	BIT(7)

 /* The guc control data is 10 DWORDs */
 #define GUC_CTL_CTXINFO			0
@ -241,8 +241,8 @@ union guc_doorbell_qw {
 	u64 value_qw;
 } __packed;

-#define GUC_MAX_DOORBELLS		256
-#define GUC_INVALID_DOORBELL_ID		(GUC_MAX_DOORBELLS)
+#define GUC_NUM_DOORBELLS	256
+#define GUC_DOORBELL_INVALID	(GUC_NUM_DOORBELLS)

 #define GUC_DB_SIZE			(PAGE_SIZE)
 #define GUC_WQ_SIZE			(PAGE_SIZE * 2)
@ -251,12 +251,12 @@ union guc_doorbell_qw {
 struct guc_wq_item {
 	u32 header;
 	u32 context_desc;
-	u32 ring_tail;
+	u32 submit_element_info;
 	u32 fence_id;
 } __packed;

 struct guc_process_desc {
-	u32 context_id;
+	u32 stage_id;
 	u64 db_base_addr;
 	u32 head;
 	u32 tail;
@ -278,7 +278,7 @@ struct guc_execlist_context {
 	u32 context_desc;
 	u32 context_id;
 	u32 ring_status;
-	u32 ring_lcra;
+	u32 ring_lrca;
 	u32 ring_begin;
 	u32 ring_end;
 	u32 ring_next_free_location;
@ -289,10 +289,18 @@ struct guc_execlist_context {
 	u16 engine_submit_queue_count;
 } __packed;

-/*Context descriptor for communicating between uKernel and Driver*/
-struct guc_context_desc {
+/*
+ * This structure describes a stage set arranged for a particular communication
+ * between uKernel (GuC) and Driver (KMD). Technically, this is known as a
+ * "GuC Context descriptor" in the specs, but we use the term "stage descriptor"
+ * to avoid confusion with all the other things already named "context" in the
+ * driver. A static pool of these descriptors are stored inside a GEM object
+ * (stage_desc_pool) which is held for the entire lifetime of our interaction
+ * with the GuC, being allocated before the GuC is loaded with its firmware.
+ */
+struct guc_stage_desc {
 	u32 sched_common_area;
-	u32 context_id;
+	u32 stage_id;
 	u32 pas_id;
 	u8 engines_used;
 	u64 db_trigger_cpu;
@ -359,7 +367,7 @@ struct guc_policy {
 } __packed;

 struct guc_policies {
-	struct guc_policy policy[GUC_CTX_PRIORITY_NUM][GUC_MAX_ENGINES_NUM];
+	struct guc_policy policy[GUC_CLIENT_PRIORITY_NUM][GUC_MAX_ENGINES_NUM];

 	/* In micro seconds. How much time to allow before DPC processing is
 	 * called back via interrupt (to prevent DPC queue drain starving).
@ -401,16 +409,17 @@ struct guc_mmio_regset {
 	u32 number_of_registers;
 } __packed;

+/* MMIO registers that are set as non privileged */
+struct mmio_white_list {
+	u32 mmio_start;
+	u32 offsets[GUC_MMIO_WHITE_LIST_MAX];
+	u32 count;
+} __packed;
+
 struct guc_mmio_reg_state {
 	struct guc_mmio_regset global_reg;
 	struct guc_mmio_regset engine_reg[GUC_MAX_ENGINES_NUM];
-
-	/* MMIO registers that are set as non privileged */
-	struct __packed {
-		u32 mmio_start;
-		u32 offsets[GUC_MMIO_WHITE_LIST_MAX];
-		u32 count;
-	} mmio_white_list[GUC_MAX_ENGINES_NUM];
+	struct mmio_white_list white_list[GUC_MAX_ENGINES_NUM];
 } __packed;

 /* GuC Additional Data Struct */
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@ -73,22 +73,6 @@ MODULE_FIRMWARE(I915_BXT_GUC_UCODE);
 #define I915_KBL_GUC_UCODE GUC_FW_PATH(kbl, KBL_FW_MAJOR, KBL_FW_MINOR)
 MODULE_FIRMWARE(I915_KBL_GUC_UCODE);

-/* User-friendly representation of an enum */
-const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status)
-{
-	switch (status) {
-	case INTEL_UC_FIRMWARE_FAIL:
-		return "FAIL";
-	case INTEL_UC_FIRMWARE_NONE:
-		return "NONE";
-	case INTEL_UC_FIRMWARE_PENDING:
-		return "PENDING";
-	case INTEL_UC_FIRMWARE_SUCCESS:
-		return "SUCCESS";
-	default:
-		return "UNKNOWN!";
-	}
-};

 static u32 get_gttype(struct drm_i915_private *dev_priv)
 {
@ -148,16 +132,14 @@ static void guc_params_init(struct drm_i915_private *dev_priv)
 	} else
 		params[GUC_CTL_DEBUG] = GUC_LOG_DISABLED;

-	if (guc->ads_vma) {
-		u32 ads = guc_ggtt_offset(guc->ads_vma) >> PAGE_SHIFT;
-		params[GUC_CTL_DEBUG] |= ads << GUC_ADS_ADDR_SHIFT;
-		params[GUC_CTL_DEBUG] |= GUC_ADS_ENABLED;
-	}
-
 	/* If GuC submission is enabled, set up additional parameters here */
 	if (i915.enable_guc_submission) {
-		u32 pgs = guc_ggtt_offset(dev_priv->guc.ctx_pool_vma);
-		u32 ctx_in_16 = GUC_MAX_GPU_CONTEXTS / 16;
+		u32 ads = guc_ggtt_offset(guc->ads_vma) >> PAGE_SHIFT;
+		u32 pgs = guc_ggtt_offset(dev_priv->guc.stage_desc_pool);
+		u32 ctx_in_16 = GUC_MAX_STAGE_DESCRIPTORS / 16;
+
+		params[GUC_CTL_DEBUG] |= ads << GUC_ADS_ADDR_SHIFT;
+		params[GUC_CTL_DEBUG] |= GUC_ADS_ENABLED;

 		pgs >>= PAGE_SHIFT;
 		params[GUC_CTL_CTXINFO] = (pgs << GUC_CTL_BASE_ADDR_SHIFT) |
@ -430,24 +412,3 @@ int intel_guc_select_fw(struct intel_guc *guc)

 	return 0;
 }
-
-/**
- * intel_guc_fini() - clean up all allocated resources
- * @dev_priv:	i915 device private
- */
-void intel_guc_fini(struct drm_i915_private *dev_priv)
-{
-	struct intel_uc_fw *guc_fw = &dev_priv->guc.fw;
-	struct drm_i915_gem_object *obj;
-
-	mutex_lock(&dev_priv->drm.struct_mutex);
-	i915_guc_submission_disable(dev_priv);
-	i915_guc_submission_fini(dev_priv);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
-
-	obj = fetch_and_zero(&guc_fw->obj);
-	if (obj)
-		i915_gem_object_put(obj);
-
-	guc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE;
-}
--- a/drivers/gpu/drm/i915/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/intel_guc_log.c
@ -66,7 +66,6 @@ static int guc_log_control(struct intel_guc *guc, u32 control_val)
 	return intel_guc_send(guc, action, ARRAY_SIZE(action));
 }

-
 /*
 * Sub buffer switch callback. Called whenever relay has to switch to a new
 * sub buffer, relay stays on the same sub buffer if 0 is returned.
@ -139,45 +138,15 @@ static struct rchan_callbacks relay_callbacks = {
 	.remove_buf_file = remove_buf_file_callback,
 };

-static void guc_log_remove_relay_file(struct intel_guc *guc)
-{
-	relay_close(guc->log.relay_chan);
-}
-
-static int guc_log_create_relay_channel(struct intel_guc *guc)
-{
-	struct drm_i915_private *dev_priv = guc_to_i915(guc);
-	struct rchan *guc_log_relay_chan;
-	size_t n_subbufs, subbuf_size;
-
-	/* Keep the size of sub buffers same as shared log buffer */
-	subbuf_size = guc->log.vma->obj->base.size;
-
-	/* Store up to 8 snapshots, which is large enough to buffer sufficient
-	 * boot time logs and provides enough leeway to User, in terms of
-	 * latency, for consuming the logs from relay. Also doesn't take
-	 * up too much memory.
-	 */
-	n_subbufs = 8;
-
-	guc_log_relay_chan = relay_open(NULL, NULL, subbuf_size,
-					n_subbufs, &relay_callbacks, dev_priv);
-	if (!guc_log_relay_chan) {
-		DRM_ERROR("Couldn't create relay chan for GuC logging\n");
-		return -ENOMEM;
-	}
-
-	GEM_BUG_ON(guc_log_relay_chan->subbuf_size < subbuf_size);
-	guc->log.relay_chan = guc_log_relay_chan;
-	return 0;
-}
-
-static int guc_log_create_relay_file(struct intel_guc *guc)
+static int guc_log_relay_file_create(struct intel_guc *guc)
 {
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 	struct dentry *log_dir;
 	int ret;

+	if (i915.guc_log_level < 0)
+		return 0;
+
 	/* For now create the log file in /sys/kernel/debug/dri/0 dir */
 	log_dir = dev_priv->drm.primary->debugfs_root;

@ -197,8 +166,8 @@ static int guc_log_create_relay_file(struct intel_guc *guc)
 		return -ENODEV;
 	}

-	ret = relay_late_setup_files(guc->log.relay_chan, "guc_log", log_dir);
-	if (ret) {
+	ret = relay_late_setup_files(guc->log.runtime.relay_chan, "guc_log", log_dir);
+	if (ret < 0 && ret != -EEXIST) {
 		DRM_ERROR("Couldn't associate relay chan with file %d\n", ret);
 		return ret;
 	}
@ -214,15 +183,15 @@ static void guc_move_to_next_buf(struct intel_guc *guc)
 	smp_wmb();

 	/* All data has been written, so now move the offset of sub buffer. */
-	relay_reserve(guc->log.relay_chan, guc->log.vma->obj->base.size);
+	relay_reserve(guc->log.runtime.relay_chan, guc->log.vma->obj->base.size);

 	/* Switch to the next sub buffer */
-	relay_flush(guc->log.relay_chan);
+	relay_flush(guc->log.runtime.relay_chan);
 }

 static void *guc_get_write_buffer(struct intel_guc *guc)
 {
-	if (!guc->log.relay_chan)
+	if (!guc->log.runtime.relay_chan)
 		return NULL;

 	/* Just get the base address of a new sub buffer and copy data into it
@ -233,7 +202,7 @@ static void *guc_get_write_buffer(struct intel_guc *guc)
 	 * done without using relay_reserve() along with relay_write(). So its
 	 * better to use relay_reserve() alone.
 	 */
-	return relay_reserve(guc->log.relay_chan, 0);
+	return relay_reserve(guc->log.runtime.relay_chan, 0);
 }

 static bool guc_check_log_buf_overflow(struct intel_guc *guc,
@ -284,11 +253,11 @@ static void guc_read_update_log_buffer(struct intel_guc *guc)
 	void *src_data, *dst_data;
 	bool new_overflow;

-	if (WARN_ON(!guc->log.buf_addr))
+	if (WARN_ON(!guc->log.runtime.buf_addr))
 		return;

 	/* Get the pointer to shared GuC log buffer */
-	log_buf_state = src_data = guc->log.buf_addr;
+	log_buf_state = src_data = guc->log.runtime.buf_addr;

 	/* Get the pointer to local buffer to store the logs */
 	log_buf_snapshot_state = dst_data = guc_get_write_buffer(guc);
@ -371,153 +340,113 @@ static void guc_read_update_log_buffer(struct intel_guc *guc)
 	}
 }

-static void guc_log_cleanup(struct intel_guc *guc)
-{
-	struct drm_i915_private *dev_priv = guc_to_i915(guc);
-
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
-	/* First disable the flush interrupt */
-	gen9_disable_guc_interrupts(dev_priv);
-
-	if (guc->log.flush_wq)
-		destroy_workqueue(guc->log.flush_wq);
-
-	guc->log.flush_wq = NULL;
-
-	if (guc->log.relay_chan)
-		guc_log_remove_relay_file(guc);
-
-	guc->log.relay_chan = NULL;
-
-	if (guc->log.buf_addr)
-		i915_gem_object_unpin_map(guc->log.vma->obj);
-
-	guc->log.buf_addr = NULL;
-}
-
 static void capture_logs_work(struct work_struct *work)
 {
 	struct intel_guc *guc =
-		container_of(work, struct intel_guc, log.flush_work);
+		container_of(work, struct intel_guc, log.runtime.flush_work);

 	guc_log_capture_logs(guc);
 }

-static int guc_log_create_extras(struct intel_guc *guc)
+static bool guc_log_has_runtime(struct intel_guc *guc)
+{
+	return guc->log.runtime.buf_addr != NULL;
+}
+
+static int guc_log_runtime_create(struct intel_guc *guc)
 {
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 	void *vaddr;
-	int ret;
+	struct rchan *guc_log_relay_chan;
+	size_t n_subbufs, subbuf_size;
+	int ret = 0;

 	lockdep_assert_held(&dev_priv->drm.struct_mutex);

-	/* Nothing to do */
-	if (i915.guc_log_level < 0)
-		return 0;
+	GEM_BUG_ON(guc_log_has_runtime(guc));

-	if (!guc->log.buf_addr) {
-		/* Create a WC (Uncached for read) vmalloc mapping of log
-		 * buffer pages, so that we can directly get the data
-		 * (up-to-date) from memory.
-		 */
-		vaddr = i915_gem_object_pin_map(guc->log.vma->obj, I915_MAP_WC);
-		if (IS_ERR(vaddr)) {
-			ret = PTR_ERR(vaddr);
-			DRM_ERROR("Couldn't map log buffer pages %d\n", ret);
-			return ret;
-		}
-
-		guc->log.buf_addr = vaddr;
+	/* Create a WC (Uncached for read) vmalloc mapping of log
+	 * buffer pages, so that we can directly get the data
+	 * (up-to-date) from memory.
+	 */
+	vaddr = i915_gem_object_pin_map(guc->log.vma->obj, I915_MAP_WC);
+	if (IS_ERR(vaddr)) {
+		DRM_ERROR("Couldn't map log buffer pages %d\n", ret);
+		return PTR_ERR(vaddr);
 	}

-	if (!guc->log.relay_chan) {
-		/* Create a relay channel, so that we have buffers for storing
-		 * the GuC firmware logs, the channel will be linked with a file
-		 * later on when debugfs is registered.
-		 */
-		ret = guc_log_create_relay_channel(guc);
-		if (ret)
-			return ret;
+	guc->log.runtime.buf_addr = vaddr;
+
+	 /* Keep the size of sub buffers same as shared log buffer */
+	subbuf_size = guc->log.vma->obj->base.size;
+
+	/* Store up to 8 snapshots, which is large enough to buffer sufficient
+	 * boot time logs and provides enough leeway to User, in terms of
+	 * latency, for consuming the logs from relay. Also doesn't take
+	 * up too much memory.
+	 */
+	n_subbufs = 8;
+
+	/* Create a relay channel, so that we have buffers for storing
+	 * the GuC firmware logs, the channel will be linked with a file
+	 * later on when debugfs is registered.
+	 */
+	guc_log_relay_chan = relay_open(NULL, NULL, subbuf_size,
+					n_subbufs, &relay_callbacks, dev_priv);
+	if (!guc_log_relay_chan) {
+		DRM_ERROR("Couldn't create relay chan for GuC logging\n");
+
+		ret = -ENOMEM;
+		goto err_vaddr;
 	}

-	if (!guc->log.flush_wq) {
-		INIT_WORK(&guc->log.flush_work, capture_logs_work);
+	GEM_BUG_ON(guc_log_relay_chan->subbuf_size < subbuf_size);
+	guc->log.runtime.relay_chan = guc_log_relay_chan;

-		 /*
-		 * GuC log buffer flush work item has to do register access to
-		 * send the ack to GuC and this work item, if not synced before
-		 * suspend, can potentially get executed after the GFX device is
-		 * suspended.
-		 * By marking the WQ as freezable, we don't have to bother about
-		 * flushing of this work item from the suspend hooks, the pending
-		 * work item if any will be either executed before the suspend
-		 * or scheduled later on resume. This way the handling of work
-		 * item can be kept same between system suspend & rpm suspend.
-		 */
-		guc->log.flush_wq = alloc_ordered_workqueue("i915-guc_log",
-							    WQ_HIGHPRI | WQ_FREEZABLE);
-		if (guc->log.flush_wq == NULL) {
-			DRM_ERROR("Couldn't allocate the wq for GuC logging\n");
-			return -ENOMEM;
-		}
+	INIT_WORK(&guc->log.runtime.flush_work, capture_logs_work);
+
+	/*
+	 * GuC log buffer flush work item has to do register access to
+	 * send the ack to GuC and this work item, if not synced before
+	 * suspend, can potentially get executed after the GFX device is
+	 * suspended.
+	 * By marking the WQ as freezable, we don't have to bother about
+	 * flushing of this work item from the suspend hooks, the pending
+	 * work item if any will be either executed before the suspend
+	 * or scheduled later on resume. This way the handling of work
+	 * item can be kept same between system suspend & rpm suspend.
+	 */
+	guc->log.runtime.flush_wq = alloc_ordered_workqueue("i915-guc_log",
+						WQ_HIGHPRI | WQ_FREEZABLE);
+	if (!guc->log.runtime.flush_wq) {
+		DRM_ERROR("Couldn't allocate the wq for GuC logging\n");
+		ret = -ENOMEM;
+		goto err_relaychan;
 	}

 	return 0;
+
+err_relaychan:
+	relay_close(guc->log.runtime.relay_chan);
+err_vaddr:
+	i915_gem_object_unpin_map(guc->log.vma->obj);
+	guc->log.runtime.buf_addr = NULL;
+	return ret;
 }

-void intel_guc_log_create(struct intel_guc *guc)
+static void guc_log_runtime_destroy(struct intel_guc *guc)
 {
-	struct i915_vma *vma;
-	unsigned long offset;
-	uint32_t size, flags;
+	/*
+	 * It's possible that the runtime stuff was never allocated because
+	 * guc_log_level was < 0 at the time
+	 **/
+	if (!guc_log_has_runtime(guc))
+		return;

-	if (i915.guc_log_level > GUC_LOG_VERBOSITY_MAX)
-		i915.guc_log_level = GUC_LOG_VERBOSITY_MAX;
-
-	/* The first page is to save log buffer state. Allocate one
-	 * extra page for others in case for overlap */
-	size = (1 + GUC_LOG_DPC_PAGES + 1 +
-		GUC_LOG_ISR_PAGES + 1 +
-		GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT;
-
-	vma = guc->log.vma;
-	if (!vma) {
-		/* We require SSE 4.1 for fast reads from the GuC log buffer and
-		 * it should be present on the chipsets supporting GuC based
-		 * submisssions.
-		 */
-		if (WARN_ON(!i915_has_memcpy_from_wc())) {
-			/* logging will not be enabled */
-			i915.guc_log_level = -1;
-			return;
-		}
-
-		vma = intel_guc_allocate_vma(guc, size);
-		if (IS_ERR(vma)) {
-			/* logging will be off */
-			i915.guc_log_level = -1;
-			return;
-		}
-
-		guc->log.vma = vma;
-
-		if (guc_log_create_extras(guc)) {
-			guc_log_cleanup(guc);
-			i915_vma_unpin_and_release(&guc->log.vma);
-			i915.guc_log_level = -1;
-			return;
-		}
-	}
-
-	/* each allocated unit is a page */
-	flags = GUC_LOG_VALID | GUC_LOG_NOTIFY_ON_HALF_FULL |
-		(GUC_LOG_DPC_PAGES << GUC_LOG_DPC_SHIFT) |
-		(GUC_LOG_ISR_PAGES << GUC_LOG_ISR_SHIFT) |
-		(GUC_LOG_CRASH_PAGES << GUC_LOG_CRASH_SHIFT);
-
-	offset = guc_ggtt_offset(vma) >> PAGE_SHIFT; /* in pages */
-	guc->log.flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags;
+	destroy_workqueue(guc->log.runtime.flush_wq);
+	relay_close(guc->log.runtime.relay_chan);
+	i915_gem_object_unpin_map(guc->log.vma->obj);
+	guc->log.runtime.buf_addr = NULL;
 }

 static int guc_log_late_setup(struct intel_guc *guc)
@ -527,24 +456,25 @@ static int guc_log_late_setup(struct intel_guc *guc)

 	lockdep_assert_held(&dev_priv->drm.struct_mutex);

-	if (i915.guc_log_level < 0)
-		return -EINVAL;
+	if (!guc_log_has_runtime(guc)) {
+		/* If log_level was set as -1 at boot time, then setup needed to
+		 * handle log buffer flush interrupts would not have been done yet,
+		 * so do that now.
+		 */
+		ret = guc_log_runtime_create(guc);
+		if (ret)
+			goto err;
+	}

-	/* If log_level was set as -1 at boot time, then setup needed to
-	 * handle log buffer flush interrupts would not have been done yet,
-	 * so do that now.
-	 */
-	ret = guc_log_create_extras(guc);
+	ret = guc_log_relay_file_create(guc);
 	if (ret)
-		goto err;
-
-	ret = guc_log_create_relay_file(guc);
-	if (ret)
-		goto err;
+		goto err_runtime;

 	return 0;
+
+err_runtime:
+	guc_log_runtime_destroy(guc);
 err:
-	guc_log_cleanup(guc);
 	/* logging will remain off */
 	i915.guc_log_level = -1;
 	return ret;
@ -577,7 +507,7 @@ static void guc_flush_logs(struct intel_guc *guc)
 	/* Before initiating the forceful flush, wait for any pending/ongoing
 	 * flush to complete otherwise forceful flush may not actually happen.
 	 */
-	flush_work(&guc->log.flush_work);
+	flush_work(&guc->log.runtime.flush_work);

 	/* Ask GuC to update the log buffer state */
 	guc_log_flush(guc);
@ -586,6 +516,72 @@ static void guc_flush_logs(struct intel_guc *guc)
 	guc_log_capture_logs(guc);
 }

+int intel_guc_log_create(struct intel_guc *guc)
+{
+	struct i915_vma *vma;
+	unsigned long offset;
+	uint32_t size, flags;
+	int ret;
+
+	GEM_BUG_ON(guc->log.vma);
+
+	if (i915.guc_log_level > GUC_LOG_VERBOSITY_MAX)
+		i915.guc_log_level = GUC_LOG_VERBOSITY_MAX;
+
+	/* The first page is to save log buffer state. Allocate one
+	 * extra page for others in case for overlap */
+	size = (1 + GUC_LOG_DPC_PAGES + 1 +
+		GUC_LOG_ISR_PAGES + 1 +
+		GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT;
+
+	/* We require SSE 4.1 for fast reads from the GuC log buffer and
+	 * it should be present on the chipsets supporting GuC based
+	 * submisssions.
+	 */
+	if (WARN_ON(!i915_has_memcpy_from_wc())) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	vma = intel_guc_allocate_vma(guc, size);
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		goto err;
+	}
+
+	guc->log.vma = vma;
+
+	if (i915.guc_log_level >= 0) {
+		ret = guc_log_runtime_create(guc);
+		if (ret < 0)
+			goto err_vma;
+	}
+
+	/* each allocated unit is a page */
+	flags = GUC_LOG_VALID | GUC_LOG_NOTIFY_ON_HALF_FULL |
+		(GUC_LOG_DPC_PAGES << GUC_LOG_DPC_SHIFT) |
+		(GUC_LOG_ISR_PAGES << GUC_LOG_ISR_SHIFT) |
+		(GUC_LOG_CRASH_PAGES << GUC_LOG_CRASH_SHIFT);
+
+	offset = guc_ggtt_offset(vma) >> PAGE_SHIFT; /* in pages */
+	guc->log.flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags;
+
+	return 0;
+
+err_vma:
+	i915_vma_unpin_and_release(&guc->log.vma);
+err:
+	/* logging will be off */
+	i915.guc_log_level = -1;
+	return ret;
+}
+
+void intel_guc_log_destroy(struct intel_guc *guc)
+{
+	guc_log_runtime_destroy(guc);
+	i915_vma_unpin_and_release(&guc->log.vma);
+}
+
 int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val)
 {
 	struct intel_guc *guc = &dev_priv->guc;
@ -609,17 +605,22 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val)
 		return ret;
 	}

-	i915.guc_log_level = log_param.verbosity;
+	if (log_param.logging_enabled) {
+		i915.guc_log_level = log_param.verbosity;

-	/* If log_level was set as -1 at boot time, then the relay channel file
-	 * wouldn't have been created by now and interrupts also would not have
-	 * been enabled.
-	 */
-	if (!dev_priv->guc.log.relay_chan) {
+		/* If log_level was set as -1 at boot time, then the relay channel file
+		 * wouldn't have been created by now and interrupts also would not have
+		 * been enabled. Try again now, just in case.
+		 */
 		ret = guc_log_late_setup(guc);
-		if (!ret)
-			gen9_enable_guc_interrupts(dev_priv);
-	} else if (!log_param.logging_enabled) {
+		if (ret < 0) {
+			DRM_DEBUG_DRIVER("GuC log late setup failed %d\n", ret);
+			return ret;
+		}
+
+		/* GuC logging is currently the only user of Guc2Host interrupts */
+		gen9_enable_guc_interrupts(dev_priv);
+	} else {
 		/* Once logging is disabled, GuC won't generate logs & send an
 		 * interrupt. But there could be some data in the log buffer
 		 * which is yet to be captured. So request GuC to update the log
@ -629,9 +630,6 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val)

 		/* As logging is disabled, update log level to reflect that */
 		i915.guc_log_level = -1;
-	} else {
-		/* In case interrupts were disabled, enable them now */
-		gen9_enable_guc_interrupts(dev_priv);
 	}

 	return ret;
@ -639,7 +637,7 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val)

 void i915_guc_log_register(struct drm_i915_private *dev_priv)
 {
-	if (!i915.enable_guc_submission)
+	if (!i915.enable_guc_submission || i915.guc_log_level < 0)
 		return;

 	mutex_lock(&dev_priv->drm.struct_mutex);
@ -653,6 +651,8 @@ void i915_guc_log_unregister(struct drm_i915_private *dev_priv)
 		return;

 	mutex_lock(&dev_priv->drm.struct_mutex);
-	guc_log_cleanup(&dev_priv->guc);
+	/* GuC logging is currently the only user of Guc2Host interrupts */
+	gen9_disable_guc_interrupts(dev_priv);
+	guc_log_runtime_destroy(&dev_priv->guc);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 }
--- a/drivers/gpu/drm/i915/intel_gvt.c
+++ b/drivers/gpu/drm/i915/intel_gvt.c
@ -45,6 +45,8 @@ static bool is_supported_device(struct drm_i915_private *dev_priv)
 		return true;
 	if (IS_SKYLAKE(dev_priv))
 		return true;
+	if (IS_KABYLAKE(dev_priv) && INTEL_DEVID(dev_priv) == 0x591D)
+		return true;
 	return false;
 }

--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@ -34,6 +34,7 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_edid.h>
+#include <drm/drm_scdc_helper.h>
 #include "intel_drv.h"
 #include <drm/i915_drm.h>
 #include <drm/intel_lpe_audio.h>
@ -1208,6 +1209,8 @@ static int intel_hdmi_source_max_tmds_clock(struct drm_i915_private *dev_priv)
 {
 	if (IS_G4X(dev_priv))
 		return 165000;
+	else if (IS_GEMINILAKE(dev_priv))
+		return 594000;
 	else if (IS_HASWELL(dev_priv) || INTEL_INFO(dev_priv)->gen >= 8)
 		return 300000;
 	else
@ -1334,6 +1337,7 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder,
 	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
+	struct drm_scdc *scdc = &conn_state->connector->display_info.hdmi.scdc;
 	int clock_8bpc = pipe_config->base.adjusted_mode.crtc_clock;
 	int clock_12bpc = clock_8bpc * 3 / 2;
 	int desired_bpp;
@ -1403,6 +1407,16 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder,

 	pipe_config->lane_count = 4;

+	if (scdc->scrambling.supported && IS_GEMINILAKE(dev_priv)) {
+		if (scdc->scrambling.low_rates)
+			pipe_config->hdmi_scrambling = true;
+
+		if (pipe_config->port_clock > 340000) {
+			pipe_config->hdmi_scrambling = true;
+			pipe_config->hdmi_high_tmds_clock_ratio = true;
+		}
+	}
+
 	return true;
 }

@ -1812,6 +1826,57 @@ intel_hdmi_add_properties(struct intel_hdmi *intel_hdmi, struct drm_connector *c
 	intel_hdmi->aspect_ratio = HDMI_PICTURE_ASPECT_NONE;
 }

+/*
+ * intel_hdmi_handle_sink_scrambling: handle sink scrambling/clock ratio setup
+ * @encoder: intel_encoder
+ * @connector: drm_connector
+ * @high_tmds_clock_ratio = bool to indicate if the function needs to set
+ *  or reset the high tmds clock ratio for scrambling
+ * @scrambling: bool to Indicate if the function needs to set or reset
+ *  sink scrambling
+ *
+ * This function handles scrambling on HDMI 2.0 capable sinks.
+ * If required clock rate is > 340 Mhz && scrambling is supported by sink
+ * it enables scrambling. This should be called before enabling the HDMI
+ * 2.0 port, as the sink can choose to disable the scrambling if it doesn't
+ * detect a scrambled clock within 100 ms.
+ */
+void intel_hdmi_handle_sink_scrambling(struct intel_encoder *encoder,
+				       struct drm_connector *connector,
+				       bool high_tmds_clock_ratio,
+				       bool scrambling)
+{
+	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
+	struct drm_i915_private *dev_priv = connector->dev->dev_private;
+	struct drm_scrambling *sink_scrambling =
+				&connector->display_info.hdmi.scdc.scrambling;
+	struct i2c_adapter *adptr = intel_gmbus_get_adapter(dev_priv,
+							   intel_hdmi->ddc_bus);
+	bool ret;
+
+	if (!sink_scrambling->supported)
+		return;
+
+	DRM_DEBUG_KMS("Setting sink scrambling for enc:%s connector:%s\n",
+		      encoder->base.name, connector->name);
+
+	/* Set TMDS bit clock ratio to 1/40 or 1/10 */
+	ret = drm_scdc_set_high_tmds_clock_ratio(adptr, high_tmds_clock_ratio);
+	if (!ret) {
+		DRM_ERROR("Set TMDS ratio failed\n");
+		return;
+	}
+
+	/* Enable/disable sink scrambling */
+	ret = drm_scdc_set_scrambling(adptr, scrambling);
+	if (!ret) {
+		DRM_ERROR("Set sink scrambling failed\n");
+		return;
+	}
+
+	DRM_DEBUG_KMS("sink scrambling handled\n");
+}
+
 static u8 intel_hdmi_ddc_pin(struct drm_i915_private *dev_priv,
 			     enum port port)
 {
--- a/drivers/gpu/drm/i915/intel_huc.c
+++ b/drivers/gpu/drm/i915/intel_huc.c
@ -250,24 +250,6 @@ fail:
 	return err;
 }

-/**
- * intel_huc_fini() - clean up resources allocated for HuC
- * @dev_priv: the drm_i915_private device
- *
- * Cleans up by releasing the huc firmware GEM obj.
- */
-void intel_huc_fini(struct drm_i915_private *dev_priv)
-{
-	struct intel_uc_fw *huc_fw = &dev_priv->huc.fw;
-	struct drm_i915_gem_object *obj;
-
-	obj = fetch_and_zero(&huc_fw->obj);
-	if (obj)
-		i915_gem_object_put(obj);
-
-	huc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE;
-}
-
 /**
 * intel_guc_auth_huc() - authenticate ucode
 * @dev_priv: the drm_i915_device
--- a/drivers/gpu/drm/i915/intel_lpe_audio.c
+++ b/drivers/gpu/drm/i915/intel_lpe_audio.c
@ -331,6 +331,7 @@ void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv)
 * audio driver and i915
 * @dev_priv: the i915 drm device private data
 * @eld : ELD data
+ * @pipe: pipe id
 * @port: port id
 * @tmds_clk_speed: tmds clock frequency in Hz
 *
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@ -326,7 +326,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
 		rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
 	u32 *reg_state = ce->lrc_reg_state;

-	GEM_BUG_ON(!IS_ALIGNED(rq->tail, 8));
+	assert_ring_tail_valid(rq->ring, rq->tail);
 	reg_state[CTX_RING_TAIL+1] = rq->tail;

 	/* True 32b PPGTT with dynamic page allocation: update PDP
@ -399,22 +399,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_request *last;
 	struct execlist_port *port = engine->execlist_port;
-	unsigned long flags;
 	struct rb_node *rb;
 	bool submit = false;

-	/* After execlist_first is updated, the tasklet will be rescheduled.
-	 *
-	 * If we are currently running (inside the tasklet) and a third
-	 * party queues a request and so updates engine->execlist_first under
-	 * the spinlock (which we have elided), it will atomically set the
-	 * TASKLET_SCHED flag causing the us to be re-executed and pick up
-	 * the change in state (the update to TASKLET_SCHED incurs a memory
-	 * barrier making this cross-cpu checking safe).
-	 */
-	if (!READ_ONCE(engine->execlist_first))
-		return;
-
 	last = port->request;
 	if (last)
 		/* WaIdleLiteRestore:bdw,skl
@ -448,7 +435,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	 * and context switches) submission.
 	 */

-	spin_lock_irqsave(&engine->timeline->lock, flags);
+	spin_lock_irq(&engine->timeline->lock);
 	rb = engine->execlist_first;
 	while (rb) {
 		struct drm_i915_gem_request *cursor =
@ -500,7 +487,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		i915_gem_request_assign(&port->request, last);
 		engine->execlist_first = rb;
 	}
-	spin_unlock_irqrestore(&engine->timeline->lock, flags);
+	spin_unlock_irq(&engine->timeline->lock);

 	if (submit)
 		execlists_submit_ports(engine);
@ -530,24 +517,36 @@ static void intel_lrc_irq_handler(unsigned long data)

 	intel_uncore_forcewake_get(dev_priv, engine->fw_domains);

-	while (test_and_clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) {
+	/* Prefer doing test_and_clear_bit() as a two stage operation to avoid
+	 * imposing the cost of a locked atomic transaction when submitting a
+	 * new request (outside of the context-switch interrupt).
+	 */
+	while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) {
 		u32 __iomem *csb_mmio =
 			dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine));
 		u32 __iomem *buf =
 			dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0));
-		unsigned int csb, head, tail;
+		unsigned int head, tail;

-		csb = readl(csb_mmio);
-		head = GEN8_CSB_READ_PTR(csb);
-		tail = GEN8_CSB_WRITE_PTR(csb);
-		if (head == tail)
-			break;
+		/* The write will be ordered by the uncached read (itself
+		 * a memory barrier), so we do not need another in the form
+		 * of a locked instruction. The race between the interrupt
+		 * handler and the split test/clear is harmless as we order
+		 * our clear before the CSB read. If the interrupt arrived
+		 * first between the test and the clear, we read the updated
+		 * CSB and clear the bit. If the interrupt arrives as we read
+		 * the CSB or later (i.e. after we had cleared the bit) the bit
+		 * is set and we do a new loop.
+		 */
+		__clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+		head = readl(csb_mmio);
+		tail = GEN8_CSB_WRITE_PTR(head);
+		head = GEN8_CSB_READ_PTR(head);
+		while (head != tail) {
+			unsigned int status;

-		if (tail < head)
-			tail += GEN8_CSB_ENTRIES;
-		do {
-			unsigned int idx = ++head % GEN8_CSB_ENTRIES;
-			unsigned int status = readl(buf + 2 * idx);
+			if (++head == GEN8_CSB_ENTRIES)
+				head = 0;

 			/* We are flying near dragons again.
 			 *
@ -566,11 +565,12 @@ static void intel_lrc_irq_handler(unsigned long data)
 			 * status notifier.
 			 */

+			status = readl(buf + 2 * head);
 			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
 				continue;

 			/* Check the context/desc id for this event matches */
-			GEM_DEBUG_BUG_ON(readl(buf + 2 * idx + 1) !=
+			GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) !=
 					 port[0].context_id);

 			GEM_BUG_ON(port[0].count == 0);
@ -588,10 +588,9 @@ static void intel_lrc_irq_handler(unsigned long data)

 			GEM_BUG_ON(port[0].count == 0 &&
 				   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
-		} while (head < tail);
+		}

-		writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK,
-				     GEN8_CSB_WRITE_PTR(csb) << 8),
+		writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
 		       csb_mmio);
 	}

@ -647,15 +646,14 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
 static struct intel_engine_cs *
 pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
 {
-	struct intel_engine_cs *engine;
+	struct intel_engine_cs *engine =
+		container_of(pt, struct drm_i915_gem_request, priotree)->engine;
+
+	GEM_BUG_ON(!locked);

-	engine = container_of(pt,
-			      struct drm_i915_gem_request,
-			      priotree)->engine;
 	if (engine != locked) {
-		if (locked)
-			spin_unlock_irq(&locked->timeline->lock);
-		spin_lock_irq(&engine->timeline->lock);
+		spin_unlock(&locked->timeline->lock);
+		spin_lock(&engine->timeline->lock);
 	}

 	return engine;
@ -663,7 +661,7 @@ pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)

 static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 {
-	struct intel_engine_cs *engine = NULL;
+	struct intel_engine_cs *engine;
 	struct i915_dependency *dep, *p;
 	struct i915_dependency stack;
 	LIST_HEAD(dfs);
@ -697,26 +695,23 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 	list_for_each_entry_safe(dep, p, &dfs, dfs_link) {
 		struct i915_priotree *pt = dep->signaler;

-		list_for_each_entry(p, &pt->signalers_list, signal_link)
+		/* Within an engine, there can be no cycle, but we may
+		 * refer to the same dependency chain multiple times
+		 * (redundant dependencies are not eliminated) and across
+		 * engines.
+		 */
+		list_for_each_entry(p, &pt->signalers_list, signal_link) {
+			GEM_BUG_ON(p->signaler->priority < pt->priority);
 			if (prio > READ_ONCE(p->signaler->priority))
 				list_move_tail(&p->dfs_link, &dfs);
+		}

 		list_safe_reset_next(dep, p, dfs_link);
-		if (!RB_EMPTY_NODE(&pt->node))
-			continue;
-
-		engine = pt_lock_engine(pt, engine);
-
-		/* If it is not already in the rbtree, we can update the
-		 * priority inplace and skip over it (and its dependencies)
-		 * if it is referenced *again* as we descend the dfs.
-		 */
-		if (prio > pt->priority && RB_EMPTY_NODE(&pt->node)) {
-			pt->priority = prio;
-			list_del_init(&dep->dfs_link);
-		}
 	}

+	engine = request->engine;
+	spin_lock_irq(&engine->timeline->lock);
+
 	/* Fifo and depth-first replacement ensure our deps execute before us */
 	list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
 		struct i915_priotree *pt = dep->signaler;
@ -728,16 +723,15 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 		if (prio <= pt->priority)
 			continue;

-		GEM_BUG_ON(RB_EMPTY_NODE(&pt->node));
-
 		pt->priority = prio;
-		rb_erase(&pt->node, &engine->execlist_queue);
-		if (insert_request(pt, &engine->execlist_queue))
-			engine->execlist_first = &pt->node;
+		if (!RB_EMPTY_NODE(&pt->node)) {
+			rb_erase(&pt->node, &engine->execlist_queue);
+			if (insert_request(pt, &engine->execlist_queue))
+				engine->execlist_first = &pt->node;
+		}
 	}

-	if (engine)
-		spin_unlock_irq(&engine->timeline->lock);
+	spin_unlock_irq(&engine->timeline->lock);

 	/* XXX Do we need to preempt to make room for us and our deps? */
 }
@ -1255,7 +1249,6 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 	ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix;

 	request->ring->head = request->postfix;
-	request->ring->last_retired_head = -1;
 	intel_ring_update_space(request->ring);

 	/* Catch up with any missed context-switch interrupts */
@ -1268,8 +1261,10 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 	GEM_BUG_ON(request->ctx != port[0].request->ctx);

 	/* Reset WaIdleLiteRestore:bdw,skl as well */
-	request->tail = request->wa_tail - WA_TAIL_DWORDS * sizeof(u32);
-	GEM_BUG_ON(!IS_ALIGNED(request->tail, 8));
+	request->tail =
+		intel_ring_wrap(request->ring,
+				request->wa_tail - WA_TAIL_DWORDS*sizeof(u32));
+	assert_ring_tail_valid(request->ring, request->tail);
 }

 static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
@ -1480,7 +1475,7 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs)
 	*cs++ = MI_USER_INTERRUPT;
 	*cs++ = MI_NOOP;
 	request->tail = intel_ring_offset(request, cs);
-	GEM_BUG_ON(!IS_ALIGNED(request->tail, 8));
+	assert_ring_tail_valid(request->ring, request->tail);

 	gen8_emit_wa_tail(request, cs);
 }
@ -1508,7 +1503,7 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request,
 	*cs++ = MI_USER_INTERRUPT;
 	*cs++ = MI_NOOP;
 	request->tail = intel_ring_offset(request, cs);
-	GEM_BUG_ON(!IS_ALIGNED(request->tail, 8));
+	assert_ring_tail_valid(request->ring, request->tail);

 	gen8_emit_wa_tail(request, cs);
 }
@ -1575,6 +1570,7 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
 {
 	engine->submit_request = execlists_submit_request;
 	engine->schedule = execlists_schedule;
+	engine->irq_tasklet.func = intel_lrc_irq_handler;
 }

 static void
@ -2041,7 +2037,6 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
 			i915_gem_object_unpin_map(ce->state->obj);

 			ce->ring->head = ce->ring->tail = 0;
-			ce->ring->last_retired_head = -1;
 			intel_ring_update_space(ce->ring);
 		}
 	}
--- a/drivers/gpu/drm/i915/intel_opregion.c
+++ b/drivers/gpu/drm/i915/intel_opregion.c
@ -920,6 +920,8 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv)
 	char buf[sizeof(OPREGION_SIGNATURE)];
 	int err = 0;
 	void *base;
+	const void *vbt;
+	u32 vbt_size;

 	BUILD_BUG_ON(sizeof(struct opregion_header) != 0x100);
 	BUILD_BUG_ON(sizeof(struct opregion_acpi) != 0x100);
@ -972,45 +974,46 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv)
 	if (mboxes & MBOX_ASLE_EXT)
 		DRM_DEBUG_DRIVER("ASLE extension supported\n");

-	if (!dmi_check_system(intel_no_opregion_vbt)) {
-		const void *vbt = NULL;
-		u32 vbt_size = 0;
-
-		if (opregion->header->opregion_ver >= 2 && opregion->asle &&
-		    opregion->asle->rvda && opregion->asle->rvds) {
-			opregion->rvda = memremap(opregion->asle->rvda,
-						  opregion->asle->rvds,
-						  MEMREMAP_WB);
-			vbt = opregion->rvda;
-			vbt_size = opregion->asle->rvds;
-		}
+	if (dmi_check_system(intel_no_opregion_vbt))
+		goto out;

+	if (opregion->header->opregion_ver >= 2 && opregion->asle &&
+	    opregion->asle->rvda && opregion->asle->rvds) {
+		opregion->rvda = memremap(opregion->asle->rvda,
+					  opregion->asle->rvds,
+					  MEMREMAP_WB);
+		vbt = opregion->rvda;
+		vbt_size = opregion->asle->rvds;
 		if (intel_bios_is_valid_vbt(vbt, vbt_size)) {
 			DRM_DEBUG_KMS("Found valid VBT in ACPI OpRegion (RVDA)\n");
 			opregion->vbt = vbt;
 			opregion->vbt_size = vbt_size;
+			goto out;
 		} else {
-			vbt = base + OPREGION_VBT_OFFSET;
-			/*
-			 * The VBT specification says that if the ASLE ext
-			 * mailbox is not used its area is reserved, but
-			 * on some CHT boards the VBT extends into the
-			 * ASLE ext area. Allow this even though it is
-			 * against the spec, so we do not end up rejecting
-			 * the VBT on those boards (and end up not finding the
-			 * LCD panel because of this).
-			 */
-			vbt_size = (mboxes & MBOX_ASLE_EXT) ?
-				OPREGION_ASLE_EXT_OFFSET : OPREGION_SIZE;
-			vbt_size -= OPREGION_VBT_OFFSET;
-			if (intel_bios_is_valid_vbt(vbt, vbt_size)) {
-				DRM_DEBUG_KMS("Found valid VBT in ACPI OpRegion (Mailbox #4)\n");
-				opregion->vbt = vbt;
-				opregion->vbt_size = vbt_size;
-			}
+			DRM_DEBUG_KMS("Invalid VBT in ACPI OpRegion (RVDA)\n");
 		}
 	}

+	vbt = base + OPREGION_VBT_OFFSET;
+	/*
+	 * The VBT specification says that if the ASLE ext mailbox is not used
+	 * its area is reserved, but on some CHT boards the VBT extends into the
+	 * ASLE ext area. Allow this even though it is against the spec, so we
+	 * do not end up rejecting the VBT on those boards (and end up not
+	 * finding the LCD panel because of this).
+	 */
+	vbt_size = (mboxes & MBOX_ASLE_EXT) ?
+		OPREGION_ASLE_EXT_OFFSET : OPREGION_SIZE;
+	vbt_size -= OPREGION_VBT_OFFSET;
+	if (intel_bios_is_valid_vbt(vbt, vbt_size)) {
+		DRM_DEBUG_KMS("Found valid VBT in ACPI OpRegion (Mailbox #4)\n");
+		opregion->vbt = vbt;
+		opregion->vbt_size = vbt_size;
+	} else {
+		DRM_DEBUG_KMS("Invalid VBT in ACPI OpRegion (Mailbox #4)\n");
+	}
+
+out:
 	return 0;

 err_out:
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@ -655,6 +655,29 @@ static unsigned long intel_calculate_wm(unsigned long clock_in_khz,
 	return wm_size;
 }

+static bool intel_wm_plane_visible(const struct intel_crtc_state *crtc_state,
+				   const struct intel_plane_state *plane_state)
+{
+	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
+
+	/* FIXME check the 'enable' instead */
+	if (!crtc_state->base.active)
+		return false;
+
+	/*
+	 * Treat cursor with fb as always visible since cursor updates
+	 * can happen faster than the vrefresh rate, and the current
+	 * watermark code doesn't handle that correctly. Cursor updates
+	 * which set/clear the fb or change the cursor size are going
+	 * to get throttled by intel_legacy_cursor_update() to work
+	 * around this problem with the watermark code.
+	 */
+	if (plane->id == PLANE_CURSOR)
+		return plane_state->base.fb != NULL;
+	else
+		return plane_state->base.visible;
+}
+
 static struct intel_crtc *single_enabled_crtc(struct drm_i915_private *dev_priv)
 {
 	struct intel_crtc *crtc, *enabled = NULL;
@ -1961,7 +1984,7 @@ static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate,
 	uint32_t method1, method2;
 	int cpp;

-	if (!cstate->base.active || !pstate->base.visible)
+	if (!intel_wm_plane_visible(cstate, pstate))
 		return 0;

 	cpp = pstate->base.fb->format->cpp[0];
@ -1990,7 +2013,7 @@ static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate,
 	uint32_t method1, method2;
 	int cpp;

-	if (!cstate->base.active || !pstate->base.visible)
+	if (!intel_wm_plane_visible(cstate, pstate))
 		return 0;

 	cpp = pstate->base.fb->format->cpp[0];
@ -2013,15 +2036,7 @@ static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate,
 {
 	int cpp;

-	/*
-	 * Treat cursor with fb as always visible since cursor updates
-	 * can happen faster than the vrefresh rate, and the current
-	 * watermark code doesn't handle that correctly. Cursor updates
-	 * which set/clear the fb or change the cursor size are going
-	 * to get throttled by intel_legacy_cursor_update() to work
-	 * around this problem with the watermark code.
-	 */
-	if (!cstate->base.active || !pstate->base.fb)
+	if (!intel_wm_plane_visible(cstate, pstate))
 		return 0;

 	cpp = pstate->base.fb->format->cpp[0];
@ -2038,7 +2053,7 @@ static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate,
 {
 	int cpp;

-	if (!cstate->base.active || !pstate->base.visible)
+	if (!intel_wm_plane_visible(cstate, pstate))
 		return 0;

 	cpp = pstate->base.fb->format->cpp[0];
@ -3346,19 +3361,29 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
 * Caller should take care of dividing & rounding off the value.
 */
 static uint32_t
-skl_plane_downscale_amount(const struct intel_plane_state *pstate)
+skl_plane_downscale_amount(const struct intel_crtc_state *cstate,
+			   const struct intel_plane_state *pstate)
 {
+	struct intel_plane *plane = to_intel_plane(pstate->base.plane);
 	uint32_t downscale_h, downscale_w;
 	uint32_t src_w, src_h, dst_w, dst_h;

-	if (WARN_ON(!pstate->base.visible))
+	if (WARN_ON(!intel_wm_plane_visible(cstate, pstate)))
 		return DRM_PLANE_HELPER_NO_SCALING;

 	/* n.b., src is 16.16 fixed point, dst is whole integer */
-	src_w = drm_rect_width(&pstate->base.src);
-	src_h = drm_rect_height(&pstate->base.src);
-	dst_w = drm_rect_width(&pstate->base.dst);
-	dst_h = drm_rect_height(&pstate->base.dst);
+	if (plane->id == PLANE_CURSOR) {
+		src_w = pstate->base.src_w;
+		src_h = pstate->base.src_h;
+		dst_w = pstate->base.crtc_w;
+		dst_h = pstate->base.crtc_h;
+	} else {
+		src_w = drm_rect_width(&pstate->base.src);
+		src_h = drm_rect_height(&pstate->base.src);
+		dst_w = drm_rect_width(&pstate->base.dst);
+		dst_h = drm_rect_height(&pstate->base.dst);
+	}
+
 	if (drm_rotation_90_or_270(pstate->base.rotation))
 		swap(dst_w, dst_h);

@ -3374,6 +3399,7 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate,
 			     const struct drm_plane_state *pstate,
 			     int y)
 {
+	struct intel_plane *plane = to_intel_plane(pstate->plane);
 	struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate);
 	uint32_t down_scale_amount, data_rate;
 	uint32_t width = 0, height = 0;
@ -3386,7 +3412,7 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate,
 	fb = pstate->fb;
 	format = fb->format->format;

-	if (pstate->plane->type == DRM_PLANE_TYPE_CURSOR)
+	if (plane->id == PLANE_CURSOR)
 		return 0;
 	if (y && format != DRM_FORMAT_NV12)
 		return 0;
@ -3410,7 +3436,7 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate,
 		data_rate = width * height * fb->format->cpp[0];
 	}

-	down_scale_amount = skl_plane_downscale_amount(intel_pstate);
+	down_scale_amount = skl_plane_downscale_amount(cstate, intel_pstate);

 	return (uint64_t)data_rate * down_scale_amount >> 16;
 }
@ -3702,7 +3728,7 @@ static uint32_t skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cst
 	uint64_t pixel_rate;

 	/* Shouldn't reach here on disabled planes... */
-	if (WARN_ON(!pstate->base.visible))
+	if (WARN_ON(!intel_wm_plane_visible(cstate, pstate)))
 		return 0;

 	/*
@ -3710,7 +3736,7 @@ static uint32_t skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cst
 	 * with additional adjustments for plane-specific scaling.
 	 */
 	adjusted_pixel_rate = cstate->pixel_rate;
-	downscale_amount = skl_plane_downscale_amount(pstate);
+	downscale_amount = skl_plane_downscale_amount(cstate, pstate);

 	pixel_rate = adjusted_pixel_rate * downscale_amount >> 16;
 	WARN_ON(pixel_rate != clamp_t(uint32_t, pixel_rate, 0, ~0));
@ -3727,6 +3753,7 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 				uint8_t *out_lines, /* out */
 				bool *enabled /* out */)
 {
+	struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane);
 	struct drm_plane_state *pstate = &intel_pstate->base;
 	struct drm_framebuffer *fb = pstate->fb;
 	uint32_t latency = dev_priv->wm.skl_latency[level];
@ -3746,7 +3773,8 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 	bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
 	bool y_tiled, x_tiled;

-	if (latency == 0 || !cstate->base.active || !intel_pstate->base.visible) {
+	if (latency == 0 ||
+	    !intel_wm_plane_visible(cstate, intel_pstate)) {
 		*enabled = false;
 		return 0;
 	}
@ -3762,8 +3790,13 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 	if (apply_memory_bw_wa && x_tiled)
 		latency += 15;

-	width = drm_rect_width(&intel_pstate->base.src) >> 16;
-	height = drm_rect_height(&intel_pstate->base.src) >> 16;
+	if (plane->id == PLANE_CURSOR) {
+		width = intel_pstate->base.crtc_w;
+		height = intel_pstate->base.crtc_h;
+	} else {
+		width = drm_rect_width(&intel_pstate->base.src) >> 16;
+		height = drm_rect_height(&intel_pstate->base.src) >> 16;
+	}

 	if (drm_rotation_90_or_270(pstate->rotation))
 		swap(width, height);
@ -8055,7 +8088,7 @@ static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv)
 	case GEN6_PCODE_TIMEOUT:
 		return -ETIMEDOUT;
 	default:
-		MISSING_CASE(flags)
+		MISSING_CASE(flags);
 		return 0;
 	}
 }
@ -8355,6 +8388,7 @@ static u64 vlv_residency_raw(struct drm_i915_private *dev_priv,
 			     const i915_reg_t reg)
 {
 	u32 lower, upper, tmp;
+	int loop = 2;

 	/* The register accessed do not need forcewake. We borrow
 	 * uncore lock to prevent concurrent access to range reg.
@ -8383,7 +8417,7 @@ static u64 vlv_residency_raw(struct drm_i915_private *dev_priv,
 		I915_WRITE_FW(VLV_COUNTER_CONTROL,
 			      _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
 		upper = I915_READ_FW(reg);
-	} while (upper != tmp);
+	} while (upper != tmp && --loop);

 	/* Everywhere else we always use VLV_COUNTER_CONTROL with the
 	 * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@ -49,13 +49,7 @@ static int __intel_ring_space(int head, int tail, int size)

 void intel_ring_update_space(struct intel_ring *ring)
 {
-	if (ring->last_retired_head != -1) {
-		ring->head = ring->last_retired_head;
-		ring->last_retired_head = -1;
-	}
-
-	ring->space = __intel_ring_space(ring->head & HEAD_ADDR,
-					 ring->tail, ring->size);
+	ring->space = __intel_ring_space(ring->head, ring->tail, ring->size);
 }

 static int
@ -618,12 +612,8 @@ static void reset_ring_common(struct intel_engine_cs *engine,
 		}

 		/* If the rq hung, jump to its breadcrumb and skip the batch */
-		if (request->fence.error == -EIO) {
-			struct intel_ring *ring = request->ring;
-
-			ring->head = request->postfix;
-			ring->last_retired_head = -1;
-		}
+		if (request->fence.error == -EIO)
+			request->ring->head = request->postfix;
 	} else {
 		engine->legacy_active_context = NULL;
 	}
@ -784,7 +774,7 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request)

 	i915_gem_request_submit(request);

-	GEM_BUG_ON(!IS_ALIGNED(request->tail, 8));
+	assert_ring_tail_valid(request->ring, request->tail);
 	I915_WRITE_TAIL(request->engine, request->tail);
 }

@ -796,7 +786,7 @@ static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs)
 	*cs++ = MI_USER_INTERRUPT;

 	req->tail = intel_ring_offset(req, cs);
-	GEM_BUG_ON(!IS_ALIGNED(req->tail, 8));
+	assert_ring_tail_valid(req->ring, req->tail);
 }

 static const int i9xx_emit_breadcrumb_sz = 4;
@ -835,7 +825,7 @@ static void gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req,
 	*cs++ = MI_NOOP;

 	req->tail = intel_ring_offset(req, cs);
-	GEM_BUG_ON(!IS_ALIGNED(req->tail, 8));
+	assert_ring_tail_valid(req->ring, req->tail);
 }

 static const int gen8_render_emit_breadcrumb_sz = 8;
@ -1392,7 +1382,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
 	if (IS_I830(engine->i915) || IS_I845G(engine->i915))
 		ring->effective_size -= 2 * CACHELINE_BYTES;

-	ring->last_retired_head = -1;
 	intel_ring_update_space(ring);

 	vma = intel_ring_create_vma(engine->i915, size);
@ -1451,6 +1440,8 @@ static int intel_ring_context_pin(struct intel_engine_cs *engine,
 		ret = context_pin(ctx);
 		if (ret)
 			goto error;
+
+		ce->state->obj->mm.dirty = true;
 	}

 	/* The kernel context is only used as a placeholder for flushing the
@ -1571,10 +1562,8 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;

-	for_each_engine(engine, dev_priv, id) {
+	for_each_engine(engine, dev_priv, id)
 		engine->buffer->head = engine->buffer->tail;
-		engine->buffer->last_retired_head = -1;
-	}
 }

 static int ring_request_alloc(struct drm_i915_gem_request *request)
@ -2128,7 +2117,7 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine)

 			num_rings =
 				hweight32(INTEL_INFO(dev_priv)->ring_mask) - 1;
-			engine->emit_breadcrumb_sz += num_rings * 6;
+			engine->emit_breadcrumb_sz += num_rings * 8;
 		}
 	} else if (INTEL_GEN(dev_priv) >= 6) {
 		engine->init_context = intel_rcs_ctx_init;
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@ -149,16 +149,6 @@ struct intel_ring {
 	int space;
 	int size;
 	int effective_size;
-
-	/** We track the position of the requests in the ring buffer, and
-	 * when each is retired we increment last_retired_head as the GPU
-	 * must have finished processing the request and so we know we
-	 * can advance the ringbuffer up to that position.
-	 *
-	 * last_retired_head is set to -1 after the value is consumed so
-	 * we can detect new retirements.
-	 */
-	u32 last_retired_head;
 };

 struct i915_gem_context;
@ -442,18 +432,10 @@ struct intel_engine_cs {
 	u32 (*get_cmd_length_mask)(u32 cmd_header);
 };

-static inline unsigned
+static inline unsigned int
 intel_engine_flag(const struct intel_engine_cs *engine)
 {
-	return 1 << engine->id;
-}
-
-static inline void
-intel_flush_status_page(struct intel_engine_cs *engine, int reg)
-{
-	mb();
-	clflush(&engine->status_page.page_addr[reg]);
-	mb();
+	return BIT(engine->id);
 }

 static inline u32
@ -464,14 +446,22 @@ intel_read_status_page(struct intel_engine_cs *engine, int reg)
 }

 static inline void
-intel_write_status_page(struct intel_engine_cs *engine,
-			int reg, u32 value)
+intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
 {
-	mb();
-	clflush(&engine->status_page.page_addr[reg]);
-	engine->status_page.page_addr[reg] = value;
-	clflush(&engine->status_page.page_addr[reg]);
-	mb();
+	/* Writing into the status page should be done sparingly. Since
+	 * we do when we are uncertain of the device state, we take a bit
+	 * of extra paranoia to try and ensure that the HWS takes the value
+	 * we give and that it doesn't end up trapped inside the CPU!
+	 */
+	if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
+		mb();
+		clflush(&engine->status_page.page_addr[reg]);
+		engine->status_page.page_addr[reg] = value;
+		clflush(&engine->status_page.page_addr[reg]);
+		mb();
+	} else {
+		WRITE_ONCE(engine->status_page.page_addr[reg], value);
+	}
 }

 /*
@ -525,12 +515,29 @@ intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs)
 }

 static inline u32
-intel_ring_offset(struct drm_i915_gem_request *req, void *addr)
+intel_ring_wrap(const struct intel_ring *ring, u32 pos)
+{
+	return pos & (ring->size - 1);
+}
+
+static inline u32
+intel_ring_offset(const struct drm_i915_gem_request *req, void *addr)
 {
 	/* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
 	u32 offset = addr - req->ring->vaddr;
 	GEM_BUG_ON(offset > req->ring->size);
-	return offset & (req->ring->size - 1);
+	return intel_ring_wrap(req->ring, offset);
+}
+
+static inline void
+assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
+{
+	/* We could combine these into a single tail operation, but keeping
+	 * them as seperate tests will help identify the cause should one
+	 * ever fire.
+	 */
+	GEM_BUG_ON(!IS_ALIGNED(tail, 8));
+	GEM_BUG_ON(tail >= ring->size);
 }

 void intel_ring_update_space(struct intel_ring *ring);
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@ -2840,8 +2840,10 @@ void intel_runtime_pm_get(struct drm_i915_private *dev_priv)
 {
 	struct pci_dev *pdev = dev_priv->drm.pdev;
 	struct device *kdev = &pdev->dev;
+	int ret;

-	pm_runtime_get_sync(kdev);
+	ret = pm_runtime_get_sync(kdev);
+	WARN_ONCE(ret < 0, "pm_runtime_get_sync() failed: %d\n", ret);

 	atomic_inc(&dev_priv->pm.wakeref_count);
 	assert_rpm_wakelock_held(dev_priv);
@ -2871,7 +2873,8 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv)
 		 * function, since the power state is undefined. This applies
 		 * atm to the late/early system suspend/resume handlers.
 		 */
-		WARN_ON_ONCE(ret < 0);
+		WARN_ONCE(ret < 0,
+			  "pm_runtime_get_if_in_use() failed: %d\n", ret);
 		if (ret <= 0)
 			return false;
 	}
@ -2955,8 +2958,11 @@ void intel_runtime_pm_enable(struct drm_i915_private *dev_priv)
 	 * platforms without RPM support.
 	 */
 	if (!HAS_RUNTIME_PM(dev_priv)) {
+		int ret;
+
 		pm_runtime_dont_use_autosuspend(kdev);
-		pm_runtime_get_sync(kdev);
+		ret = pm_runtime_get_sync(kdev);
+		WARN(ret < 0, "pm_runtime_get_sync() failed: %d\n", ret);
 	} else {
 		pm_runtime_use_autosuspend(kdev);
 	}
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@ -217,7 +217,7 @@ skl_update_plane(struct drm_plane *drm_plane,
 	struct drm_framebuffer *fb = plane_state->base.fb;
 	enum plane_id plane_id = intel_plane->id;
 	enum pipe pipe = intel_plane->pipe;
-	u32 plane_ctl;
+	u32 plane_ctl = plane_state->ctl;
 	const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
 	u32 surf_addr = plane_state->main.offset;
 	unsigned int rotation = plane_state->base.rotation;
@ -232,24 +232,6 @@ skl_update_plane(struct drm_plane *drm_plane,
 	uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16;
 	unsigned long irqflags;

-	plane_ctl = PLANE_CTL_ENABLE;
-
-	if (!IS_GEMINILAKE(dev_priv)) {
-		plane_ctl |=
-			PLANE_CTL_PIPE_GAMMA_ENABLE |
-			PLANE_CTL_PIPE_CSC_ENABLE |
-			PLANE_CTL_PLANE_GAMMA_DISABLE;
-	}
-
-	plane_ctl |= skl_plane_ctl_format(fb->format->format);
-	plane_ctl |= skl_plane_ctl_tiling(fb->modifier);
-	plane_ctl |= skl_plane_ctl_rotation(rotation);
-
-	if (key->flags & I915_SET_COLORKEY_DESTINATION)
-		plane_ctl |= PLANE_CTL_KEY_ENABLE_DESTINATION;
-	else if (key->flags & I915_SET_COLORKEY_SOURCE)
-		plane_ctl |= PLANE_CTL_KEY_ENABLE_SOURCE;
-
 	/* Sizes are 0 based */
 	src_w--;
 	src_h--;
@ -361,32 +343,15 @@ chv_update_csc(struct intel_plane *intel_plane, uint32_t format)
 	I915_WRITE_FW(SPCSCCROCLAMP(plane_id), SPCSC_OMAX(1023) | SPCSC_OMIN(0));
 }

-static void
-vlv_update_plane(struct drm_plane *dplane,
-		 const struct intel_crtc_state *crtc_state,
-		 const struct intel_plane_state *plane_state)
+static u32 vlv_sprite_ctl(const struct intel_crtc_state *crtc_state,
+			  const struct intel_plane_state *plane_state)
 {
-	struct drm_device *dev = dplane->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_plane *intel_plane = to_intel_plane(dplane);
-	struct drm_framebuffer *fb = plane_state->base.fb;
-	enum pipe pipe = intel_plane->pipe;
-	enum plane_id plane_id = intel_plane->id;
-	u32 sprctl;
-	u32 sprsurf_offset, linear_offset;
+	const struct drm_framebuffer *fb = plane_state->base.fb;
 	unsigned int rotation = plane_state->base.rotation;
 	const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
-	int crtc_x = plane_state->base.dst.x1;
-	int crtc_y = plane_state->base.dst.y1;
-	uint32_t crtc_w = drm_rect_width(&plane_state->base.dst);
-	uint32_t crtc_h = drm_rect_height(&plane_state->base.dst);
-	uint32_t x = plane_state->base.src.x1 >> 16;
-	uint32_t y = plane_state->base.src.y1 >> 16;
-	uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16;
-	uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16;
-	unsigned long irqflags;
+	u32 sprctl;

-	sprctl = SP_ENABLE;
+	sprctl = SP_ENABLE | SP_GAMMA_ENABLE;

 	switch (fb->format->format) {
 	case DRM_FORMAT_YUYV:
@ -423,20 +388,10 @@ vlv_update_plane(struct drm_plane *dplane,
 		sprctl |= SP_FORMAT_RGBA8888;
 		break;
 	default:
-		/*
-		 * If we get here one of the upper layers failed to filter
-		 * out the unsupported plane formats
-		 */
-		BUG();
-		break;
+		MISSING_CASE(fb->format->format);
+		return 0;
 	}

-	/*
-	 * Enable gamma to match primary/cursor plane behaviour.
-	 * FIXME should be user controllable via propertiesa.
-	 */
-	sprctl |= SP_GAMMA_ENABLE;
-
 	if (fb->modifier == I915_FORMAT_MOD_X_TILED)
 		sprctl |= SP_TILED;

@ -449,22 +404,36 @@ vlv_update_plane(struct drm_plane *dplane,
 	if (key->flags & I915_SET_COLORKEY_SOURCE)
 		sprctl |= SP_SOURCE_KEY;

+	return sprctl;
+}
+
+static void
+vlv_update_plane(struct drm_plane *dplane,
+		 const struct intel_crtc_state *crtc_state,
+		 const struct intel_plane_state *plane_state)
+{
+	struct drm_device *dev = dplane->dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct intel_plane *intel_plane = to_intel_plane(dplane);
+	struct drm_framebuffer *fb = plane_state->base.fb;
+	enum pipe pipe = intel_plane->pipe;
+	enum plane_id plane_id = intel_plane->id;
+	u32 sprctl = plane_state->ctl;
+	u32 sprsurf_offset = plane_state->main.offset;
+	u32 linear_offset;
+	const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
+	int crtc_x = plane_state->base.dst.x1;
+	int crtc_y = plane_state->base.dst.y1;
+	uint32_t crtc_w = drm_rect_width(&plane_state->base.dst);
+	uint32_t crtc_h = drm_rect_height(&plane_state->base.dst);
+	uint32_t x = plane_state->main.x;
+	uint32_t y = plane_state->main.y;
+	unsigned long irqflags;
+
 	/* Sizes are 0 based */
-	src_w--;
-	src_h--;
 	crtc_w--;
 	crtc_h--;

-	intel_add_fb_offsets(&x, &y, plane_state, 0);
-	sprsurf_offset = intel_compute_tile_offset(&x, &y, plane_state, 0);
-
-	if (rotation & DRM_ROTATE_180) {
-		x += src_w;
-		y += src_h;
-	} else if (rotation & DRM_REFLECT_X) {
-		x += src_w;
-	}
-
 	linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0);

 	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
@ -516,31 +485,23 @@ vlv_disable_plane(struct drm_plane *dplane, struct drm_crtc *crtc)
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 }

-static void
-ivb_update_plane(struct drm_plane *plane,
-		 const struct intel_crtc_state *crtc_state,
-		 const struct intel_plane_state *plane_state)
+static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state,
+			  const struct intel_plane_state *plane_state)
 {
-	struct drm_device *dev = plane->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_plane *intel_plane = to_intel_plane(plane);
-	struct drm_framebuffer *fb = plane_state->base.fb;
-	enum pipe pipe = intel_plane->pipe;
-	u32 sprctl, sprscale = 0;
-	u32 sprsurf_offset, linear_offset;
+	struct drm_i915_private *dev_priv =
+		to_i915(plane_state->base.plane->dev);
+	const struct drm_framebuffer *fb = plane_state->base.fb;
 	unsigned int rotation = plane_state->base.rotation;
 	const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
-	int crtc_x = plane_state->base.dst.x1;
-	int crtc_y = plane_state->base.dst.y1;
-	uint32_t crtc_w = drm_rect_width(&plane_state->base.dst);
-	uint32_t crtc_h = drm_rect_height(&plane_state->base.dst);
-	uint32_t x = plane_state->base.src.x1 >> 16;
-	uint32_t y = plane_state->base.src.y1 >> 16;
-	uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16;
-	uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16;
-	unsigned long irqflags;
+	u32 sprctl;

-	sprctl = SPRITE_ENABLE;
+	sprctl = SPRITE_ENABLE | SPRITE_GAMMA_ENABLE;
+
+	if (IS_IVYBRIDGE(dev_priv))
+		sprctl |= SPRITE_TRICKLE_FEED_DISABLE;
+
+	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
+		sprctl |= SPRITE_PIPE_CSC_ENABLE;

 	switch (fb->format->format) {
 	case DRM_FORMAT_XBGR8888:
@ -562,34 +523,48 @@ ivb_update_plane(struct drm_plane *plane,
 		sprctl |= SPRITE_FORMAT_YUV422 | SPRITE_YUV_ORDER_VYUY;
 		break;
 	default:
-		BUG();
+		MISSING_CASE(fb->format->format);
+		return 0;
 	}

-	/*
-	 * Enable gamma to match primary/cursor plane behaviour.
-	 * FIXME should be user controllable via propertiesa.
-	 */
-	sprctl |= SPRITE_GAMMA_ENABLE;
-
 	if (fb->modifier == I915_FORMAT_MOD_X_TILED)
 		sprctl |= SPRITE_TILED;

 	if (rotation & DRM_ROTATE_180)
 		sprctl |= SPRITE_ROTATE_180;

-	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
-		sprctl &= ~SPRITE_TRICKLE_FEED_DISABLE;
-	else
-		sprctl |= SPRITE_TRICKLE_FEED_DISABLE;
-
-	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
-		sprctl |= SPRITE_PIPE_CSC_ENABLE;
-
 	if (key->flags & I915_SET_COLORKEY_DESTINATION)
 		sprctl |= SPRITE_DEST_KEY;
 	else if (key->flags & I915_SET_COLORKEY_SOURCE)
 		sprctl |= SPRITE_SOURCE_KEY;

+	return sprctl;
+}
+
+static void
+ivb_update_plane(struct drm_plane *plane,
+		 const struct intel_crtc_state *crtc_state,
+		 const struct intel_plane_state *plane_state)
+{
+	struct drm_device *dev = plane->dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct intel_plane *intel_plane = to_intel_plane(plane);
+	struct drm_framebuffer *fb = plane_state->base.fb;
+	enum pipe pipe = intel_plane->pipe;
+	u32 sprctl = plane_state->ctl, sprscale = 0;
+	u32 sprsurf_offset = plane_state->main.offset;
+	u32 linear_offset;
+	const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
+	int crtc_x = plane_state->base.dst.x1;
+	int crtc_y = plane_state->base.dst.y1;
+	uint32_t crtc_w = drm_rect_width(&plane_state->base.dst);
+	uint32_t crtc_h = drm_rect_height(&plane_state->base.dst);
+	uint32_t x = plane_state->main.x;
+	uint32_t y = plane_state->main.y;
+	uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16;
+	uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16;
+	unsigned long irqflags;
+
 	/* Sizes are 0 based */
 	src_w--;
 	src_h--;
@ -599,16 +574,6 @@ ivb_update_plane(struct drm_plane *plane,
 	if (crtc_w != src_w || crtc_h != src_h)
 		sprscale = SPRITE_SCALE_ENABLE | (src_w << 16) | src_h;

-	intel_add_fb_offsets(&x, &y, plane_state, 0);
-	sprsurf_offset = intel_compute_tile_offset(&x, &y, plane_state, 0);
-
-	/* HSW+ does this automagically in hardware */
-	if (!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv) &&
-	    rotation & DRM_ROTATE_180) {
-		x += src_w;
-		y += src_h;
-	}
-
 	linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0);

 	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
@ -664,31 +629,20 @@ ivb_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc)
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 }

-static void
-ilk_update_plane(struct drm_plane *plane,
-		 const struct intel_crtc_state *crtc_state,
-		 const struct intel_plane_state *plane_state)
+static u32 ilk_sprite_ctl(const struct intel_crtc_state *crtc_state,
+			  const struct intel_plane_state *plane_state)
 {
-	struct drm_device *dev = plane->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_plane *intel_plane = to_intel_plane(plane);
-	struct drm_framebuffer *fb = plane_state->base.fb;
-	int pipe = intel_plane->pipe;
-	u32 dvscntr, dvsscale;
-	u32 dvssurf_offset, linear_offset;
+	struct drm_i915_private *dev_priv =
+		to_i915(plane_state->base.plane->dev);
+	const struct drm_framebuffer *fb = plane_state->base.fb;
 	unsigned int rotation = plane_state->base.rotation;
 	const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
-	int crtc_x = plane_state->base.dst.x1;
-	int crtc_y = plane_state->base.dst.y1;
-	uint32_t crtc_w = drm_rect_width(&plane_state->base.dst);
-	uint32_t crtc_h = drm_rect_height(&plane_state->base.dst);
-	uint32_t x = plane_state->base.src.x1 >> 16;
-	uint32_t y = plane_state->base.src.y1 >> 16;
-	uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16;
-	uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16;
-	unsigned long irqflags;
+	u32 dvscntr;

-	dvscntr = DVS_ENABLE;
+	dvscntr = DVS_ENABLE | DVS_GAMMA_ENABLE;
+
+	if (IS_GEN6(dev_priv))
+		dvscntr |= DVS_TRICKLE_FEED_DISABLE;

 	switch (fb->format->format) {
 	case DRM_FORMAT_XBGR8888:
@ -710,47 +664,57 @@ ilk_update_plane(struct drm_plane *plane,
 		dvscntr |= DVS_FORMAT_YUV422 | DVS_YUV_ORDER_VYUY;
 		break;
 	default:
-		BUG();
+		MISSING_CASE(fb->format->format);
+		return 0;
 	}

-	/*
-	 * Enable gamma to match primary/cursor plane behaviour.
-	 * FIXME should be user controllable via propertiesa.
-	 */
-	dvscntr |= DVS_GAMMA_ENABLE;
-
 	if (fb->modifier == I915_FORMAT_MOD_X_TILED)
 		dvscntr |= DVS_TILED;

 	if (rotation & DRM_ROTATE_180)
 		dvscntr |= DVS_ROTATE_180;

-	if (IS_GEN6(dev_priv))
-		dvscntr |= DVS_TRICKLE_FEED_DISABLE; /* must disable */
-
 	if (key->flags & I915_SET_COLORKEY_DESTINATION)
 		dvscntr |= DVS_DEST_KEY;
 	else if (key->flags & I915_SET_COLORKEY_SOURCE)
 		dvscntr |= DVS_SOURCE_KEY;

+	return dvscntr;
+}
+
+static void
+ilk_update_plane(struct drm_plane *plane,
+		 const struct intel_crtc_state *crtc_state,
+		 const struct intel_plane_state *plane_state)
+{
+	struct drm_device *dev = plane->dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct intel_plane *intel_plane = to_intel_plane(plane);
+	struct drm_framebuffer *fb = plane_state->base.fb;
+	int pipe = intel_plane->pipe;
+	u32 dvscntr = plane_state->ctl, dvsscale = 0;
+	u32 dvssurf_offset = plane_state->main.offset;
+	u32 linear_offset;
+	const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
+	int crtc_x = plane_state->base.dst.x1;
+	int crtc_y = plane_state->base.dst.y1;
+	uint32_t crtc_w = drm_rect_width(&plane_state->base.dst);
+	uint32_t crtc_h = drm_rect_height(&plane_state->base.dst);
+	uint32_t x = plane_state->main.x;
+	uint32_t y = plane_state->main.y;
+	uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16;
+	uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16;
+	unsigned long irqflags;
+
 	/* Sizes are 0 based */
 	src_w--;
 	src_h--;
 	crtc_w--;
 	crtc_h--;

-	dvsscale = 0;
 	if (crtc_w != src_w || crtc_h != src_h)
 		dvsscale = DVS_SCALE_ENABLE | (src_w << 16) | src_h;

-	intel_add_fb_offsets(&x, &y, plane_state, 0);
-	dvssurf_offset = intel_compute_tile_offset(&x, &y, plane_state, 0);
-
-	if (rotation & DRM_ROTATE_180) {
-		x += src_w;
-		y += src_h;
-	}
-
 	linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0);

 	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
@ -981,6 +945,26 @@ intel_check_sprite_plane(struct drm_plane *plane,
 		ret = skl_check_plane_surface(state);
 		if (ret)
 			return ret;
+
+		state->ctl = skl_plane_ctl(crtc_state, state);
+	} else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
+		ret = i9xx_check_plane_surface(state);
+		if (ret)
+			return ret;
+
+		state->ctl = vlv_sprite_ctl(crtc_state, state);
+	} else if (INTEL_GEN(dev_priv) >= 7) {
+		ret = i9xx_check_plane_surface(state);
+		if (ret)
+			return ret;
+
+		state->ctl = ivb_sprite_ctl(crtc_state, state);
+	} else {
+		ret = i9xx_check_plane_surface(state);
+		if (ret)
+			return ret;
+
+		state->ctl = ilk_sprite_ctl(crtc_state, state);
 	}

 	return 0;
--- a/drivers/gpu/drm/i915/intel_uc.c
+++ b/drivers/gpu/drm/i915/intel_uc.c
@ -26,6 +26,19 @@
 #include "intel_uc.h"
 #include <linux/firmware.h>

+/* Cleans up uC firmware by releasing the firmware GEM obj.
+ */
+static void __intel_uc_fw_fini(struct intel_uc_fw *uc_fw)
+{
+	struct drm_i915_gem_object *obj;
+
+	obj = fetch_and_zero(&uc_fw->obj);
+	if (obj)
+		i915_gem_object_put(obj);
+
+	uc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE;
+}
+
 /* Reset GuC providing us with fresh state for both GuC and HuC.
 */
 static int __intel_uc_reset_hw(struct drm_i915_private *dev_priv)
@ -83,194 +96,14 @@ void intel_uc_sanitize_options(struct drm_i915_private *dev_priv)

 void intel_uc_init_early(struct drm_i915_private *dev_priv)
 {
-	mutex_init(&dev_priv->guc.send_mutex);
+	struct intel_guc *guc = &dev_priv->guc;
+
+	mutex_init(&guc->send_mutex);
+	guc->send = intel_guc_send_mmio;
 }

-void intel_uc_init_fw(struct drm_i915_private *dev_priv)
-{
-	if (dev_priv->huc.fw.path)
-		intel_uc_prepare_fw(dev_priv, &dev_priv->huc.fw);
-
-	if (dev_priv->guc.fw.path)
-		intel_uc_prepare_fw(dev_priv, &dev_priv->guc.fw);
-}
-
-int intel_uc_init_hw(struct drm_i915_private *dev_priv)
-{
-	int ret, attempts;
-
-	/* GuC not enabled, nothing to do */
-	if (!i915.enable_guc_loading)
-		return 0;
-
-	gen9_reset_guc_interrupts(dev_priv);
-
-	/* We need to notify the guc whenever we change the GGTT */
-	i915_ggtt_enable_guc(dev_priv);
-
-	if (i915.enable_guc_submission) {
-		ret = i915_guc_submission_init(dev_priv);
-		if (ret)
-			goto err;
-	}
-
-	/* WaEnableuKernelHeaderValidFix:skl */
-	/* WaEnableGuCBootHashCheckNotSet:skl,bxt,kbl */
-	if (IS_GEN9(dev_priv))
-		attempts = 3;
-	else
-		attempts = 1;
-
-	while (attempts--) {
-		/*
-		 * Always reset the GuC just before (re)loading, so
-		 * that the state and timing are fairly predictable
-		 */
-		ret = __intel_uc_reset_hw(dev_priv);
-		if (ret)
-			goto err_submission;
-
-		intel_huc_init_hw(&dev_priv->huc);
-		ret = intel_guc_init_hw(&dev_priv->guc);
-		if (ret == 0 || ret != -EAGAIN)
-			break;
-
-		DRM_DEBUG_DRIVER("GuC fw load failed: %d; will reset and "
-				 "retry %d more time(s)\n", ret, attempts);
-	}
-
-	/* Did we succeded or run out of retries? */
-	if (ret)
-		goto err_submission;
-
-	intel_guc_auth_huc(dev_priv);
-	if (i915.enable_guc_submission) {
-		if (i915.guc_log_level >= 0)
-			gen9_enable_guc_interrupts(dev_priv);
-
-		ret = i915_guc_submission_enable(dev_priv);
-		if (ret)
-			goto err_submission;
-	}
-
-	return 0;
-
-	/*
-	 * We've failed to load the firmware :(
-	 *
-	 * Decide whether to disable GuC submission and fall back to
-	 * execlist mode, and whether to hide the error by returning
-	 * zero or to return -EIO, which the caller will treat as a
-	 * nonfatal error (i.e. it doesn't prevent driver load, but
-	 * marks the GPU as wedged until reset).
-	 */
-err_submission:
-	if (i915.enable_guc_submission)
-		i915_guc_submission_fini(dev_priv);
-
-err:
-	i915_ggtt_disable_guc(dev_priv);
-
-	DRM_ERROR("GuC init failed\n");
-	if (i915.enable_guc_loading > 1 || i915.enable_guc_submission > 1)
-		ret = -EIO;
-	else
-		ret = 0;
-
-	if (i915.enable_guc_submission) {
-		i915.enable_guc_submission = 0;
-		DRM_NOTE("Falling back from GuC submission to execlist mode\n");
-	}
-
-	return ret;
-}
-
-/*
- * Read GuC command/status register (SOFT_SCRATCH_0)
- * Return true if it contains a response rather than a command
- */
-static bool intel_guc_recv(struct intel_guc *guc, u32 *status)
-{
-	struct drm_i915_private *dev_priv = guc_to_i915(guc);
-
-	u32 val = I915_READ(SOFT_SCRATCH(0));
-	*status = val;
-	return INTEL_GUC_RECV_IS_RESPONSE(val);
-}
-
-int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len)
-{
-	struct drm_i915_private *dev_priv = guc_to_i915(guc);
-	u32 status;
-	int i;
-	int ret;
-
-	if (WARN_ON(len < 1 || len > 15))
-		return -EINVAL;
-
-	mutex_lock(&guc->send_mutex);
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-
-	dev_priv->guc.action_count += 1;
-	dev_priv->guc.action_cmd = action[0];
-
-	for (i = 0; i < len; i++)
-		I915_WRITE(SOFT_SCRATCH(i), action[i]);
-
-	POSTING_READ(SOFT_SCRATCH(i - 1));
-
-	I915_WRITE(GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER);
-
-	/*
-	 * Fast commands should complete in less than 10us, so sample quickly
-	 * up to that length of time, then switch to a slower sleep-wait loop.
-	 * No inte_guc_send command should ever take longer than 10ms.
-	 */
-	ret = wait_for_us(intel_guc_recv(guc, &status), 10);
-	if (ret)
-		ret = wait_for(intel_guc_recv(guc, &status), 10);
-	if (status != INTEL_GUC_STATUS_SUCCESS) {
-		/*
-		 * Either the GuC explicitly returned an error (which
-		 * we convert to -EIO here) or no response at all was
-		 * received within the timeout limit (-ETIMEDOUT)
-		 */
-		if (ret != -ETIMEDOUT)
-			ret = -EIO;
-
-		DRM_WARN("INTEL_GUC_SEND: Action 0x%X failed;"
-			 " ret=%d status=0x%08X response=0x%08X\n",
-			 action[0], ret, status, I915_READ(SOFT_SCRATCH(15)));
-
-		dev_priv->guc.action_fail += 1;
-		dev_priv->guc.action_err = ret;
-	}
-	dev_priv->guc.action_status = status;
-
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-	mutex_unlock(&guc->send_mutex);
-
-	return ret;
-}
-
-int intel_guc_sample_forcewake(struct intel_guc *guc)
-{
-	struct drm_i915_private *dev_priv = guc_to_i915(guc);
-	u32 action[2];
-
-	action[0] = INTEL_GUC_ACTION_SAMPLE_FORCEWAKE;
-	/* WaRsDisableCoarsePowerGating:skl,bxt */
-	if (!intel_enable_rc6() || NEEDS_WaRsDisableCoarsePowerGating(dev_priv))
-		action[1] = 0;
-	else
-		/* bit 0 and 1 are for Render and Media domain separately */
-		action[1] = GUC_FORCEWAKE_RENDER | GUC_FORCEWAKE_MEDIA;
-
-	return intel_guc_send(guc, action, ARRAY_SIZE(action));
-}
-
-void intel_uc_prepare_fw(struct drm_i915_private *dev_priv,
-			 struct intel_uc_fw *uc_fw)
+static void fetch_uc_fw(struct drm_i915_private *dev_priv,
+			struct intel_uc_fw *uc_fw)
 {
 	struct pci_dev *pdev = dev_priv->drm.pdev;
 	struct drm_i915_gem_object *obj;
@ -279,6 +112,9 @@ void intel_uc_prepare_fw(struct drm_i915_private *dev_priv,
 	size_t size;
 	int err;

+	if (!uc_fw->path)
+		return;
+
 	uc_fw->fetch_status = INTEL_UC_FIRMWARE_PENDING;

 	DRM_DEBUG_DRIVER("before requesting firmware: uC fw fetch status %s\n",
@ -291,7 +127,7 @@ void intel_uc_prepare_fw(struct drm_i915_private *dev_priv,
 		goto fail;

 	DRM_DEBUG_DRIVER("fetch uC fw from %s succeeded, fw %p\n",
-		uc_fw->path, fw);
+			 uc_fw->path, fw);

 	/* Check the size of the blob before examining buffer contents */
 	if (fw->size < sizeof(struct uc_css_header)) {
@ -304,7 +140,7 @@ void intel_uc_prepare_fw(struct drm_i915_private *dev_priv,
 	/* Firmware bits always start from header */
 	uc_fw->header_offset = 0;
 	uc_fw->header_size = (css->header_size_dw - css->modulus_size_dw -
-		css->key_size_dw - css->exponent_size_dw) * sizeof(u32);
+			      css->key_size_dw - css->exponent_size_dw) * sizeof(u32);

 	if (uc_fw->header_size != sizeof(struct uc_css_header)) {
 		DRM_NOTE("CSS header definition mismatch\n");
@ -362,19 +198,21 @@ void intel_uc_prepare_fw(struct drm_i915_private *dev_priv,
 	}

 	if (uc_fw->major_ver_wanted == 0 && uc_fw->minor_ver_wanted == 0) {
-		DRM_NOTE("Skipping uC firmware version check\n");
+		DRM_NOTE("Skipping %s firmware version check\n",
+			 intel_uc_fw_type_repr(uc_fw->type));
 	} else if (uc_fw->major_ver_found != uc_fw->major_ver_wanted ||
 		   uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) {
-		DRM_NOTE("uC firmware version %d.%d, required %d.%d\n",
-			uc_fw->major_ver_found, uc_fw->minor_ver_found,
-			uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted);
+		DRM_NOTE("%s firmware version %d.%d, required %d.%d\n",
+			 intel_uc_fw_type_repr(uc_fw->type),
+			 uc_fw->major_ver_found, uc_fw->minor_ver_found,
+			 uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted);
 		err = -ENOEXEC;
 		goto fail;
 	}

 	DRM_DEBUG_DRIVER("firmware version %d.%d OK (minimum %d.%d)\n",
-			uc_fw->major_ver_found, uc_fw->minor_ver_found,
-			uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted);
+			 uc_fw->major_ver_found, uc_fw->minor_ver_found,
+			 uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted);

 	obj = i915_gem_object_create_from_data(dev_priv, fw->data, fw->size);
 	if (IS_ERR(obj)) {
@ -386,7 +224,7 @@ void intel_uc_prepare_fw(struct drm_i915_private *dev_priv,
 	uc_fw->size = fw->size;

 	DRM_DEBUG_DRIVER("uC fw fetch status SUCCESS, obj %p\n",
-			uc_fw->obj);
+			 uc_fw->obj);

 	release_firmware(fw);
 	uc_fw->fetch_status = INTEL_UC_FIRMWARE_SUCCESS;
@ -396,8 +234,214 @@ fail:
 	DRM_WARN("Failed to fetch valid uC firmware from %s (error %d)\n",
 		 uc_fw->path, err);
 	DRM_DEBUG_DRIVER("uC fw fetch status FAIL; err %d, fw %p, obj %p\n",
-		err, fw, uc_fw->obj);
+			 err, fw, uc_fw->obj);

 	release_firmware(fw);		/* OK even if fw is NULL */
 	uc_fw->fetch_status = INTEL_UC_FIRMWARE_FAIL;
 }
+
+void intel_uc_init_fw(struct drm_i915_private *dev_priv)
+{
+	fetch_uc_fw(dev_priv, &dev_priv->huc.fw);
+	fetch_uc_fw(dev_priv, &dev_priv->guc.fw);
+}
+
+void intel_uc_fini_fw(struct drm_i915_private *dev_priv)
+{
+	__intel_uc_fw_fini(&dev_priv->guc.fw);
+	__intel_uc_fw_fini(&dev_priv->huc.fw);
+}
+
+int intel_uc_init_hw(struct drm_i915_private *dev_priv)
+{
+	int ret, attempts;
+
+	if (!i915.enable_guc_loading)
+		return 0;
+
+	gen9_reset_guc_interrupts(dev_priv);
+
+	/* We need to notify the guc whenever we change the GGTT */
+	i915_ggtt_enable_guc(dev_priv);
+
+	if (i915.enable_guc_submission) {
+		/*
+		 * This is stuff we need to have available at fw load time
+		 * if we are planning to enable submission later
+		 */
+		ret = i915_guc_submission_init(dev_priv);
+		if (ret)
+			goto err_guc;
+	}
+
+	/* WaEnableuKernelHeaderValidFix:skl */
+	/* WaEnableGuCBootHashCheckNotSet:skl,bxt,kbl */
+	if (IS_GEN9(dev_priv))
+		attempts = 3;
+	else
+		attempts = 1;
+
+	while (attempts--) {
+		/*
+		 * Always reset the GuC just before (re)loading, so
+		 * that the state and timing are fairly predictable
+		 */
+		ret = __intel_uc_reset_hw(dev_priv);
+		if (ret)
+			goto err_submission;
+
+		intel_huc_init_hw(&dev_priv->huc);
+		ret = intel_guc_init_hw(&dev_priv->guc);
+		if (ret == 0 || ret != -EAGAIN)
+			break;
+
+		DRM_DEBUG_DRIVER("GuC fw load failed: %d; will reset and "
+				 "retry %d more time(s)\n", ret, attempts);
+	}
+
+	/* Did we succeded or run out of retries? */
+	if (ret)
+		goto err_submission;
+
+	intel_guc_auth_huc(dev_priv);
+	if (i915.enable_guc_submission) {
+		if (i915.guc_log_level >= 0)
+			gen9_enable_guc_interrupts(dev_priv);
+
+		ret = i915_guc_submission_enable(dev_priv);
+		if (ret)
+			goto err_interrupts;
+	}
+
+	return 0;
+
+	/*
+	 * We've failed to load the firmware :(
+	 *
+	 * Decide whether to disable GuC submission and fall back to
+	 * execlist mode, and whether to hide the error by returning
+	 * zero or to return -EIO, which the caller will treat as a
+	 * nonfatal error (i.e. it doesn't prevent driver load, but
+	 * marks the GPU as wedged until reset).
+	 */
+err_interrupts:
+	gen9_disable_guc_interrupts(dev_priv);
+err_submission:
+	if (i915.enable_guc_submission)
+		i915_guc_submission_fini(dev_priv);
+err_guc:
+	i915_ggtt_disable_guc(dev_priv);
+
+	DRM_ERROR("GuC init failed\n");
+	if (i915.enable_guc_loading > 1 || i915.enable_guc_submission > 1)
+		ret = -EIO;
+	else
+		ret = 0;
+
+	if (i915.enable_guc_submission) {
+		i915.enable_guc_submission = 0;
+		DRM_NOTE("Falling back from GuC submission to execlist mode\n");
+	}
+
+	return ret;
+}
+
+void intel_uc_fini_hw(struct drm_i915_private *dev_priv)
+{
+	if (!i915.enable_guc_loading)
+		return;
+
+	if (i915.enable_guc_submission) {
+		i915_guc_submission_disable(dev_priv);
+		gen9_disable_guc_interrupts(dev_priv);
+		i915_guc_submission_fini(dev_priv);
+	}
+	i915_ggtt_disable_guc(dev_priv);
+}
+
+/*
+ * Read GuC command/status register (SOFT_SCRATCH_0)
+ * Return true if it contains a response rather than a command
+ */
+static bool guc_recv(struct intel_guc *guc, u32 *status)
+{
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+
+	u32 val = I915_READ(SOFT_SCRATCH(0));
+	*status = val;
+	return INTEL_GUC_RECV_IS_RESPONSE(val);
+}
+
+/*
+ * This function implements the MMIO based host to GuC interface.
+ */
+int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len)
+{
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+	u32 status;
+	int i;
+	int ret;
+
+	if (WARN_ON(len < 1 || len > 15))
+		return -EINVAL;
+
+	mutex_lock(&guc->send_mutex);
+	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_BLITTER);
+
+	dev_priv->guc.action_count += 1;
+	dev_priv->guc.action_cmd = action[0];
+
+	for (i = 0; i < len; i++)
+		I915_WRITE(SOFT_SCRATCH(i), action[i]);
+
+	POSTING_READ(SOFT_SCRATCH(i - 1));
+
+	I915_WRITE(GUC_SEND_INTERRUPT, GUC_SEND_TRIGGER);
+
+	/*
+	 * Fast commands should complete in less than 10us, so sample quickly
+	 * up to that length of time, then switch to a slower sleep-wait loop.
+	 * No inte_guc_send command should ever take longer than 10ms.
+	 */
+	ret = wait_for_us(guc_recv(guc, &status), 10);
+	if (ret)
+		ret = wait_for(guc_recv(guc, &status), 10);
+	if (status != INTEL_GUC_STATUS_SUCCESS) {
+		/*
+		 * Either the GuC explicitly returned an error (which
+		 * we convert to -EIO here) or no response at all was
+		 * received within the timeout limit (-ETIMEDOUT)
+		 */
+		if (ret != -ETIMEDOUT)
+			ret = -EIO;
+
+		DRM_WARN("INTEL_GUC_SEND: Action 0x%X failed;"
+			 " ret=%d status=0x%08X response=0x%08X\n",
+			 action[0], ret, status, I915_READ(SOFT_SCRATCH(15)));
+
+		dev_priv->guc.action_fail += 1;
+		dev_priv->guc.action_err = ret;
+	}
+	dev_priv->guc.action_status = status;
+
+	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_BLITTER);
+	mutex_unlock(&guc->send_mutex);
+
+	return ret;
+}
+
+int intel_guc_sample_forcewake(struct intel_guc *guc)
+{
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+	u32 action[2];
+
+	action[0] = INTEL_GUC_ACTION_SAMPLE_FORCEWAKE;
+	/* WaRsDisableCoarsePowerGating:skl,bxt */
+	if (!intel_enable_rc6() || NEEDS_WaRsDisableCoarsePowerGating(dev_priv))
+		action[1] = 0;
+	else
+		/* bit 0 and 1 are for Render and Media domain separately */
+		action[1] = GUC_FORCEWAKE_RENDER | GUC_FORCEWAKE_MEDIA;
+
+	return intel_guc_send(guc, action, ARRAY_SIZE(action));
+}
--- a/drivers/gpu/drm/i915/intel_uc.h
+++ b/drivers/gpu/drm/i915/intel_uc.h
@ -34,7 +34,9 @@ struct drm_i915_gem_request;

 /*
 * This structure primarily describes the GEM object shared with the GuC.
- * The GEM object is held for the entire lifetime of our interaction with
+ * The specs sometimes refer to this object as a "GuC context", but we use
+ * the term "client" to avoid confusion with hardware contexts. This
+ * GEM object is held for the entire lifetime of our interaction with
 * the GuC, being allocated before the GuC is loaded with its firmware.
 * Because there's no way to update the address used by the GuC after
 * initialisation, the shared object must stay pinned into the GGTT as
@ -44,7 +46,7 @@ struct drm_i915_gem_request;
 *
 * The single GEM object described here is actually made up of several
 * separate areas, as far as the GuC is concerned. The first page (kept
- * kmap'd) includes the "process decriptor" which holds sequence data for
+ * kmap'd) includes the "process descriptor" which holds sequence data for
 * the doorbell, and one cacheline which actually *is* the doorbell; a
 * write to this will "ring the doorbell" (i.e. send an interrupt to the
 * GuC). The subsequent  pages of the client object constitute the work
@ -72,13 +74,12 @@ struct i915_guc_client {

 	uint32_t engines;		/* bitmap of (host) engine ids	*/
 	uint32_t priority;
-	uint32_t ctx_index;
+	u32 stage_id;
 	uint32_t proc_desc_offset;

-	uint32_t doorbell_offset;
-	uint32_t doorbell_cookie;
-	uint16_t doorbell_id;
-	uint16_t padding[3];		/* Maintain alignment		*/
+	u16 doorbell_id;
+	unsigned long doorbell_offset;
+	u32 doorbell_cookie;

 	spinlock_t wq_lock;
 	uint32_t wq_offset;
@ -100,11 +101,40 @@ enum intel_uc_fw_status {
 	INTEL_UC_FIRMWARE_SUCCESS
 };

+/* User-friendly representation of an enum */
+static inline
+const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status)
+{
+	switch (status) {
+	case INTEL_UC_FIRMWARE_FAIL:
+		return "FAIL";
+	case INTEL_UC_FIRMWARE_NONE:
+		return "NONE";
+	case INTEL_UC_FIRMWARE_PENDING:
+		return "PENDING";
+	case INTEL_UC_FIRMWARE_SUCCESS:
+		return "SUCCESS";
+	}
+	return "<invalid>";
+}
+
 enum intel_uc_fw_type {
 	INTEL_UC_FW_TYPE_GUC,
 	INTEL_UC_FW_TYPE_HUC
 };

+/* User-friendly representation of an enum */
+static inline const char *intel_uc_fw_type_repr(enum intel_uc_fw_type type)
+{
+	switch (type) {
+	case INTEL_UC_FW_TYPE_GUC:
+		return "GuC";
+	case INTEL_UC_FW_TYPE_HUC:
+		return "HuC";
+	}
+	return "uC";
+}
+
 /*
 * This structure encapsulates all the data needed during the process
 * of fetching, caching, and loading the firmware image into the GuC.
@ -133,11 +163,13 @@ struct intel_uc_fw {
 struct intel_guc_log {
 	uint32_t flags;
 	struct i915_vma *vma;
-	void *buf_addr;
-	struct workqueue_struct *flush_wq;
-	struct work_struct flush_work;
-	struct rchan *relay_chan;
-
+	/* The runtime stuff gets created only when GuC logging gets enabled */
+	struct {
+		void *buf_addr;
+		struct workqueue_struct *flush_wq;
+		struct work_struct flush_work;
+		struct rchan *relay_chan;
+	} runtime;
 	/* logging related stats */
 	u32 capture_miss_count;
 	u32 flush_interrupt_count;
@ -154,12 +186,13 @@ struct intel_guc {
 	bool interrupts_enabled;

 	struct i915_vma *ads_vma;
-	struct i915_vma *ctx_pool_vma;
-	struct ida ctx_ids;
+	struct i915_vma *stage_desc_pool;
+	void *stage_desc_pool_vaddr;
+	struct ida stage_ids;

 	struct i915_guc_client *execbuf_client;

-	DECLARE_BITMAP(doorbell_bitmap, GUC_MAX_DOORBELLS);
+	DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS);
 	uint32_t db_cacheline;		/* Cyclic counter mod pagesize	*/

 	/* Action status & statistics */
@ -174,6 +207,9 @@ struct intel_guc {

 	/* To serialize the intel_guc_send actions */
 	struct mutex send_mutex;
+
+	/* GuC's FW specific send function */
+	int (*send)(struct intel_guc *guc, const u32 *data, u32 len);
 };

 struct intel_huc {
@ -187,17 +223,19 @@ struct intel_huc {
 void intel_uc_sanitize_options(struct drm_i915_private *dev_priv);
 void intel_uc_init_early(struct drm_i915_private *dev_priv);
 void intel_uc_init_fw(struct drm_i915_private *dev_priv);
+void intel_uc_fini_fw(struct drm_i915_private *dev_priv);
 int intel_uc_init_hw(struct drm_i915_private *dev_priv);
-void intel_uc_prepare_fw(struct drm_i915_private *dev_priv,
-			 struct intel_uc_fw *uc_fw);
-int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len);
+void intel_uc_fini_hw(struct drm_i915_private *dev_priv);
 int intel_guc_sample_forcewake(struct intel_guc *guc);
+int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len);
+static inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len)
+{
+	return guc->send(guc, action, len);
+}

 /* intel_guc_loader.c */
 int intel_guc_select_fw(struct intel_guc *guc);
 int intel_guc_init_hw(struct intel_guc *guc);
-void intel_guc_fini(struct drm_i915_private *dev_priv);
-const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status);
 int intel_guc_suspend(struct drm_i915_private *dev_priv);
 int intel_guc_resume(struct drm_i915_private *dev_priv);
 u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv);
@ -212,10 +250,11 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv);
 struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size);

 /* intel_guc_log.c */
-void intel_guc_log_create(struct intel_guc *guc);
+int intel_guc_log_create(struct intel_guc *guc);
+void intel_guc_log_destroy(struct intel_guc *guc);
+int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val);
 void i915_guc_log_register(struct drm_i915_private *dev_priv);
 void i915_guc_log_unregister(struct drm_i915_private *dev_priv);
-int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val);

 static inline u32 guc_ggtt_offset(struct i915_vma *vma)
 {
@ -227,7 +266,6 @@ static inline u32 guc_ggtt_offset(struct i915_vma *vma)

 /* intel_huc.c */
 void intel_huc_select_fw(struct intel_huc *huc);
-void intel_huc_fini(struct drm_i915_private  *dev_priv);
 int intel_huc_init_hw(struct intel_huc *huc);
 void intel_guc_auth_huc(struct drm_i915_private *dev_priv);

--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@ -52,10 +52,10 @@ intel_uncore_forcewake_domain_to_str(const enum forcewake_domain_id id)
 }

 static inline void
-fw_domain_reset(const struct intel_uncore_forcewake_domain *d)
+fw_domain_reset(struct drm_i915_private *i915,
+		const struct intel_uncore_forcewake_domain *d)
 {
-	WARN_ON(!i915_mmio_reg_valid(d->reg_set));
-	__raw_i915_write32(d->i915, d->reg_set, d->val_reset);
+	__raw_i915_write32(i915, d->reg_set, i915->uncore.fw_reset);
 }

 static inline void
@ -69,9 +69,10 @@ fw_domain_arm_timer(struct intel_uncore_forcewake_domain *d)
 }

 static inline void
-fw_domain_wait_ack_clear(const struct intel_uncore_forcewake_domain *d)
+fw_domain_wait_ack_clear(const struct drm_i915_private *i915,
+			 const struct intel_uncore_forcewake_domain *d)
 {
-	if (wait_for_atomic((__raw_i915_read32(d->i915, d->reg_ack) &
+	if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) &
 			     FORCEWAKE_KERNEL) == 0,
 			    FORCEWAKE_ACK_TIMEOUT_MS))
 		DRM_ERROR("%s: timed out waiting for forcewake ack to clear.\n",
@ -79,15 +80,17 @@ fw_domain_wait_ack_clear(const struct intel_uncore_forcewake_domain *d)
 }

 static inline void
-fw_domain_get(const struct intel_uncore_forcewake_domain *d)
+fw_domain_get(struct drm_i915_private *i915,
+	      const struct intel_uncore_forcewake_domain *d)
 {
-	__raw_i915_write32(d->i915, d->reg_set, d->val_set);
+	__raw_i915_write32(i915, d->reg_set, i915->uncore.fw_set);
 }

 static inline void
-fw_domain_wait_ack(const struct intel_uncore_forcewake_domain *d)
+fw_domain_wait_ack(const struct drm_i915_private *i915,
+		   const struct intel_uncore_forcewake_domain *d)
 {
-	if (wait_for_atomic((__raw_i915_read32(d->i915, d->reg_ack) &
+	if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) &
 			     FORCEWAKE_KERNEL),
 			    FORCEWAKE_ACK_TIMEOUT_MS))
 		DRM_ERROR("%s: timed out waiting for forcewake ack request.\n",
@ -95,72 +98,59 @@ fw_domain_wait_ack(const struct intel_uncore_forcewake_domain *d)
 }

 static inline void
-fw_domain_put(const struct intel_uncore_forcewake_domain *d)
+fw_domain_put(const struct drm_i915_private *i915,
+	      const struct intel_uncore_forcewake_domain *d)
 {
-	__raw_i915_write32(d->i915, d->reg_set, d->val_clear);
-}
-
-static inline void
-fw_domain_posting_read(const struct intel_uncore_forcewake_domain *d)
-{
-	/* something from same cacheline, but not from the set register */
-	if (i915_mmio_reg_valid(d->reg_post))
-		__raw_posting_read(d->i915, d->reg_post);
+	__raw_i915_write32(i915, d->reg_set, i915->uncore.fw_clear);
 }

 static void
-fw_domains_get(struct drm_i915_private *dev_priv, enum forcewake_domains fw_domains)
+fw_domains_get(struct drm_i915_private *i915, enum forcewake_domains fw_domains)
 {
 	struct intel_uncore_forcewake_domain *d;
+	unsigned int tmp;

-	for_each_fw_domain_masked(d, fw_domains, dev_priv) {
-		fw_domain_wait_ack_clear(d);
-		fw_domain_get(d);
+	GEM_BUG_ON(fw_domains & ~i915->uncore.fw_domains);
+
+	for_each_fw_domain_masked(d, fw_domains, i915, tmp) {
+		fw_domain_wait_ack_clear(i915, d);
+		fw_domain_get(i915, d);
 	}

-	for_each_fw_domain_masked(d, fw_domains, dev_priv)
-		fw_domain_wait_ack(d);
+	for_each_fw_domain_masked(d, fw_domains, i915, tmp)
+		fw_domain_wait_ack(i915, d);

-	dev_priv->uncore.fw_domains_active |= fw_domains;
+	i915->uncore.fw_domains_active |= fw_domains;
 }

 static void
-fw_domains_put(struct drm_i915_private *dev_priv, enum forcewake_domains fw_domains)
+fw_domains_put(struct drm_i915_private *i915, enum forcewake_domains fw_domains)
 {
 	struct intel_uncore_forcewake_domain *d;
+	unsigned int tmp;

-	for_each_fw_domain_masked(d, fw_domains, dev_priv) {
-		fw_domain_put(d);
-		fw_domain_posting_read(d);
-	}
+	GEM_BUG_ON(fw_domains & ~i915->uncore.fw_domains);

-	dev_priv->uncore.fw_domains_active &= ~fw_domains;
+	for_each_fw_domain_masked(d, fw_domains, i915, tmp)
+		fw_domain_put(i915, d);
+
+	i915->uncore.fw_domains_active &= ~fw_domains;
 }

 static void
-fw_domains_posting_read(struct drm_i915_private *dev_priv)
+fw_domains_reset(struct drm_i915_private *i915,
+		 enum forcewake_domains fw_domains)
 {
 	struct intel_uncore_forcewake_domain *d;
+	unsigned int tmp;

-	/* No need to do for all, just do for first found */
-	for_each_fw_domain(d, dev_priv) {
-		fw_domain_posting_read(d);
-		break;
-	}
-}
-
-static void
-fw_domains_reset(struct drm_i915_private *dev_priv, enum forcewake_domains fw_domains)
-{
-	struct intel_uncore_forcewake_domain *d;
-
-	if (dev_priv->uncore.fw_domains == 0)
+	if (!fw_domains)
 		return;

-	for_each_fw_domain_masked(d, fw_domains, dev_priv)
-		fw_domain_reset(d);
+	GEM_BUG_ON(fw_domains & ~i915->uncore.fw_domains);

-	fw_domains_posting_read(dev_priv);
+	for_each_fw_domain_masked(d, fw_domains, i915, tmp)
+		fw_domain_reset(i915, d);
 }

 static void __gen6_gt_wait_for_thread_c0(struct drm_i915_private *dev_priv)
@ -236,7 +226,8 @@ intel_uncore_fw_release_timer(struct hrtimer *timer)
 {
 	struct intel_uncore_forcewake_domain *domain =
 	       container_of(timer, struct intel_uncore_forcewake_domain, timer);
-	struct drm_i915_private *dev_priv = domain->i915;
+	struct drm_i915_private *dev_priv =
+		container_of(domain, struct drm_i915_private, uncore.fw_domain[domain->id]);
 	unsigned long irqflags;

 	assert_rpm_device_not_suspended(dev_priv);
@ -266,9 +257,11 @@ static void intel_uncore_forcewake_reset(struct drm_i915_private *dev_priv,
 	 * timers are run before holding.
 	 */
 	while (1) {
+		unsigned int tmp;
+
 		active_domains = 0;

-		for_each_fw_domain(domain, dev_priv) {
+		for_each_fw_domain(domain, dev_priv, tmp) {
 			if (hrtimer_cancel(&domain->timer) == 0)
 				continue;

@ -277,7 +270,7 @@ static void intel_uncore_forcewake_reset(struct drm_i915_private *dev_priv,

 		spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);

-		for_each_fw_domain(domain, dev_priv) {
+		for_each_fw_domain(domain, dev_priv, tmp) {
 			if (hrtimer_active(&domain->timer))
 				active_domains |= domain->mask;
 		}
@ -300,7 +293,7 @@ static void intel_uncore_forcewake_reset(struct drm_i915_private *dev_priv,
 	if (fw)
 		dev_priv->uncore.funcs.force_wake_put(dev_priv, fw);

-	fw_domains_reset(dev_priv, FORCEWAKE_ALL);
+	fw_domains_reset(dev_priv, dev_priv->uncore.fw_domains);

 	if (restore) { /* If reset with a user forcewake, try to restore */
 		if (fw)
@ -457,13 +450,13 @@ static void __intel_uncore_forcewake_get(struct drm_i915_private *dev_priv,
 					 enum forcewake_domains fw_domains)
 {
 	struct intel_uncore_forcewake_domain *domain;
+	unsigned int tmp;

 	fw_domains &= dev_priv->uncore.fw_domains;

-	for_each_fw_domain_masked(domain, fw_domains, dev_priv) {
+	for_each_fw_domain_masked(domain, fw_domains, dev_priv, tmp)
 		if (domain->wake_count++)
 			fw_domains &= ~domain->mask;
-	}

 	if (fw_domains)
 		dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains);
@ -520,10 +513,11 @@ static void __intel_uncore_forcewake_put(struct drm_i915_private *dev_priv,
 					 enum forcewake_domains fw_domains)
 {
 	struct intel_uncore_forcewake_domain *domain;
+	unsigned int tmp;

 	fw_domains &= dev_priv->uncore.fw_domains;

-	for_each_fw_domain_masked(domain, fw_domains, dev_priv) {
+	for_each_fw_domain_masked(domain, fw_domains, dev_priv, tmp) {
 		if (WARN_ON(domain->wake_count == 0))
 			continue;

@ -928,8 +922,11 @@ static noinline void ___force_wake_auto(struct drm_i915_private *dev_priv,
 					enum forcewake_domains fw_domains)
 {
 	struct intel_uncore_forcewake_domain *domain;
+	unsigned int tmp;

-	for_each_fw_domain_masked(domain, fw_domains, dev_priv)
+	GEM_BUG_ON(fw_domains & ~dev_priv->uncore.fw_domains);
+
+	for_each_fw_domain_masked(domain, fw_domains, dev_priv, tmp)
 		fw_domain_arm_timer(domain);

 	dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains);
@ -1141,41 +1138,27 @@ static void fw_domain_init(struct drm_i915_private *dev_priv,

 	WARN_ON(d->wake_count);

+	WARN_ON(!i915_mmio_reg_valid(reg_set));
+	WARN_ON(!i915_mmio_reg_valid(reg_ack));
+
 	d->wake_count = 0;
 	d->reg_set = reg_set;
 	d->reg_ack = reg_ack;

-	if (IS_GEN6(dev_priv)) {
-		d->val_reset = 0;
-		d->val_set = FORCEWAKE_KERNEL;
-		d->val_clear = 0;
-	} else {
-		/* WaRsClearFWBitsAtReset:bdw,skl */
-		d->val_reset = _MASKED_BIT_DISABLE(0xffff);
-		d->val_set = _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL);
-		d->val_clear = _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL);
-	}
-
-	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
-		d->reg_post = FORCEWAKE_ACK_VLV;
-	else if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv) || IS_GEN8(dev_priv))
-		d->reg_post = ECOBUS;
-
-	d->i915 = dev_priv;
 	d->id = domain_id;

 	BUILD_BUG_ON(FORCEWAKE_RENDER != (1 << FW_DOMAIN_ID_RENDER));
 	BUILD_BUG_ON(FORCEWAKE_BLITTER != (1 << FW_DOMAIN_ID_BLITTER));
 	BUILD_BUG_ON(FORCEWAKE_MEDIA != (1 << FW_DOMAIN_ID_MEDIA));

-	d->mask = 1 << domain_id;
+	d->mask = BIT(domain_id);

 	hrtimer_init(&d->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	d->timer.function = intel_uncore_fw_release_timer;

-	dev_priv->uncore.fw_domains |= (1 << domain_id);
+	dev_priv->uncore.fw_domains |= BIT(domain_id);

-	fw_domain_reset(d);
+	fw_domain_reset(dev_priv, d);
 }

 static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
@ -1183,6 +1166,17 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
 	if (INTEL_GEN(dev_priv) <= 5 || intel_vgpu_active(dev_priv))
 		return;

+	if (IS_GEN6(dev_priv)) {
+		dev_priv->uncore.fw_reset = 0;
+		dev_priv->uncore.fw_set = FORCEWAKE_KERNEL;
+		dev_priv->uncore.fw_clear = 0;
+	} else {
+		/* WaRsClearFWBitsAtReset:bdw,skl */
+		dev_priv->uncore.fw_reset = _MASKED_BIT_DISABLE(0xffff);
+		dev_priv->uncore.fw_set = _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL);
+		dev_priv->uncore.fw_clear = _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL);
+	}
+
 	if (IS_GEN9(dev_priv)) {
 		dev_priv->uncore.funcs.force_wake_get = fw_domains_get;
 		dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
@ -1246,9 +1240,9 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
 			       FORCEWAKE_MT, FORCEWAKE_MT_ACK);

 		spin_lock_irq(&dev_priv->uncore.lock);
-		fw_domains_get_with_thread_status(dev_priv, FORCEWAKE_ALL);
+		fw_domains_get_with_thread_status(dev_priv, FORCEWAKE_RENDER);
 		ecobus = __raw_i915_read32(dev_priv, ECOBUS);
-		fw_domains_put_with_fifo(dev_priv, FORCEWAKE_ALL);
+		fw_domains_put_with_fifo(dev_priv, FORCEWAKE_RENDER);
 		spin_unlock_irq(&dev_priv->uncore.lock);

 		if (!(ecobus & FORCEWAKE_MT_ENABLE)) {
--- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c
@ -291,8 +291,6 @@ static int begin_live_test(struct live_test *t,
 		return err;
 	}

-	i915_gem_retire_requests(i915);
-
 	i915->gpu_error.missed_irq_rings = 0;
 	t->reset_count = i915_reset_count(&i915->gpu_error);

@ -303,7 +301,9 @@ static int end_live_test(struct live_test *t)
 {
 	struct drm_i915_private *i915 = t->i915;

-	if (wait_for(intel_engines_are_idle(i915), 1)) {
+	i915_gem_retire_requests(i915);
+
+	if (wait_for(intel_engines_are_idle(i915), 10)) {
 		pr_err("%s(%s): GPU not idle\n", t->func, t->name);
 		return -EIO;
 	}
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@ -235,7 +235,6 @@ static void hang_fini(struct hang *h)
 	i915_gem_object_put(h->hws);

 	i915_gem_wait_for_idle(h->i915, I915_WAIT_LOCKED);
-	i915_gem_retire_requests(h->i915);
 }

 static int igt_hang_sanitycheck(void *arg)
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@ -118,7 +118,6 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
 	ring->vaddr = (void *)(ring + 1);

 	INIT_LIST_HEAD(&ring->request_list);
-	ring->last_retired_head = -1;
 	intel_ring_update_space(ring);

 	return ring;
--- a/drivers/gpu/drm/i915/selftests/scatterlist.c
+++ b/drivers/gpu/drm/i915/selftests/scatterlist.c
@ -189,6 +189,13 @@ static unsigned int random(unsigned long n,
 	return 1 + (prandom_u32_state(rnd) % 1024);
 }

+static inline bool page_contiguous(struct page *first,
+				   struct page *last,
+				   unsigned long npages)
+{
+	return first + npages == last;
+}
+
 static int alloc_table(struct pfn_table *pt,
 		       unsigned long count, unsigned long max,
 		       npages_fn_t npages_fn,
@ -216,7 +223,9 @@ static int alloc_table(struct pfn_table *pt,
 		unsigned long npages = npages_fn(n, count, rnd);

 		/* Nobody expects the Sparse Memmap! */
-		if (pfn_to_page(pfn + npages) != pfn_to_page(pfn) + npages) {
+		if (!page_contiguous(pfn_to_page(pfn),
+				     pfn_to_page(pfn + npages),
+				     npages)) {
 			sg_free_table(&pt->st);
 			return -ENOSPC;
 		}