46 files changed, 880 insertions, 397 deletions
diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 0761a03cdbb2..2bf9670ba29b 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -184,19 +184,27 @@ EXPORT_SYMBOL(drm_mm_get_block_generic);
  * -ENOSPC if no suitable free area is available. The preallocated memory node
  * must be cleared.
  */
-int drm_mm_insert_node(struct drm_mm *mm, struct drm_mm_node *node,
-		       unsigned long size, unsigned alignment)
+int drm_mm_insert_node_generic(struct drm_mm *mm, struct drm_mm_node *node,
+			       unsigned long size, unsigned alignment,
+			       unsigned long color)
 {
 	struct drm_mm_node *hole_node;
 
-	hole_node = drm_mm_search_free(mm, size, alignment, false);
+	hole_node = drm_mm_search_free_generic(mm, size, alignment,
+					       color, 0);
 	if (!hole_node)
 		return -ENOSPC;
 
-	drm_mm_insert_helper(hole_node, node, size, alignment, 0);
-
+	drm_mm_insert_helper(hole_node, node, size, alignment, color);
 	return 0;
 }
+EXPORT_SYMBOL(drm_mm_insert_node_generic);
+
+int drm_mm_insert_node(struct drm_mm *mm, struct drm_mm_node *node,
+		       unsigned long size, unsigned alignment)
+{
+	return drm_mm_insert_node_generic(mm, node, size, alignment, 0);
+}
 EXPORT_SYMBOL(drm_mm_insert_node);
 
 static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node,
@@ -275,22 +283,31 @@ EXPORT_SYMBOL(drm_mm_get_block_range_generic);
  * -ENOSPC if no suitable free area is available. This is for range
  * restricted allocations. The preallocated memory node must be cleared.
  */
-int drm_mm_insert_node_in_range(struct drm_mm *mm, struct drm_mm_node *node,
-				unsigned long size, unsigned alignment,
-				unsigned long start, unsigned long end)
+int drm_mm_insert_node_in_range_generic(struct drm_mm *mm, struct drm_mm_node *node,
+					unsigned long size, unsigned alignment, unsigned long color,
+					unsigned long start, unsigned long end)
 {
 	struct drm_mm_node *hole_node;
 
-	hole_node = drm_mm_search_free_in_range(mm, size, alignment,
-						start, end, false);
+	hole_node = drm_mm_search_free_in_range_generic(mm,
+							size, alignment, color,
+							start, end, 0);
 	if (!hole_node)
 		return -ENOSPC;
 
-	drm_mm_insert_helper_range(hole_node, node, size, alignment, 0,
+	drm_mm_insert_helper_range(hole_node, node,
+				   size, alignment, color,
 				   start, end);
-
 	return 0;
 }
+EXPORT_SYMBOL(drm_mm_insert_node_in_range_generic);
+
+int drm_mm_insert_node_in_range(struct drm_mm *mm, struct drm_mm_node *node,
+				unsigned long size, unsigned alignment,
+				unsigned long start, unsigned long end)
+{
+	return drm_mm_insert_node_in_range_generic(mm, node, size, alignment, 0, start, end);
+}
 EXPORT_SYMBOL(drm_mm_insert_node_in_range);
 
 /**
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 8f63cd5de4b4..99daa896105d 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -989,6 +989,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_SECURE_BATCHES:
 		value = capable(CAP_SYS_ADMIN);
 		break;
+	case I915_PARAM_HAS_PINNED_BATCHES:
+		value = 1;
+		break;
 	default:
 		DRM_DEBUG_DRIVER("Unknown parameter %d\n",
 				 param->param);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 557843dd4b2e..ed3059575576 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -780,6 +780,7 @@ typedef struct drm_i915_private {
 		struct i915_hw_ppgtt *aliasing_ppgtt;
 
 		struct shrinker inactive_shrinker;
+		bool shrinker_no_lock_stealing;
 
 		/**
 		 * List of objects currently involved in rendering.
@@ -1100,6 +1101,7 @@ struct drm_i915_gem_object {
 	 */
 	atomic_t pending_flip;
 };
+#define to_gem_object(obj) (&((struct drm_i915_gem_object *)(obj))->base)
 
 #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base)
 
@@ -1166,6 +1168,9 @@ struct drm_i915_file_private {
 #define IS_IVB_GT1(dev)		((dev)->pci_device == 0x0156 || \
 				 (dev)->pci_device == 0x0152 ||	\
 				 (dev)->pci_device == 0x015a)
+#define IS_SNB_GT1(dev)		((dev)->pci_device == 0x0102 || \
+				 (dev)->pci_device == 0x0106 ||	\
+				 (dev)->pci_device == 0x010A)
 #define IS_VALLEYVIEW(dev)	(INTEL_INFO(dev)->is_valleyview)
 #define IS_HASWELL(dev)	(INTEL_INFO(dev)->is_haswell)
 #define IS_MOBILE(dev)		(INTEL_INFO(dev)->is_mobile)
@@ -1196,6 +1201,9 @@ struct drm_i915_file_private {
 #define HAS_OVERLAY(dev)		(INTEL_INFO(dev)->has_overlay)
 #define OVERLAY_NEEDS_PHYSICAL(dev)	(INTEL_INFO(dev)->overlay_needs_physical)
 
+/* Early gen2 have a totally busted CS tlb and require pinned batches. */
+#define HAS_BROKEN_CS_TLB(dev)		(IS_I830(dev) || IS_845G(dev))
+
 /* With the 945 and later, Y tiling got adjusted so that it was 32 128-byte
  * rows, which changed the alignment requirements and fence programming.
  */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 742206e45103..da3c82e301b1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1517,9 +1517,11 @@ static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
 	if (obj->base.map_list.map)
 		return 0;
 
+	dev_priv->mm.shrinker_no_lock_stealing = true;
+
 	ret = drm_gem_create_mmap_offset(&obj->base);
 	if (ret != -ENOSPC)
-		return ret;
+		goto out;
 
 	/* Badly fragmented mmap space? The only way we can recover
 	 * space is by destroying unwanted objects. We can't randomly release
@@ -1531,10 +1533,14 @@ static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
 	i915_gem_purge(dev_priv, obj->base.size >> PAGE_SHIFT);
 	ret = drm_gem_create_mmap_offset(&obj->base);
 	if (ret != -ENOSPC)
-		return ret;
+		goto out;
 
 	i915_gem_shrink_all(dev_priv);
-	return drm_gem_create_mmap_offset(&obj->base);
+	ret = drm_gem_create_mmap_offset(&obj->base);
+out:
+	dev_priv->mm.shrinker_no_lock_stealing = false;
+
+	return ret;
 }
 
 static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
@@ -2890,7 +2896,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
 {
 	struct drm_device *dev = obj->base.dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
-	struct drm_mm_node *free_space;
+	struct drm_mm_node *node;
 	u32 size, fence_size, fence_alignment, unfenced_alignment;
 	bool mappable, fenceable;
 	int ret;
@@ -2936,66 +2942,54 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
 
 	i915_gem_object_pin_pages(obj);
 
+	node = kzalloc(sizeof(*node), GFP_KERNEL);
+	if (node == NULL) {
+		i915_gem_object_unpin_pages(obj);
+		return -ENOMEM;
+	}
+
  search_free:
 	if (map_and_fenceable)
-		free_space = drm_mm_search_free_in_range_color(&dev_priv->mm.gtt_space,
-							       size, alignment, obj->cache_level,
-							       0, dev_priv->mm.gtt_mappable_end,
-							       false);
+		ret = drm_mm_insert_node_in_range_generic(&dev_priv->mm.gtt_space, node,
+							  size, alignment, obj->cache_level,
+							  0, dev_priv->mm.gtt_mappable_end);
 	else
-		free_space = drm_mm_search_free_color(&dev_priv->mm.gtt_space,
-						      size, alignment, obj->cache_level,
-						      false);
-
-	if (free_space != NULL) {
-		if (map_and_fenceable)
-			free_space =
-				drm_mm_get_block_range_generic(free_space,
-							       size, alignment, obj->cache_level,
-							       0, dev_priv->mm.gtt_mappable_end,
-							       false);
-		else
-			free_space =
-				drm_mm_get_block_generic(free_space,
-							 size, alignment, obj->cache_level,
-							 false);
-	}
-	if (free_space == NULL) {
+		ret = drm_mm_insert_node_generic(&dev_priv->mm.gtt_space, node,
+						 size, alignment, obj->cache_level);
+	if (ret) {
 		ret = i915_gem_evict_something(dev, size, alignment,
 					       obj->cache_level,
 					       map_and_fenceable,
 					       nonblocking);
-		if (ret) {
-			i915_gem_object_unpin_pages(obj);
-			return ret;
-		}
+		if (ret == 0)
+			goto search_free;
 
-		goto search_free;
+		i915_gem_object_unpin_pages(obj);
+		kfree(node);
+		return ret;
 	}
-	if (WARN_ON(!i915_gem_valid_gtt_space(dev,
-					      free_space,
-					      obj->cache_level))) {
+	if (WARN_ON(!i915_gem_valid_gtt_space(dev, node, obj->cache_level))) {
 		i915_gem_object_unpin_pages(obj);
-		drm_mm_put_block(free_space);
+		drm_mm_put_block(node);
 		return -EINVAL;
 	}
 
 	ret = i915_gem_gtt_prepare_object(obj);
 	if (ret) {
 		i915_gem_object_unpin_pages(obj);
-		drm_mm_put_block(free_space);
+		drm_mm_put_block(node);
 		return ret;
 	}
 
 	list_move_tail(&obj->gtt_list, &dev_priv->mm.bound_list);
 	list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
 
-	obj->gtt_space = free_space;
-	obj->gtt_offset = free_space->start;
+	obj->gtt_space = node;
+	obj->gtt_offset = node->start;
 
 	fenceable =
-		free_space->size == fence_size &&
-		(free_space->start & (fence_alignment - 1)) == 0;
+		node->size == fence_size &&
+		(node->start & (fence_alignment - 1)) == 0;
 
 	mappable =
 		obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end;
@@ -4392,6 +4386,9 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
 		if (!mutex_is_locked_by(&dev->struct_mutex, current))
 			return 0;
 
+		if (dev_priv->mm.shrinker_no_lock_stealing)
+			return 0;
+
 		unlock = false;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index 773ef77b6c22..7be4241e8242 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -226,7 +226,7 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
 {
 	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
 
-	return dma_buf_export(obj, &i915_dmabuf_ops, obj->base.size, 0600);
+	return dma_buf_export(obj, &i915_dmabuf_ops, obj->base.size, flags);
 }
 
 static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index ee8f97f0539e..d6a994a07393 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -808,6 +808,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 
 		flags |= I915_DISPATCH_SECURE;
 	}
+	if (args->flags & I915_EXEC_IS_PINNED)
+		flags |= I915_DISPATCH_PINNED;
 
 	switch (args->flags & I915_EXEC_RING_MASK) {
 	case I915_EXEC_DEFAULT:
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index a4dc97f8b9f0..2220dec3e5d9 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1087,6 +1087,18 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv,
 	if (!ring->get_seqno)
 		return NULL;
 
+	if (HAS_BROKEN_CS_TLB(dev_priv->dev)) {
+		u32 acthd = I915_READ(ACTHD);
+
+		if (WARN_ON(ring->id != RCS))
+			return NULL;
+
+		obj = ring->private;
+		if (acthd >= obj->gtt_offset &&
+		    acthd < obj->gtt_offset + obj->base.size)
+			return i915_error_object_create(dev_priv, obj);
+	}
+
 	seqno = ring->get_seqno(ring, false);
 	list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) {
 		if (obj->ring != ring)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 3f75cfaf1c3f..186ee5c85b51 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -517,6 +517,7 @@
  * the enables for writing to the corresponding low bit.
  */
 #define _3D_CHICKEN	0x02084
+#define  _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB	(1 << 10)
 #define _3D_CHICKEN2	0x0208c
 /* Disables pipelining of read flushes past the SF-WIZ interface.
  * Required on all Ironlake steppings according to the B-Spec, but the
@@ -532,7 +533,8 @@
 # define MI_FLUSH_ENABLE				(1 << 12)
 
 #define GEN6_GT_MODE	0x20d0
-#define   GEN6_GT_MODE_HI	(1 << 9)
+#define   GEN6_GT_MODE_HI				(1 << 9)
+#define   GEN6_TD_FOUR_ROW_DISPATCH_DISABLE		(1 << 5)
 
 #define GFX_MODE	0x02520
 #define GFX_MODE_GEN7	0x0229c
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 5d127e068950..a9fb046b94a1 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -8144,10 +8144,6 @@ intel_modeset_stage_output_state(struct drm_device *dev,
 			DRM_DEBUG_KMS("encoder changed, full mode switch\n");
 			config->mode_changed = true;
 		}
-
-		/* Disable all disconnected encoders. */
-		if (connector->base.status == connector_status_disconnected)
-			connector->new_encoder = NULL;
 	}
 	/* connector->new_encoder is now updated for all connectors. */
 
@@ -9167,6 +9163,23 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder)
 	 * the crtc fixup. */
 }
 
+static void i915_redisable_vga(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	u32 vga_reg;
+
+	if (HAS_PCH_SPLIT(dev))
+		vga_reg = CPU_VGACNTRL;
+	else
+		vga_reg = VGACNTRL;
+
+	if (I915_READ(vga_reg) != VGA_DISP_DISABLE) {
+		DRM_DEBUG_KMS("Something enabled VGA plane, disabling it\n");
+		I915_WRITE(vga_reg, VGA_DISP_DISABLE);
+		POSTING_READ(vga_reg);
+	}
+}
+
 /* Scan out the current hw modeset state, sanitizes it and maps it into the drm
  * and i915 state tracking structures. */
 void intel_modeset_setup_hw_state(struct drm_device *dev,
@@ -9275,6 +9288,8 @@ void intel_modeset_setup_hw_state(struct drm_device *dev,
 			intel_set_mode(&crtc->base, &crtc->base.mode,
 				       crtc->base.x, crtc->base.y, crtc->base.fb);
 		}
+
+		i915_redisable_vga(dev);
 	} else {
 		intel_modeset_update_staged_output_state(dev);
 	}
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 496caa73eb70..e6f54ffab3ba 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -405,7 +405,7 @@ void intel_update_fbc(struct drm_device *dev)
 	 *   - going to an unsupported config (interlace, pixel multiply, etc.)
 	 */
 	list_for_each_entry(tmp_crtc, &dev->mode_config.crtc_list, head) {
-		if (tmp_crtc->enabled &&
+		if (to_intel_crtc(tmp_crtc)->active &&
 		    !to_intel_crtc(tmp_crtc)->primary_disabled &&
 		    tmp_crtc->fb) {
 			if (crtc) {
@@ -992,7 +992,7 @@ static struct drm_crtc *single_enabled_crtc(struct drm_device *dev)
 	struct drm_crtc *crtc, *enabled = NULL;
 
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		if (crtc->enabled && crtc->fb) {
+		if (to_intel_crtc(crtc)->active && crtc->fb) {
 			if (enabled)
 				return NULL;
 			enabled = crtc;
@@ -1086,7 +1086,7 @@ static bool g4x_compute_wm0(struct drm_device *dev,
 	int entries, tlb_miss;
 
 	crtc = intel_get_crtc_for_plane(dev, plane);
-	if (crtc->fb == NULL || !crtc->enabled) {
+	if (crtc->fb == NULL || !to_intel_crtc(crtc)->active) {
 		*cursor_wm = cursor->guard_size;
 		*plane_wm = display->guard_size;
 		return false;
@@ -1215,7 +1215,7 @@ static bool vlv_compute_drain_latency(struct drm_device *dev,
 	int entries;
 
 	crtc = intel_get_crtc_for_plane(dev, plane);
-	if (crtc->fb == NULL || !crtc->enabled)
+	if (crtc->fb == NULL || !to_intel_crtc(crtc)->active)
 		return false;
 
 	clock = crtc->mode.clock;	/* VESA DOT Clock */
@@ -1286,6 +1286,7 @@ static void valleyview_update_wm(struct drm_device *dev)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int planea_wm, planeb_wm, cursora_wm, cursorb_wm;
 	int plane_sr, cursor_sr;
+	int ignore_plane_sr, ignore_cursor_sr;
 	unsigned int enabled = 0;
 
 	vlv_update_drain_latency(dev);
@@ -1302,17 +1303,23 @@ static void valleyview_update_wm(struct drm_device *dev)
 			    &planeb_wm, &cursorb_wm))
 		enabled |= 2;
 
-	plane_sr = cursor_sr = 0;
 	if (single_plane_enabled(enabled) &&
 	    g4x_compute_srwm(dev, ffs(enabled) - 1,
 			     sr_latency_ns,
 			     &valleyview_wm_info,
 			     &valleyview_cursor_wm_info,
-			     &plane_sr, &cursor_sr))
+			     &plane_sr, &ignore_cursor_sr) &&
+	    g4x_compute_srwm(dev, ffs(enabled) - 1,
+			     2*sr_latency_ns,
+			     &valleyview_wm_info,
+			     &valleyview_cursor_wm_info,
+			     &ignore_plane_sr, &cursor_sr)) {
 		I915_WRITE(FW_BLC_SELF_VLV, FW_CSPWRDWNEN);
-	else
+	} else {
 		I915_WRITE(FW_BLC_SELF_VLV,
 			   I915_READ(FW_BLC_SELF_VLV) & ~FW_CSPWRDWNEN);
+		plane_sr = cursor_sr = 0;
+	}
 
 	DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n",
 		      planea_wm, cursora_wm,
@@ -1352,17 +1359,18 @@ static void g4x_update_wm(struct drm_device *dev)
 			    &planeb_wm, &cursorb_wm))
 		enabled |= 2;
 
-	plane_sr = cursor_sr = 0;
 	if (single_plane_enabled(enabled) &&
 	    g4x_compute_srwm(dev, ffs(enabled) - 1,
 			     sr_latency_ns,
 			     &g4x_wm_info,
 			     &g4x_cursor_wm_info,
-			     &plane_sr, &cursor_sr))
+			     &plane_sr, &cursor_sr)) {
 		I915_WRITE(FW_BLC_SELF, FW_BLC_SELF_EN);
-	else
+	} else {
 		I915_WRITE(FW_BLC_SELF,
 			   I915_READ(FW_BLC_SELF) & ~FW_BLC_SELF_EN);
+		plane_sr = cursor_sr = 0;
+	}
 
 	DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n",
 		      planea_wm, cursora_wm,
@@ -1468,7 +1476,7 @@ static void i9xx_update_wm(struct drm_device *dev)
 
 	fifo_size = dev_priv->display.get_fifo_size(dev, 0);
 	crtc = intel_get_crtc_for_plane(dev, 0);
-	if (crtc->enabled && crtc->fb) {
+	if (to_intel_crtc(crtc)->active && crtc->fb) {
 		int cpp = crtc->fb->bits_per_pixel / 8;
 		if (IS_GEN2(dev))
 			cpp = 4;
@@ -1482,7 +1490,7 @@ static void i9xx_update_wm(struct drm_device *dev)
 
 	fifo_size = dev_priv->display.get_fifo_size(dev, 1);
 	crtc = intel_get_crtc_for_plane(dev, 1);
-	if (crtc->enabled && crtc->fb) {
+	if (to_intel_crtc(crtc)->active && crtc->fb) {
 		int cpp = crtc->fb->bits_per_pixel / 8;
 		if (IS_GEN2(dev))
 			cpp = 4;
@@ -1811,8 +1819,110 @@ static void sandybridge_update_wm(struct drm_device *dev)
 		enabled |= 2;
 	}
 
-	if ((dev_priv->num_pipe == 3) &&
-	    g4x_compute_wm0(dev, 2,
+	/*
+	 * Calculate and update the self-refresh watermark only when one
+	 * display plane is used.
+	 *
+	 * SNB support 3 levels of watermark.
+	 *
+	 * WM1/WM2/WM2 watermarks have to be enabled in the ascending order,
+	 * and disabled in the descending order
+	 *
+	 */
+	I915_WRITE(WM3_LP_ILK, 0);
+	I915_WRITE(WM2_LP_ILK, 0);
+	I915_WRITE(WM1_LP_ILK, 0);
+
+	if (!single_plane_enabled(enabled) ||
+	    dev_priv->sprite_scaling_enabled)
+		return;
+	enabled = ffs(enabled) - 1;
+
+	/* WM1 */
+	if (!ironlake_compute_srwm(dev, 1, enabled,
+				   SNB_READ_WM1_LATENCY() * 500,
+				   &sandybridge_display_srwm_info,
+				   &sandybridge_cursor_srwm_info,
+				   &fbc_wm, &plane_wm, &cursor_wm))
+		return;
+
+	I915_WRITE(WM1_LP_ILK,
+		   WM1_LP_SR_EN |
+		   (SNB_READ_WM1_LATENCY() << WM1_LP_LATENCY_SHIFT) |
+		   (fbc_wm << WM1_LP_FBC_SHIFT) |
+		   (plane_wm << WM1_LP_SR_SHIFT) |
+		   cursor_wm);
+
+	/* WM2 */
+	if (!ironlake_compute_srwm(dev, 2, enabled,
+				   SNB_READ_WM2_LATENCY() * 500,
+				   &sandybridge_display_srwm_info,
+				   &sandybridge_cursor_srwm_info,
+				   &fbc_wm, &plane_wm, &cursor_wm))
+		return;
+
+	I915_WRITE(WM2_LP_ILK,
+		   WM2_LP_EN |
+		   (SNB_READ_WM2_LATENCY() << WM1_LP_LATENCY_SHIFT) |
+		   (fbc_wm << WM1_LP_FBC_SHIFT) |
+		   (plane_wm << WM1_LP_SR_SHIFT) |
+		   cursor_wm);
+
+	/* WM3 */
+	if (!ironlake_compute_srwm(dev, 3, enabled,
+				   SNB_READ_WM3_LATENCY() * 500,
+				   &sandybridge_display_srwm_info,
+				   &sandybridge_cursor_srwm_info,
+				   &fbc_wm, &plane_wm, &cursor_wm))
+		return;
+
+	I915_WRITE(WM3_LP_ILK,
+		   WM3_LP_EN |
+		   (SNB_READ_WM3_LATENCY() << WM1_LP_LATENCY_SHIFT) |
+		   (fbc_wm << WM1_LP_FBC_SHIFT) |
+		   (plane_wm << WM1_LP_SR_SHIFT) |
+		   cursor_wm);
+}
+
+static void ivybridge_update_wm(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int latency = SNB_READ_WM0_LATENCY() * 100;	/* In unit 0.1us */
+	u32 val;
+	int fbc_wm, plane_wm, cursor_wm;
+	int ignore_fbc_wm, ignore_plane_wm, ignore_cursor_wm;
+	unsigned int enabled;
+
+	enabled = 0;
+	if (g4x_compute_wm0(dev, 0,
+			    &sandybridge_display_wm_info, latency,
+			    &sandybridge_cursor_wm_info, latency,
+			    &plane_wm, &cursor_wm)) {
+		val = I915_READ(WM0_PIPEA_ILK);
+		val &= ~(WM0_PIPE_PLANE_MASK | WM0_PIPE_CURSOR_MASK);
+		I915_WRITE(WM0_PIPEA_ILK, val |
+			   ((plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm));
+		DRM_DEBUG_KMS("FIFO watermarks For pipe A -"
+			      " plane %d, " "cursor: %d\n",
+			      plane_wm, cursor_wm);
+		enabled |= 1;
+	}
+
+	if (g4x_compute_wm0(dev, 1,
+			    &sandybridge_display_wm_info, latency,
+			    &sandybridge_cursor_wm_info, latency,
+			    &plane_wm, &cursor_wm)) {
+		val = I915_READ(WM0_PIPEB_ILK);
+		val &= ~(WM0_PIPE_PLANE_MASK | WM0_PIPE_CURSOR_MASK);
+		I915_WRITE(WM0_PIPEB_ILK, val |
+			   ((plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm));
+		DRM_DEBUG_KMS("FIFO watermarks For pipe B -"
+			      " plane %d, cursor: %d\n",
+			      plane_wm, cursor_wm);
+		enabled |= 2;
+	}
+
+	if (g4x_compute_wm0(dev, 2,
 			    &sandybridge_display_wm_info, latency,
 			    &sandybridge_cursor_wm_info, latency,
 			    &plane_wm, &cursor_wm)) {
@@ -1875,12 +1985,17 @@ static void sandybridge_update_wm(struct drm_device *dev)
 		   (plane_wm << WM1_LP_SR_SHIFT) |
 		   cursor_wm);
 
-	/* WM3 */
+	/* WM3, note we have to correct the cursor latency */
 	if (!ironlake_compute_srwm(dev, 3, enabled,
 				   SNB_READ_WM3_LATENCY() * 500,
 				   &sandybridge_display_srwm_info,
 				   &sandybridge_cursor_srwm_info,
-				   &fbc_wm, &plane_wm, &cursor_wm))
+				   &fbc_wm, &plane_wm, &ignore_cursor_wm) ||
+	    !ironlake_compute_srwm(dev, 3, enabled,
+				   2 * SNB_READ_WM3_LATENCY() * 500,
+				   &sandybridge_display_srwm_info,
+				   &sandybridge_cursor_srwm_info,
+				   &ignore_fbc_wm, &ignore_plane_wm, &cursor_wm))
 		return;
 
 	I915_WRITE(WM3_LP_ILK,
@@ -1929,7 +2044,7 @@ sandybridge_compute_sprite_wm(struct drm_device *dev, int plane,
 	int entries, tlb_miss;
 
 	crtc = intel_get_crtc_for_plane(dev, plane);
-	if (crtc->fb == NULL || !crtc->enabled) {
+	if (crtc->fb == NULL || !to_intel_crtc(crtc)->active) {
 		*sprite_wm = display->guard_size;
 		return false;
 	}
@@ -3471,6 +3586,15 @@ static void gen6_init_clock_gating(struct drm_device *dev)
 		   I915_READ(ILK_DISPLAY_CHICKEN2) |
 		   ILK_ELPIN_409_SELECT);
 
+	/* WaDisableHiZPlanesWhenMSAAEnabled */
+	I915_WRITE(_3D_CHICKEN,
+		   _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB));
+
+	/* WaSetupGtModeTdRowDispatch */
+	if (IS_SNB_GT1(dev))
+		I915_WRITE(GEN6_GT_MODE,
+			   _MASKED_BIT_ENABLE(GEN6_TD_FOUR_ROW_DISPATCH_DISABLE));
+
 	I915_WRITE(WM3_LP_ILK, 0);
 	I915_WRITE(WM2_LP_ILK, 0);
 	I915_WRITE(WM1_LP_ILK, 0);
@@ -3999,7 +4123,7 @@ void intel_init_pm(struct drm_device *dev)
 		} else if (IS_IVYBRIDGE(dev)) {
 			/* FIXME: detect B0+ stepping and use auto training */
 			if (SNB_READ_WM0_LATENCY()) {
-				dev_priv->display.update_wm = sandybridge_update_wm;
+				dev_priv->display.update_wm = ivybridge_update_wm;
 				dev_priv->display.update_sprite_wm = sandybridge_update_sprite_wm;
 			} else {
 				DRM_DEBUG_KMS("Failed to read display plane latency. "
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 2346b920bd86..ae253e04c391 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -547,9 +547,14 @@ static int init_render_ring(struct intel_ring_buffer *ring)
 
 static void render_ring_cleanup(struct intel_ring_buffer *ring)
 {
+	struct drm_device *dev = ring->dev;
+
 	if (!ring->private)
 		return;
 
+	if (HAS_BROKEN_CS_TLB(dev))
+		drm_gem_object_unreference(to_gem_object(ring->private));
+
 	cleanup_pipe_control(ring);
 }
 
@@ -969,6 +974,8 @@ i965_dispatch_execbuffer(struct intel_ring_buffer *ring,
 	return 0;
 }
 
+/* Just userspace ABI convention to limit the wa batch bo to a resonable size */
+#define I830_BATCH_LIMIT (256*1024)
 static int
 i830_dispatch_execbuffer(struct intel_ring_buffer *ring,
 				u32 offset, u32 len,
@@ -976,15 +983,47 @@ i830_dispatch_execbuffer(struct intel_ring_buffer *ring,
 {
 	int ret;
 
-	ret = intel_ring_begin(ring, 4);
-	if (ret)
-		return ret;
+	if (flags & I915_DISPATCH_PINNED) {
+		ret = intel_ring_begin(ring, 4);
+		if (ret)
+			return ret;
 
-	intel_ring_emit(ring, MI_BATCH_BUFFER);
-	intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
-	intel_ring_emit(ring, offset + len - 8);
-	intel_ring_emit(ring, 0);
-	intel_ring_advance(ring);
+		intel_ring_emit(ring, MI_BATCH_BUFFER);
+		intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
+		intel_ring_emit(ring, offset + len - 8);
+		intel_ring_emit(ring, MI_NOOP);
+		intel_ring_advance(ring);
+	} else {
+		struct drm_i915_gem_object *obj = ring->private;
+		u32 cs_offset = obj->gtt_offset;
+
+		if (len > I830_BATCH_LIMIT)
+			return -ENOSPC;
+
+		ret = intel_ring_begin(ring, 9+3);
+		if (ret)
+			return ret;
+		/* Blit the batch (which has now all relocs applied) to the stable batch
+		 * scratch bo area (so that the CS never stumbles over its tlb
+		 * invalidation bug) ... */
+		intel_ring_emit(ring, XY_SRC_COPY_BLT_CMD |
+				XY_SRC_COPY_BLT_WRITE_ALPHA |
+				XY_SRC_COPY_BLT_WRITE_RGB);
+		intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_GXCOPY | 4096);
+		intel_ring_emit(ring, 0);
+		intel_ring_emit(ring, (DIV_ROUND_UP(len, 4096) << 16) | 1024);
+		intel_ring_emit(ring, cs_offset);
+		intel_ring_emit(ring, 0);
+		intel_ring_emit(ring, 4096);
+		intel_ring_emit(ring, offset);
+		intel_ring_emit(ring, MI_FLUSH);
+
+		/* ... and execute it. */
+		intel_ring_emit(ring, MI_BATCH_BUFFER);
+		intel_ring_emit(ring, cs_offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
+		intel_ring_emit(ring, cs_offset + len - 8);
+		intel_ring_advance(ring);
+	}
 
 	return 0;
 }
@@ -1596,6 +1635,27 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
 	ring->init = init_render_ring;
 	ring->cleanup = render_ring_cleanup;
 
+	/* Workaround batchbuffer to combat CS tlb bug. */
+	if (HAS_BROKEN_CS_TLB(dev)) {
+		struct drm_i915_gem_object *obj;
+		int ret;
+
+		obj = i915_gem_alloc_object(dev, I830_BATCH_LIMIT);
+		if (obj == NULL) {
+			DRM_ERROR("Failed to allocate batch bo\n");
+			return -ENOMEM;
+		}
+
+		ret = i915_gem_object_pin(obj, 0, true, false);
+		if (ret != 0) {
+			drm_gem_object_unreference(&obj->base);
+			DRM_ERROR("Failed to ping batch bo\n");
+			return ret;
+		}
+
+		ring->private = obj;
+	}
+
 	return intel_init_ring_buffer(dev, ring);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 526182ed0c6d..6af87cd05725 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -94,6 +94,7 @@ struct  intel_ring_buffer {
 					       u32 offset, u32 length,
 					       unsigned flags);
 #define I915_DISPATCH_SECURE 0x1
+#define I915_DISPATCH_PINNED 0x2
 	void		(*cleanup)(struct intel_ring_buffer *ring);
 	int		(*sync_to)(struct intel_ring_buffer *ring,
 				   struct intel_ring_buffer *to,
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc
index 7b715fda2763..62ab231cd6b6 100644
--- a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc
@@ -57,6 +57,11 @@ chipsets:
 .b16 #nve4_gpc_mmio_tail
 .b16 #nve4_tpc_mmio_head
 .b16 #nve4_tpc_mmio_tail
+.b8  0xe6 0 0 0
+.b16 #nve4_gpc_mmio_head
+.b16 #nve4_gpc_mmio_tail
+.b16 #nve4_tpc_mmio_head
+.b16 #nve4_tpc_mmio_tail
 .b8  0 0 0 0
 
 // GPC mmio lists
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc.h b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc.h
index 26c2165bad0f..09ee4702c8b2 100644
--- a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc.h
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc.h
@@ -34,13 +34,16 @@ uint32_t nve0_grgpc_data[] = {
 	0x00000000,
 /* 0x0064: chipsets */
 	0x000000e4,
-	0x01040080,
-	0x014c0104,
+	0x0110008c,
+	0x01580110,
 	0x000000e7,
-	0x01040080,
-	0x014c0104,
+	0x0110008c,
+	0x01580110,
+	0x000000e6,
+	0x0110008c,
+	0x01580110,
 	0x00000000,
-/* 0x0080: nve4_gpc_mmio_head */
+/* 0x008c: nve4_gpc_mmio_head */
 	0x00000380,
 	0x04000400,
 	0x0800040c,
@@ -74,8 +77,8 @@ uint32_t nve0_grgpc_data[] = {
 	0x14003100,
 	0x000031d0,
 	0x040031e0,
-/* 0x0104: nve4_gpc_mmio_tail */
-/* 0x0104: nve4_tpc_mmio_head */
+/* 0x0110: nve4_gpc_mmio_tail */
+/* 0x0110: nve4_tpc_mmio_head */
 	0x00000048,
 	0x00000064,
 	0x00000088,
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc
index acfc457654bd..0bcfa4d447e5 100644
--- a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc
@@ -754,6 +754,16 @@ ctx_mmio_exec:
 //		on load it means: "a save preceeded this load"
 //
 ctx_xfer:
+	// according to mwk, some kind of wait for idle
+	mov $r15 0xc00
+	shl b32 $r15 6
+	mov $r14 4
+	iowr I[$r15 + 0x200] $r14
+	ctx_xfer_idle:
+		iord $r14 I[$r15 + 0x000]
+		and $r14 0x2000
+		bra ne #ctx_xfer_idle
+
 	bra not $p1 #ctx_xfer_pre
 	bra $p2 #ctx_xfer_pre_load
 	ctx_xfer_pre:
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc.h b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc.h
index 85a8d556f484..bb03d2a1d57b 100644
--- a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc.h
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc.h
@@ -799,79 +799,80 @@ uint32_t nvc0_grhub_code[] = {
 	0x01fa0613,
 	0xf803f806,
 /* 0x0829: ctx_xfer */
-	0x0611f400,
-/* 0x082f: ctx_xfer_pre */
-	0xf01102f4,
-	0x21f510f7,
-	0x21f50698,
-	0x11f40631,
-/* 0x083d: ctx_xfer_pre_load */
-	0x02f7f01c,
-	0x065721f5,
-	0x066621f5,
-	0x067821f5,
-	0x21f5f4bd,
-	0x21f50657,
-/* 0x0856: ctx_xfer_exec */
-	0x019806b8,
-	0x1427f116,
-	0x0624b604,
-	0xf10020d0,
-	0xf0a500e7,
-	0x1fb941e3,
-	0x8d21f402,
-	0xf004e0b6,
-	0x2cf001fc,
-	0x0124b602,
-	0xf405f2fd,
-	0x17f18d21,
-	0x13f04afc,
-	0x0c27f002,
-	0xf50012d0,
-	0xf1020721,
-	0xf047fc27,
-	0x20d00223,
-	0x012cf000,
-	0xd00320b6,
-	0xacf00012,
-	0x06a5f001,
-	0x9800b7f0,
-	0x0d98140c,
-	0x00e7f015,
-	0x015c21f5,
-	0xf508a7f0,
-	0xf5010321,
-	0xf4020721,
-	0xa7f02201,
-	0xc921f40c,
-	0x0a1017f1,
-	0xf00614b6,
-	0x12d00527,
-/* 0x08dd: ctx_xfer_post_save_wait */
-	0x0012cf00,
-	0xf40522fd,
-	0x02f4fa1b,
-/* 0x08e9: ctx_xfer_post */
-	0x02f7f032,
-	0x065721f5,
-	0x21f5f4bd,
-	0x21f50698,
-	0x21f50226,
-	0xf4bd0666,
-	0x065721f5,
-	0x981011f4,
-	0x11fd8001,
-	0x070bf405,
-	0x07df21f5,
-/* 0x0914: ctx_xfer_no_post_mmio */
-	0x064921f5,
-/* 0x0918: ctx_xfer_done */
-	0x000000f8,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
+	0x00f7f100,
+	0x06f4b60c,
+	0xd004e7f0,
+/* 0x0836: ctx_xfer_idle */
+	0xfecf80fe,
+	0x00e4f100,
+	0xf91bf420,
+	0xf40611f4,
+/* 0x0846: ctx_xfer_pre */
+	0xf7f01102,
+	0x9821f510,
+	0x3121f506,
+	0x1c11f406,
+/* 0x0854: ctx_xfer_pre_load */
+	0xf502f7f0,
+	0xf5065721,
+	0xf5066621,
+	0xbd067821,
+	0x5721f5f4,
+	0xb821f506,
+/* 0x086d: ctx_xfer_exec */
+	0x16019806,
+	0x041427f1,
+	0xd00624b6,
+	0xe7f10020,
+	0xe3f0a500,
+	0x021fb941,
+	0xb68d21f4,
+	0xfcf004e0,
+	0x022cf001,
+	0xfd0124b6,
+	0x21f405f2,
+	0xfc17f18d,
+	0x0213f04a,
+	0xd00c27f0,
+	0x21f50012,
+	0x27f10207,
+	0x23f047fc,
+	0x0020d002,
+	0xb6012cf0,
+	0x12d00320,
+	0x01acf000,
+	0xf006a5f0,
+	0x0c9800b7,
+	0x150d9814,
+	0xf500e7f0,
+	0xf0015c21,
+	0x21f508a7,
+	0x21f50103,
+	0x01f40207,
+	0x0ca7f022,
+	0xf1c921f4,
+	0xb60a1017,
+	0x27f00614,
+	0x0012d005,
+/* 0x08f4: ctx_xfer_post_save_wait */
+	0xfd0012cf,
+	0x1bf40522,
+	0x3202f4fa,
+/* 0x0900: ctx_xfer_post */
+	0xf502f7f0,
+	0xbd065721,
+	0x9821f5f4,
+	0x2621f506,
+	0x6621f502,
+	0xf5f4bd06,
+	0xf4065721,
+	0x01981011,
+	0x0511fd80,
+	0xf5070bf4,
+/* 0x092b: ctx_xfer_no_post_mmio */
+	0xf507df21,
+/* 0x092f: ctx_xfer_done */
+	0xf8064921,
 	0x00000000,
 	0x00000000,
 	0x00000000,
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc
index 138eeaa28665..7fe9d7cf486b 100644
--- a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc
@@ -44,6 +44,9 @@ chipsets:
 .b8  0xe7 0 0 0
 .b16 #nve4_hub_mmio_head
 .b16 #nve4_hub_mmio_tail
+.b8  0xe6 0 0 0
+.b16 #nve4_hub_mmio_head
+.b16 #nve4_hub_mmio_tail
 .b8  0 0 0 0
 
 nve4_hub_mmio_head:
@@ -680,6 +683,16 @@ ctx_mmio_exec:
 //		on load it means: "a save preceeded this load"
 //
 ctx_xfer:
+	// according to mwk, some kind of wait for idle
+	mov $r15 0xc00
+	shl b32 $r15 6
+	mov $r14 4
+	iowr I[$r15 + 0x200] $r14
+	ctx_xfer_idle:
+		iord $r14 I[$r15 + 0x000]
+		and $r14 0x2000
+		bra ne #ctx_xfer_idle
+
 	bra not $p1 #ctx_xfer_pre
 	bra $p2 #ctx_xfer_pre_load
 	ctx_xfer_pre:
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc.h b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc.h
index decf0c60ca3b..e3421af68ab9 100644
--- a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc.h
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc.h
@@ -30,11 +30,13 @@ uint32_t nve0_grhub_data[] = {
 	0x00000000,
 /* 0x005c: chipsets */
 	0x000000e4,
-	0x013c0070,
+	0x01440078,
 	0x000000e7,
-	0x013c0070,
+	0x01440078,
+	0x000000e6,
+	0x01440078,
 	0x00000000,
-/* 0x0070: nve4_hub_mmio_head */
+/* 0x0078: nve4_hub_mmio_head */
 	0x0417e91c,
 	0x04400204,
 	0x18404010,
@@ -86,9 +88,7 @@ uint32_t nve0_grhub_data[] = {
 	0x00408840,
 	0x08408900,
 	0x00408980,
-/* 0x013c: nve4_hub_mmio_tail */
-	0x00000000,
-	0x00000000,
+/* 0x0144: nve4_hub_mmio_tail */
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -781,77 +781,78 @@ uint32_t nve0_grhub_code[] = {
 	0x0613f002,
 	0xf80601fa,
 /* 0x07fb: ctx_xfer */
-	0xf400f803,
-	0x02f40611,
-/* 0x0801: ctx_xfer_pre */
-	0x10f7f00d,
-	0x067221f5,
-/* 0x080b: ctx_xfer_pre_load */
-	0xf01c11f4,
-	0x21f502f7,
-	0x21f50631,
-	0x21f50640,
-	0xf4bd0652,
-	0x063121f5,
-	0x069221f5,
-/* 0x0824: ctx_xfer_exec */
-	0xf1160198,
-	0xb6041427,
-	0x20d00624,
-	0x00e7f100,
-	0x41e3f0a5,
-	0xf4021fb9,
-	0xe0b68d21,
-	0x01fcf004,
-	0xb6022cf0,
-	0xf2fd0124,
-	0x8d21f405,
-	0x4afc17f1,
-	0xf00213f0,
-	0x12d00c27,
-	0x0721f500,
-	0xfc27f102,
-	0x0223f047,
-	0xf00020d0,
-	0x20b6012c,
-	0x0012d003,
-	0xf001acf0,
-	0xb7f006a5,
-	0x140c9800,
-	0xf0150d98,
-	0x21f500e7,
-	0xa7f0015c,
-	0x0321f508,
-	0x0721f501,
-	0x2201f402,
-	0xf40ca7f0,
-	0x17f1c921,
-	0x14b60a10,
-	0x0527f006,
-/* 0x08ab: ctx_xfer_post_save_wait */
-	0xcf0012d0,
-	0x22fd0012,
-	0xfa1bf405,
-/* 0x08b7: ctx_xfer_post */
-	0xf02e02f4,
-	0x21f502f7,
-	0xf4bd0631,
-	0x067221f5,
-	0x022621f5,
-	0x064021f5,
-	0x21f5f4bd,
-	0x11f40631,
-	0x80019810,
-	0xf40511fd,
-	0x21f5070b,
-/* 0x08e2: ctx_xfer_no_post_mmio */
-/* 0x08e2: ctx_xfer_done */
-	0x00f807b1,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
+	0xf100f803,
+	0xb60c00f7,
+	0xe7f006f4,
+	0x80fed004,
+/* 0x0808: ctx_xfer_idle */
+	0xf100fecf,
+	0xf42000e4,
+	0x11f4f91b,
+	0x0d02f406,
+/* 0x0818: ctx_xfer_pre */
+	0xf510f7f0,
+	0xf4067221,
+/* 0x0822: ctx_xfer_pre_load */
+	0xf7f01c11,
+	0x3121f502,
+	0x4021f506,
+	0x5221f506,
+	0xf5f4bd06,
+	0xf5063121,
+/* 0x083b: ctx_xfer_exec */
+	0x98069221,
+	0x27f11601,
+	0x24b60414,
+	0x0020d006,
+	0xa500e7f1,
+	0xb941e3f0,
+	0x21f4021f,
+	0x04e0b68d,
+	0xf001fcf0,
+	0x24b6022c,
+	0x05f2fd01,
+	0xf18d21f4,
+	0xf04afc17,
+	0x27f00213,
+	0x0012d00c,
+	0x020721f5,
+	0x47fc27f1,
+	0xd00223f0,
+	0x2cf00020,
+	0x0320b601,
+	0xf00012d0,
+	0xa5f001ac,
+	0x00b7f006,
+	0x98140c98,
+	0xe7f0150d,
+	0x5c21f500,
+	0x08a7f001,
+	0x010321f5,
+	0x020721f5,
+	0xf02201f4,
+	0x21f40ca7,
+	0x1017f1c9,
+	0x0614b60a,
+	0xd00527f0,
+/* 0x08c2: ctx_xfer_post_save_wait */
+	0x12cf0012,
+	0x0522fd00,
+	0xf4fa1bf4,
+/* 0x08ce: ctx_xfer_post */
+	0xf7f02e02,
+	0x3121f502,
+	0xf5f4bd06,
+	0xf5067221,
+	0xf5022621,
+	0xbd064021,
+	0x3121f5f4,
+	0x1011f406,
+	0xfd800198,
+	0x0bf40511,
+	0xb121f507,
+/* 0x08f9: ctx_xfer_no_post_mmio */
+/* 0x08f9: ctx_xfer_done */
+	0x0000f807,
 	0x00000000,
 };
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c
index 47a02081d708..45aff5f5085a 100644
--- a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c
@@ -516,18 +516,9 @@ nvc0_graph_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 {
 	struct nouveau_device *device = nv_device(parent);
 	struct nvc0_graph_priv *priv;
-	bool enable = true;
 	int ret, i;
 
-	switch (device->chipset) {
-	case 0xd9: /* known broken without binary driver firmware */
-		enable = false;
-		break;
-	default:
-		break;
-	}
-
-	ret = nouveau_graph_create(parent, engine, oclass, enable, &priv);
+	ret = nouveau_graph_create(parent, engine, oclass, true, &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h
index 18d2210e12eb..a1e78de46456 100644
--- a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h
@@ -121,6 +121,7 @@ nvc0_graph_class(void *obj)
 		return 0x9297;
 	case 0xe4:
 	case 0xe7:
+	case 0xe6:
 		return 0xa097;
 	default:
 		return 0;
diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c b/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c
index 539d4c72f192..9f82e9702b46 100644
--- a/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c
@@ -203,7 +203,7 @@ nve0_graph_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	struct nvc0_graph_priv *priv;
 	int ret, i;
 
-	ret = nouveau_graph_create(parent, engine, oclass, false, &priv);
+	ret = nouveau_graph_create(parent, engine, oclass, true, &priv);
 	*pobject = nv_object(priv);
 	if (ret)
 		return ret;
@@ -252,6 +252,7 @@ nve0_graph_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 			priv->magic_not_rop_nr = 1;
 		break;
 	case 0xe7:
+	case 0xe6:
 		priv->magic_not_rop_nr = 1;
 		break;
 	default:
diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/bios.h b/drivers/gpu/drm/nouveau/core/include/subdev/bios.h
index d145b25e6be4..5bd1ca8cd20d 100644
--- a/drivers/gpu/drm/nouveau/core/include/subdev/bios.h
+++ b/drivers/gpu/drm/nouveau/core/include/subdev/bios.h
@@ -17,6 +17,7 @@ struct nouveau_bios {
 		u8 chip;
 		u8 minor;
 		u8 micro;
+		u8 patch;
 	} version;
 };
 
diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/bios/gpio.h b/drivers/gpu/drm/nouveau/core/include/subdev/bios/gpio.h
index 2bf178082a36..e6563b5cb08e 100644
--- a/drivers/gpu/drm/nouveau/core/include/subdev/bios/gpio.h
+++ b/drivers/gpu/drm/nouveau/core/include/subdev/bios/gpio.h
@@ -25,9 +25,11 @@ struct dcb_gpio_func {
 	u8 param;
 };
 
-u16 dcb_gpio_table(struct nouveau_bios *);
-u16 dcb_gpio_entry(struct nouveau_bios *, int idx, int ent, u8 *ver);
-int dcb_gpio_parse(struct nouveau_bios *, int idx, u8 func, u8 line,
+u16 dcb_gpio_table(struct nouveau_bios *, u8 *ver, u8 *hdr, u8 *cnt, u8 *len);
+u16 dcb_gpio_entry(struct nouveau_bios *, int idx, int ent, u8 *ver, u8 *len);
+u16 dcb_gpio_parse(struct nouveau_bios *, int idx, int ent, u8 *ver, u8 *len,
 		   struct dcb_gpio_func *);
+u16 dcb_gpio_match(struct nouveau_bios *, int idx, u8 func, u8 line,
+		   u8 *ver, u8 *len, struct dcb_gpio_func *);
 
 #endif
diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/bios/init.h b/drivers/gpu/drm/nouveau/core/include/subdev/bios/init.h
index e69a8bdc6e97..ca2f6bf37f46 100644
--- a/drivers/gpu/drm/nouveau/core/include/subdev/bios/init.h
+++ b/drivers/gpu/drm/nouveau/core/include/subdev/bios/init.h
@@ -13,6 +13,7 @@ struct nvbios_init {
 	u32 nested;
 	u16 repeat;
 	u16 repend;
+	u32 ramcfg;
 };
 
 int nvbios_exec(struct nvbios_init *);
diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/gpio.h b/drivers/gpu/drm/nouveau/core/include/subdev/gpio.h
index 9ea2b12cc15d..b75e8f18e52c 100644
--- a/drivers/gpu/drm/nouveau/core/include/subdev/gpio.h
+++ b/drivers/gpu/drm/nouveau/core/include/subdev/gpio.h
@@ -11,7 +11,7 @@ struct nouveau_gpio {
 	struct nouveau_subdev base;
 
 	/* hardware interfaces */
-	void (*reset)(struct nouveau_gpio *);
+	void (*reset)(struct nouveau_gpio *, u8 func);
 	int  (*drive)(struct nouveau_gpio *, int line, int dir, int out);
 	int  (*sense)(struct nouveau_gpio *, int line);
 	void (*irq_enable)(struct nouveau_gpio *, int line, bool);
diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/base.c b/drivers/gpu/drm/nouveau/core/subdev/bios/base.c
index dd111947eb86..f621f69fa1a2 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/bios/base.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/bios/base.c
@@ -447,6 +447,7 @@ nouveau_bios_ctor(struct nouveau_object *parent,
 		bios->version.chip  = nv_ro08(bios, bit_i.offset + 2);
 		bios->version.minor = nv_ro08(bios, bit_i.offset + 1);
 		bios->version.micro = nv_ro08(bios, bit_i.offset + 0);
+		bios->version.patch = nv_ro08(bios, bit_i.offset + 4);
 	} else
 	if (bmp_version(bios)) {
 		bios->version.major = nv_ro08(bios, bios->bmp_offset + 13);
@@ -455,9 +456,9 @@ nouveau_bios_ctor(struct nouveau_object *parent,
 		bios->version.micro = nv_ro08(bios, bios->bmp_offset + 10);
 	}
 
-	nv_info(bios, "version %02x.%02x.%02x.%02x\n",
+	nv_info(bios, "version %02x.%02x.%02x.%02x.%02x\n",
 		bios->version.major, bios->version.chip,
-		bios->version.minor, bios->version.micro);
+		bios->version.minor, bios->version.micro, bios->version.patch);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/gpio.c b/drivers/gpu/drm/nouveau/core/subdev/bios/gpio.c
index c90d4aa3ae4f..c84e93fa6d95 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/bios/gpio.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/bios/gpio.c
@@ -27,84 +27,105 @@
 #include <subdev/bios/gpio.h>
 
 u16
-dcb_gpio_table(struct nouveau_bios *bios)
+dcb_gpio_table(struct nouveau_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
 {
-	u8  ver, hdr, cnt, len;
-	u16 dcb = dcb_table(bios, &ver, &hdr, &cnt, &len);
+	u16 data = 0x0000;
+	u16 dcb = dcb_table(bios, ver, hdr, cnt, len);
 	if (dcb) {
-		if (ver >= 0x30 && hdr >= 0x0c)
-			return nv_ro16(bios, dcb + 0x0a);
-		if (ver >= 0x22 && nv_ro08(bios, dcb - 1) >= 0x13)
-			return nv_ro16(bios, dcb - 0x0f);
+		if (*ver >= 0x30 && *hdr >= 0x0c)
+			data = nv_ro16(bios, dcb + 0x0a);
+		else
+		if (*ver >= 0x22 && nv_ro08(bios, dcb - 1) >= 0x13)
+			data = nv_ro16(bios, dcb - 0x0f);
+
+		if (data) {
+			*ver = nv_ro08(bios, data + 0x00);
+			if (*ver < 0x30) {
+				*hdr = 3;
+				*cnt = nv_ro08(bios, data + 0x02);
+				*len = nv_ro08(bios, data + 0x01);
+			} else
+			if (*ver <= 0x41) {
+				*hdr = nv_ro08(bios, data + 0x01);
+				*cnt = nv_ro08(bios, data + 0x02);
+				*len = nv_ro08(bios, data + 0x03);
+			} else {
+				data = 0x0000;
+			}
+		}
 	}
-	return 0x0000;
+	return data;
 }
 
 u16
-dcb_gpio_entry(struct nouveau_bios *bios, int idx, int ent, u8 *ver)
+dcb_gpio_entry(struct nouveau_bios *bios, int idx, int ent, u8 *ver, u8 *len)
 {
-	u16 gpio = dcb_gpio_table(bios);
-	if (gpio) {
-		*ver = nv_ro08(bios, gpio);
-		if (*ver < 0x30 && ent < nv_ro08(bios, gpio + 2))
-			return gpio + 3 + (ent * nv_ro08(bios, gpio + 1));
-		else if (ent < nv_ro08(bios, gpio + 2))
-			return gpio + nv_ro08(bios, gpio + 1) +
-			       (ent * nv_ro08(bios, gpio + 3));
-	}
+	u8  hdr, cnt;
+	u16 gpio = !idx ? dcb_gpio_table(bios, ver, &hdr, &cnt, len) : 0x0000;
+	if (gpio && ent < cnt)
+		return gpio + hdr + (ent * *len);
 	return 0x0000;
 }
 
-int
-dcb_gpio_parse(struct nouveau_bios *bios, int idx, u8 func, u8 line,
+u16
+dcb_gpio_parse(struct nouveau_bios *bios, int idx, int ent, u8 *ver, u8 *len,
 	       struct dcb_gpio_func *gpio)
 {
-	u8  ver, hdr, cnt, len;
-	u16 entry;
-	int i = -1;
-
-	while ((entry = dcb_gpio_entry(bios, idx, ++i, &ver))) {
-		if (ver < 0x40) {
-			u16 data = nv_ro16(bios, entry);
+	u16 data = dcb_gpio_entry(bios, idx, ent, ver, len);
+	if (data) {
+		if (*ver < 0x40) {
+			u16 info = nv_ro16(bios, data);
 			*gpio = (struct dcb_gpio_func) {
-				.line = (data & 0x001f) >> 0,
-				.func = (data & 0x07e0) >> 5,
-				.log[0] = (data & 0x1800) >> 11,
-				.log[1] = (data & 0x6000) >> 13,
-				.param = !!(data & 0x8000),
+				.line = (info & 0x001f) >> 0,
+				.func = (info & 0x07e0) >> 5,
+				.log[0] = (info & 0x1800) >> 11,
+				.log[1] = (info & 0x6000) >> 13,
+				.param = !!(info & 0x8000),
 			};
 		} else
-		if (ver < 0x41) {
-			u32 data = nv_ro32(bios, entry);
+		if (*ver < 0x41) {
+			u32 info = nv_ro32(bios, data);
 			*gpio = (struct dcb_gpio_func) {
-				.line = (data & 0x0000001f) >> 0,
-				.func = (data & 0x0000ff00) >> 8,
-				.log[0] = (data & 0x18000000) >> 27,
-				.log[1] = (data & 0x60000000) >> 29,
-				.param = !!(data & 0x80000000),
+				.line = (info & 0x0000001f) >> 0,
+				.func = (info & 0x0000ff00) >> 8,
+				.log[0] = (info & 0x18000000) >> 27,
+				.log[1] = (info & 0x60000000) >> 29,
+				.param = !!(info & 0x80000000),
 			};
 		} else {
-			u32 data = nv_ro32(bios, entry + 0);
-			u8 data1 = nv_ro32(bios, entry + 4);
+			u32 info = nv_ro32(bios, data + 0);
+			u8 info1 = nv_ro32(bios, data + 4);
 			*gpio = (struct dcb_gpio_func) {
-				.line = (data & 0x0000003f) >> 0,
-				.func = (data & 0x0000ff00) >> 8,
-				.log[0] = (data1 & 0x30) >> 4,
-				.log[1] = (data1 & 0xc0) >> 6,
-				.param = !!(data & 0x80000000),
+				.line = (info & 0x0000003f) >> 0,
+				.func = (info & 0x0000ff00) >> 8,
+				.log[0] = (info1 & 0x30) >> 4,
+				.log[1] = (info1 & 0xc0) >> 6,
+				.param = !!(info & 0x80000000),
 			};
 		}
+	}
+
+	return data;
+}
 
+u16
+dcb_gpio_match(struct nouveau_bios *bios, int idx, u8 func, u8 line,
+	       u8 *ver, u8 *len, struct dcb_gpio_func *gpio)
+{
+	u8  hdr, cnt, i = 0;
+	u16 data;
+
+	while ((data = dcb_gpio_parse(bios, idx, i++, ver, len, gpio))) {
 		if ((line == 0xff || line == gpio->line) &&
 		    (func == 0xff || func == gpio->func))
-			return 0;
+			return data;
 	}
 
 	/* DCB 2.2, fixed TVDAC GPIO data */
-	if ((entry = dcb_table(bios, &ver, &hdr, &cnt, &len))) {
-		if (ver >= 0x22 && ver < 0x30 && func == DCB_GPIO_TVDAC0) {
-			u8 conf = nv_ro08(bios, entry - 5);
-			u8 addr = nv_ro08(bios, entry - 4);
+	if ((data = dcb_table(bios, ver, &hdr, &cnt, len))) {
+		if (*ver >= 0x22 && *ver < 0x30 && func == DCB_GPIO_TVDAC0) {
+			u8 conf = nv_ro08(bios, data - 5);
+			u8 addr = nv_ro08(bios, data - 4);
 			if (conf & 0x01) {
 				*gpio = (struct dcb_gpio_func) {
 					.func = DCB_GPIO_TVDAC0,
@@ -112,10 +133,11 @@ dcb_gpio_parse(struct nouveau_bios *bios, int idx, u8 func, u8 line,
 					.log[0] = !!(conf & 0x02),
 					.log[1] =  !(conf & 0x02),
 				};
-				return 0;
+				*ver = 0x00;
+				return data;
 			}
 		}
 	}
 
-	return -EINVAL;
+	return 0x0000;
 }
diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/init.c b/drivers/gpu/drm/nouveau/core/subdev/bios/init.c
index ae168bbb86d8..2917d552689b 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/bios/init.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/bios/init.c
@@ -2,11 +2,12 @@
 #include <core/device.h>
 
 #include <subdev/bios.h>
-#include <subdev/bios/conn.h>
 #include <subdev/bios/bmp.h>
 #include <subdev/bios/bit.h>
+#include <subdev/bios/conn.h>
 #include <subdev/bios/dcb.h>
 #include <subdev/bios/dp.h>
+#include <subdev/bios/gpio.h>
 #include <subdev/bios/init.h>
 #include <subdev/devinit.h>
 #include <subdev/clock.h>
@@ -410,9 +411,25 @@ init_ram_restrict_group_count(struct nvbios_init *init)
 }
 
 static u8
+init_ram_restrict_strap(struct nvbios_init *init)
+{
+	/* This appears to be the behaviour of the VBIOS parser, and *is*
+	 * important to cache the NV_PEXTDEV_BOOT0 on later chipsets to
+	 * avoid fucking up the memory controller (somehow) by reading it
+	 * on every INIT_RAM_RESTRICT_ZM_GROUP opcode.
+	 *
+	 * Preserving the non-caching behaviour on earlier chipsets just
+	 * in case *not* re-reading the strap causes similar breakage.
+	 */
+	if (!init->ramcfg || init->bios->version.major < 0x70)
+		init->ramcfg = init_rd32(init, 0x101000);
+	return (init->ramcfg & 0x00000003c) >> 2;
+}
+
+static u8
 init_ram_restrict(struct nvbios_init *init)
 {
-	u32 strap = (init_rd32(init, 0x101000) & 0x0000003c) >> 2;
+	u8  strap = init_ram_restrict_strap(init);
 	u16 table = init_ram_restrict_table(init);
 	if (table)
 		return nv_ro08(init->bios, table + strap);
@@ -1781,7 +1798,7 @@ init_gpio(struct nvbios_init *init)
 	init->offset += 1;
 
 	if (init_exec(init) && gpio && gpio->reset)
-		gpio->reset(gpio);
+		gpio->reset(gpio, DCB_GPIO_UNUSED);
 }
 
 /**
@@ -1995,6 +2012,47 @@ init_i2c_long_if(struct nvbios_init *init)
 	init_exec_set(init, false);
 }
 
+/**
+ * INIT_GPIO_NE - opcode 0xa9
+ *
+ */
+static void
+init_gpio_ne(struct nvbios_init *init)
+{
+	struct nouveau_bios *bios = init->bios;
+	struct nouveau_gpio *gpio = nouveau_gpio(bios);
+	struct dcb_gpio_func func;
+	u8 count = nv_ro08(bios, init->offset + 1);
+	u8 idx = 0, ver, len;
+	u16 data, i;
+
+	trace("GPIO_NE\t");
+	init->offset += 2;
+
+	for (i = init->offset; i < init->offset + count; i++)
+		cont("0x%02x ", nv_ro08(bios, i));
+	cont("\n");
+
+	while ((data = dcb_gpio_parse(bios, 0, idx++, &ver, &len, &func))) {
+		if (func.func != DCB_GPIO_UNUSED) {
+			for (i = init->offset; i < init->offset + count; i++) {
+				if (func.func == nv_ro08(bios, i))
+					break;
+			}
+
+			trace("\tFUNC[0x%02x]", func.func);
+			if (i == (init->offset + count)) {
+				cont(" *");
+				if (init_exec(init) && gpio && gpio->reset)
+					gpio->reset(gpio, func.func);
+			}
+			cont("\n");
+		}
+	}
+
+	init->offset += count;
+}
+
 static struct nvbios_init_opcode {
 	void (*exec)(struct nvbios_init *);
 } init_opcode[] = {
@@ -2059,6 +2117,7 @@ static struct nvbios_init_opcode {
 	[0x98] = { init_auxch },
 	[0x99] = { init_zm_auxch },
 	[0x9a] = { init_i2c_long_if },
+	[0xa9] = { init_gpio_ne },
 };
 
 #define init_opcode_nr (sizeof(init_opcode) / sizeof(init_opcode[0]))
diff --git a/drivers/gpu/drm/nouveau/core/subdev/device/nve0.c b/drivers/gpu/drm/nouveau/core/subdev/device/nve0.c
index 9b7881e76634..03a652876e73 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/device/nve0.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/device/nve0.c
@@ -109,6 +109,34 @@ nve0_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_ENGINE_VP     ] = &nve0_vp_oclass;
 		device->oclass[NVDEV_ENGINE_PPP    ] = &nvc0_ppp_oclass;
 		break;
+	case 0xe6:
+		device->cname = "GK106";
+		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] = &nvd0_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_I2C    ] = &nouveau_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nvc0_clock_oclass;
+		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv50_therm_oclass;
+		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
+		device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv50_devinit_oclass;
+		device->oclass[NVDEV_SUBDEV_MC     ] = &nvc0_mc_oclass;
+		device->oclass[NVDEV_SUBDEV_TIMER  ] = &nv04_timer_oclass;
+		device->oclass[NVDEV_SUBDEV_FB     ] = &nvc0_fb_oclass;
+		device->oclass[NVDEV_SUBDEV_LTCG   ] = &nvc0_ltcg_oclass;
+		device->oclass[NVDEV_SUBDEV_IBUS   ] = &nve0_ibus_oclass;
+		device->oclass[NVDEV_SUBDEV_INSTMEM] = &nv50_instmem_oclass;
+		device->oclass[NVDEV_SUBDEV_VM     ] = &nvc0_vmmgr_oclass;
+		device->oclass[NVDEV_SUBDEV_BAR    ] = &nvc0_bar_oclass;
+		device->oclass[NVDEV_ENGINE_DMAOBJ ] = &nvd0_dmaeng_oclass;
+		device->oclass[NVDEV_ENGINE_FIFO   ] = &nve0_fifo_oclass;
+		device->oclass[NVDEV_ENGINE_SW     ] = &nvc0_software_oclass;
+		device->oclass[NVDEV_ENGINE_GR     ] = &nve0_graph_oclass;
+		device->oclass[NVDEV_ENGINE_DISP   ] = &nve0_disp_oclass;
+		device->oclass[NVDEV_ENGINE_COPY0  ] = &nve0_copy0_oclass;
+		device->oclass[NVDEV_ENGINE_COPY1  ] = &nve0_copy1_oclass;
+		device->oclass[NVDEV_ENGINE_BSP    ] = &nve0_bsp_oclass;
+		device->oclass[NVDEV_ENGINE_VP     ] = &nve0_vp_oclass;
+		device->oclass[NVDEV_ENGINE_PPP    ] = &nvc0_ppp_oclass;
+		break;
 	default:
 		nv_fatal(device, "unknown Kepler chipset\n");
 		return -EINVAL;
diff --git a/drivers/gpu/drm/nouveau/core/subdev/gpio/base.c b/drivers/gpu/drm/nouveau/core/subdev/gpio/base.c
index acf818c58bf0..9fb0f9b92d49 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/gpio/base.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/gpio/base.c
@@ -43,10 +43,15 @@ static int
 nouveau_gpio_find(struct nouveau_gpio *gpio, int idx, u8 tag, u8 line,
 		  struct dcb_gpio_func *func)
 {
+	struct nouveau_bios *bios = nouveau_bios(gpio);
+	u8  ver, len;
+	u16 data;
+
 	if (line == 0xff && tag == 0xff)
 		return -EINVAL;
 
-	if (!dcb_gpio_parse(nouveau_bios(gpio), idx, tag, line, func))
+	data = dcb_gpio_match(bios, idx, tag, line, &ver, &len, func);
+	if (data)
 		return 0;
 
 	/* Apple iMac G4 NV18 */
@@ -265,7 +270,7 @@ nouveau_gpio_init(struct nouveau_gpio *gpio)
 	int ret = nouveau_subdev_init(&gpio->base);
 	if (ret == 0 && gpio->reset) {
 		if (dmi_check_system(gpio_reset_ids))
-			gpio->reset(gpio);
+			gpio->reset(gpio, DCB_GPIO_UNUSED);
 	}
 	return ret;
 }
diff --git a/drivers/gpu/drm/nouveau/core/subdev/gpio/nv50.c b/drivers/gpu/drm/nouveau/core/subdev/gpio/nv50.c
index f3502c961cd9..bf13a1200f26 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/gpio/nv50.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/gpio/nv50.c
@@ -29,15 +29,15 @@ struct nv50_gpio_priv {
 };
 
 static void
-nv50_gpio_reset(struct nouveau_gpio *gpio)
+nv50_gpio_reset(struct nouveau_gpio *gpio, u8 match)
 {
 	struct nouveau_bios *bios = nouveau_bios(gpio);
 	struct nv50_gpio_priv *priv = (void *)gpio;
+	u8 ver, len;
 	u16 entry;
-	u8 ver;
 	int ent = -1;
 
-	while ((entry = dcb_gpio_entry(bios, 0, ++ent, &ver))) {
+	while ((entry = dcb_gpio_entry(bios, 0, ++ent, &ver, &len))) {
 		static const u32 regs[] = { 0xe100, 0xe28c };
 		u32 data = nv_ro32(bios, entry);
 		u8  line =   (data & 0x0000001f);
@@ -48,7 +48,8 @@ nv50_gpio_reset(struct nouveau_gpio *gpio)
 		u32 val = (unk1 << 16) | unk0;
 		u32 reg = regs[line >> 4]; line &= 0x0f;
 
-		if (func == 0xff)
+		if ( func  == DCB_GPIO_UNUSED ||
+		    (match != DCB_GPIO_UNUSED && match != func))
 			continue;
 
 		gpio->set(gpio, 0, func, line, defs);
diff --git a/drivers/gpu/drm/nouveau/core/subdev/gpio/nvd0.c b/drivers/gpu/drm/nouveau/core/subdev/gpio/nvd0.c
index 8d18fcad26e0..83e8b8f16e6a 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/gpio/nvd0.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/gpio/nvd0.c
@@ -29,15 +29,15 @@ struct nvd0_gpio_priv {
 };
 
 static void
-nvd0_gpio_reset(struct nouveau_gpio *gpio)
+nvd0_gpio_reset(struct nouveau_gpio *gpio, u8 match)
 {
 	struct nouveau_bios *bios = nouveau_bios(gpio);
 	struct nvd0_gpio_priv *priv = (void *)gpio;
+	u8 ver, len;
 	u16 entry;
-	u8 ver;
 	int ent = -1;
 
-	while ((entry = dcb_gpio_entry(bios, 0, ++ent, &ver))) {
+	while ((entry = dcb_gpio_entry(bios, 0, ++ent, &ver, &len))) {
 		u32 data = nv_ro32(bios, entry);
 		u8  line =   (data & 0x0000003f);
 		u8  defs = !!(data & 0x00000080);
@@ -45,7 +45,8 @@ nvd0_gpio_reset(struct nouveau_gpio *gpio)
 		u8  unk0 =   (data & 0x00ff0000) >> 16;
 		u8  unk1 =   (data & 0x1f000000) >> 24;
 
-		if (func == 0xff)
+		if ( func  == DCB_GPIO_UNUSED ||
+		    (match != DCB_GPIO_UNUSED && match != func))
 			continue;
 
 		gpio->set(gpio, 0, func, line, defs);
diff --git a/drivers/gpu/drm/nouveau/core/subdev/mxm/base.c b/drivers/gpu/drm/nouveau/core/subdev/mxm/base.c
index 93e3ddf7303a..e286e132c7e7 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/mxm/base.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/mxm/base.c
@@ -260,7 +260,7 @@ nouveau_mxm_create_(struct nouveau_object *parent,
 
 	data = mxm_table(bios, &ver, &len);
 	if (!data || !(ver = nv_ro08(bios, data))) {
-		nv_info(mxm, "no VBIOS data, nothing to do\n");
+		nv_debug(mxm, "no VBIOS data, nothing to do\n");
 		return 0;
 	}
 
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index 74c6b42d2597..7a445666e71f 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -2654,6 +2654,35 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 			ib[idx+4] = upper_32_bits(offset) & 0xff;
 		}
 		break;
+	case PACKET3_MEM_WRITE:
+	{
+		u64 offset;
+
+		if (pkt->count != 3) {
+			DRM_ERROR("bad MEM_WRITE (invalid count)\n");
+			return -EINVAL;
+		}
+		r = evergreen_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("bad MEM_WRITE (missing reloc)\n");
+			return -EINVAL;
+		}
+		offset = radeon_get_ib_value(p, idx+0);
+		offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL;
+		if (offset & 0x7) {
+			DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n");
+			return -EINVAL;
+		}
+		if ((offset + 8) > radeon_bo_size(reloc->robj)) {
+			DRM_ERROR("bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n",
+				  offset + 8, radeon_bo_size(reloc->robj));
+			return -EINVAL;
+		}
+		offset += reloc->lobj.gpu_offset;
+		ib[idx+0] = offset;
+		ib[idx+1] = upper_32_bits(offset) & 0xff;
+		break;
+	}
 	case PACKET3_COPY_DW:
 		if (pkt->count != 4) {
 			DRM_ERROR("bad COPY_DW (invalid count)\n");
@@ -3287,6 +3316,7 @@ static bool evergreen_vm_reg_valid(u32 reg)
 
 	/* check config regs */
 	switch (reg) {
+	case WAIT_UNTIL:
 	case GRBM_GFX_INDEX:
 	case CP_STRMOUT_CNTL:
 	case CP_COHER_CNTL:
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index 0be768be530c..9ea13d07cc55 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -2294,6 +2294,35 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
 			ib[idx+4] = upper_32_bits(offset) & 0xff;
 		}
 		break;
+	case PACKET3_MEM_WRITE:
+	{
+		u64 offset;
+
+		if (pkt->count != 3) {
+			DRM_ERROR("bad MEM_WRITE (invalid count)\n");
+			return -EINVAL;
+		}
+		r = r600_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("bad MEM_WRITE (missing reloc)\n");
+			return -EINVAL;
+		}
+		offset = radeon_get_ib_value(p, idx+0);
+		offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL;
+		if (offset & 0x7) {
+			DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n");
+			return -EINVAL;
+		}
+		if ((offset + 8) > radeon_bo_size(reloc->robj)) {
+			DRM_ERROR("bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n",
+				  offset + 8, radeon_bo_size(reloc->robj));
+			return -EINVAL;
+		}
+		offset += reloc->lobj.gpu_offset;
+		ib[idx+0] = offset;
+		ib[idx+1] = upper_32_bits(offset) & 0xff;
+		break;
+	}
 	case PACKET3_COPY_DW:
 		if (pkt->count != 4) {
 			DRM_ERROR("bad COPY_DW (invalid count)\n");
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 5dc744d43d12..9b9422c4403a 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -225,12 +225,13 @@ struct radeon_fence {
 int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring);
 int radeon_fence_driver_init(struct radeon_device *rdev);
 void radeon_fence_driver_fini(struct radeon_device *rdev);
+void radeon_fence_driver_force_completion(struct radeon_device *rdev);
 int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, int ring);
 void radeon_fence_process(struct radeon_device *rdev, int ring);
 bool radeon_fence_signaled(struct radeon_fence *fence);
 int radeon_fence_wait(struct radeon_fence *fence, bool interruptible);
 int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring);
-void radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring);
+int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring);
 int radeon_fence_wait_any(struct radeon_device *rdev,
 			  struct radeon_fence **fences,
 			  bool intr);
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 49b06590001e..cd756262924d 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1164,6 +1164,7 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state)
 	struct drm_crtc *crtc;
 	struct drm_connector *connector;
 	int i, r;
+	bool force_completion = false;
 
 	if (dev == NULL || dev->dev_private == NULL) {
 		return -ENODEV;
@@ -1206,8 +1207,16 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state)
 
 	mutex_lock(&rdev->ring_lock);
 	/* wait for gpu to finish processing current batch */
-	for (i = 0; i < RADEON_NUM_RINGS; i++)
-		radeon_fence_wait_empty_locked(rdev, i);
+	for (i = 0; i < RADEON_NUM_RINGS; i++) {
+		r = radeon_fence_wait_empty_locked(rdev, i);
+		if (r) {
+			/* delay GPU reset to resume */
+			force_completion = true;
+		}
+	}
+	if (force_completion) {
+		radeon_fence_driver_force_completion(rdev);
+	}
 	mutex_unlock(&rdev->ring_lock);
 
 	radeon_save_bios_scratch_regs(rdev);
@@ -1338,7 +1347,6 @@ retry:
 	}
 
 	radeon_restore_bios_scratch_regs(rdev);
-	drm_helper_resume_force_mode(rdev->ddev);
 
 	if (!r) {
 		for (i = 0; i < RADEON_NUM_RINGS; ++i) {
@@ -1358,11 +1366,14 @@ retry:
 			}
 		}
 	} else {
+		radeon_fence_driver_force_completion(rdev);
 		for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 			kfree(ring_data[i]);
 		}
 	}
 
+	drm_helper_resume_force_mode(rdev->ddev);
+
 	ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
 	if (r) {
 		/* bad news, how to tell it to userspace ? */
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 9b1a727d3c9e..ff7593498a74 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -68,9 +68,10 @@
  *   2.25.0 - eg+: new info request for num SE and num SH
  *   2.26.0 - r600-eg: fix htile size computation
  *   2.27.0 - r600-SI: Add CS ioctl support for async DMA
+ *   2.28.0 - r600-eg: Add MEM_WRITE packet support
  */
 #define KMS_DRIVER_MAJOR	2
-#define KMS_DRIVER_MINOR	27
+#define KMS_DRIVER_MINOR	28
 #define KMS_DRIVER_PATCHLEVEL	0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int radeon_driver_unload_kms(struct drm_device *dev);
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 410a975a8eec..34356252567a 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -609,26 +609,20 @@ int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring)
  * Returns 0 if the fences have passed, error for all other cases.
  * Caller must hold ring lock.
  */
-void radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring)
+int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring)
 {
 	uint64_t seq = rdev->fence_drv[ring].sync_seq[ring];
+	int r;
 
-	while(1) {
-		int r;
-		r = radeon_fence_wait_seq(rdev, seq, ring, false, false);
+	r = radeon_fence_wait_seq(rdev, seq, ring, false, false);
+	if (r) {
 		if (r == -EDEADLK) {
-			mutex_unlock(&rdev->ring_lock);
-			r = radeon_gpu_reset(rdev);
-			mutex_lock(&rdev->ring_lock);
-			if (!r)
-				continue;
-		}
-		if (r) {
-			dev_err(rdev->dev, "error waiting for ring to become"
-				" idle (%d)\n", r);
+			return -EDEADLK;
 		}
-		return;
+		dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%d)\n",
+			ring, r);
 	}
+	return 0;
 }
 
 /**
@@ -854,13 +848,17 @@ int radeon_fence_driver_init(struct radeon_device *rdev)
  */
 void radeon_fence_driver_fini(struct radeon_device *rdev)
 {
-	int ring;
+	int ring, r;
 
 	mutex_lock(&rdev->ring_lock);
 	for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
 		if (!rdev->fence_drv[ring].initialized)
 			continue;
-		radeon_fence_wait_empty_locked(rdev, ring);
+		r = radeon_fence_wait_empty_locked(rdev, ring);
+		if (r) {
+			/* no need to trigger GPU reset as we are unloading */
+			radeon_fence_driver_force_completion(rdev);
+		}
 		wake_up_all(&rdev->fence_queue);
 		radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
 		rdev->fence_drv[ring].initialized = false;
@@ -868,6 +866,25 @@ void radeon_fence_driver_fini(struct radeon_device *rdev)
 	mutex_unlock(&rdev->ring_lock);
 }
 
+/**
+ * radeon_fence_driver_force_completion - force all fence waiter to complete
+ *
+ * @rdev: radeon device pointer
+ *
+ * In case of GPU reset failure make sure no process keep waiting on fence
+ * that will never complete.
+ */
+void radeon_fence_driver_force_completion(struct radeon_device *rdev)
+{
+	int ring;
+
+	for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
+		if (!rdev->fence_drv[ring].initialized)
+			continue;
+		radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring);
+	}
+}
+
 
 /*
  * Fence debugfs
diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c
index aa14dbb7e4fb..0bfa656aa87d 100644
--- a/drivers/gpu/drm/radeon/radeon_pm.c
+++ b/drivers/gpu/drm/radeon/radeon_pm.c
@@ -234,7 +234,7 @@ static void radeon_set_power_state(struct radeon_device *rdev)
 
 static void radeon_pm_set_clocks(struct radeon_device *rdev)
 {
-	int i;
+	int i, r;
 
 	/* no need to take locks, etc. if nothing's going to change */
 	if ((rdev->pm.requested_clock_mode_index == rdev->pm.current_clock_mode_index) &&
@@ -248,8 +248,17 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev)
 	/* wait for the rings to drain */
 	for (i = 0; i < RADEON_NUM_RINGS; i++) {
 		struct radeon_ring *ring = &rdev->ring[i];
-		if (ring->ready)
-			radeon_fence_wait_empty_locked(rdev, i);
+		if (!ring->ready) {
+			continue;
+		}
+		r = radeon_fence_wait_empty_locked(rdev, i);
+		if (r) {
+			/* needs a GPU reset dont reset here */
+			mutex_unlock(&rdev->ring_lock);
+			up_write(&rdev->pm.mclk_lock);
+			mutex_unlock(&rdev->ddev->struct_mutex);
+			return;
+		}
 	}
 
 	radeon_unmap_vram_bos(rdev);
diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index 074410371e2a..656b2e3334a6 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -102,12 +102,12 @@ static int tegra_dc_set_timings(struct tegra_dc *dc,
 		((mode->hsync_end - mode->hsync_start) <<  0);
 	tegra_dc_writel(dc, value, DC_DISP_SYNC_WIDTH);
 
-	value = ((mode->vsync_start - mode->vdisplay) << 16) |
-		((mode->hsync_start - mode->hdisplay) <<  0);
-	tegra_dc_writel(dc, value, DC_DISP_BACK_PORCH);
-
 	value = ((mode->vtotal - mode->vsync_end) << 16) |
 		((mode->htotal - mode->hsync_end) <<  0);
+	tegra_dc_writel(dc, value, DC_DISP_BACK_PORCH);
+
+	value = ((mode->vsync_start - mode->vdisplay) << 16) |
+		((mode->hsync_start - mode->hdisplay) <<  0);
 	tegra_dc_writel(dc, value, DC_DISP_FRONT_PORCH);
 
 	value = (mode->vdisplay << 16) | mode->hdisplay;
@@ -221,8 +221,7 @@ static int tegra_crtc_mode_set(struct drm_crtc *crtc,
 	win.stride = crtc->fb->pitches[0];
 
 	/* program window registers */
-	value = tegra_dc_readl(dc, DC_CMD_DISPLAY_WINDOW_HEADER);
-	value |= WINDOW_A_SELECT;
+	value = WINDOW_A_SELECT;
 	tegra_dc_writel(dc, value, DC_CMD_DISPLAY_WINDOW_HEADER);
 
 	tegra_dc_writel(dc, win.fmt, DC_WIN_COLOR_DEPTH);
diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index 3a843a77ddc7..741b5dc2742c 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -204,24 +204,6 @@ extern int tegra_output_parse_dt(struct tegra_output *output);
 extern int tegra_output_init(struct drm_device *drm, struct tegra_output *output);
 extern int tegra_output_exit(struct tegra_output *output);
 
-/* from gem.c */
-extern struct tegra_gem_object *tegra_gem_alloc(struct drm_device *drm,
-						size_t size);
-extern int tegra_gem_handle_create(struct drm_device *drm,
-				   struct drm_file *file, size_t size,
-				   unsigned long flags, uint32_t *handle);
-extern int tegra_gem_dumb_create(struct drm_file *file, struct drm_device *drm,
-				 struct drm_mode_create_dumb *args);
-extern int tegra_gem_dumb_map_offset(struct drm_file *file,
-				     struct drm_device *drm, uint32_t handle,
-				     uint64_t *offset);
-extern int tegra_gem_dumb_destroy(struct drm_file *file,
-				  struct drm_device *drm, uint32_t handle);
-extern int tegra_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
-extern int tegra_gem_init_object(struct drm_gem_object *obj);
-extern void tegra_gem_free_object(struct drm_gem_object *obj);
-extern struct vm_operations_struct tegra_gem_vm_ops;
-
 /* from fb.c */
 extern int tegra_drm_fb_init(struct drm_device *drm);
 extern void tegra_drm_fb_exit(struct drm_device *drm);
diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c
index ab4016412bbf..e060c7e6434d 100644
--- a/drivers/gpu/drm/tegra/hdmi.c
+++ b/drivers/gpu/drm/tegra/hdmi.c
@@ -149,7 +149,7 @@ struct tmds_config {
 };
 
 static const struct tmds_config tegra2_tmds_config[] = {
-	{ /* 480p modes */
+	{ /* slow pixel clock modes */
 		.pclk = 27000000,
 		.pll0 = SOR_PLL_BG_V17_S(3) | SOR_PLL_ICHPMP(1) |
 			SOR_PLL_RESISTORSEL | SOR_PLL_VCOCAP(0) |
@@ -163,21 +163,8 @@ static const struct tmds_config tegra2_tmds_config[] = {
 			DRIVE_CURRENT_LANE1(DRIVE_CURRENT_7_125_mA) |
 			DRIVE_CURRENT_LANE2(DRIVE_CURRENT_7_125_mA) |
 			DRIVE_CURRENT_LANE3(DRIVE_CURRENT_7_125_mA),
-	}, { /* 720p modes */
-		.pclk = 74250000,
-		.pll0 = SOR_PLL_BG_V17_S(3) | SOR_PLL_ICHPMP(1) |
-			SOR_PLL_RESISTORSEL | SOR_PLL_VCOCAP(1) |
-			SOR_PLL_TX_REG_LOAD(3),
-		.pll1 = SOR_PLL_TMDS_TERM_ENABLE | SOR_PLL_PE_EN,
-		.pe_current = PE_CURRENT0(PE_CURRENT_6_0_mA) |
-			PE_CURRENT1(PE_CURRENT_6_0_mA) |
-			PE_CURRENT2(PE_CURRENT_6_0_mA) |
-			PE_CURRENT3(PE_CURRENT_6_0_mA),
-		.drive_current = DRIVE_CURRENT_LANE0(DRIVE_CURRENT_7_125_mA) |
-			DRIVE_CURRENT_LANE1(DRIVE_CURRENT_7_125_mA) |
-			DRIVE_CURRENT_LANE2(DRIVE_CURRENT_7_125_mA) |
-			DRIVE_CURRENT_LANE3(DRIVE_CURRENT_7_125_mA),
-	}, { /* 1080p modes */
+	},
+	{ /* high pixel clock modes */
 		.pclk = UINT_MAX,
 		.pll0 = SOR_PLL_BG_V17_S(3) | SOR_PLL_ICHPMP(1) |
 			SOR_PLL_RESISTORSEL | SOR_PLL_VCOCAP(1) |
@@ -479,7 +466,7 @@ static void tegra_hdmi_setup_avi_infoframe(struct tegra_hdmi *hdmi,
 		return;
 	}
 
-	h_front_porch = mode->htotal - mode->hsync_end;
+	h_front_porch = mode->hsync_start - mode->hdisplay;
 	memset(&frame, 0, sizeof(frame));
 	frame.r = HDMI_AVI_R_SAME;
 
@@ -634,8 +621,8 @@ static int tegra_output_hdmi_enable(struct tegra_output *output)
 
 	pclk = mode->clock * 1000;
 	h_sync_width = mode->hsync_end - mode->hsync_start;
-	h_front_porch = mode->htotal - mode->hsync_end;
-	h_back_porch = mode->hsync_start - mode->hdisplay;
+	h_back_porch = mode->htotal - mode->hsync_end;
+	h_front_porch = mode->hsync_start - mode->hdisplay;
 
 	err = regulator_enable(hdmi->vdd);
 	if (err < 0) {
diff --git a/drivers/gpu/drm/tegra/host1x.c b/drivers/gpu/drm/tegra/host1x.c
index bdb97a564d82..5d17b113a6fc 100644
--- a/drivers/gpu/drm/tegra/host1x.c
+++ b/drivers/gpu/drm/tegra/host1x.c
@@ -239,6 +239,8 @@ int host1x_register_client(struct host1x *host1x, struct host1x_client *client)
 		}
 	}
 
+	client->host1x = host1x;
+
 	return 0;
 }
 
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index 06d7f798a08c..0f4a366f6fa6 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -158,12 +158,29 @@ static inline struct drm_mm_node *drm_mm_get_block_atomic_range(
 	return drm_mm_get_block_range_generic(parent, size, alignment, 0,
 						start, end, 1);
 }
-extern int drm_mm_insert_node(struct drm_mm *mm, struct drm_mm_node *node,
-			      unsigned long size, unsigned alignment);
+
+extern int drm_mm_insert_node(struct drm_mm *mm,
+			      struct drm_mm_node *node,
+			      unsigned long size,
+			      unsigned alignment);
 extern int drm_mm_insert_node_in_range(struct drm_mm *mm,
 				       struct drm_mm_node *node,
-				       unsigned long size, unsigned alignment,
-				       unsigned long start, unsigned long end);
+				       unsigned long size,
+				       unsigned alignment,
+				       unsigned long start,
+				       unsigned long end);
+extern int drm_mm_insert_node_generic(struct drm_mm *mm,
+				      struct drm_mm_node *node,
+				      unsigned long size,
+				      unsigned alignment,
+				      unsigned long color);
+extern int drm_mm_insert_node_in_range_generic(struct drm_mm *mm,
+				       struct drm_mm_node *node,
+				       unsigned long size,
+				       unsigned alignment,
+				       unsigned long color,
+				       unsigned long start,
+				       unsigned long end);
 extern void drm_mm_put_block(struct drm_mm_node *cur);
 extern void drm_mm_remove_node(struct drm_mm_node *node);
 extern void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new);
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index b746a3cf5fa9..c4d2e9c74002 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -307,6 +307,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_PRIME_VMAP_FLUSH	 21
 #define I915_PARAM_RSVD_FOR_FUTURE_USE	 22
 #define I915_PARAM_HAS_SECURE_BATCHES	 23
+#define I915_PARAM_HAS_PINNED_BATCHES	 24
 
 typedef struct drm_i915_getparam {
 	int param;
@@ -677,6 +678,15 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_SECURE		(1<<9)
 
+/** Inform the kernel that the batch is and will always be pinned. This
+ * negates the requirement for a workaround to be performed to avoid
+ * an incoherent CS (such as can be found on 830/845). If this flag is
+ * not passed, the kernel will endeavour to make sure the batch is
+ * coherent with the CS before execution. If this flag is passed,
+ * userspace assumes the responsibility for ensuring the same.
+ */
+#define I915_EXEC_IS_PINNED		(1<<10)
+
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
 	(eb2).rsvd1 = context & I915_EXEC_CONTEXT_ID_MASK