74 files changed, 3614 insertions, 2319 deletions
diff --git a/drivers/gpu/drm/nouveau/nvkm/Kbuild b/drivers/gpu/drm/nouveau/nvkm/Kbuild
index 2832147b676c..e664378f6eda 100644
--- a/drivers/gpu/drm/nouveau/nvkm/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/Kbuild
@@ -1,3 +1,4 @@
 include $(src)/nvkm/core/Kbuild
+include $(src)/nvkm/falcon/Kbuild
 include $(src)/nvkm/subdev/Kbuild
 include $(src)/nvkm/engine/Kbuild
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/client.c b/drivers/gpu/drm/nouveau/nvkm/core/client.c
index e1943910858e..0d3a896892b4 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/client.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/client.c
@@ -31,6 +31,43 @@
 #include <nvif/if0000.h>
 #include <nvif/unpack.h>
 
+static int
+nvkm_uclient_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		 struct nvkm_object **pobject)
+{
+	union {
+		struct nvif_client_v0 v0;
+	} *args = argv;
+	struct nvkm_client *client;
+	int ret = -ENOSYS;
+
+	if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))){
+		args->v0.name[sizeof(args->v0.name) - 1] = 0;
+		ret = nvkm_client_new(args->v0.name, args->v0.device, NULL,
+				      NULL, oclass->client->ntfy, &client);
+		if (ret)
+			return ret;
+	} else
+		return ret;
+
+	client->object.client = oclass->client;
+	client->object.handle = oclass->handle;
+	client->object.route  = oclass->route;
+	client->object.token  = oclass->token;
+	client->object.object = oclass->object;
+	client->debug = oclass->client->debug;
+	*pobject = &client->object;
+	return 0;
+}
+
+const struct nvkm_sclass
+nvkm_uclient_sclass = {
+	.oclass = NVIF_CLASS_CLIENT,
+	.minver = 0,
+	.maxver = 0,
+	.ctor = nvkm_uclient_new,
+};
+
 struct nvkm_client_notify {
 	struct nvkm_client *client;
 	struct nvkm_notify n;
@@ -138,17 +175,30 @@ nvkm_client_notify_new(struct nvkm_object *object,
 	return ret;
 }
 
+static const struct nvkm_object_func nvkm_client;
+struct nvkm_client *
+nvkm_client_search(struct nvkm_client *client, u64 handle)
+{
+	struct nvkm_object *object;
+
+	object = nvkm_object_search(client, handle, &nvkm_client);
+	if (IS_ERR(object))
+		return (void *)object;
+
+	return nvkm_client(object);
+}
+
 static int
-nvkm_client_mthd_devlist(struct nvkm_object *object, void *data, u32 size)
+nvkm_client_mthd_devlist(struct nvkm_client *client, void *data, u32 size)
 {
 	union {
-		struct nv_client_devlist_v0 v0;
+		struct nvif_client_devlist_v0 v0;
 	} *args = data;
 	int ret = -ENOSYS;
 
-	nvif_ioctl(object, "client devlist size %d\n", size);
+	nvif_ioctl(&client->object, "client devlist size %d\n", size);
 	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, true))) {
-		nvif_ioctl(object, "client devlist vers %d count %d\n",
+		nvif_ioctl(&client->object, "client devlist vers %d count %d\n",
 			   args->v0.version, args->v0.count);
 		if (size == sizeof(args->v0.device[0]) * args->v0.count) {
 			ret = nvkm_device_list(args->v0.device, args->v0.count);
@@ -167,9 +217,10 @@ nvkm_client_mthd_devlist(struct nvkm_object *object, void *data, u32 size)
 static int
 nvkm_client_mthd(struct nvkm_object *object, u32 mthd, void *data, u32 size)
 {
+	struct nvkm_client *client = nvkm_client(object);
 	switch (mthd) {
-	case NV_CLIENT_DEVLIST:
-		return nvkm_client_mthd_devlist(object, data, size);
+	case NVIF_CLIENT_V0_DEVLIST:
+		return nvkm_client_mthd_devlist(client, data, size);
 	default:
 		break;
 	}
@@ -190,7 +241,8 @@ nvkm_client_child_get(struct nvkm_object *object, int index,
 	const struct nvkm_sclass *sclass;
 
 	switch (index) {
-	case 0: sclass = &nvkm_udevice_sclass; break;
+	case 0: sclass = &nvkm_uclient_sclass; break;
+	case 1: sclass = &nvkm_udevice_sclass; break;
 	default:
 		return -EINVAL;
 	}
@@ -200,110 +252,54 @@ nvkm_client_child_get(struct nvkm_object *object, int index,
 	return 0;
 }
 
-static const struct nvkm_object_func
-nvkm_client_object_func = {
-	.mthd = nvkm_client_mthd,
-	.sclass = nvkm_client_child_get,
-};
-
-void
-nvkm_client_remove(struct nvkm_client *client, struct nvkm_object *object)
-{
-	if (!RB_EMPTY_NODE(&object->node))
-		rb_erase(&object->node, &client->objroot);
-}
-
-bool
-nvkm_client_insert(struct nvkm_client *client, struct nvkm_object *object)
-{
-	struct rb_node **ptr = &client->objroot.rb_node;
-	struct rb_node *parent = NULL;
-
-	while (*ptr) {
-		struct nvkm_object *this =
-			container_of(*ptr, typeof(*this), node);
-		parent = *ptr;
-		if (object->object < this->object)
-			ptr = &parent->rb_left;
-		else
-		if (object->object > this->object)
-			ptr = &parent->rb_right;
-		else
-			return false;
-	}
-
-	rb_link_node(&object->node, parent, ptr);
-	rb_insert_color(&object->node, &client->objroot);
-	return true;
-}
-
-struct nvkm_object *
-nvkm_client_search(struct nvkm_client *client, u64 handle)
-{
-	struct rb_node *node = client->objroot.rb_node;
-	while (node) {
-		struct nvkm_object *object =
-			container_of(node, typeof(*object), node);
-		if (handle < object->object)
-			node = node->rb_left;
-		else
-		if (handle > object->object)
-			node = node->rb_right;
-		else
-			return object;
-	}
-	return NULL;
-}
-
-int
-nvkm_client_fini(struct nvkm_client *client, bool suspend)
+static int
+nvkm_client_fini(struct nvkm_object *object, bool suspend)
 {
-	struct nvkm_object *object = &client->object;
+	struct nvkm_client *client = nvkm_client(object);
 	const char *name[2] = { "fini", "suspend" };
 	int i;
 	nvif_debug(object, "%s notify\n", name[suspend]);
 	for (i = 0; i < ARRAY_SIZE(client->notify); i++)
 		nvkm_client_notify_put(client, i);
-	return nvkm_object_fini(&client->object, suspend);
-}
-
-int
-nvkm_client_init(struct nvkm_client *client)
-{
-	return nvkm_object_init(&client->object);
+	return 0;
 }
 
-void
-nvkm_client_del(struct nvkm_client **pclient)
+static void *
+nvkm_client_dtor(struct nvkm_object *object)
 {
-	struct nvkm_client *client = *pclient;
+	struct nvkm_client *client = nvkm_client(object);
 	int i;
-	if (client) {
-		nvkm_client_fini(client, false);
-		for (i = 0; i < ARRAY_SIZE(client->notify); i++)
-			nvkm_client_notify_del(client, i);
-		nvkm_object_dtor(&client->object);
-		kfree(*pclient);
-		*pclient = NULL;
-	}
+	for (i = 0; i < ARRAY_SIZE(client->notify); i++)
+		nvkm_client_notify_del(client, i);
+	return client;
 }
 
+static const struct nvkm_object_func
+nvkm_client = {
+	.dtor = nvkm_client_dtor,
+	.fini = nvkm_client_fini,
+	.mthd = nvkm_client_mthd,
+	.sclass = nvkm_client_child_get,
+};
+
 int
 nvkm_client_new(const char *name, u64 device, const char *cfg,
-		const char *dbg, struct nvkm_client **pclient)
+		const char *dbg,
+		int (*ntfy)(const void *, u32, const void *, u32),
+		struct nvkm_client **pclient)
 {
-	struct nvkm_oclass oclass = {};
+	struct nvkm_oclass oclass = { .base = nvkm_uclient_sclass };
 	struct nvkm_client *client;
 
 	if (!(client = *pclient = kzalloc(sizeof(*client), GFP_KERNEL)))
 		return -ENOMEM;
 	oclass.client = client;
 
-	nvkm_object_ctor(&nvkm_client_object_func, &oclass, &client->object);
+	nvkm_object_ctor(&nvkm_client, &oclass, &client->object);
 	snprintf(client->name, sizeof(client->name), "%s", name);
 	client->device = device;
 	client->debug = nvkm_dbgopt(dbg, "CLIENT");
 	client->objroot = RB_ROOT;
-	client->dmaroot = RB_ROOT;
+	client->ntfy = ntfy;
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/engine.c b/drivers/gpu/drm/nouveau/nvkm/core/engine.c
index ee8e5831fe37..b6c916954a10 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/engine.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/engine.c
@@ -27,6 +27,14 @@
 
 #include <subdev/fb.h>
 
+bool
+nvkm_engine_chsw_load(struct nvkm_engine *engine)
+{
+	if (engine->func->chsw_load)
+		return engine->func->chsw_load(engine);
+	return false;
+}
+
 void
 nvkm_engine_unref(struct nvkm_engine **pengine)
 {
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c b/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c
index b0db51847c36..be19bbe56bba 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c
@@ -29,7 +29,8 @@
 #include <nvif/ioctl.h>
 
 static int
-nvkm_ioctl_nop(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_nop(struct nvkm_client *client,
+	       struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_nop_v0 v0;
@@ -46,7 +47,8 @@ nvkm_ioctl_nop(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_sclass(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_sclass(struct nvkm_client *client,
+		  struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_sclass_v0 v0;
@@ -78,12 +80,12 @@ nvkm_ioctl_sclass(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_new(struct nvkm_object *parent, void *data, u32 size)
+nvkm_ioctl_new(struct nvkm_client *client,
+	       struct nvkm_object *parent, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_new_v0 v0;
 	} *args = data;
-	struct nvkm_client *client = parent->client;
 	struct nvkm_object *object = NULL;
 	struct nvkm_oclass oclass;
 	int ret = -ENOSYS, i = 0;
@@ -104,9 +106,11 @@ nvkm_ioctl_new(struct nvkm_object *parent, void *data, u32 size)
 
 	do {
 		memset(&oclass, 0x00, sizeof(oclass));
-		oclass.client = client;
 		oclass.handle = args->v0.handle;
+		oclass.route  = args->v0.route;
+		oclass.token  = args->v0.token;
 		oclass.object = args->v0.object;
+		oclass.client = client;
 		oclass.parent = parent;
 		ret = parent->func->sclass(parent, i++, &oclass);
 		if (ret)
@@ -125,10 +129,7 @@ nvkm_ioctl_new(struct nvkm_object *parent, void *data, u32 size)
 		ret = nvkm_object_init(object);
 		if (ret == 0) {
 			list_add(&object->head, &parent->tree);
-			object->route = args->v0.route;
-			object->token = args->v0.token;
-			object->object = args->v0.object;
-			if (nvkm_client_insert(client, object)) {
+			if (nvkm_object_insert(object)) {
 				client->data = object;
 				return 0;
 			}
@@ -142,7 +143,8 @@ nvkm_ioctl_new(struct nvkm_object *parent, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_del(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_del(struct nvkm_client *client,
+	       struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_del none;
@@ -156,11 +158,12 @@ nvkm_ioctl_del(struct nvkm_object *object, void *data, u32 size)
 		nvkm_object_del(&object);
 	}
 
-	return ret;
+	return ret ? ret : 1;
 }
 
 static int
-nvkm_ioctl_mthd(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_mthd(struct nvkm_client *client,
+		struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_mthd_v0 v0;
@@ -179,7 +182,8 @@ nvkm_ioctl_mthd(struct nvkm_object *object, void *data, u32 size)
 
 
 static int
-nvkm_ioctl_rd(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_rd(struct nvkm_client *client,
+	      struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_rd_v0 v0;
@@ -218,7 +222,8 @@ nvkm_ioctl_rd(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_wr(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_wr(struct nvkm_client *client,
+	      struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_wr_v0 v0;
@@ -246,7 +251,8 @@ nvkm_ioctl_wr(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_map(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_map(struct nvkm_client *client,
+	       struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_map_v0 v0;
@@ -264,7 +270,8 @@ nvkm_ioctl_map(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_unmap(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_unmap(struct nvkm_client *client,
+		 struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_unmap none;
@@ -280,7 +287,8 @@ nvkm_ioctl_unmap(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_ntfy_new(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_ntfy_new(struct nvkm_client *client,
+		    struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_ntfy_new_v0 v0;
@@ -306,9 +314,9 @@ nvkm_ioctl_ntfy_new(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_ntfy_del(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_ntfy_del(struct nvkm_client *client,
+		    struct nvkm_object *object, void *data, u32 size)
 {
-	struct nvkm_client *client = object->client;
 	union {
 		struct nvif_ioctl_ntfy_del_v0 v0;
 	} *args = data;
@@ -325,9 +333,9 @@ nvkm_ioctl_ntfy_del(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_ntfy_get(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_ntfy_get(struct nvkm_client *client,
+		    struct nvkm_object *object, void *data, u32 size)
 {
-	struct nvkm_client *client = object->client;
 	union {
 		struct nvif_ioctl_ntfy_get_v0 v0;
 	} *args = data;
@@ -344,9 +352,9 @@ nvkm_ioctl_ntfy_get(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_ntfy_put(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_ntfy_put(struct nvkm_client *client,
+		    struct nvkm_object *object, void *data, u32 size)
 {
-	struct nvkm_client *client = object->client;
 	union {
 		struct nvif_ioctl_ntfy_put_v0 v0;
 	} *args = data;
@@ -364,7 +372,7 @@ nvkm_ioctl_ntfy_put(struct nvkm_object *object, void *data, u32 size)
 
 static struct {
 	int version;
-	int (*func)(struct nvkm_object *, void *, u32);
+	int (*func)(struct nvkm_client *, struct nvkm_object *, void *, u32);
 }
 nvkm_ioctl_v0[] = {
 	{ 0x00, nvkm_ioctl_nop },
@@ -389,13 +397,10 @@ nvkm_ioctl_path(struct nvkm_client *client, u64 handle, u32 type,
 	struct nvkm_object *object;
 	int ret;
 
-	if (handle)
-		object = nvkm_client_search(client, handle);
-	else
-		object = &client->object;
-	if (unlikely(!object)) {
+	object = nvkm_object_search(client, handle, NULL);
+	if (IS_ERR(object)) {
 		nvif_ioctl(&client->object, "object not found\n");
-		return -ENOENT;
+		return PTR_ERR(object);
 	}
 
 	if (owner != NVIF_IOCTL_V0_OWNER_ANY && owner != object->route) {
@@ -407,7 +412,7 @@ nvkm_ioctl_path(struct nvkm_client *client, u64 handle, u32 type,
 
 	if (ret = -EINVAL, type < ARRAY_SIZE(nvkm_ioctl_v0)) {
 		if (nvkm_ioctl_v0[type].version == 0)
-			ret = nvkm_ioctl_v0[type].func(object, data, size);
+			ret = nvkm_ioctl_v0[type].func(client, object, data, size);
 	}
 
 	return ret;
@@ -436,12 +441,13 @@ nvkm_ioctl(struct nvkm_client *client, bool supervisor,
 				      &args->v0.route, &args->v0.token);
 	}
 
-	nvif_ioctl(object, "return %d\n", ret);
-	if (hack) {
-		*hack = client->data;
-		client->data = NULL;
+	if (ret != 1) {
+		nvif_ioctl(object, "return %d\n", ret);
+		if (hack) {
+			*hack = client->data;
+			client->data = NULL;
+		}
 	}
 
-	client->super = false;
 	return ret;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/mm.c b/drivers/gpu/drm/nouveau/nvkm/core/mm.c
index 09a1eee8fd33..fd19d652a7ab 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/mm.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/mm.c
@@ -147,6 +147,7 @@ nvkm_mm_head(struct nvkm_mm *mm, u8 heap, u8 type, u32 size_max, u32 size_min,
 		if (!this)
 			return -ENOMEM;
 
+		this->next = NULL;
 		this->type = type;
 		list_del(&this->fl_entry);
 		*pnode = this;
@@ -225,6 +226,7 @@ nvkm_mm_tail(struct nvkm_mm *mm, u8 heap, u8 type, u32 size_max, u32 size_min,
 		if (!this)
 			return -ENOMEM;
 
+		this->next = NULL;
 		this->type = type;
 		list_del(&this->fl_entry);
 		*pnode = this;
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/object.c b/drivers/gpu/drm/nouveau/nvkm/core/object.c
index 67aa7223dcd7..89d2e9da11c7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/object.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/object.c
@@ -25,6 +25,65 @@
 #include <core/client.h>
 #include <core/engine.h>
 
+struct nvkm_object *
+nvkm_object_search(struct nvkm_client *client, u64 handle,
+		   const struct nvkm_object_func *func)
+{
+	struct nvkm_object *object;
+
+	if (handle) {
+		struct rb_node *node = client->objroot.rb_node;
+		while (node) {
+			object = rb_entry(node, typeof(*object), node);
+			if (handle < object->object)
+				node = node->rb_left;
+			else
+			if (handle > object->object)
+				node = node->rb_right;
+			else
+				goto done;
+		}
+		return ERR_PTR(-ENOENT);
+	} else {
+		object = &client->object;
+	}
+
+done:
+	if (unlikely(func && object->func != func))
+		return ERR_PTR(-EINVAL);
+	return object;
+}
+
+void
+nvkm_object_remove(struct nvkm_object *object)
+{
+	if (!RB_EMPTY_NODE(&object->node))
+		rb_erase(&object->node, &object->client->objroot);
+}
+
+bool
+nvkm_object_insert(struct nvkm_object *object)
+{
+	struct rb_node **ptr = &object->client->objroot.rb_node;
+	struct rb_node *parent = NULL;
+
+	while (*ptr) {
+		struct nvkm_object *this = rb_entry(*ptr, typeof(*this), node);
+		parent = *ptr;
+		if (object->object < this->object)
+			ptr = &parent->rb_left;
+		else
+		if (object->object > this->object)
+			ptr = &parent->rb_right;
+		else
+			return false;
+	}
+
+	rb_link_node(&object->node, parent, ptr);
+	rb_insert_color(&object->node, &object->client->objroot);
+	return true;
+}
+
 int
 nvkm_object_mthd(struct nvkm_object *object, u32 mthd, void *data, u32 size)
 {
@@ -214,7 +273,7 @@ nvkm_object_del(struct nvkm_object **pobject)
 	struct nvkm_object *object = *pobject;
 	if (object && !WARN_ON(!object->func)) {
 		*pobject = nvkm_object_dtor(object);
-		nvkm_client_remove(object->client, object);
+		nvkm_object_remove(object);
 		list_del(&object->head);
 		kfree(*pobject);
 		*pobject = NULL;
@@ -230,6 +289,9 @@ nvkm_object_ctor(const struct nvkm_object_func *func,
 	object->engine = nvkm_engine_ref(oclass->engine);
 	object->oclass = oclass->base.oclass;
 	object->handle = oclass->handle;
+	object->route  = oclass->route;
+	object->token  = oclass->token;
+	object->object = oclass->object;
 	INIT_LIST_HEAD(&object->head);
 	INIT_LIST_HEAD(&object->tree);
 	RB_CLEAR_NODE(&object->node);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index cceda959b47c..273562dd6bbd 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -993,7 +993,7 @@ nv92_chipset = {
 	.mc = g84_mc_new,
 	.mmu = nv50_mmu_new,
 	.mxm = nv50_mxm_new,
-	.pci = g84_pci_new,
+	.pci = g92_pci_new,
 	.therm = g84_therm_new,
 	.timer = nv41_timer_new,
 	.volt = nv40_volt_new,
@@ -2138,6 +2138,7 @@ nv12b_chipset = {
 	.ltc = gm200_ltc_new,
 	.mc = gk20a_mc_new,
 	.mmu = gf100_mmu_new,
+	.pmu = gm20b_pmu_new,
 	.secboot = gm20b_secboot_new,
 	.timer = gk20a_timer_new,
 	.top = gk104_top_new,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.c
index 0a1381a84552..070ec5e18fdb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.c
@@ -137,7 +137,6 @@ nv50_disp_dmac_new_(const struct nv50_disp_dmac_func *func,
 		    const struct nvkm_oclass *oclass,
 		    struct nvkm_object **pobject)
 {
-	struct nvkm_device *device = root->disp->base.engine.subdev.device;
 	struct nvkm_client *client = oclass->client;
 	struct nvkm_dmaobj *dmaobj;
 	struct nv50_disp_dmac *chan;
@@ -153,9 +152,9 @@ nv50_disp_dmac_new_(const struct nv50_disp_dmac_func *func,
 	if (ret)
 		return ret;
 
-	dmaobj = nvkm_dma_search(device->dma, client, push);
-	if (!dmaobj)
-		return -ENOENT;
+	dmaobj = nvkm_dmaobj_search(client, push);
+	if (IS_ERR(dmaobj))
+		return PTR_ERR(dmaobj);
 
 	if (dmaobj->limit - dmaobj->start != 0xfff)
 		return -EINVAL;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c
index 6f0436df0219..f8f2f16c22a2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c
@@ -59,7 +59,7 @@ gt215_hda_eld(NV50_DISP_MTHD_V1)
 			);
 		}
 		for (i = 0; i < size; i++)
-			nvkm_wr32(device, 0x61c440 + soff, (i << 8) | args->v0.data[0]);
+			nvkm_wr32(device, 0x61c440 + soff, (i << 8) | args->v0.data[i]);
 		for (; i < 0x60; i++)
 			nvkm_wr32(device, 0x61c440 + soff, (i << 8));
 		nvkm_mask(device, 0x61c448 + soff, 0x80000003, 0x80000003);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
index 567466f93cd5..0db8efbf1c2e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
@@ -433,8 +433,6 @@ nv50_disp_dptmds_war(struct nvkm_device *device)
 	case 0x94:
 	case 0x96:
 	case 0x98:
-	case 0xaa:
-	case 0xac:
 		return true;
 	default:
 		break;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg94.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg94.c
index 4510cb6e10a8..627b9ee1ddd2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg94.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg94.c
@@ -39,13 +39,6 @@ g94_sor_loff(struct nvkm_output_dp *outp)
 }
 
 /*******************************************************************************
- * TMDS/LVDS
- ******************************************************************************/
-static const struct nvkm_output_func
-g94_sor_output_func = {
-};
-
-/*******************************************************************************
  * DisplayPort
  ******************************************************************************/
 u32
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/dma/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/dma/base.c
index f11ebdd16c77..11b7b8fd5dda 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/dma/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/dma/base.c
@@ -28,24 +28,6 @@
 
 #include <nvif/class.h>
 
-struct nvkm_dmaobj *
-nvkm_dma_search(struct nvkm_dma *dma, struct nvkm_client *client, u64 object)
-{
-	struct rb_node *node = client->dmaroot.rb_node;
-	while (node) {
-		struct nvkm_dmaobj *dmaobj =
-			container_of(node, typeof(*dmaobj), rb);
-		if (object < dmaobj->handle)
-			node = node->rb_left;
-		else
-		if (object > dmaobj->handle)
-			node = node->rb_right;
-		else
-			return dmaobj;
-	}
-	return NULL;
-}
-
 static int
 nvkm_dma_oclass_new(struct nvkm_device *device,
 		    const struct nvkm_oclass *oclass, void *data, u32 size,
@@ -53,34 +35,12 @@ nvkm_dma_oclass_new(struct nvkm_device *device,
 {
 	struct nvkm_dma *dma = nvkm_dma(oclass->engine);
 	struct nvkm_dmaobj *dmaobj = NULL;
-	struct nvkm_client *client = oclass->client;
-	struct rb_node **ptr = &client->dmaroot.rb_node;
-	struct rb_node *parent = NULL;
 	int ret;
 
 	ret = dma->func->class_new(dma, oclass, data, size, &dmaobj);
 	if (dmaobj)
 		*pobject = &dmaobj->object;
-	if (ret)
-		return ret;
-
-	dmaobj->handle = oclass->object;
-
-	while (*ptr) {
-		struct nvkm_dmaobj *obj = container_of(*ptr, typeof(*obj), rb);
-		parent = *ptr;
-		if (dmaobj->handle < obj->handle)
-			ptr = &parent->rb_left;
-		else
-		if (dmaobj->handle > obj->handle)
-			ptr = &parent->rb_right;
-		else
-			return -EEXIST;
-	}
-
-	rb_link_node(&dmaobj->rb, parent, ptr);
-	rb_insert_color(&dmaobj->rb, &client->dmaroot);
-	return 0;
+	return ret;
 }
 
 static const struct nvkm_device_oclass
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.c b/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.c
index 13c661b1ef14..d20cc0681a88 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.c
@@ -31,6 +31,19 @@
 #include <nvif/cl0002.h>
 #include <nvif/unpack.h>
 
+static const struct nvkm_object_func nvkm_dmaobj_func;
+struct nvkm_dmaobj *
+nvkm_dmaobj_search(struct nvkm_client *client, u64 handle)
+{
+	struct nvkm_object *object;
+
+	object = nvkm_object_search(client, handle, &nvkm_dmaobj_func);
+	if (IS_ERR(object))
+		return (void *)object;
+
+	return nvkm_dmaobj(object);
+}
+
 static int
 nvkm_dmaobj_bind(struct nvkm_object *base, struct nvkm_gpuobj *gpuobj,
 		 int align, struct nvkm_gpuobj **pgpuobj)
@@ -42,10 +55,7 @@ nvkm_dmaobj_bind(struct nvkm_object *base, struct nvkm_gpuobj *gpuobj,
 static void *
 nvkm_dmaobj_dtor(struct nvkm_object *base)
 {
-	struct nvkm_dmaobj *dmaobj = nvkm_dmaobj(base);
-	if (!RB_EMPTY_NODE(&dmaobj->rb))
-		rb_erase(&dmaobj->rb, &dmaobj->object.client->dmaroot);
-	return dmaobj;
+	return nvkm_dmaobj(base);
 }
 
 static const struct nvkm_object_func
@@ -74,7 +84,6 @@ nvkm_dmaobj_ctor(const struct nvkm_dmaobj_func *func, struct nvkm_dma *dma,
 	nvkm_object_ctor(&nvkm_dmaobj_func, oclass, &dmaobj->object);
 	dmaobj->func = func;
 	dmaobj->dma = dma;
-	RB_CLEAR_NODE(&dmaobj->rb);
 
 	nvif_ioctl(parent, "create dma size %d\n", *psize);
 	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, true))) {
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c
index 1c9682ae3a6b..660ca7aa95ea 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c
@@ -32,6 +32,17 @@
 #include <nvif/unpack.h>
 
 void
+nvkm_fifo_recover_chan(struct nvkm_fifo *fifo, int chid)
+{
+	unsigned long flags;
+	if (WARN_ON(!fifo->func->recover_chan))
+		return;
+	spin_lock_irqsave(&fifo->lock, flags);
+	fifo->func->recover_chan(fifo, chid);
+	spin_unlock_irqrestore(&fifo->lock, flags);
+}
+
+void
 nvkm_fifo_pause(struct nvkm_fifo *fifo, unsigned long *flags)
 {
 	return fifo->func->pause(fifo, flags);
@@ -55,19 +66,29 @@ nvkm_fifo_chan_put(struct nvkm_fifo *fifo, unsigned long flags,
 }
 
 struct nvkm_fifo_chan *
-nvkm_fifo_chan_inst(struct nvkm_fifo *fifo, u64 inst, unsigned long *rflags)
+nvkm_fifo_chan_inst_locked(struct nvkm_fifo *fifo, u64 inst)
 {
 	struct nvkm_fifo_chan *chan;
-	unsigned long flags;
-	spin_lock_irqsave(&fifo->lock, flags);
 	list_for_each_entry(chan, &fifo->chan, head) {
 		if (chan->inst->addr == inst) {
 			list_del(&chan->head);
 			list_add(&chan->head, &fifo->chan);
-			*rflags = flags;
 			return chan;
 		}
 	}
+	return NULL;
+}
+
+struct nvkm_fifo_chan *
+nvkm_fifo_chan_inst(struct nvkm_fifo *fifo, u64 inst, unsigned long *rflags)
+{
+	struct nvkm_fifo_chan *chan;
+	unsigned long flags;
+	spin_lock_irqsave(&fifo->lock, flags);
+	if ((chan = nvkm_fifo_chan_inst_locked(fifo, inst))) {
+		*rflags = flags;
+		return chan;
+	}
 	spin_unlock_irqrestore(&fifo->lock, flags);
 	return NULL;
 }
@@ -90,9 +111,34 @@ nvkm_fifo_chan_chid(struct nvkm_fifo *fifo, int chid, unsigned long *rflags)
 	return NULL;
 }
 
+void
+nvkm_fifo_kevent(struct nvkm_fifo *fifo, int chid)
+{
+	nvkm_event_send(&fifo->kevent, 1, chid, NULL, 0);
+}
+
 static int
-nvkm_fifo_event_ctor(struct nvkm_object *object, void *data, u32 size,
-		     struct nvkm_notify *notify)
+nvkm_fifo_kevent_ctor(struct nvkm_object *object, void *data, u32 size,
+		      struct nvkm_notify *notify)
+{
+	struct nvkm_fifo_chan *chan = nvkm_fifo_chan(object);
+	if (size == 0) {
+		notify->size  = 0;
+		notify->types = 1;
+		notify->index = chan->chid;
+		return 0;
+	}
+	return -ENOSYS;
+}
+
+static const struct nvkm_event_func
+nvkm_fifo_kevent_func = {
+	.ctor = nvkm_fifo_kevent_ctor,
+};
+
+static int
+nvkm_fifo_cevent_ctor(struct nvkm_object *object, void *data, u32 size,
+		      struct nvkm_notify *notify)
 {
 	if (size == 0) {
 		notify->size  = 0;
@@ -104,10 +150,16 @@ nvkm_fifo_event_ctor(struct nvkm_object *object, void *data, u32 size,
 }
 
 static const struct nvkm_event_func
-nvkm_fifo_event_func = {
-	.ctor = nvkm_fifo_event_ctor,
+nvkm_fifo_cevent_func = {
+	.ctor = nvkm_fifo_cevent_ctor,
 };
 
+void
+nvkm_fifo_cevent(struct nvkm_fifo *fifo)
+{
+	nvkm_event_send(&fifo->cevent, 1, 0, NULL, 0);
+}
+
 static void
 nvkm_fifo_uevent_fini(struct nvkm_event *event, int type, int index)
 {
@@ -241,6 +293,7 @@ nvkm_fifo_dtor(struct nvkm_engine *engine)
 	void *data = fifo;
 	if (fifo->func->dtor)
 		data = fifo->func->dtor(fifo);
+	nvkm_event_fini(&fifo->kevent);
 	nvkm_event_fini(&fifo->cevent);
 	nvkm_event_fini(&fifo->uevent);
 	return data;
@@ -283,5 +336,9 @@ nvkm_fifo_ctor(const struct nvkm_fifo_func *func, struct nvkm_device *device,
 			return ret;
 	}
 
-	return nvkm_event_init(&nvkm_fifo_event_func, 1, 1, &fifo->cevent);
+	ret = nvkm_event_init(&nvkm_fifo_cevent_func, 1, 1, &fifo->cevent);
+	if (ret)
+		return ret;
+
+	return nvkm_event_init(&nvkm_fifo_kevent_func, 1, nr, &fifo->kevent);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c
index dc6d4678f228..fab760ae922f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c
@@ -371,9 +371,9 @@ nvkm_fifo_chan_ctor(const struct nvkm_fifo_chan_func *func,
 
 	/* allocate push buffer ctxdma instance */
 	if (push) {
-		dmaobj = nvkm_dma_search(device->dma, oclass->client, push);
-		if (!dmaobj)
-			return -ENOENT;
+		dmaobj = nvkm_dmaobj_search(client, push);
+		if (IS_ERR(dmaobj))
+			return PTR_ERR(dmaobj);
 
 		ret = nvkm_object_bind(&dmaobj->object, chan->inst, -16,
 				       &chan->push);
@@ -410,6 +410,6 @@ nvkm_fifo_chan_ctor(const struct nvkm_fifo_chan_func *func,
 		     base + user * chan->chid;
 	chan->size = user;
 
-	nvkm_event_send(&fifo->cevent, 1, 0, NULL, 0);
+	nvkm_fifo_cevent(fifo);
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h
index 55dc415c5c08..d8019bdacd61 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h
@@ -29,5 +29,5 @@ struct nvkm_fifo_chan_oclass {
 	struct nvkm_sclass base;
 };
 
-int g84_fifo_chan_ntfy(struct nvkm_fifo_chan *, u32, struct nvkm_event **);
+int gf100_fifo_chan_ntfy(struct nvkm_fifo_chan *, u32, struct nvkm_event **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c
index 15a992b3580a..61797c4dd07a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c
@@ -30,12 +30,12 @@
 
 #include <nvif/cl826e.h>
 
-int
+static int
 g84_fifo_chan_ntfy(struct nvkm_fifo_chan *chan, u32 type,
 		   struct nvkm_event **pevent)
 {
 	switch (type) {
-	case G82_CHANNEL_DMA_V0_NTFY_UEVENT:
+	case NV826E_V0_NTFY_NON_STALL_INTERRUPT:
 		*pevent = &chan->fifo->uevent;
 		return 0;
 	default:
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c
index ec68ea9747d5..cd468ab1db12 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c
@@ -68,7 +68,14 @@ gf100_fifo_runlist_commit(struct gf100_fifo *fifo)
 	}
 	nvkm_done(cur);
 
-	target = (nvkm_memory_target(cur) == NVKM_MEM_TARGET_HOST) ? 0x3 : 0x0;
+	switch (nvkm_memory_target(cur)) {
+	case NVKM_MEM_TARGET_VRAM: target = 0; break;
+	case NVKM_MEM_TARGET_NCOH: target = 3; break;
+	default:
+		mutex_unlock(&subdev->mutex);
+		WARN_ON(1);
+		return;
+	}
 
 	nvkm_wr32(device, 0x002270, (nvkm_memory_addr(cur) >> 12) |
 				    (target << 28));
@@ -183,6 +190,7 @@ gf100_fifo_recover(struct gf100_fifo *fifo, struct nvkm_engine *engine,
 	if (engine != &fifo->base.engine)
 		fifo->recover.mask |= 1ULL << engine->subdev.index;
 	schedule_work(&fifo->recover.work);
+	nvkm_fifo_kevent(&fifo->base, chid);
 }
 
 static const struct nvkm_enum
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
index 38c0910722c0..3a24788c3185 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
@@ -27,11 +27,71 @@
 #include <core/client.h>
 #include <core/gpuobj.h>
 #include <subdev/bar.h>
+#include <subdev/timer.h>
 #include <subdev/top.h>
 #include <engine/sw.h>
 
 #include <nvif/class.h>
 
+struct gk104_fifo_engine_status {
+	bool busy;
+	bool faulted;
+	bool chsw;
+	bool save;
+	bool load;
+	struct {
+		bool tsg;
+		u32 id;
+	} prev, next, *chan;
+};
+
+static void
+gk104_fifo_engine_status(struct gk104_fifo *fifo, int engn,
+			 struct gk104_fifo_engine_status *status)
+{
+	struct nvkm_engine *engine = fifo->engine[engn].engine;
+	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 stat = nvkm_rd32(device, 0x002640 + (engn * 0x08));
+
+	status->busy     = !!(stat & 0x80000000);
+	status->faulted  = !!(stat & 0x40000000);
+	status->next.tsg = !!(stat & 0x10000000);
+	status->next.id  =   (stat & 0x0fff0000) >> 16;
+	status->chsw     = !!(stat & 0x00008000);
+	status->save     = !!(stat & 0x00004000);
+	status->load     = !!(stat & 0x00002000);
+	status->prev.tsg = !!(stat & 0x00001000);
+	status->prev.id  =   (stat & 0x00000fff);
+	status->chan     = NULL;
+
+	if (status->busy && status->chsw) {
+		if (status->load && status->save) {
+			if (engine && nvkm_engine_chsw_load(engine))
+				status->chan = &status->next;
+			else
+				status->chan = &status->prev;
+		} else
+		if (status->load) {
+			status->chan = &status->next;
+		} else {
+			status->chan = &status->prev;
+		}
+	} else
+	if (status->load) {
+		status->chan = &status->prev;
+	}
+
+	nvkm_debug(subdev, "engine %02d: busy %d faulted %d chsw %d "
+			   "save %d load %d %sid %d%s-> %sid %d%s\n",
+		   engn, status->busy, status->faulted,
+		   status->chsw, status->save, status->load,
+		   status->prev.tsg ? "tsg" : "ch", status->prev.id,
+		   status->chan == &status->prev ? "*" : " ",
+		   status->next.tsg ? "tsg" : "ch", status->next.id,
+		   status->chan == &status->next ? "*" : " ");
+}
+
 static int
 gk104_fifo_class_get(struct nvkm_fifo *base, int index,
 		     const struct nvkm_fifo_chan_oclass **psclass)
@@ -83,10 +143,13 @@ gk104_fifo_runlist_commit(struct gk104_fifo *fifo, int runl)
 	}
 	nvkm_done(mem);
 
-	if (nvkm_memory_target(mem) == NVKM_MEM_TARGET_VRAM)
-		target = 0;
-	else
-		target = 3;
+	switch (nvkm_memory_target(mem)) {
+	case NVKM_MEM_TARGET_VRAM: target = 0; break;
+	case NVKM_MEM_TARGET_NCOH: target = 3; break;
+	default:
+		WARN_ON(1);
+		return;
+	}
 
 	nvkm_wr32(device, 0x002270, (nvkm_memory_addr(mem) >> 12) |
 				    (target << 28));
@@ -149,31 +212,137 @@ gk104_fifo_recover_work(struct work_struct *w)
 	nvkm_mask(device, 0x002630, runm, 0x00000000);
 }
 
+static void gk104_fifo_recover_engn(struct gk104_fifo *fifo, int engn);
+
 static void
-gk104_fifo_recover(struct gk104_fifo *fifo, struct nvkm_engine *engine,
-		   struct gk104_fifo_chan *chan)
+gk104_fifo_recover_runl(struct gk104_fifo *fifo, int runl)
 {
 	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	u32 chid = chan->base.chid;
-	int engn;
+	const u32 runm = BIT(runl);
 
-	nvkm_error(subdev, "%s engine fault on channel %d, recovering...\n",
-		   nvkm_subdev_name[engine->subdev.index], chid);
 	assert_spin_locked(&fifo->base.lock);
+	if (fifo->recover.runm & runm)
+		return;
+	fifo->recover.runm |= runm;
 
-	nvkm_mask(device, 0x800004 + (chid * 0x08), 0x00000800, 0x00000800);
-	list_del_init(&chan->head);
-	chan->killed = true;
+	/* Block runlist to prevent channel assignment(s) from changing. */
+	nvkm_mask(device, 0x002630, runm, runm);
 
-	for (engn = 0; engn < fifo->engine_nr; engn++) {
-		if (fifo->engine[engn].engine == engine) {
-			fifo->recover.engm |= BIT(engn);
+	/* Schedule recovery. */
+	nvkm_warn(subdev, "runlist %d: scheduled for recovery\n", runl);
+	schedule_work(&fifo->recover.work);
+}
+
+static void
+gk104_fifo_recover_chan(struct nvkm_fifo *base, int chid)
+{
+	struct gk104_fifo *fifo = gk104_fifo(base);
+	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	const u32  stat = nvkm_rd32(device, 0x800004 + (chid * 0x08));
+	const u32  runl = (stat & 0x000f0000) >> 16;
+	const bool used = (stat & 0x00000001);
+	unsigned long engn, engm = fifo->runlist[runl].engm;
+	struct gk104_fifo_chan *chan;
+
+	assert_spin_locked(&fifo->base.lock);
+	if (!used)
+		return;
+
+	/* Lookup SW state for channel, and mark it as dead. */
+	list_for_each_entry(chan, &fifo->runlist[runl].chan, head) {
+		if (chan->base.chid == chid) {
+			list_del_init(&chan->head);
+			chan->killed = true;
+			nvkm_fifo_kevent(&fifo->base, chid);
 			break;
 		}
 	}
 
-	fifo->recover.runm |= BIT(chan->runl);
+	/* Disable channel. */
+	nvkm_wr32(device, 0x800004 + (chid * 0x08), stat | 0x00000800);
+	nvkm_warn(subdev, "channel %d: killed\n", chid);
+
+	/* Block channel assignments from changing during recovery. */
+	gk104_fifo_recover_runl(fifo, runl);
+
+	/* Schedule recovery for any engines the channel is on. */
+	for_each_set_bit(engn, &engm, fifo->engine_nr) {
+		struct gk104_fifo_engine_status status;
+		gk104_fifo_engine_status(fifo, engn, &status);
+		if (!status.chan || status.chan->id != chid)
+			continue;
+		gk104_fifo_recover_engn(fifo, engn);
+	}
+}
+
+static void
+gk104_fifo_recover_engn(struct gk104_fifo *fifo, int engn)
+{
+	struct nvkm_engine *engine = fifo->engine[engn].engine;
+	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	const u32 runl = fifo->engine[engn].runl;
+	const u32 engm = BIT(engn);
+	struct gk104_fifo_engine_status status;
+	int mmui = -1;
+
+	assert_spin_locked(&fifo->base.lock);
+	if (fifo->recover.engm & engm)
+		return;
+	fifo->recover.engm |= engm;
+
+	/* Block channel assignments from changing during recovery. */
+	gk104_fifo_recover_runl(fifo, runl);
+
+	/* Determine which channel (if any) is currently on the engine. */
+	gk104_fifo_engine_status(fifo, engn, &status);
+	if (status.chan) {
+		/* The channel is not longer viable, kill it. */
+		gk104_fifo_recover_chan(&fifo->base, status.chan->id);
+	}
+
+	/* Determine MMU fault ID for the engine, if we're not being
+	 * called from the fault handler already.
+	 */
+	if (!status.faulted && engine) {
+		mmui = nvkm_top_fault_id(device, engine->subdev.index);
+		if (mmui < 0) {
+			const struct nvkm_enum *en = fifo->func->fault.engine;
+			for (; en && en->name; en++) {
+				if (en->data2 == engine->subdev.index) {
+					mmui = en->value;
+					break;
+				}
+			}
+		}
+		WARN_ON(mmui < 0);
+	}
+
+	/* Trigger a MMU fault for the engine.
+	 *
+	 * No good idea why this is needed, but nvgpu does something similar,
+	 * and it makes recovery from CTXSW_TIMEOUT a lot more reliable.
+	 */
+	if (mmui >= 0) {
+		nvkm_wr32(device, 0x002a30 + (engn * 0x04), 0x00000100 | mmui);
+
+		/* Wait for fault to trigger. */
+		nvkm_msec(device, 2000,
+			gk104_fifo_engine_status(fifo, engn, &status);
+			if (status.faulted)
+				break;
+		);
+
+		/* Release MMU fault trigger, and ACK the fault. */
+		nvkm_wr32(device, 0x002a30 + (engn * 0x04), 0x00000000);
+		nvkm_wr32(device, 0x00259c, BIT(mmui));
+		nvkm_wr32(device, 0x002100, 0x10000000);
+	}
+
+	/* Schedule recovery. */
+	nvkm_warn(subdev, "engine %d: scheduled for recovery\n", engn);
 	schedule_work(&fifo->recover.work);
 }
 
@@ -211,34 +380,30 @@ static void
 gk104_fifo_intr_sched_ctxsw(struct gk104_fifo *fifo)
 {
 	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	struct gk104_fifo_chan *chan;
-	unsigned long flags;
+	unsigned long flags, engm = 0;
 	u32 engn;
 
+	/* We need to ACK the SCHED_ERROR here, and prevent it reasserting,
+	 * as MMU_FAULT cannot be triggered while it's pending.
+	 */
 	spin_lock_irqsave(&fifo->base.lock, flags);
+	nvkm_mask(device, 0x002140, 0x00000100, 0x00000000);
+	nvkm_wr32(device, 0x002100, 0x00000100);
+
 	for (engn = 0; engn < fifo->engine_nr; engn++) {
-		struct nvkm_engine *engine = fifo->engine[engn].engine;
-		int runl = fifo->engine[engn].runl;
-		u32 stat = nvkm_rd32(device, 0x002640 + (engn * 0x08));
-		u32 busy = (stat & 0x80000000);
-		u32 next = (stat & 0x0fff0000) >> 16;
-		u32 chsw = (stat & 0x00008000);
-		u32 save = (stat & 0x00004000);
-		u32 load = (stat & 0x00002000);
-		u32 prev = (stat & 0x00000fff);
-		u32 chid = load ? next : prev;
-		(void)save;
-
-		if (!busy || !chsw)
+		struct gk104_fifo_engine_status status;
+
+		gk104_fifo_engine_status(fifo, engn, &status);
+		if (!status.busy || !status.chsw)
 			continue;
 
-		list_for_each_entry(chan, &fifo->runlist[runl].chan, head) {
-			if (chan->base.chid == chid && engine) {
-				gk104_fifo_recover(fifo, engine, chan);
-				break;
-			}
-		}
+		engm |= BIT(engn);
 	}
+
+	for_each_set_bit(engn, &engm, fifo->engine_nr)
+		gk104_fifo_recover_engn(fifo, engn);
+
+	nvkm_mask(device, 0x002140, 0x00000100, 0x00000100);
 	spin_unlock_irqrestore(&fifo->base.lock, flags);
 }
 
@@ -301,6 +466,7 @@ gk104_fifo_intr_fault(struct gk104_fifo *fifo, int unit)
 	struct nvkm_fifo_chan *chan;
 	unsigned long flags;
 	char gpcid[8] = "", en[16] = "";
+	int engn;
 
 	er = nvkm_enum_find(fifo->func->fault.reason, reason);
 	eu = nvkm_enum_find(fifo->func->fault.engine, unit);
@@ -342,7 +508,8 @@ gk104_fifo_intr_fault(struct gk104_fifo *fifo, int unit)
 		snprintf(en, sizeof(en), "%s", eu->name);
 	}
 
-	chan = nvkm_fifo_chan_inst(&fifo->base, (u64)inst << 12, &flags);
+	spin_lock_irqsave(&fifo->base.lock, flags);
+	chan = nvkm_fifo_chan_inst_locked(&fifo->base, (u64)inst << 12);
 
 	nvkm_error(subdev,
 		   "%s fault at %010llx engine %02x [%s] client %02x [%s%s] "
@@ -353,9 +520,23 @@ gk104_fifo_intr_fault(struct gk104_fifo *fifo, int unit)
 		   (u64)inst << 12,
 		   chan ? chan->object.client->name : "unknown");
 
-	if (engine && chan)
-		gk104_fifo_recover(fifo, engine, (void *)chan);
-	nvkm_fifo_chan_put(&fifo->base, flags, &chan);
+
+	/* Kill the channel that caused the fault. */
+	if (chan)
+		gk104_fifo_recover_chan(&fifo->base, chan->chid);
+
+	/* Channel recovery will probably have already done this for the
+	 * correct engine(s), but just in case we can't find the channel
+	 * information...
+	 */
+	for (engn = 0; engn < fifo->engine_nr && engine; engn++) {
+		if (fifo->engine[engn].engine == engine) {
+			gk104_fifo_recover_engn(fifo, engn);
+			break;
+		}
+	}
+
+	spin_unlock_irqrestore(&fifo->base.lock, flags);
 }
 
 static const struct nvkm_bitfield gk104_fifo_pbdma_intr_0[] = {
@@ -716,6 +897,7 @@ gk104_fifo_ = {
 	.intr = gk104_fifo_intr,
 	.uevent_init = gk104_fifo_uevent_init,
 	.uevent_fini = gk104_fifo_uevent_fini,
+	.recover_chan = gk104_fifo_recover_chan,
 	.class_get = gk104_fifo_class_get,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c
index 12d964260a29..f9e0377d3d24 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c
@@ -32,6 +32,23 @@
 #include <nvif/cl906f.h>
 #include <nvif/unpack.h>
 
+int
+gf100_fifo_chan_ntfy(struct nvkm_fifo_chan *chan, u32 type,
+		     struct nvkm_event **pevent)
+{
+	switch (type) {
+	case NV906F_V0_NTFY_NON_STALL_INTERRUPT:
+		*pevent = &chan->fifo->uevent;
+		return 0;
+	case NV906F_V0_NTFY_KILLED:
+		*pevent = &chan->fifo->kevent;
+		return 0;
+	default:
+		break;
+	}
+	return -EINVAL;
+}
+
 static u32
 gf100_fifo_gpfifo_engine_addr(struct nvkm_engine *engine)
 {
@@ -184,7 +201,7 @@ gf100_fifo_gpfifo_func = {
 	.dtor = gf100_fifo_gpfifo_dtor,
 	.init = gf100_fifo_gpfifo_init,
 	.fini = gf100_fifo_gpfifo_fini,
-	.ntfy = g84_fifo_chan_ntfy,
+	.ntfy = gf100_fifo_chan_ntfy,
 	.engine_ctor = gf100_fifo_gpfifo_engine_ctor,
 	.engine_dtor = gf100_fifo_gpfifo_engine_dtor,
 	.engine_init = gf100_fifo_gpfifo_engine_init,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c
index a2df4f3e7763..8abf6f8ef445 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c
@@ -50,6 +50,7 @@ gk104_fifo_gpfifo_kick(struct gk104_fifo_chan *chan)
 	) < 0) {
 		nvkm_error(subdev, "channel %d [%s] kick timeout\n",
 			   chan->base.chid, client->name);
+		nvkm_fifo_recover_chan(&fifo->base, chan->base.chid);
 		ret = -ETIMEDOUT;
 	}
 	mutex_unlock(&subdev->mutex);
@@ -213,7 +214,7 @@ gk104_fifo_gpfifo_func = {
 	.dtor = gk104_fifo_gpfifo_dtor,
 	.init = gk104_fifo_gpfifo_init,
 	.fini = gk104_fifo_gpfifo_fini,
-	.ntfy = g84_fifo_chan_ntfy,
+	.ntfy = gf100_fifo_chan_ntfy,
 	.engine_ctor = gk104_fifo_gpfifo_engine_ctor,
 	.engine_dtor = gk104_fifo_gpfifo_engine_dtor,
 	.engine_init = gk104_fifo_gpfifo_engine_init,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h
index f6dfb37d9429..f889b13b5e41 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h
@@ -6,6 +6,12 @@
 int nvkm_fifo_ctor(const struct nvkm_fifo_func *, struct nvkm_device *,
 		   int index, int nr, struct nvkm_fifo *);
 void nvkm_fifo_uevent(struct nvkm_fifo *);
+void nvkm_fifo_cevent(struct nvkm_fifo *);
+void nvkm_fifo_kevent(struct nvkm_fifo *, int chid);
+void nvkm_fifo_recover_chan(struct nvkm_fifo *, int chid);
+
+struct nvkm_fifo_chan *
+nvkm_fifo_chan_inst_locked(struct nvkm_fifo *, u64 inst);
 
 struct nvkm_fifo_chan_oclass;
 struct nvkm_fifo_func {
@@ -18,6 +24,7 @@ struct nvkm_fifo_func {
 	void (*start)(struct nvkm_fifo *, unsigned long *);
 	void (*uevent_init)(struct nvkm_fifo *);
 	void (*uevent_fini)(struct nvkm_fifo *);
+	void (*recover_chan)(struct nvkm_fifo *, int chid);
 	int (*class_get)(struct nvkm_fifo *, int index,
 			 const struct nvkm_fifo_chan_oclass **);
 	const struct nvkm_fifo_chan_oclass *chan[];
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c
index 467065d1b4e6..cd8cf6f7024c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c
@@ -25,6 +25,15 @@
 
 #include <engine/fifo.h>
 
+static bool
+nvkm_gr_chsw_load(struct nvkm_engine *engine)
+{
+	struct nvkm_gr *gr = nvkm_gr(engine);
+	if (gr->func->chsw_load)
+		return gr->func->chsw_load(gr);
+	return false;
+}
+
 static void
 nvkm_gr_tile(struct nvkm_engine *engine, int region, struct nvkm_fb_tile *tile)
 {
@@ -106,6 +115,15 @@ nvkm_gr_init(struct nvkm_engine *engine)
 	return gr->func->init(gr);
 }
 
+static int
+nvkm_gr_fini(struct nvkm_engine *engine, bool suspend)
+{
+	struct nvkm_gr *gr = nvkm_gr(engine);
+	if (gr->func->fini)
+		return gr->func->fini(gr, suspend);
+	return 0;
+}
+
 static void *
 nvkm_gr_dtor(struct nvkm_engine *engine)
 {
@@ -120,8 +138,10 @@ nvkm_gr = {
 	.dtor = nvkm_gr_dtor,
 	.oneinit = nvkm_gr_oneinit,
 	.init = nvkm_gr_init,
+	.fini = nvkm_gr_fini,
 	.intr = nvkm_gr_intr,
 	.tile = nvkm_gr_tile,
+	.chsw_load = nvkm_gr_chsw_load,
 	.fifo.cclass = nvkm_gr_cclass_new,
 	.fifo.sclass = nvkm_gr_oclass_get,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/g84.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/g84.c
index ce913300539f..da1ba74682b4 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/g84.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/g84.c
@@ -25,6 +25,8 @@
 
 #include <subdev/timer.h>
 
+#include <nvif/class.h>
+
 static const struct nvkm_bitfield nv50_gr_status[] = {
 	{ 0x00000001, "BUSY" }, /* set when any bit is set */
 	{ 0x00000002, "DISPATCH" },
@@ -180,11 +182,11 @@ g84_gr = {
 	.tlb_flush = g84_gr_tlb_flush,
 	.units = nv50_gr_units,
 	.sclass = {
-		{ -1, -1, 0x0030, &nv50_gr_object },
-		{ -1, -1, 0x502d, &nv50_gr_object },
-		{ -1, -1, 0x5039, &nv50_gr_object },
-		{ -1, -1, 0x50c0, &nv50_gr_object },
-		{ -1, -1, 0x8297, &nv50_gr_object },
+		{ -1, -1, NV_NULL_CLASS, &nv50_gr_object },
+		{ -1, -1, NV50_TWOD, &nv50_gr_object },
+		{ -1, -1, NV50_MEMORY_TO_MEMORY_FORMAT, &nv50_gr_object },
+		{ -1, -1, NV50_COMPUTE, &nv50_gr_object },
+		{ -1, -1, G82_TESLA, &nv50_gr_object },
 		{}
 	}
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
index f65a5b0a1a4d..f9acb8a944d2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
@@ -702,6 +702,22 @@ gf100_gr_pack_mmio[] = {
  * PGRAPH engine/subdev functions
  ******************************************************************************/
 
+static bool
+gf100_gr_chsw_load(struct nvkm_gr *base)
+{
+	struct gf100_gr *gr = gf100_gr(base);
+	if (!gr->firmware) {
+		u32 trace = nvkm_rd32(gr->base.engine.subdev.device, 0x40981c);
+		if (trace & 0x00000040)
+			return true;
+	} else {
+		u32 mthd = nvkm_rd32(gr->base.engine.subdev.device, 0x409808);
+		if (mthd & 0x00080000)
+			return true;
+	}
+	return false;
+}
+
 int
 gf100_gr_rops(struct gf100_gr *gr)
 {
@@ -1136,7 +1152,7 @@ gf100_gr_trap_intr(struct gf100_gr *gr)
 	if (trap & 0x00000008) {
 		u32 stat = nvkm_rd32(device, 0x408030);
 
-		nvkm_snprintbf(error, sizeof(error), gf100_m2mf_error,
+		nvkm_snprintbf(error, sizeof(error), gf100_ccache_error,
 			       stat & 0x3fffffff);
 		nvkm_error(subdev, "CCACHE %08x [%s]\n", stat, error);
 		nvkm_wr32(device, 0x408030, 0xc0000000);
@@ -1391,26 +1407,11 @@ gf100_gr_intr(struct nvkm_gr *base)
 }
 
 static void
-gf100_gr_init_fw(struct gf100_gr *gr, u32 fuc_base,
+gf100_gr_init_fw(struct nvkm_falcon *falcon,
 		 struct gf100_gr_fuc *code, struct gf100_gr_fuc *data)
 {
-	struct nvkm_device *device = gr->base.engine.subdev.device;
-	int i;
-
-	nvkm_wr32(device, fuc_base + 0x01c0, 0x01000000);
-	for (i = 0; i < data->size / 4; i++)
-		nvkm_wr32(device, fuc_base + 0x01c4, data->data[i]);
-
-	nvkm_wr32(device, fuc_base + 0x0180, 0x01000000);
-	for (i = 0; i < code->size / 4; i++) {
-		if ((i & 0x3f) == 0)
-			nvkm_wr32(device, fuc_base + 0x0188, i >> 6);
-		nvkm_wr32(device, fuc_base + 0x0184, code->data[i]);
-	}
-
-	/* code must be padded to 0x40 words */
-	for (; i & 0x3f; i++)
-		nvkm_wr32(device, fuc_base + 0x0184, 0);
+	nvkm_falcon_load_dmem(falcon, data->data, 0x0, data->size, 0);
+	nvkm_falcon_load_imem(falcon, code->data, 0x0, code->size, 0, 0, false);
 }
 
 static void
@@ -1455,162 +1456,149 @@ gf100_gr_init_csdata(struct gf100_gr *gr,
 	nvkm_wr32(device, falcon + 0x01c4, star + 4);
 }
 
-int
-gf100_gr_init_ctxctl(struct gf100_gr *gr)
+/* Initialize context from an external (secure or not) firmware */
+static int
+gf100_gr_init_ctxctl_ext(struct gf100_gr *gr)
 {
-	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
 	struct nvkm_secboot *sb = device->secboot;
-	int i;
 	int ret = 0;
 
-	if (gr->firmware) {
-		/* load fuc microcode */
-		nvkm_mc_unk260(device, 0);
-
-		/* securely-managed falcons must be reset using secure boot */
-		if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_FECS))
-			ret = nvkm_secboot_reset(sb, NVKM_SECBOOT_FALCON_FECS);
-		else
-			gf100_gr_init_fw(gr, 0x409000, &gr->fuc409c,
-					 &gr->fuc409d);
-		if (ret)
-			return ret;
+	/* load fuc microcode */
+	nvkm_mc_unk260(device, 0);
 
-		if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_GPCCS))
-			ret = nvkm_secboot_reset(sb, NVKM_SECBOOT_FALCON_GPCCS);
-		else
-			gf100_gr_init_fw(gr, 0x41a000, &gr->fuc41ac,
-					 &gr->fuc41ad);
-		if (ret)
-			return ret;
+	/* securely-managed falcons must be reset using secure boot */
+	if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_FECS))
+		ret = nvkm_secboot_reset(sb, NVKM_SECBOOT_FALCON_FECS);
+	else
+		gf100_gr_init_fw(gr->fecs, &gr->fuc409c, &gr->fuc409d);
+	if (ret)
+		return ret;
 
-		nvkm_mc_unk260(device, 1);
-
-		/* start both of them running */
-		nvkm_wr32(device, 0x409840, 0xffffffff);
-		nvkm_wr32(device, 0x41a10c, 0x00000000);
-		nvkm_wr32(device, 0x40910c, 0x00000000);
-
-		if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_GPCCS))
-			nvkm_secboot_start(sb, NVKM_SECBOOT_FALCON_GPCCS);
-		else
-			nvkm_wr32(device, 0x41a100, 0x00000002);
-		if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_FECS))
-			nvkm_secboot_start(sb, NVKM_SECBOOT_FALCON_FECS);
-		else
-			nvkm_wr32(device, 0x409100, 0x00000002);
-		if (nvkm_msec(device, 2000,
-			if (nvkm_rd32(device, 0x409800) & 0x00000001)
-				break;
-		) < 0)
-			return -EBUSY;
+	if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_GPCCS))
+		ret = nvkm_secboot_reset(sb, NVKM_SECBOOT_FALCON_GPCCS);
+	else
+		gf100_gr_init_fw(gr->gpccs, &gr->fuc41ac, &gr->fuc41ad);
+	if (ret)
+		return ret;
+
+	nvkm_mc_unk260(device, 1);
+
+	/* start both of them running */
+	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x41a10c, 0x00000000);
+	nvkm_wr32(device, 0x40910c, 0x00000000);
+
+	nvkm_falcon_start(gr->gpccs);
+	nvkm_falcon_start(gr->fecs);
 
-		nvkm_wr32(device, 0x409840, 0xffffffff);
-		nvkm_wr32(device, 0x409500, 0x7fffffff);
-		nvkm_wr32(device, 0x409504, 0x00000021);
+	if (nvkm_msec(device, 2000,
+		if (nvkm_rd32(device, 0x409800) & 0x00000001)
+			break;
+	) < 0)
+		return -EBUSY;
+
+	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409500, 0x7fffffff);
+	nvkm_wr32(device, 0x409504, 0x00000021);
+
+	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409500, 0x00000000);
+	nvkm_wr32(device, 0x409504, 0x00000010);
+	if (nvkm_msec(device, 2000,
+		if ((gr->size = nvkm_rd32(device, 0x409800)))
+			break;
+	) < 0)
+		return -EBUSY;
+
+	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409500, 0x00000000);
+	nvkm_wr32(device, 0x409504, 0x00000016);
+	if (nvkm_msec(device, 2000,
+		if (nvkm_rd32(device, 0x409800))
+			break;
+	) < 0)
+		return -EBUSY;
+
+	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409500, 0x00000000);
+	nvkm_wr32(device, 0x409504, 0x00000025);
+	if (nvkm_msec(device, 2000,
+		if (nvkm_rd32(device, 0x409800))
+			break;
+	) < 0)
+		return -EBUSY;
 
-		nvkm_wr32(device, 0x409840, 0xffffffff);
-		nvkm_wr32(device, 0x409500, 0x00000000);
-		nvkm_wr32(device, 0x409504, 0x00000010);
+	if (device->chipset >= 0xe0) {
+		nvkm_wr32(device, 0x409800, 0x00000000);
+		nvkm_wr32(device, 0x409500, 0x00000001);
+		nvkm_wr32(device, 0x409504, 0x00000030);
 		if (nvkm_msec(device, 2000,
-			if ((gr->size = nvkm_rd32(device, 0x409800)))
+			if (nvkm_rd32(device, 0x409800))
 				break;
 		) < 0)
 			return -EBUSY;
 
-		nvkm_wr32(device, 0x409840, 0xffffffff);
-		nvkm_wr32(device, 0x409500, 0x00000000);
-		nvkm_wr32(device, 0x409504, 0x00000016);
+		nvkm_wr32(device, 0x409810, 0xb00095c8);
+		nvkm_wr32(device, 0x409800, 0x00000000);
+		nvkm_wr32(device, 0x409500, 0x00000001);
+		nvkm_wr32(device, 0x409504, 0x00000031);
 		if (nvkm_msec(device, 2000,
 			if (nvkm_rd32(device, 0x409800))
 				break;
 		) < 0)
 			return -EBUSY;
 
-		nvkm_wr32(device, 0x409840, 0xffffffff);
-		nvkm_wr32(device, 0x409500, 0x00000000);
-		nvkm_wr32(device, 0x409504, 0x00000025);
+		nvkm_wr32(device, 0x409810, 0x00080420);
+		nvkm_wr32(device, 0x409800, 0x00000000);
+		nvkm_wr32(device, 0x409500, 0x00000001);
+		nvkm_wr32(device, 0x409504, 0x00000032);
 		if (nvkm_msec(device, 2000,
 			if (nvkm_rd32(device, 0x409800))
 				break;
 		) < 0)
 			return -EBUSY;
 
-		if (device->chipset >= 0xe0) {
-			nvkm_wr32(device, 0x409800, 0x00000000);
-			nvkm_wr32(device, 0x409500, 0x00000001);
-			nvkm_wr32(device, 0x409504, 0x00000030);
-			if (nvkm_msec(device, 2000,
-				if (nvkm_rd32(device, 0x409800))
-					break;
-			) < 0)
-				return -EBUSY;
-
-			nvkm_wr32(device, 0x409810, 0xb00095c8);
-			nvkm_wr32(device, 0x409800, 0x00000000);
-			nvkm_wr32(device, 0x409500, 0x00000001);
-			nvkm_wr32(device, 0x409504, 0x00000031);
-			if (nvkm_msec(device, 2000,
-				if (nvkm_rd32(device, 0x409800))
-					break;
-			) < 0)
-				return -EBUSY;
-
-			nvkm_wr32(device, 0x409810, 0x00080420);
-			nvkm_wr32(device, 0x409800, 0x00000000);
-			nvkm_wr32(device, 0x409500, 0x00000001);
-			nvkm_wr32(device, 0x409504, 0x00000032);
-			if (nvkm_msec(device, 2000,
-				if (nvkm_rd32(device, 0x409800))
-					break;
-			) < 0)
-				return -EBUSY;
+		nvkm_wr32(device, 0x409614, 0x00000070);
+		nvkm_wr32(device, 0x409614, 0x00000770);
+		nvkm_wr32(device, 0x40802c, 0x00000001);
+	}
 
-			nvkm_wr32(device, 0x409614, 0x00000070);
-			nvkm_wr32(device, 0x409614, 0x00000770);
-			nvkm_wr32(device, 0x40802c, 0x00000001);
+	if (gr->data == NULL) {
+		int ret = gf100_grctx_generate(gr);
+		if (ret) {
+			nvkm_error(subdev, "failed to construct context\n");
+			return ret;
 		}
+	}
 
-		if (gr->data == NULL) {
-			int ret = gf100_grctx_generate(gr);
-			if (ret) {
-				nvkm_error(subdev, "failed to construct context\n");
-				return ret;
-			}
-		}
+	return 0;
+}
+
+static int
+gf100_gr_init_ctxctl_int(struct gf100_gr *gr)
+{
+	const struct gf100_grctx_func *grctx = gr->func->grctx;
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
 
-		return 0;
-	} else
 	if (!gr->func->fecs.ucode) {
 		return -ENOSYS;
 	}
 
 	/* load HUB microcode */
 	nvkm_mc_unk260(device, 0);
-	nvkm_wr32(device, 0x4091c0, 0x01000000);
-	for (i = 0; i < gr->func->fecs.ucode->data.size / 4; i++)
-		nvkm_wr32(device, 0x4091c4, gr->func->fecs.ucode->data.data[i]);
-
-	nvkm_wr32(device, 0x409180, 0x01000000);
-	for (i = 0; i < gr->func->fecs.ucode->code.size / 4; i++) {
-		if ((i & 0x3f) == 0)
-			nvkm_wr32(device, 0x409188, i >> 6);
-		nvkm_wr32(device, 0x409184, gr->func->fecs.ucode->code.data[i]);
-	}
+	nvkm_falcon_load_dmem(gr->fecs, gr->func->fecs.ucode->data.data, 0x0,
+			      gr->func->fecs.ucode->data.size, 0);
+	nvkm_falcon_load_imem(gr->fecs, gr->func->fecs.ucode->code.data, 0x0,
+			      gr->func->fecs.ucode->code.size, 0, 0, false);
 
 	/* load GPC microcode */
-	nvkm_wr32(device, 0x41a1c0, 0x01000000);
-	for (i = 0; i < gr->func->gpccs.ucode->data.size / 4; i++)
-		nvkm_wr32(device, 0x41a1c4, gr->func->gpccs.ucode->data.data[i]);
-
-	nvkm_wr32(device, 0x41a180, 0x01000000);
-	for (i = 0; i < gr->func->gpccs.ucode->code.size / 4; i++) {
-		if ((i & 0x3f) == 0)
-			nvkm_wr32(device, 0x41a188, i >> 6);
-		nvkm_wr32(device, 0x41a184, gr->func->gpccs.ucode->code.data[i]);
-	}
+	nvkm_falcon_load_dmem(gr->gpccs, gr->func->gpccs.ucode->data.data, 0x0,
+			      gr->func->gpccs.ucode->data.size, 0);
+	nvkm_falcon_load_imem(gr->gpccs, gr->func->gpccs.ucode->code.data, 0x0,
+			      gr->func->gpccs.ucode->code.size, 0, 0, false);
 	nvkm_mc_unk260(device, 1);
 
 	/* load register lists */
@@ -1642,6 +1630,19 @@ gf100_gr_init_ctxctl(struct gf100_gr *gr)
 	return 0;
 }
 
+int
+gf100_gr_init_ctxctl(struct gf100_gr *gr)
+{
+	int ret;
+
+	if (gr->firmware)
+		ret = gf100_gr_init_ctxctl_ext(gr);
+	else
+		ret = gf100_gr_init_ctxctl_int(gr);
+
+	return ret;
+}
+
 static int
 gf100_gr_oneinit(struct nvkm_gr *base)
 {
@@ -1711,10 +1712,32 @@ static int
 gf100_gr_init_(struct nvkm_gr *base)
 {
 	struct gf100_gr *gr = gf100_gr(base);
+	struct nvkm_subdev *subdev = &base->engine.subdev;
+	u32 ret;
+
 	nvkm_pmu_pgob(gr->base.engine.subdev.device->pmu, false);
+
+	ret = nvkm_falcon_get(gr->fecs, subdev);
+	if (ret)
+		return ret;
+
+	ret = nvkm_falcon_get(gr->gpccs, subdev);
+	if (ret)
+		return ret;
+
 	return gr->func->init(gr);
 }
 
+static int
+gf100_gr_fini_(struct nvkm_gr *base, bool suspend)
+{
+	struct gf100_gr *gr = gf100_gr(base);
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	nvkm_falcon_put(gr->gpccs, subdev);
+	nvkm_falcon_put(gr->fecs, subdev);
+	return 0;
+}
+
 void
 gf100_gr_dtor_fw(struct gf100_gr_fuc *fuc)
 {
@@ -1737,6 +1760,9 @@ gf100_gr_dtor(struct nvkm_gr *base)
 		gr->func->dtor(gr);
 	kfree(gr->data);
 
+	nvkm_falcon_del(&gr->gpccs);
+	nvkm_falcon_del(&gr->fecs);
+
 	gf100_gr_dtor_fw(&gr->fuc409c);
 	gf100_gr_dtor_fw(&gr->fuc409d);
 	gf100_gr_dtor_fw(&gr->fuc41ac);
@@ -1755,10 +1781,12 @@ gf100_gr_ = {
 	.dtor = gf100_gr_dtor,
 	.oneinit = gf100_gr_oneinit,
 	.init = gf100_gr_init_,
+	.fini = gf100_gr_fini_,
 	.intr = gf100_gr_intr,
 	.units = gf100_gr_units,
 	.chan_new = gf100_gr_chan_new,
 	.object_get = gf100_gr_object_get,
+	.chsw_load = gf100_gr_chsw_load,
 };
 
 int
@@ -1828,6 +1856,7 @@ int
 gf100_gr_ctor(const struct gf100_gr_func *func, struct nvkm_device *device,
 	      int index, struct gf100_gr *gr)
 {
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
 	int ret;
 
 	gr->func = func;
@@ -1840,7 +1869,11 @@ gf100_gr_ctor(const struct gf100_gr_func *func, struct nvkm_device *device,
 	if (ret)
 		return ret;
 
-	return 0;
+	ret = nvkm_falcon_v1_new(subdev, "FECS", 0x409000, &gr->fecs);
+	if (ret)
+		return ret;
+
+	return nvkm_falcon_v1_new(subdev, "GPCCS", 0x41a000, &gr->gpccs);
 }
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
index 268b8d60ff73..db6ee3b06841 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
@@ -29,6 +29,7 @@
 #include <core/gpuobj.h>
 #include <subdev/ltc.h>
 #include <subdev/mmu.h>
+#include <engine/falcon.h>
 
 #define GPC_MAX 32
 #define TPC_MAX_PER_GPC 8
@@ -75,6 +76,8 @@ struct gf100_gr {
 	const struct gf100_gr_func *func;
 	struct nvkm_gr base;
 
+	struct nvkm_falcon *fecs;
+	struct nvkm_falcon *gpccs;
 	struct gf100_gr_fuc fuc409c;
 	struct gf100_gr_fuc fuc409d;
 	struct gf100_gr_fuc fuc41ac;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt200.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt200.c
index 2e68919f00b2..c711a55ce392 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt200.c
@@ -23,6 +23,8 @@
  */
 #include "nv50.h"
 
+#include <nvif/class.h>
+
 static const struct nvkm_gr_func
 gt200_gr = {
 	.init = nv50_gr_init,
@@ -31,11 +33,11 @@ gt200_gr = {
 	.tlb_flush = g84_gr_tlb_flush,
 	.units = nv50_gr_units,
 	.sclass = {
-		{ -1, -1, 0x0030, &nv50_gr_object },
-		{ -1, -1, 0x502d, &nv50_gr_object },
-		{ -1, -1, 0x5039, &nv50_gr_object },
-		{ -1, -1, 0x50c0, &nv50_gr_object },
-		{ -1, -1, 0x8397, &nv50_gr_object },
+		{ -1, -1, NV_NULL_CLASS, &nv50_gr_object },
+		{ -1, -1, NV50_TWOD, &nv50_gr_object },
+		{ -1, -1, NV50_MEMORY_TO_MEMORY_FORMAT, &nv50_gr_object },
+		{ -1, -1, NV50_COMPUTE, &nv50_gr_object },
+		{ -1, -1, GT200_TESLA, &nv50_gr_object },
 		{}
 	}
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt215.c
index 2bf7aac360cc..fa103df32ec7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt215.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt215.c
@@ -23,6 +23,8 @@
  */
 #include "nv50.h"
 
+#include <nvif/class.h>
+
 static const struct nvkm_gr_func
 gt215_gr = {
 	.init = nv50_gr_init,
@@ -31,12 +33,12 @@ gt215_gr = {
 	.tlb_flush = g84_gr_tlb_flush,
 	.units = nv50_gr_units,
 	.sclass = {
-		{ -1, -1, 0x0030, &nv50_gr_object },
-		{ -1, -1, 0x502d, &nv50_gr_object },
-		{ -1, -1, 0x5039, &nv50_gr_object },
-		{ -1, -1, 0x50c0, &nv50_gr_object },
-		{ -1, -1, 0x8597, &nv50_gr_object },
-		{ -1, -1, 0x85c0, &nv50_gr_object },
+		{ -1, -1, NV_NULL_CLASS, &nv50_gr_object },
+		{ -1, -1, NV50_TWOD, &nv50_gr_object },
+		{ -1, -1, NV50_MEMORY_TO_MEMORY_FORMAT, &nv50_gr_object },
+		{ -1, -1, NV50_COMPUTE, &nv50_gr_object },
+		{ -1, -1, GT214_TESLA, &nv50_gr_object },
+		{ -1, -1, GT214_COMPUTE, &nv50_gr_object },
 		{}
 	}
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp79.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp79.c
index 95d5219faf93..eb1a90644752 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp79.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp79.c
@@ -23,6 +23,8 @@
  */
 #include "nv50.h"
 
+#include <nvif/class.h>
+
 static const struct nvkm_gr_func
 mcp79_gr = {
 	.init = nv50_gr_init,
@@ -30,11 +32,11 @@ mcp79_gr = {
 	.chan_new = nv50_gr_chan_new,
 	.units = nv50_gr_units,
 	.sclass = {
-		{ -1, -1, 0x0030, &nv50_gr_object },
-		{ -1, -1, 0x502d, &nv50_gr_object },
-		{ -1, -1, 0x5039, &nv50_gr_object },
-		{ -1, -1, 0x50c0, &nv50_gr_object },
-		{ -1, -1, 0x8397, &nv50_gr_object },
+		{ -1, -1, NV_NULL_CLASS, &nv50_gr_object },
+		{ -1, -1, NV50_TWOD, &nv50_gr_object },
+		{ -1, -1, NV50_MEMORY_TO_MEMORY_FORMAT, &nv50_gr_object },
+		{ -1, -1, NV50_COMPUTE, &nv50_gr_object },
+		{ -1, -1, GT200_TESLA, &nv50_gr_object },
 		{}
 	}
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp89.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp89.c
index 027b58e5976b..c91eb56e9327 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp89.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp89.c
@@ -23,6 +23,8 @@
  */
 #include "nv50.h"
 
+#include <nvif/class.h>
+
 static const struct nvkm_gr_func
 mcp89_gr = {
 	.init = nv50_gr_init,
@@ -31,12 +33,12 @@ mcp89_gr = {
 	.tlb_flush = g84_gr_tlb_flush,
 	.units = nv50_gr_units,
 	.sclass = {
-		{ -1, -1, 0x0030, &nv50_gr_object },
-		{ -1, -1, 0x502d, &nv50_gr_object },
-		{ -1, -1, 0x5039, &nv50_gr_object },
-		{ -1, -1, 0x50c0, &nv50_gr_object },
-		{ -1, -1, 0x85c0, &nv50_gr_object },
-		{ -1, -1, 0x8697, &nv50_gr_object },
+		{ -1, -1, NV_NULL_CLASS, &nv50_gr_object },
+		{ -1, -1, NV50_TWOD, &nv50_gr_object },
+		{ -1, -1, NV50_MEMORY_TO_MEMORY_FORMAT, &nv50_gr_object },
+		{ -1, -1, NV50_COMPUTE, &nv50_gr_object },
+		{ -1, -1, GT214_COMPUTE, &nv50_gr_object },
+		{ -1, -1, GT21A_TESLA, &nv50_gr_object },
 		{}
 	}
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c
index fca67de43f2b..df16ffda1749 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c
@@ -27,6 +27,8 @@
 #include <core/gpuobj.h>
 #include <engine/fifo.h>
 
+#include <nvif/class.h>
+
 u64
 nv50_gr_units(struct nvkm_gr *gr)
 {
@@ -778,11 +780,11 @@ nv50_gr = {
 	.chan_new = nv50_gr_chan_new,
 	.units = nv50_gr_units,
 	.sclass = {
-		{ -1, -1, 0x0030, &nv50_gr_object },
-		{ -1, -1, 0x502d, &nv50_gr_object },
-		{ -1, -1, 0x5039, &nv50_gr_object },
-		{ -1, -1, 0x5097, &nv50_gr_object },
-		{ -1, -1, 0x50c0, &nv50_gr_object },
+		{ -1, -1, NV_NULL_CLASS, &nv50_gr_object },
+		{ -1, -1, NV50_TWOD, &nv50_gr_object },
+		{ -1, -1, NV50_MEMORY_TO_MEMORY_FORMAT, &nv50_gr_object },
+		{ -1, -1, NV50_TESLA, &nv50_gr_object },
+		{ -1, -1, NV50_COMPUTE, &nv50_gr_object },
 		{}
 	}
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h
index d8adcdf6985a..2a52d9f026ec 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h
@@ -15,6 +15,7 @@ struct nvkm_gr_func {
 	void *(*dtor)(struct nvkm_gr *);
 	int (*oneinit)(struct nvkm_gr *);
 	int (*init)(struct nvkm_gr *);
+	int (*fini)(struct nvkm_gr *, bool);
 	void (*intr)(struct nvkm_gr *);
 	void (*tile)(struct nvkm_gr *, int region, struct nvkm_fb_tile *);
 	int (*tlb_flush)(struct nvkm_gr *);
@@ -24,6 +25,7 @@ struct nvkm_gr_func {
 	/* Returns chipset-specific counts of units packed into an u64.
 	 */
 	u64 (*units)(struct nvkm_gr *);
+	bool (*chsw_load)(struct nvkm_gr *);
 	struct nvkm_sclass sclass[];
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/Kbuild b/drivers/gpu/drm/nouveau/nvkm/falcon/Kbuild
new file mode 100644
index 000000000000..584863db9bfc
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/falcon/Kbuild
@@ -0,0 +1,2 @@
+nvkm-y += nvkm/falcon/base.o
+nvkm-y += nvkm/falcon/v1.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/base.c b/drivers/gpu/drm/nouveau/nvkm/falcon/base.c
new file mode 100644
index 000000000000..4852f313762f
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/falcon/base.c
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+
+#include <subdev/mc.h>
+
+void
+nvkm_falcon_load_imem(struct nvkm_falcon *falcon, void *data, u32 start,
+		      u32 size, u16 tag, u8 port, bool secure)
+{
+	if (secure && !falcon->secret) {
+		nvkm_warn(falcon->user,
+			  "writing with secure tag on a non-secure falcon!\n");
+		return;
+	}
+
+	falcon->func->load_imem(falcon, data, start, size, tag, port,
+				secure);
+}
+
+void
+nvkm_falcon_load_dmem(struct nvkm_falcon *falcon, void *data, u32 start,
+		      u32 size, u8 port)
+{
+	falcon->func->load_dmem(falcon, data, start, size, port);
+}
+
+void
+nvkm_falcon_read_dmem(struct nvkm_falcon *falcon, u32 start, u32 size, u8 port,
+		      void *data)
+{
+	falcon->func->read_dmem(falcon, start, size, port, data);
+}
+
+void
+nvkm_falcon_bind_context(struct nvkm_falcon *falcon, struct nvkm_gpuobj *inst)
+{
+	if (!falcon->func->bind_context) {
+		nvkm_error(falcon->user,
+			   "Context binding not supported on this falcon!\n");
+		return;
+	}
+
+	falcon->func->bind_context(falcon, inst);
+}
+
+void
+nvkm_falcon_set_start_addr(struct nvkm_falcon *falcon, u32 start_addr)
+{
+	falcon->func->set_start_addr(falcon, start_addr);
+}
+
+void
+nvkm_falcon_start(struct nvkm_falcon *falcon)
+{
+	falcon->func->start(falcon);
+}
+
+int
+nvkm_falcon_enable(struct nvkm_falcon *falcon)
+{
+	struct nvkm_device *device = falcon->owner->device;
+	enum nvkm_devidx id = falcon->owner->index;
+	int ret;
+
+	nvkm_mc_enable(device, id);
+	ret = falcon->func->enable(falcon);
+	if (ret) {
+		nvkm_mc_disable(device, id);
+		return ret;
+	}
+
+	return 0;
+}
+
+void
+nvkm_falcon_disable(struct nvkm_falcon *falcon)
+{
+	struct nvkm_device *device = falcon->owner->device;
+	enum nvkm_devidx id = falcon->owner->index;
+
+	/* already disabled, return or wait_idle will timeout */
+	if (!nvkm_mc_enabled(device, id))
+		return;
+
+	falcon->func->disable(falcon);
+
+	nvkm_mc_disable(device, id);
+}
+
+int
+nvkm_falcon_reset(struct nvkm_falcon *falcon)
+{
+	nvkm_falcon_disable(falcon);
+	return nvkm_falcon_enable(falcon);
+}
+
+int
+nvkm_falcon_wait_for_halt(struct nvkm_falcon *falcon, u32 ms)
+{
+	return falcon->func->wait_for_halt(falcon, ms);
+}
+
+int
+nvkm_falcon_clear_interrupt(struct nvkm_falcon *falcon, u32 mask)
+{
+	return falcon->func->clear_interrupt(falcon, mask);
+}
+
+void
+nvkm_falcon_put(struct nvkm_falcon *falcon, const struct nvkm_subdev *user)
+{
+	mutex_lock(&falcon->mutex);
+	if (falcon->user == user) {
+		nvkm_debug(falcon->user, "released %s falcon\n", falcon->name);
+		falcon->user = NULL;
+	}
+	mutex_unlock(&falcon->mutex);
+}
+
+int
+nvkm_falcon_get(struct nvkm_falcon *falcon, const struct nvkm_subdev *user)
+{
+	mutex_lock(&falcon->mutex);
+	if (falcon->user) {
+		nvkm_error(user, "%s falcon already acquired by %s!\n",
+			   falcon->name, nvkm_subdev_name[falcon->user->index]);
+		mutex_unlock(&falcon->mutex);
+		return -EBUSY;
+	}
+
+	nvkm_debug(user, "acquired %s falcon\n", falcon->name);
+	falcon->user = user;
+	mutex_unlock(&falcon->mutex);
+	return 0;
+}
+
+void
+nvkm_falcon_ctor(const struct nvkm_falcon_func *func,
+		 struct nvkm_subdev *subdev, const char *name, u32 addr,
+		 struct nvkm_falcon *falcon)
+{
+	u32 reg;
+
+	falcon->func = func;
+	falcon->owner = subdev;
+	falcon->name = name;
+	falcon->addr = addr;
+	mutex_init(&falcon->mutex);
+
+	reg = nvkm_falcon_rd32(falcon, 0x12c);
+	falcon->version = reg & 0xf;
+	falcon->secret = (reg >> 4) & 0x3;
+	falcon->code.ports = (reg >> 8) & 0xf;
+	falcon->data.ports = (reg >> 12) & 0xf;
+
+	reg = nvkm_falcon_rd32(falcon, 0x108);
+	falcon->code.limit = (reg & 0x1ff) << 8;
+	falcon->data.limit = (reg & 0x3fe00) >> 1;
+
+	reg = nvkm_falcon_rd32(falcon, 0xc08);
+	falcon->debug = (reg >> 20) & 0x1;
+}
+
+void
+nvkm_falcon_del(struct nvkm_falcon **pfalcon)
+{
+	if (*pfalcon) {
+		kfree(*pfalcon);
+		*pfalcon = NULL;
+	}
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/priv.h b/drivers/gpu/drm/nouveau/nvkm/falcon/priv.h
new file mode 100644
index 000000000000..97b56f759d0b
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/falcon/priv.h
@@ -0,0 +1,8 @@
+#ifndef __NVKM_FALCON_PRIV_H__
+#define __NVKM_FALCON_PRIV_H__
+#include <engine/falcon.h>
+
+void
+nvkm_falcon_ctor(const struct nvkm_falcon_func *, struct nvkm_subdev *,
+		 const char *, u32, struct nvkm_falcon *);
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c b/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c
new file mode 100644
index 000000000000..b537f111f39c
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c
@@ -0,0 +1,266 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+
+#include <core/gpuobj.h>
+#include <core/memory.h>
+#include <subdev/timer.h>
+
+static void
+nvkm_falcon_v1_load_imem(struct nvkm_falcon *falcon, void *data, u32 start,
+			 u32 size, u16 tag, u8 port, bool secure)
+{
+	u8 rem = size % 4;
+	u32 reg;
+	int i;
+
+	size -= rem;
+
+	reg = start | BIT(24) | (secure ? BIT(28) : 0);
+	nvkm_falcon_wr32(falcon, 0x180 + (port * 16), reg);
+	for (i = 0; i < size / 4; i++) {
+		/* write new tag every 256B */
+		if ((i & 0x3f) == 0)
+			nvkm_falcon_wr32(falcon, 0x188, tag++);
+		nvkm_falcon_wr32(falcon, 0x184, ((u32 *)data)[i]);
+	}
+
+	/*
+	 * If size is not a multiple of 4, mask the last work to ensure garbage
+	 * does not get written
+	 */
+	if (rem) {
+		u32 extra = ((u32 *)data)[i];
+
+		/* write new tag every 256B */
+		if ((i & 0x3f) == 0)
+			nvkm_falcon_wr32(falcon, 0x188, tag++);
+		nvkm_falcon_wr32(falcon, 0x184, extra & (BIT(rem * 8) - 1));
+		++i;
+	}
+
+	/* code must be padded to 0x40 words */
+	for (; i & 0x3f; i++)
+		nvkm_falcon_wr32(falcon, 0x184, 0);
+}
+
+static void
+nvkm_falcon_v1_load_dmem(struct nvkm_falcon *falcon, void *data, u32 start,
+		      u32 size, u8 port)
+{
+	u8 rem = size % 4;
+	int i;
+
+	size -= rem;
+
+	nvkm_falcon_wr32(falcon, 0x1c0 + (port * 16), start | (0x1 << 24));
+	for (i = 0; i < size / 4; i++)
+		nvkm_falcon_wr32(falcon, 0x1c4, ((u32 *)data)[i]);
+
+	/*
+	 * If size is not a multiple of 4, mask the last work to ensure garbage
+	 * does not get read
+	 */
+	if (rem) {
+		u32 extra = ((u32 *)data)[i];
+
+		nvkm_falcon_wr32(falcon, 0x1c4, extra & (BIT(rem * 8) - 1));
+	}
+}
+
+static void
+nvkm_falcon_v1_read_dmem(struct nvkm_falcon *falcon, u32 start, u32 size,
+			 u8 port, void *data)
+{
+	u8 rem = size % 4;
+	int i;
+
+	size -= rem;
+
+	nvkm_falcon_wr32(falcon, 0x1c0 + (port * 16), start | (0x1 << 25));
+	for (i = 0; i < size / 4; i++)
+		((u32 *)data)[i] = nvkm_falcon_rd32(falcon, 0x1c4);
+
+	/*
+	 * If size is not a multiple of 4, mask the last work to ensure garbage
+	 * does not get read
+	 */
+	if (rem) {
+		u32 extra = nvkm_falcon_rd32(falcon, 0x1c4);
+
+		for (i = size; i < size + rem; i++) {
+			((u8 *)data)[i] = (u8)(extra & 0xff);
+			extra >>= 8;
+		}
+	}
+}
+
+static void
+nvkm_falcon_v1_bind_context(struct nvkm_falcon *falcon, struct nvkm_gpuobj *ctx)
+{
+	u32 inst_loc;
+
+	/* disable instance block binding */
+	if (ctx == NULL) {
+		nvkm_falcon_wr32(falcon, 0x10c, 0x0);
+		return;
+	}
+
+	nvkm_falcon_wr32(falcon, 0x10c, 0x1);
+
+	/* setup apertures - virtual */
+	nvkm_falcon_wr32(falcon, 0xe00 + 4 * FALCON_DMAIDX_UCODE, 0x4);
+	nvkm_falcon_wr32(falcon, 0xe00 + 4 * FALCON_DMAIDX_VIRT, 0x0);
+	/* setup apertures - physical */
+	nvkm_falcon_wr32(falcon, 0xe00 + 4 * FALCON_DMAIDX_PHYS_VID, 0x4);
+	nvkm_falcon_wr32(falcon, 0xe00 + 4 * FALCON_DMAIDX_PHYS_SYS_COH, 0x5);
+	nvkm_falcon_wr32(falcon, 0xe00 + 4 * FALCON_DMAIDX_PHYS_SYS_NCOH, 0x6);
+
+	/* Set context */
+	switch (nvkm_memory_target(ctx->memory)) {
+	case NVKM_MEM_TARGET_VRAM: inst_loc = 0; break;
+	case NVKM_MEM_TARGET_NCOH: inst_loc = 3; break;
+	default:
+		WARN_ON(1);
+		return;
+	}
+
+	/* Enable context */
+	nvkm_falcon_mask(falcon, 0x048, 0x1, 0x1);
+	nvkm_falcon_wr32(falcon, 0x480,
+			 ((ctx->addr >> 12) & 0xfffffff) |
+			 (inst_loc << 28) | (1 << 30));
+}
+
+static void
+nvkm_falcon_v1_set_start_addr(struct nvkm_falcon *falcon, u32 start_addr)
+{
+	nvkm_falcon_wr32(falcon, 0x104, start_addr);
+}
+
+static void
+nvkm_falcon_v1_start(struct nvkm_falcon *falcon)
+{
+	u32 reg = nvkm_falcon_rd32(falcon, 0x100);
+
+	if (reg & BIT(6))
+		nvkm_falcon_wr32(falcon, 0x130, 0x2);
+	else
+		nvkm_falcon_wr32(falcon, 0x100, 0x2);
+}
+
+static int
+nvkm_falcon_v1_wait_for_halt(struct nvkm_falcon *falcon, u32 ms)
+{
+	struct nvkm_device *device = falcon->owner->device;
+	int ret;
+
+	ret = nvkm_wait_msec(device, ms, falcon->addr + 0x100, 0x10, 0x10);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int
+nvkm_falcon_v1_clear_interrupt(struct nvkm_falcon *falcon, u32 mask)
+{
+	struct nvkm_device *device = falcon->owner->device;
+	int ret;
+
+	/* clear interrupt(s) */
+	nvkm_falcon_mask(falcon, 0x004, mask, mask);
+	/* wait until interrupts are cleared */
+	ret = nvkm_wait_msec(device, 10, falcon->addr + 0x008, mask, 0x0);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int
+falcon_v1_wait_idle(struct nvkm_falcon *falcon)
+{
+	struct nvkm_device *device = falcon->owner->device;
+	int ret;
+
+	ret = nvkm_wait_msec(device, 10, falcon->addr + 0x04c, 0xffff, 0x0);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int
+nvkm_falcon_v1_enable(struct nvkm_falcon *falcon)
+{
+	struct nvkm_device *device = falcon->owner->device;
+	int ret;
+
+	ret = nvkm_wait_msec(device, 10, falcon->addr + 0x10c, 0x6, 0x0);
+	if (ret < 0) {
+		nvkm_error(falcon->user, "Falcon mem scrubbing timeout\n");
+		return ret;
+	}
+
+	ret = falcon_v1_wait_idle(falcon);
+	if (ret)
+		return ret;
+
+	/* enable IRQs */
+	nvkm_falcon_wr32(falcon, 0x010, 0xff);
+
+	return 0;
+}
+
+static void
+nvkm_falcon_v1_disable(struct nvkm_falcon *falcon)
+{
+	/* disable IRQs and wait for any previous code to complete */
+	nvkm_falcon_wr32(falcon, 0x014, 0xff);
+	falcon_v1_wait_idle(falcon);
+}
+
+static const struct nvkm_falcon_func
+nvkm_falcon_v1 = {
+	.load_imem = nvkm_falcon_v1_load_imem,
+	.load_dmem = nvkm_falcon_v1_load_dmem,
+	.read_dmem = nvkm_falcon_v1_read_dmem,
+	.bind_context = nvkm_falcon_v1_bind_context,
+	.start = nvkm_falcon_v1_start,
+	.wait_for_halt = nvkm_falcon_v1_wait_for_halt,
+	.clear_interrupt = nvkm_falcon_v1_clear_interrupt,
+	.enable = nvkm_falcon_v1_enable,
+	.disable = nvkm_falcon_v1_disable,
+	.set_start_addr = nvkm_falcon_v1_set_start_addr,
+};
+
+int
+nvkm_falcon_v1_new(struct nvkm_subdev *owner, const char *name, u32 addr,
+		   struct nvkm_falcon **pfalcon)
+{
+	struct nvkm_falcon *falcon;
+	if (!(falcon = *pfalcon = kzalloc(sizeof(*falcon), GFP_KERNEL)))
+		return -ENOMEM;
+	nvkm_falcon_ctor(&nvkm_falcon_v1, owner, name, addr, falcon);
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/Kbuild
index be57220a2e01..6b4f1e06a38f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/Kbuild
@@ -19,6 +19,7 @@ nvkm-y += nvkm/subdev/bios/pcir.o
 nvkm-y += nvkm/subdev/bios/perf.o
 nvkm-y += nvkm/subdev/bios/pll.o
 nvkm-y += nvkm/subdev/bios/pmu.o
+nvkm-y += nvkm/subdev/bios/power_budget.o
 nvkm-y += nvkm/subdev/bios/ramcfg.o
 nvkm-y += nvkm/subdev/bios/rammap.o
 nvkm-y += nvkm/subdev/bios/shadow.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/power_budget.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/power_budget.c
new file mode 100644
index 000000000000..617bfffce4ad
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/power_budget.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright 2016 Karol Herbst
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Karol Herbst
+ */
+#include <subdev/bios.h>
+#include <subdev/bios/bit.h>
+#include <subdev/bios/power_budget.h>
+
+static u32
+nvbios_power_budget_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt,
+			  u8 *len)
+{
+	struct bit_entry bit_P;
+	u32 power_budget;
+
+	if (bit_entry(bios, 'P', &bit_P) || bit_P.version != 2 ||
+	    bit_P.length < 0x2c)
+		return 0;
+
+	power_budget = nvbios_rd32(bios, bit_P.offset + 0x2c);
+	if (!power_budget)
+		return 0;
+
+	*ver = nvbios_rd08(bios, power_budget);
+	switch (*ver) {
+	case 0x20:
+	case 0x30:
+		*hdr = nvbios_rd08(bios, power_budget + 0x1);
+		*len = nvbios_rd08(bios, power_budget + 0x2);
+		*cnt = nvbios_rd08(bios, power_budget + 0x3);
+		return power_budget;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+int
+nvbios_power_budget_header(struct nvkm_bios *bios,
+                           struct nvbios_power_budget *budget)
+{
+	struct nvkm_subdev *subdev = &bios->subdev;
+	u8 ver, hdr, cnt, len, cap_entry;
+	u32 header;
+
+	if (!bios || !budget)
+		return -EINVAL;
+
+	header = nvbios_power_budget_table(bios, &ver, &hdr, &cnt, &len);
+	if (!header || !cnt)
+		return -ENODEV;
+
+	switch (ver) {
+	case 0x20:
+		cap_entry = nvbios_rd08(bios, header + 0x9);
+		break;
+	case 0x30:
+		cap_entry = nvbios_rd08(bios, header + 0xa);
+		break;
+	default:
+		cap_entry = 0xff;
+	}
+
+	if (cap_entry >= cnt && cap_entry != 0xff) {
+		nvkm_warn(subdev,
+		          "invalid cap_entry in power budget table found\n");
+		budget->cap_entry = 0xff;
+		return -EINVAL;
+	}
+
+	budget->offset = header;
+	budget->ver = ver;
+	budget->hlen = hdr;
+	budget->elen = len;
+	budget->ecount = cnt;
+
+	budget->cap_entry = cap_entry;
+
+	return 0;
+}
+
+int
+nvbios_power_budget_entry(struct nvkm_bios *bios,
+                          struct nvbios_power_budget *budget,
+                          u8 idx, struct nvbios_power_budget_entry *entry)
+{
+	u32 entry_offset;
+
+	if (!bios || !budget || !budget->offset || idx >= budget->ecount
+		|| !entry)
+		return -EINVAL;
+
+	entry_offset = budget->offset + budget->hlen + idx * budget->elen;
+
+	if (budget->ver >= 0x20) {
+		entry->min_w = nvbios_rd32(bios, entry_offset + 0x2);
+		entry->avg_w = nvbios_rd32(bios, entry_offset + 0x6);
+		entry->max_w = nvbios_rd32(bios, entry_offset + 0xa);
+	} else {
+		entry->min_w = 0;
+		entry->max_w = nvbios_rd32(bios, entry_offset + 0x2);
+		entry->avg_w = entry->max_w;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/nv50.c
index 5841f297973c..da1770e47490 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/nv50.c
@@ -112,7 +112,7 @@ read_pll_src(struct nv50_clk *clk, u32 base)
 		M    = (coef & 0x000000ff) >> 0;
 		break;
 	default:
-		BUG_ON(1);
+		BUG();
 	}
 
 	if (M)
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.c
index c714b097719c..59362f8dee22 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.c
@@ -50,7 +50,7 @@ nv50_devinit_pll_set(struct nvkm_devinit *init, u32 type, u32 freq)
 	ret = nv04_pll_calc(subdev, &info, freq, &N1, &M1, &N2, &M2, &P);
 	if (!ret) {
 		nvkm_error(subdev, "failed pll calculation\n");
-		return ret;
+		return -EINVAL;
 	}
 
 	switch (info.type) {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c
index 093223d1df4f..6758da93a3a1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c
@@ -445,7 +445,7 @@ gf100_ram_get(struct nvkm_ram *ram, u64 size, u32 align, u32 ncmin,
 {
 	struct nvkm_ltc *ltc = ram->fb->subdev.device->ltc;
 	struct nvkm_mm *mm = &ram->vram;
-	struct nvkm_mm_node *r;
+	struct nvkm_mm_node **node, *r;
 	struct nvkm_mem *mem;
 	int type = (memtype & 0x0ff);
 	int back = (memtype & 0x800);
@@ -462,7 +462,6 @@ gf100_ram_get(struct nvkm_ram *ram, u64 size, u32 align, u32 ncmin,
 	if (!mem)
 		return -ENOMEM;
 
-	INIT_LIST_HEAD(&mem->regions);
 	mem->size = size;
 
 	mutex_lock(&ram->fb->subdev.mutex);
@@ -478,6 +477,7 @@ gf100_ram_get(struct nvkm_ram *ram, u64 size, u32 align, u32 ncmin,
 	}
 	mem->memtype = type;
 
+	node = &mem->mem;
 	do {
 		if (back)
 			ret = nvkm_mm_tail(mm, 0, 1, size, ncmin, align, &r);
@@ -489,13 +489,13 @@ gf100_ram_get(struct nvkm_ram *ram, u64 size, u32 align, u32 ncmin,
 			return ret;
 		}
 
-		list_add_tail(&r->rl_entry, &mem->regions);
+		*node = r;
+		node = &r->next;
 		size -= r->length;
 	} while (size);
 	mutex_unlock(&ram->fb->subdev.mutex);
 
-	r = list_first_entry(&mem->regions, struct nvkm_mm_node, rl_entry);
-	mem->offset = (u64)r->offset << NVKM_RAM_MM_SHIFT;
+	mem->offset = (u64)mem->mem->offset << NVKM_RAM_MM_SHIFT;
 	*pmem = mem;
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c
index 7904fa41acef..fb8a1239743d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c
@@ -989,7 +989,7 @@ gk104_pll_calc_hiclk(int target_khz, int crystal,
 		int *N1, int *fN1, int *M1, int *P1,
 		int *N2, int *M2, int *P2)
 {
-	int best_clk = 0, best_err = target_khz, p_ref, n_ref;
+	int best_err = target_khz, p_ref, n_ref;
 	bool upper = false;
 
 	*M1 = 1;
@@ -1010,7 +1010,6 @@ gk104_pll_calc_hiclk(int target_khz, int crystal,
 			/* we found a better combination */
 			if (cur_err < best_err) {
 				best_err = cur_err;
-				best_clk = cur_clk;
 				*N2 = cur_N;
 				*N1 = n_ref;
 				*P1 = p_ref;
@@ -1022,7 +1021,6 @@ gk104_pll_calc_hiclk(int target_khz, int crystal,
 				- target_khz;
 			if (cur_err < best_err) {
 				best_err = cur_err;
-				best_clk = cur_clk;
 				*N2 = cur_N;
 				*N1 = n_ref;
 				*P1 = p_ref;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/rammcp77.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/rammcp77.c
index 0a0e44b75577..017a91de74a0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/rammcp77.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/rammcp77.c
@@ -39,7 +39,7 @@ mcp77_ram_init(struct nvkm_ram *base)
 	u32 flush  = ((ram->base.size - (ram->poller_base + 0x40)) >> 5) - 1;
 
 	/* Enable NISO poller for various clients and set their associated
-	 * read address, only for MCP77/78 and MCP79/7A. (fd#25701)
+	 * read address, only for MCP77/78 and MCP79/7A. (fd#27501)
 	 */
 	nvkm_wr32(device, 0x100c18, dniso);
 	nvkm_mask(device, 0x100c14, 0x00000000, 0x00000001);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c
index 87bde8ff2d6b..6549b0588309 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c
@@ -496,15 +496,12 @@ nv50_ram_tidy(struct nvkm_ram *base)
 void
 __nv50_ram_put(struct nvkm_ram *ram, struct nvkm_mem *mem)
 {
-	struct nvkm_mm_node *this;
-
-	while (!list_empty(&mem->regions)) {
-		this = list_first_entry(&mem->regions, typeof(*this), rl_entry);
-
-		list_del(&this->rl_entry);
-		nvkm_mm_free(&ram->vram, &this);
+	struct nvkm_mm_node *next = mem->mem;
+	struct nvkm_mm_node *node;
+	while ((node = next)) {
+		next = node->next;
+		nvkm_mm_free(&ram->vram, &node);
 	}
-
 	nvkm_mm_free(&ram->tags, &mem->tag);
 }
 
@@ -530,7 +527,7 @@ nv50_ram_get(struct nvkm_ram *ram, u64 size, u32 align, u32 ncmin,
 {
 	struct nvkm_mm *heap = &ram->vram;
 	struct nvkm_mm *tags = &ram->tags;
-	struct nvkm_mm_node *r;
+	struct nvkm_mm_node **node, *r;
 	struct nvkm_mem *mem;
 	int comp = (memtype & 0x300) >> 8;
 	int type = (memtype & 0x07f);
@@ -559,11 +556,11 @@ nv50_ram_get(struct nvkm_ram *ram, u64 size, u32 align, u32 ncmin,
 			comp = 0;
 	}
 
-	INIT_LIST_HEAD(&mem->regions);
 	mem->memtype = (comp << 7) | type;
 	mem->size = max;
 
 	type = nv50_fb_memtype[type];
+	node = &mem->mem;
 	do {
 		if (back)
 			ret = nvkm_mm_tail(heap, 0, type, max, min, align, &r);
@@ -575,13 +572,13 @@ nv50_ram_get(struct nvkm_ram *ram, u64 size, u32 align, u32 ncmin,
 			return ret;
 		}
 
-		list_add_tail(&r->rl_entry, &mem->regions);
+		*node = r;
+		node = &r->next;
 		max -= r->length;
 	} while (max);
 	mutex_unlock(&ram->fb->subdev.mutex);
 
-	r = list_first_entry(&mem->regions, struct nvkm_mm_node, rl_entry);
-	mem->offset = (u64)r->offset << NVKM_RAM_MM_SHIFT;
+	mem->offset = (u64)mem->mem->offset << NVKM_RAM_MM_SHIFT;
 	*pmem = mem;
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c
index f0af2a381eea..fecfa6afcf54 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c
@@ -26,6 +26,7 @@
 #include <subdev/bios.h>
 #include <subdev/bios/extdev.h>
 #include <subdev/bios/iccsense.h>
+#include <subdev/bios/power_budget.h>
 #include <subdev/i2c.h>
 
 static bool
@@ -216,10 +217,25 @@ nvkm_iccsense_oneinit(struct nvkm_subdev *subdev)
 {
 	struct nvkm_iccsense *iccsense = nvkm_iccsense(subdev);
 	struct nvkm_bios *bios = subdev->device->bios;
+	struct nvbios_power_budget budget;
 	struct nvbios_iccsense stbl;
-	int i;
+	int i, ret;
 
-	if (!bios || nvbios_iccsense_parse(bios, &stbl) || !stbl.nr_entry)
+	if (!bios)
+		return 0;
+
+	ret = nvbios_power_budget_header(bios, &budget);
+	if (!ret && budget.cap_entry != 0xff) {
+		struct nvbios_power_budget_entry entry;
+		ret = nvbios_power_budget_entry(bios, &budget,
+		                                budget.cap_entry, &entry);
+		if (!ret) {
+			iccsense->power_w_max  = entry.avg_w;
+			iccsense->power_w_crit = entry.max_w;
+		}
+	}
+
+	if (nvbios_iccsense_parse(bios, &stbl) || !stbl.nr_entry)
 		return 0;
 
 	iccsense->data_valid = true;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
index a6a7fa0d7679..9dec58ec3d9f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
@@ -116,7 +116,7 @@ struct gk20a_instmem {
 static enum nvkm_memory_target
 gk20a_instobj_target(struct nvkm_memory *memory)
 {
-	return NVKM_MEM_TARGET_HOST;
+	return NVKM_MEM_TARGET_NCOH;
 }
 
 static u64
@@ -305,11 +305,11 @@ gk20a_instobj_dtor_iommu(struct nvkm_memory *memory)
 	struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory);
 	struct gk20a_instmem *imem = node->base.imem;
 	struct device *dev = imem->base.subdev.device->dev;
-	struct nvkm_mm_node *r;
+	struct nvkm_mm_node *r = node->base.mem.mem;
 	unsigned long flags;
 	int i;
 
-	if (unlikely(list_empty(&node->base.mem.regions)))
+	if (unlikely(!r))
 		goto out;
 
 	spin_lock_irqsave(&imem->lock, flags);
@@ -320,9 +320,6 @@ gk20a_instobj_dtor_iommu(struct nvkm_memory *memory)
 
 	spin_unlock_irqrestore(&imem->lock, flags);
 
-	r = list_first_entry(&node->base.mem.regions, struct nvkm_mm_node,
-			     rl_entry);
-
 	/* clear IOMMU bit to unmap pages */
 	r->offset &= ~BIT(imem->iommu_bit - imem->iommu_pgshift);
 
@@ -404,10 +401,7 @@ gk20a_instobj_ctor_dma(struct gk20a_instmem *imem, u32 npages, u32 align,
 	node->r.length = (npages << PAGE_SHIFT) >> 12;
 
 	node->base.mem.offset = node->handle;
-
-	INIT_LIST_HEAD(&node->base.mem.regions);
-	list_add_tail(&node->r.rl_entry, &node->base.mem.regions);
-
+	node->base.mem.mem = &node->r;
 	return 0;
 }
 
@@ -484,10 +478,7 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align,
 	r->offset |= BIT(imem->iommu_bit - imem->iommu_pgshift);
 
 	node->base.mem.offset = ((u64)r->offset) << imem->iommu_pgshift;
-
-	INIT_LIST_HEAD(&node->base.mem.regions);
-	list_add_tail(&r->rl_entry, &node->base.mem.regions);
-
+	node->base.mem.mem = r;
 	return 0;
 
 release_area:
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c
index 6b25e25f9eba..09f669ac6630 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c
@@ -161,6 +161,16 @@ nvkm_mc_enable(struct nvkm_device *device, enum nvkm_devidx devidx)
 	}
 }
 
+bool
+nvkm_mc_enabled(struct nvkm_device *device, enum nvkm_devidx devidx)
+{
+	u64 pmc_enable = nvkm_mc_reset_mask(device, false, devidx);
+
+	return (pmc_enable != 0) &&
+	       ((nvkm_rd32(device, 0x000200) & pmc_enable) == pmc_enable);
+}
+
+
 static int
 nvkm_mc_fini(struct nvkm_subdev *subdev, bool suspend)
 {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c
index 5df9669ea39c..d06ad2c372bf 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c
@@ -31,7 +31,7 @@ nvkm_vm_map_at(struct nvkm_vma *vma, u64 delta, struct nvkm_mem *node)
 {
 	struct nvkm_vm *vm = vma->vm;
 	struct nvkm_mmu *mmu = vm->mmu;
-	struct nvkm_mm_node *r;
+	struct nvkm_mm_node *r = node->mem;
 	int big = vma->node->type != mmu->func->spg_shift;
 	u32 offset = vma->node->offset + (delta >> 12);
 	u32 bits = vma->node->type - 12;
@@ -41,7 +41,7 @@ nvkm_vm_map_at(struct nvkm_vma *vma, u64 delta, struct nvkm_mem *node)
 	u32 end, len;
 
 	delta = 0;
-	list_for_each_entry(r, &node->regions, rl_entry) {
+	while (r) {
 		u64 phys = (u64)r->offset << 12;
 		u32 num  = r->length >> bits;
 
@@ -65,7 +65,8 @@ nvkm_vm_map_at(struct nvkm_vma *vma, u64 delta, struct nvkm_mem *node)
 
 			delta += (u64)len << vma->node->type;
 		}
-	}
+		r = r->next;
+	};
 
 	mmu->func->flush(vm);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild
index 2a31b7d66a6d..87bf41cef0c6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild
@@ -6,6 +6,7 @@ nvkm-y += nvkm/subdev/pci/nv40.o
 nvkm-y += nvkm/subdev/pci/nv46.o
 nvkm-y += nvkm/subdev/pci/nv4c.o
 nvkm-y += nvkm/subdev/pci/g84.o
+nvkm-y += nvkm/subdev/pci/g92.o
 nvkm-y += nvkm/subdev/pci/g94.o
 nvkm-y += nvkm/subdev/pci/gf100.o
 nvkm-y += nvkm/subdev/pci/gf106.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g92.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g92.c
new file mode 100644
index 000000000000..48874359d5f6
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g92.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2015 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "priv.h"
+
+int
+g92_pcie_version_supported(struct nvkm_pci *pci)
+{
+	if ((nvkm_pci_rd32(pci, 0x460) & 0x200) == 0x200)
+		return 2;
+	return 1;
+}
+
+static const struct nvkm_pci_func
+g92_pci_func = {
+	.init = g84_pci_init,
+	.rd32 = nv40_pci_rd32,
+	.wr08 = nv40_pci_wr08,
+	.wr32 = nv40_pci_wr32,
+	.msi_rearm = nv46_pci_msi_rearm,
+
+	.pcie.init = g84_pcie_init,
+	.pcie.set_link = g84_pcie_set_link,
+
+	.pcie.max_speed = g84_pcie_max_speed,
+	.pcie.cur_speed = g84_pcie_cur_speed,
+
+	.pcie.set_version = g84_pcie_set_version,
+	.pcie.version = g84_pcie_version,
+	.pcie.version_supported = g92_pcie_version_supported,
+};
+
+int
+g92_pci_new(struct nvkm_device *device, int index, struct nvkm_pci **ppci)
+{
+	return nvkm_pci_new_(&g92_pci_func, device, index, ppci);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g94.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g94.c
index 43444123bc04..09adb37a5664 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g94.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g94.c
@@ -23,14 +23,6 @@
  */
 #include "priv.h"
 
-int
-g94_pcie_version_supported(struct nvkm_pci *pci)
-{
-	if ((nvkm_pci_rd32(pci, 0x460) & 0x200) == 0x200)
-		return 2;
-	return 1;
-}
-
 static const struct nvkm_pci_func
 g94_pci_func = {
 	.init = g84_pci_init,
@@ -47,7 +39,7 @@ g94_pci_func = {
 
 	.pcie.set_version = g84_pcie_set_version,
 	.pcie.version = g84_pcie_version,
-	.pcie.version_supported = g94_pcie_version_supported,
+	.pcie.version_supported = g92_pcie_version_supported,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf100.c
index e30ea676baf6..00a5e7d3ee9d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf100.c
@@ -92,7 +92,7 @@ gf100_pci_func = {
 
 	.pcie.set_version = gf100_pcie_set_version,
 	.pcie.version = gf100_pcie_version,
-	.pcie.version_supported = g94_pcie_version_supported,
+	.pcie.version_supported = g92_pcie_version_supported,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf106.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf106.c
index c3b798c5c6dd..11bf419afe3f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf106.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf106.c
@@ -39,7 +39,7 @@ gf106_pci_func = {
 
 	.pcie.set_version = gf100_pcie_set_version,
 	.pcie.version = gf100_pcie_version,
-	.pcie.version_supported = g94_pcie_version_supported,
+	.pcie.version_supported = g92_pcie_version_supported,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h
index 23de3180aae5..86921ec962d6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h
@@ -44,7 +44,7 @@ enum nvkm_pcie_speed g84_pcie_max_speed(struct nvkm_pci *);
 int g84_pcie_init(struct nvkm_pci *);
 int g84_pcie_set_link(struct nvkm_pci *, enum nvkm_pcie_speed, u8);
 
-int g94_pcie_version_supported(struct nvkm_pci *);
+int g92_pcie_version_supported(struct nvkm_pci *);
 
 void gf100_pcie_set_version(struct nvkm_pci *, u8);
 int gf100_pcie_version(struct nvkm_pci *);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/Kbuild
index 51fb4bf94a44..ca57c1e491b0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/Kbuild
@@ -8,5 +8,6 @@ nvkm-y += nvkm/subdev/pmu/gk110.o
 nvkm-y += nvkm/subdev/pmu/gk208.o
 nvkm-y += nvkm/subdev/pmu/gk20a.o
 nvkm-y += nvkm/subdev/pmu/gm107.o
+nvkm-y += nvkm/subdev/pmu/gm20b.o
 nvkm-y += nvkm/subdev/pmu/gp100.o
 nvkm-y += nvkm/subdev/pmu/gp102.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c
index e611ce80f8ef..a73f690eb4b5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c
@@ -116,6 +116,8 @@ nvkm_pmu_init(struct nvkm_subdev *subdev)
 static void *
 nvkm_pmu_dtor(struct nvkm_subdev *subdev)
 {
+	struct nvkm_pmu *pmu = nvkm_pmu(subdev);
+	nvkm_falcon_del(&pmu->falcon);
 	return nvkm_pmu(subdev);
 }
 
@@ -129,15 +131,22 @@ nvkm_pmu = {
 };
 
 int
+nvkm_pmu_ctor(const struct nvkm_pmu_func *func, struct nvkm_device *device,
+	      int index, struct nvkm_pmu *pmu)
+{
+	nvkm_subdev_ctor(&nvkm_pmu, device, index, &pmu->subdev);
+	pmu->func = func;
+	INIT_WORK(&pmu->recv.work, nvkm_pmu_recv);
+	init_waitqueue_head(&pmu->recv.wait);
+	return nvkm_falcon_v1_new(&pmu->subdev, "PMU", 0x10a000, &pmu->falcon);
+}
+
+int
 nvkm_pmu_new_(const struct nvkm_pmu_func *func, struct nvkm_device *device,
 	      int index, struct nvkm_pmu **ppmu)
 {
 	struct nvkm_pmu *pmu;
 	if (!(pmu = *ppmu = kzalloc(sizeof(*pmu), GFP_KERNEL)))
 		return -ENOMEM;
-	nvkm_subdev_ctor(&nvkm_pmu, device, index, &pmu->subdev);
-	pmu->func = func;
-	INIT_WORK(&pmu->recv.work, nvkm_pmu_recv);
-	init_waitqueue_head(&pmu->recv.wait);
-	return 0;
+	return nvkm_pmu_ctor(func, device, index, *ppmu);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk20a.c
index f996d90c9f0d..9ca0db796cbe 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk20a.c
@@ -19,7 +19,7 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */
-#define gk20a_pmu(p) container_of((p), struct gk20a_pmu, base.subdev)
+#define gk20a_pmu(p) container_of((p), struct gk20a_pmu, base)
 #include "priv.h"
 
 #include <subdev/clk.h>
@@ -43,9 +43,8 @@ struct gk20a_pmu {
 };
 
 struct gk20a_pmu_dvfs_dev_status {
-	unsigned long total;
-	unsigned long busy;
-	int cur_state;
+	u32 total;
+	u32 busy;
 };
 
 static int
@@ -56,13 +55,12 @@ gk20a_pmu_dvfs_target(struct gk20a_pmu *pmu, int *state)
 	return nvkm_clk_astate(clk, *state, 0, false);
 }
 
-static int
+static void
 gk20a_pmu_dvfs_get_cur_state(struct gk20a_pmu *pmu, int *state)
 {
 	struct nvkm_clk *clk = pmu->base.subdev.device->clk;
 
 	*state = clk->pstate;
-	return 0;
 }
 
 static int
@@ -90,28 +88,26 @@ gk20a_pmu_dvfs_get_target_state(struct gk20a_pmu *pmu,
 
 	*state = level;
 
-	if (level == cur_level)
-		return 0;
-	else
-		return 1;
+	return (level != cur_level);
 }
 
-static int
+static void
 gk20a_pmu_dvfs_get_dev_status(struct gk20a_pmu *pmu,
 			      struct gk20a_pmu_dvfs_dev_status *status)
 {
-	struct nvkm_device *device = pmu->base.subdev.device;
-	status->busy = nvkm_rd32(device, 0x10a508 + (BUSY_SLOT * 0x10));
-	status->total= nvkm_rd32(device, 0x10a508 + (CLK_SLOT * 0x10));
-	return 0;
+	struct nvkm_falcon *falcon = pmu->base.falcon;
+
+	status->busy = nvkm_falcon_rd32(falcon, 0x508 + (BUSY_SLOT * 0x10));
+	status->total= nvkm_falcon_rd32(falcon, 0x508 + (CLK_SLOT * 0x10));
 }
 
 static void
 gk20a_pmu_dvfs_reset_dev_status(struct gk20a_pmu *pmu)
 {
-	struct nvkm_device *device = pmu->base.subdev.device;
-	nvkm_wr32(device, 0x10a508 + (BUSY_SLOT * 0x10), 0x80000000);
-	nvkm_wr32(device, 0x10a508 + (CLK_SLOT * 0x10), 0x80000000);
+	struct nvkm_falcon *falcon = pmu->base.falcon;
+
+	nvkm_falcon_wr32(falcon, 0x508 + (BUSY_SLOT * 0x10), 0x80000000);
+	nvkm_falcon_wr32(falcon, 0x508 + (CLK_SLOT * 0x10), 0x80000000);
 }
 
 static void
@@ -127,7 +123,7 @@ gk20a_pmu_dvfs_work(struct nvkm_alarm *alarm)
 	struct nvkm_timer *tmr = device->timer;
 	struct nvkm_volt *volt = device->volt;
 	u32 utilization = 0;
-	int state, ret;
+	int state;
 
 	/*
 	 * The PMU is initialized before CLK and VOLT, so we have to make sure the
@@ -136,11 +132,7 @@ gk20a_pmu_dvfs_work(struct nvkm_alarm *alarm)
 	if (!clk || !volt)
 		goto resched;
 
-	ret = gk20a_pmu_dvfs_get_dev_status(pmu, &status);
-	if (ret) {
-		nvkm_warn(subdev, "failed to get device status\n");
-		goto resched;
-	}
+	gk20a_pmu_dvfs_get_dev_status(pmu, &status);
 
 	if (status.total)
 		utilization = div_u64((u64)status.busy * 100, status.total);
@@ -150,11 +142,7 @@ gk20a_pmu_dvfs_work(struct nvkm_alarm *alarm)
 	nvkm_trace(subdev, "utilization = %d %%, avg_load = %d %%\n",
 		   utilization, data->avg_load);
 
-	ret = gk20a_pmu_dvfs_get_cur_state(pmu, &state);
-	if (ret) {
-		nvkm_warn(subdev, "failed to get current state\n");
-		goto resched;
-	}
+	gk20a_pmu_dvfs_get_cur_state(pmu, &state);
 
 	if (gk20a_pmu_dvfs_get_target_state(pmu, &state, data->avg_load)) {
 		nvkm_trace(subdev, "set new state to %d\n", state);
@@ -166,32 +154,36 @@ resched:
 	nvkm_timer_alarm(tmr, 100000000, alarm);
 }
 
-static int
-gk20a_pmu_fini(struct nvkm_subdev *subdev, bool suspend)
+static void
+gk20a_pmu_fini(struct nvkm_pmu *pmu)
 {
-	struct gk20a_pmu *pmu = gk20a_pmu(subdev);
-	nvkm_timer_alarm_cancel(subdev->device->timer, &pmu->alarm);
-	return 0;
-}
+	struct gk20a_pmu *gpmu = gk20a_pmu(pmu);
+	nvkm_timer_alarm_cancel(pmu->subdev.device->timer, &gpmu->alarm);
 
-static void *
-gk20a_pmu_dtor(struct nvkm_subdev *subdev)
-{
-	return gk20a_pmu(subdev);
+	nvkm_falcon_put(pmu->falcon, &pmu->subdev);
 }
 
 static int
-gk20a_pmu_init(struct nvkm_subdev *subdev)
+gk20a_pmu_init(struct nvkm_pmu *pmu)
 {
-	struct gk20a_pmu *pmu = gk20a_pmu(subdev);
-	struct nvkm_device *device = pmu->base.subdev.device;
+	struct gk20a_pmu *gpmu = gk20a_pmu(pmu);
+	struct nvkm_subdev *subdev = &pmu->subdev;
+	struct nvkm_device *device = pmu->subdev.device;
+	struct nvkm_falcon *falcon = pmu->falcon;
+	int ret;
+
+	ret = nvkm_falcon_get(falcon, subdev);
+	if (ret) {
+		nvkm_error(subdev, "cannot acquire %s falcon!\n", falcon->name);
+		return ret;
+	}
 
 	/* init pwr perf counter */
-	nvkm_wr32(device, 0x10a504 + (BUSY_SLOT * 0x10), 0x00200001);
-	nvkm_wr32(device, 0x10a50c + (BUSY_SLOT * 0x10), 0x00000002);
-	nvkm_wr32(device, 0x10a50c + (CLK_SLOT * 0x10), 0x00000003);
+	nvkm_falcon_wr32(falcon, 0x504 + (BUSY_SLOT * 0x10), 0x00200001);
+	nvkm_falcon_wr32(falcon, 0x50c + (BUSY_SLOT * 0x10), 0x00000002);
+	nvkm_falcon_wr32(falcon, 0x50c + (CLK_SLOT * 0x10), 0x00000003);
 
-	nvkm_timer_alarm(device->timer, 2000000000, &pmu->alarm);
+	nvkm_timer_alarm(device->timer, 2000000000, &gpmu->alarm);
 	return 0;
 }
 
@@ -202,26 +194,26 @@ gk20a_dvfs_data= {
 	.p_smooth = 1,
 };
 
-static const struct nvkm_subdev_func
+static const struct nvkm_pmu_func
 gk20a_pmu = {
 	.init = gk20a_pmu_init,
 	.fini = gk20a_pmu_fini,
-	.dtor = gk20a_pmu_dtor,
+	.reset = gt215_pmu_reset,
 };
 
 int
 gk20a_pmu_new(struct nvkm_device *device, int index, struct nvkm_pmu **ppmu)
 {
-	static const struct nvkm_pmu_func func = {};
 	struct gk20a_pmu *pmu;
 
 	if (!(pmu = kzalloc(sizeof(*pmu), GFP_KERNEL)))
 		return -ENOMEM;
-	pmu->base.func = &func;
 	*ppmu = &pmu->base;
 
-	nvkm_subdev_ctor(&gk20a_pmu, device, index, &pmu->base.subdev);
+	nvkm_pmu_ctor(&gk20a_pmu, device, index, &pmu->base);
+
 	pmu->data = &gk20a_dvfs_data;
 	nvkm_alarm_init(&pmu->alarm, gk20a_pmu_dvfs_work);
+
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c
new file mode 100644
index 000000000000..0b8a1cc4a0ee
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "priv.h"
+
+static const struct nvkm_pmu_func
+gm20b_pmu = {
+	.reset = gt215_pmu_reset,
+};
+
+int
+gm20b_pmu_new(struct nvkm_device *device, int index, struct nvkm_pmu **ppmu)
+{
+	return nvkm_pmu_new_(&gm20b_pmu, device, index, ppmu);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h
index 2e2179a4ad17..096cba069f72 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h
@@ -4,6 +4,8 @@
 #include <subdev/pmu.h>
 #include <subdev/pmu/fuc/os.h>
 
+int nvkm_pmu_ctor(const struct nvkm_pmu_func *, struct nvkm_device *,
+		  int index, struct nvkm_pmu *);
 int nvkm_pmu_new_(const struct nvkm_pmu_func *, struct nvkm_device *,
 		  int index, struct nvkm_pmu **);
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild
index b02b868a6589..5076d1500f47 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild
@@ -1,3 +1,7 @@
 nvkm-y += nvkm/subdev/secboot/base.o
+nvkm-y += nvkm/subdev/secboot/ls_ucode_gr.o
+nvkm-y += nvkm/subdev/secboot/acr.o
+nvkm-y += nvkm/subdev/secboot/acr_r352.o
+nvkm-y += nvkm/subdev/secboot/acr_r361.o
 nvkm-y += nvkm/subdev/secboot/gm200.o
 nvkm-y += nvkm/subdev/secboot/gm20b.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.c
new file mode 100644
index 000000000000..75dc06557877
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "acr.h"
+
+#include <core/firmware.h>
+
+/**
+ * Convenience function to duplicate a firmware file in memory and check that
+ * it has the required minimum size.
+ */
+void *
+nvkm_acr_load_firmware(const struct nvkm_subdev *subdev, const char *name,
+		       size_t min_size)
+{
+	const struct firmware *fw;
+	void *blob;
+	int ret;
+
+	ret = nvkm_firmware_get(subdev->device, name, &fw);
+	if (ret)
+		return ERR_PTR(ret);
+	if (fw->size < min_size) {
+		nvkm_error(subdev, "%s is smaller than expected size %zu\n",
+			   name, min_size);
+		nvkm_firmware_put(fw);
+		return ERR_PTR(-EINVAL);
+	}
+	blob = kmemdup(fw->data, fw->size, GFP_KERNEL);
+	nvkm_firmware_put(fw);
+	if (!blob)
+		return ERR_PTR(-ENOMEM);
+
+	return blob;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h
new file mode 100644
index 000000000000..97795b342b6f
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __NVKM_SECBOOT_ACR_H__
+#define __NVKM_SECBOOT_ACR_H__
+
+#include "priv.h"
+
+struct nvkm_acr;
+
+/**
+ * struct nvkm_acr_func - properties and functions specific to an ACR
+ *
+ * @load: make the ACR ready to run on the given secboot device
+ * @reset: reset the specified falcon
+ * @start: start the specified falcon (assumed to have been reset)
+ */
+struct nvkm_acr_func {
+	void (*dtor)(struct nvkm_acr *);
+	int (*oneinit)(struct nvkm_acr *, struct nvkm_secboot *);
+	int (*fini)(struct nvkm_acr *, struct nvkm_secboot *, bool);
+	int (*load)(struct nvkm_acr *, struct nvkm_secboot *,
+		    struct nvkm_gpuobj *, u64);
+	int (*reset)(struct nvkm_acr *, struct nvkm_secboot *,
+		     enum nvkm_secboot_falcon);
+	int (*start)(struct nvkm_acr *, struct nvkm_secboot *,
+		     enum nvkm_secboot_falcon);
+};
+
+/**
+ * struct nvkm_acr - instance of an ACR
+ *
+ * @boot_falcon: ID of the falcon that will perform secure boot
+ * @managed_falcons: bitfield of falcons managed by this ACR
+ * @start_address: virtual start address of the HS bootloader
+ */
+struct nvkm_acr {
+	const struct nvkm_acr_func *func;
+	const struct nvkm_subdev *subdev;
+
+	enum nvkm_secboot_falcon boot_falcon;
+	unsigned long managed_falcons;
+	u32 start_address;
+};
+
+void *nvkm_acr_load_firmware(const struct nvkm_subdev *, const char *, size_t);
+
+struct nvkm_acr *acr_r352_new(unsigned long);
+struct nvkm_acr *acr_r361_new(unsigned long);
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c
new file mode 100644
index 000000000000..1aa37ea18580
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c
@@ -0,0 +1,936 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "acr_r352.h"
+
+#include <core/gpuobj.h>
+#include <core/firmware.h>
+#include <engine/falcon.h>
+
+/**
+ * struct hsf_fw_header - HS firmware descriptor
+ * @sig_dbg_offset:	offset of the debug signature
+ * @sig_dbg_size:	size of the debug signature
+ * @sig_prod_offset:	offset of the production signature
+ * @sig_prod_size:	size of the production signature
+ * @patch_loc:		offset of the offset (sic) of where the signature is
+ * @patch_sig:		offset of the offset (sic) to add to sig_*_offset
+ * @hdr_offset:		offset of the load header (see struct hs_load_header)
+ * @hdr_size:		size of above header
+ *
+ * This structure is embedded in the HS firmware image at
+ * hs_bin_hdr.header_offset.
+ */
+struct hsf_fw_header {
+	u32 sig_dbg_offset;
+	u32 sig_dbg_size;
+	u32 sig_prod_offset;
+	u32 sig_prod_size;
+	u32 patch_loc;
+	u32 patch_sig;
+	u32 hdr_offset;
+	u32 hdr_size;
+};
+
+/**
+ * struct acr_r352_flcn_bl_desc - DMEM bootloader descriptor
+ * @signature:		16B signature for secure code. 0s if no secure code
+ * @ctx_dma:		DMA context to be used by BL while loading code/data
+ * @code_dma_base:	256B-aligned Physical FB Address where code is located
+ *			(falcon's $xcbase register)
+ * @non_sec_code_off:	offset from code_dma_base where the non-secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @non_sec_code_size:	the size of the nonSecure code part.
+ * @sec_code_off:	offset from code_dma_base where the secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @sec_code_size:	offset from code_dma_base where the secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @code_entry_point:	code entry point which will be invoked by BL after
+ *                      code is loaded.
+ * @data_dma_base:	256B aligned Physical FB Address where data is located.
+ *			(falcon's $xdbase register)
+ * @data_size:		size of data block. Should be multiple of 256B
+ *
+ * Structure used by the bootloader to load the rest of the code. This has
+ * to be filled by host and copied into DMEM at offset provided in the
+ * hsflcn_bl_desc.bl_desc_dmem_load_off.
+ */
+struct acr_r352_flcn_bl_desc {
+	u32 reserved[4];
+	u32 signature[4];
+	u32 ctx_dma;
+	u32 code_dma_base;
+	u32 non_sec_code_off;
+	u32 non_sec_code_size;
+	u32 sec_code_off;
+	u32 sec_code_size;
+	u32 code_entry_point;
+	u32 data_dma_base;
+	u32 data_size;
+	u32 code_dma_base1;
+	u32 data_dma_base1;
+};
+
+/**
+ * acr_r352_generate_flcn_bl_desc - generate generic BL descriptor for LS image
+ */
+static void
+acr_r352_generate_flcn_bl_desc(const struct nvkm_acr *acr,
+			       const struct ls_ucode_img *_img, u64 wpr_addr,
+			       void *_desc)
+{
+	struct ls_ucode_img_r352 *img = ls_ucode_img_r352(_img);
+	struct acr_r352_flcn_bl_desc *desc = _desc;
+	const struct ls_ucode_img_desc *pdesc = &_img->ucode_desc;
+	u64 base, addr_code, addr_data;
+
+	base = wpr_addr + img->lsb_header.ucode_off + pdesc->app_start_offset;
+	addr_code = (base + pdesc->app_resident_code_offset) >> 8;
+	addr_data = (base + pdesc->app_resident_data_offset) >> 8;
+
+	desc->ctx_dma = FALCON_DMAIDX_UCODE;
+	desc->code_dma_base = lower_32_bits(addr_code);
+	desc->code_dma_base1 = upper_32_bits(addr_code);
+	desc->non_sec_code_off = pdesc->app_resident_code_offset;
+	desc->non_sec_code_size = pdesc->app_resident_code_size;
+	desc->code_entry_point = pdesc->app_imem_entry;
+	desc->data_dma_base = lower_32_bits(addr_data);
+	desc->data_dma_base1 = upper_32_bits(addr_data);
+	desc->data_size = pdesc->app_resident_data_size;
+}
+
+
+/**
+ * struct hsflcn_acr_desc - data section of the HS firmware
+ *
+ * This header is to be copied at the beginning of DMEM by the HS bootloader.
+ *
+ * @signature:		signature of ACR ucode
+ * @wpr_region_id:	region ID holding the WPR header and its details
+ * @wpr_offset:		offset from the WPR region holding the wpr header
+ * @regions:		region descriptors
+ * @nonwpr_ucode_blob_size:	size of LS blob
+ * @nonwpr_ucode_blob_start:	FB location of LS blob is
+ */
+struct hsflcn_acr_desc {
+	union {
+		u8 reserved_dmem[0x200];
+		u32 signatures[4];
+	} ucode_reserved_space;
+	u32 wpr_region_id;
+	u32 wpr_offset;
+	u32 mmu_mem_range;
+#define FLCN_ACR_MAX_REGIONS 2
+	struct {
+		u32 no_regions;
+		struct {
+			u32 start_addr;
+			u32 end_addr;
+			u32 region_id;
+			u32 read_mask;
+			u32 write_mask;
+			u32 client_mask;
+		} region_props[FLCN_ACR_MAX_REGIONS];
+	} regions;
+	u32 ucode_blob_size;
+	u64 ucode_blob_base __aligned(8);
+	struct {
+		u32 vpr_enabled;
+		u32 vpr_start;
+		u32 vpr_end;
+		u32 hdcp_policies;
+	} vpr_desc;
+};
+
+
+/*
+ * Low-secure blob creation
+ */
+
+/**
+ * ls_ucode_img_load() - create a lsf_ucode_img and load it
+ */
+struct ls_ucode_img *
+acr_r352_ls_ucode_img_load(const struct acr_r352 *acr,
+			   enum nvkm_secboot_falcon falcon_id)
+{
+	const struct nvkm_subdev *subdev = acr->base.subdev;
+	struct ls_ucode_img_r352 *img;
+	int ret;
+
+	img = kzalloc(sizeof(*img), GFP_KERNEL);
+	if (!img)
+		return ERR_PTR(-ENOMEM);
+
+	img->base.falcon_id = falcon_id;
+
+	ret = acr->func->ls_func[falcon_id]->load(subdev, &img->base);
+
+	if (ret) {
+		kfree(img->base.ucode_data);
+		kfree(img->base.sig);
+		kfree(img);
+		return ERR_PTR(ret);
+	}
+
+	/* Check that the signature size matches our expectations... */
+	if (img->base.sig_size != sizeof(img->lsb_header.signature)) {
+		nvkm_error(subdev, "invalid signature size for %s falcon!\n",
+			   nvkm_secboot_falcon_name[falcon_id]);
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* Copy signature to the right place */
+	memcpy(&img->lsb_header.signature, img->base.sig, img->base.sig_size);
+
+	/* not needed? the signature should already have the right value */
+	img->lsb_header.signature.falcon_id = falcon_id;
+
+	return &img->base;
+}
+
+#define LSF_LSB_HEADER_ALIGN 256
+#define LSF_BL_DATA_ALIGN 256
+#define LSF_BL_DATA_SIZE_ALIGN 256
+#define LSF_BL_CODE_SIZE_ALIGN 256
+#define LSF_UCODE_DATA_ALIGN 4096
+
+/**
+ * acr_r352_ls_img_fill_headers - fill the WPR and LSB headers of an image
+ * @acr:	ACR to use
+ * @img:	image to generate for
+ * @offset:	offset in the WPR region where this image starts
+ *
+ * Allocate space in the WPR area from offset and write the WPR and LSB headers
+ * accordingly.
+ *
+ * Return: offset at the end of this image.
+ */
+static u32
+acr_r352_ls_img_fill_headers(struct acr_r352 *acr,
+			     struct ls_ucode_img_r352 *img, u32 offset)
+{
+	struct ls_ucode_img *_img = &img->base;
+	struct acr_r352_lsf_wpr_header *whdr = &img->wpr_header;
+	struct acr_r352_lsf_lsb_header *lhdr = &img->lsb_header;
+	struct ls_ucode_img_desc *desc = &_img->ucode_desc;
+	const struct acr_r352_ls_func *func =
+					    acr->func->ls_func[_img->falcon_id];
+
+	/* Fill WPR header */
+	whdr->falcon_id = _img->falcon_id;
+	whdr->bootstrap_owner = acr->base.boot_falcon;
+	whdr->status = LSF_IMAGE_STATUS_COPY;
+
+	/* Skip bootstrapping falcons started by someone else than ACR */
+	if (acr->lazy_bootstrap & BIT(_img->falcon_id))
+		whdr->lazy_bootstrap = 1;
+
+	/* Align, save off, and include an LSB header size */
+	offset = ALIGN(offset, LSF_LSB_HEADER_ALIGN);
+	whdr->lsb_offset = offset;
+	offset += sizeof(*lhdr);
+
+	/*
+	 * Align, save off, and include the original (static) ucode
+	 * image size
+	 */
+	offset = ALIGN(offset, LSF_UCODE_DATA_ALIGN);
+	lhdr->ucode_off = offset;
+	offset += _img->ucode_size;
+
+	/*
+	 * For falcons that use a boot loader (BL), we append a loader
+	 * desc structure on the end of the ucode image and consider
+	 * this the boot loader data. The host will then copy the loader
+	 * desc args to this space within the WPR region (before locking
+	 * down) and the HS bin will then copy them to DMEM 0 for the
+	 * loader.
+	 */
+	lhdr->bl_code_size = ALIGN(desc->bootloader_size,
+				   LSF_BL_CODE_SIZE_ALIGN);
+	lhdr->ucode_size = ALIGN(desc->app_resident_data_offset,
+				 LSF_BL_CODE_SIZE_ALIGN) + lhdr->bl_code_size;
+	lhdr->data_size = ALIGN(desc->app_size, LSF_BL_CODE_SIZE_ALIGN) +
+				lhdr->bl_code_size - lhdr->ucode_size;
+	/*
+	 * Though the BL is located at 0th offset of the image, the VA
+	 * is different to make sure that it doesn't collide the actual
+	 * OS VA range
+	 */
+	lhdr->bl_imem_off = desc->bootloader_imem_offset;
+	lhdr->app_code_off = desc->app_start_offset +
+			     desc->app_resident_code_offset;
+	lhdr->app_code_size = desc->app_resident_code_size;
+	lhdr->app_data_off = desc->app_start_offset +
+			     desc->app_resident_data_offset;
+	lhdr->app_data_size = desc->app_resident_data_size;
+
+	lhdr->flags = func->lhdr_flags;
+	if (_img->falcon_id == acr->base.boot_falcon)
+		lhdr->flags |= LSF_FLAG_DMACTL_REQ_CTX;
+
+	/* Align and save off BL descriptor size */
+	lhdr->bl_data_size = ALIGN(func->bl_desc_size, LSF_BL_DATA_SIZE_ALIGN);
+
+	/*
+	 * Align, save off, and include the additional BL data
+	 */
+	offset = ALIGN(offset, LSF_BL_DATA_ALIGN);
+	lhdr->bl_data_off = offset;
+	offset += lhdr->bl_data_size;
+
+	return offset;
+}
+
+/**
+ * acr_r352_ls_fill_headers - fill WPR and LSB headers of all managed images
+ */
+int
+acr_r352_ls_fill_headers(struct acr_r352 *acr, struct list_head *imgs)
+{
+	struct ls_ucode_img_r352 *img;
+	struct list_head *l;
+	u32 count = 0;
+	u32 offset;
+
+	/* Count the number of images to manage */
+	list_for_each(l, imgs)
+		count++;
+
+	/*
+	 * Start with an array of WPR headers at the base of the WPR.
+	 * The expectation here is that the secure falcon will do a single DMA
+	 * read of this array and cache it internally so it's ok to pack these.
+	 * Also, we add 1 to the falcon count to indicate the end of the array.
+	 */
+	offset = sizeof(img->wpr_header) * (count + 1);
+
+	/*
+	 * Walk the managed falcons, accounting for the LSB structs
+	 * as well as the ucode images.
+	 */
+	list_for_each_entry(img, imgs, base.node) {
+		offset = acr_r352_ls_img_fill_headers(acr, img, offset);
+	}
+
+	return offset;
+}
+
+/**
+ * acr_r352_ls_write_wpr - write the WPR blob contents
+ */
+int
+acr_r352_ls_write_wpr(struct acr_r352 *acr, struct list_head *imgs,
+		      struct nvkm_gpuobj *wpr_blob, u32 wpr_addr)
+{
+	struct ls_ucode_img *_img;
+	u32 pos = 0;
+
+	nvkm_kmap(wpr_blob);
+
+	list_for_each_entry(_img, imgs, node) {
+		struct ls_ucode_img_r352 *img = ls_ucode_img_r352(_img);
+		const struct acr_r352_ls_func *ls_func =
+					    acr->func->ls_func[_img->falcon_id];
+		u8 gdesc[ls_func->bl_desc_size];
+
+		nvkm_gpuobj_memcpy_to(wpr_blob, pos, &img->wpr_header,
+				      sizeof(img->wpr_header));
+
+		nvkm_gpuobj_memcpy_to(wpr_blob, img->wpr_header.lsb_offset,
+				     &img->lsb_header, sizeof(img->lsb_header));
+
+		/* Generate and write BL descriptor */
+		memset(gdesc, 0, ls_func->bl_desc_size);
+		ls_func->generate_bl_desc(&acr->base, _img, wpr_addr, gdesc);
+
+		nvkm_gpuobj_memcpy_to(wpr_blob, img->lsb_header.bl_data_off,
+				      gdesc, ls_func->bl_desc_size);
+
+		/* Copy ucode */
+		nvkm_gpuobj_memcpy_to(wpr_blob, img->lsb_header.ucode_off,
+				      _img->ucode_data, _img->ucode_size);
+
+		pos += sizeof(img->wpr_header);
+	}
+
+	nvkm_wo32(wpr_blob, pos, NVKM_SECBOOT_FALCON_INVALID);
+
+	nvkm_done(wpr_blob);
+
+	return 0;
+}
+
+/* Both size and address of WPR need to be 128K-aligned */
+#define WPR_ALIGNMENT	0x20000
+/**
+ * acr_r352_prepare_ls_blob() - prepare the LS blob
+ *
+ * For each securely managed falcon, load the FW, signatures and bootloaders and
+ * prepare a ucode blob. Then, compute the offsets in the WPR region for each
+ * blob, and finally write the headers and ucode blobs into a GPU object that
+ * will be copied into the WPR region by the HS firmware.
+ */
+static int
+acr_r352_prepare_ls_blob(struct acr_r352 *acr, u64 wpr_addr, u32 wpr_size)
+{
+	const struct nvkm_subdev *subdev = acr->base.subdev;
+	struct list_head imgs;
+	struct ls_ucode_img *img, *t;
+	unsigned long managed_falcons = acr->base.managed_falcons;
+	int managed_count = 0;
+	u32 image_wpr_size;
+	int falcon_id;
+	int ret;
+
+	INIT_LIST_HEAD(&imgs);
+
+	/* Load all LS blobs */
+	for_each_set_bit(falcon_id, &managed_falcons, NVKM_SECBOOT_FALCON_END) {
+		struct ls_ucode_img *img;
+
+		img = acr->func->ls_ucode_img_load(acr, falcon_id);
+		if (IS_ERR(img)) {
+			ret = PTR_ERR(img);
+			goto cleanup;
+		}
+
+		list_add_tail(&img->node, &imgs);
+		managed_count++;
+	}
+
+	/*
+	 * Fill the WPR and LSF headers with the right offsets and compute
+	 * required WPR size
+	 */
+	image_wpr_size = acr->func->ls_fill_headers(acr, &imgs);
+	image_wpr_size = ALIGN(image_wpr_size, WPR_ALIGNMENT);
+
+	/* Allocate GPU object that will contain the WPR region */
+	ret = nvkm_gpuobj_new(subdev->device, image_wpr_size, WPR_ALIGNMENT,
+			      false, NULL, &acr->ls_blob);
+	if (ret)
+		goto cleanup;
+
+	nvkm_debug(subdev, "%d managed LS falcons, WPR size is %d bytes\n",
+		    managed_count, image_wpr_size);
+
+	/* If WPR address and size are not fixed, set them to fit the LS blob */
+	if (wpr_size == 0) {
+		wpr_addr = acr->ls_blob->addr;
+		wpr_size = image_wpr_size;
+	/*
+	 * But if the WPR region is set by the bootloader, it is illegal for
+	 * the HS blob to be larger than this region.
+	 */
+	} else if (image_wpr_size > wpr_size) {
+		nvkm_error(subdev, "WPR region too small for FW blob!\n");
+		nvkm_error(subdev, "required: %dB\n", image_wpr_size);
+		nvkm_error(subdev, "available: %dB\n", wpr_size);
+		ret = -ENOSPC;
+		goto cleanup;
+	}
+
+	/* Write LS blob */
+	ret = acr->func->ls_write_wpr(acr, &imgs, acr->ls_blob, wpr_addr);
+	if (ret)
+		nvkm_gpuobj_del(&acr->ls_blob);
+
+cleanup:
+	list_for_each_entry_safe(img, t, &imgs, node) {
+		kfree(img->ucode_data);
+		kfree(img->sig);
+		kfree(img);
+	}
+
+	return ret;
+}
+
+
+
+
+/**
+ * acr_r352_hsf_patch_signature() - patch HS blob with correct signature
+ */
+static void
+acr_r352_hsf_patch_signature(struct nvkm_secboot *sb, void *acr_image)
+{
+	struct fw_bin_header *hsbin_hdr = acr_image;
+	struct hsf_fw_header *fw_hdr = acr_image + hsbin_hdr->header_offset;
+	void *hs_data = acr_image + hsbin_hdr->data_offset;
+	void *sig;
+	u32 sig_size;
+
+	/* Falcon in debug or production mode? */
+	if (sb->boot_falcon->debug) {
+		sig = acr_image + fw_hdr->sig_dbg_offset;
+		sig_size = fw_hdr->sig_dbg_size;
+	} else {
+		sig = acr_image + fw_hdr->sig_prod_offset;
+		sig_size = fw_hdr->sig_prod_size;
+	}
+
+	/* Patch signature */
+	memcpy(hs_data + fw_hdr->patch_loc, sig + fw_hdr->patch_sig, sig_size);
+}
+
+static void
+acr_r352_fixup_hs_desc(struct acr_r352 *acr, struct nvkm_secboot *sb,
+		       struct hsflcn_acr_desc *desc)
+{
+	struct nvkm_gpuobj *ls_blob = acr->ls_blob;
+
+	/* WPR region information if WPR is not fixed */
+	if (sb->wpr_size == 0) {
+		u32 wpr_start = ls_blob->addr;
+		u32 wpr_end = wpr_start + ls_blob->size;
+
+		desc->wpr_region_id = 1;
+		desc->regions.no_regions = 2;
+		desc->regions.region_props[0].start_addr = wpr_start >> 8;
+		desc->regions.region_props[0].end_addr = wpr_end >> 8;
+		desc->regions.region_props[0].region_id = 1;
+		desc->regions.region_props[0].read_mask = 0xf;
+		desc->regions.region_props[0].write_mask = 0xc;
+		desc->regions.region_props[0].client_mask = 0x2;
+	} else {
+		desc->ucode_blob_base = ls_blob->addr;
+		desc->ucode_blob_size = ls_blob->size;
+	}
+}
+
+static void
+acr_r352_generate_hs_bl_desc(const struct hsf_load_header *hdr, void *_bl_desc,
+			     u64 offset)
+{
+	struct acr_r352_flcn_bl_desc *bl_desc = _bl_desc;
+	u64 addr_code, addr_data;
+
+	addr_code = offset >> 8;
+	addr_data = (offset + hdr->data_dma_base) >> 8;
+
+	bl_desc->ctx_dma = FALCON_DMAIDX_VIRT;
+	bl_desc->code_dma_base = lower_32_bits(addr_code);
+	bl_desc->non_sec_code_off = hdr->non_sec_code_off;
+	bl_desc->non_sec_code_size = hdr->non_sec_code_size;
+	bl_desc->sec_code_off = hdr->app[0].sec_code_off;
+	bl_desc->sec_code_size = hdr->app[0].sec_code_size;
+	bl_desc->code_entry_point = 0;
+	bl_desc->data_dma_base = lower_32_bits(addr_data);
+	bl_desc->data_size = hdr->data_size;
+}
+
+/**
+ * acr_r352_prepare_hs_blob - load and prepare a HS blob and BL descriptor
+ *
+ * @sb secure boot instance to prepare for
+ * @fw name of the HS firmware to load
+ * @blob pointer to gpuobj that will be allocated to receive the HS FW payload
+ * @bl_desc pointer to the BL descriptor to write for this firmware
+ * @patch whether we should patch the HS descriptor (only for HS loaders)
+ */
+static int
+acr_r352_prepare_hs_blob(struct acr_r352 *acr, struct nvkm_secboot *sb,
+			 const char *fw, struct nvkm_gpuobj **blob,
+			 struct hsf_load_header *load_header, bool patch)
+{
+	struct nvkm_subdev *subdev = &sb->subdev;
+	void *acr_image;
+	struct fw_bin_header *hsbin_hdr;
+	struct hsf_fw_header *fw_hdr;
+	struct hsf_load_header *load_hdr;
+	void *acr_data;
+	int ret;
+
+	acr_image = nvkm_acr_load_firmware(subdev, fw, 0);
+	if (IS_ERR(acr_image))
+		return PTR_ERR(acr_image);
+
+	hsbin_hdr = acr_image;
+	fw_hdr = acr_image + hsbin_hdr->header_offset;
+	load_hdr = acr_image + fw_hdr->hdr_offset;
+	acr_data = acr_image + hsbin_hdr->data_offset;
+
+	/* Patch signature */
+	acr_r352_hsf_patch_signature(sb, acr_image);
+
+	/* Patch descriptor with WPR information? */
+	if (patch) {
+		struct hsflcn_acr_desc *desc;
+
+		desc = acr_data + load_hdr->data_dma_base;
+		acr_r352_fixup_hs_desc(acr, sb, desc);
+	}
+
+	if (load_hdr->num_apps > ACR_R352_MAX_APPS) {
+		nvkm_error(subdev, "more apps (%d) than supported (%d)!",
+			   load_hdr->num_apps, ACR_R352_MAX_APPS);
+		ret = -EINVAL;
+		goto cleanup;
+	}
+	memcpy(load_header, load_hdr, sizeof(*load_header) +
+			       (sizeof(load_hdr->app[0]) * load_hdr->num_apps));
+
+	/* Create ACR blob and copy HS data to it */
+	ret = nvkm_gpuobj_new(subdev->device, ALIGN(hsbin_hdr->data_size, 256),
+			      0x1000, false, NULL, blob);
+	if (ret)
+		goto cleanup;
+
+	nvkm_kmap(*blob);
+	nvkm_gpuobj_memcpy_to(*blob, 0, acr_data, hsbin_hdr->data_size);
+	nvkm_done(*blob);
+
+cleanup:
+	kfree(acr_image);
+
+	return ret;
+}
+
+static int
+acr_r352_prepare_hsbl_blob(struct acr_r352 *acr)
+{
+	const struct nvkm_subdev *subdev = acr->base.subdev;
+	struct fw_bin_header *hdr;
+	struct fw_bl_desc *hsbl_desc;
+
+	acr->hsbl_blob = nvkm_acr_load_firmware(subdev, "acr/bl", 0);
+	if (IS_ERR(acr->hsbl_blob)) {
+		int ret = PTR_ERR(acr->hsbl_blob);
+
+		acr->hsbl_blob = NULL;
+		return ret;
+	}
+
+	hdr = acr->hsbl_blob;
+	hsbl_desc = acr->hsbl_blob + hdr->header_offset;
+
+	/* virtual start address for boot vector */
+	acr->base.start_address = hsbl_desc->start_tag << 8;
+
+	return 0;
+}
+
+/**
+ * acr_r352_load_blobs - load blobs common to all ACR V1 versions.
+ *
+ * This includes the LS blob, HS ucode loading blob, and HS bootloader.
+ *
+ * The HS ucode unload blob is only used on dGPU if the WPR region is variable.
+ */
+int
+acr_r352_load_blobs(struct acr_r352 *acr, struct nvkm_secboot *sb)
+{
+	int ret;
+
+	/* Firmware already loaded? */
+	if (acr->firmware_ok)
+		return 0;
+
+	/* Load and prepare the managed falcon's firmwares */
+	ret = acr_r352_prepare_ls_blob(acr, sb->wpr_addr, sb->wpr_size);
+	if (ret)
+		return ret;
+
+	/* Load the HS firmware that will load the LS firmwares */
+	if (!acr->load_blob) {
+		ret = acr_r352_prepare_hs_blob(acr, sb, "acr/ucode_load",
+					       &acr->load_blob,
+					       &acr->load_bl_header, true);
+		if (ret)
+			return ret;
+	}
+
+	/* If the ACR region is dynamically programmed, we need an unload FW */
+	if (sb->wpr_size == 0) {
+		ret = acr_r352_prepare_hs_blob(acr, sb, "acr/ucode_unload",
+					       &acr->unload_blob,
+					       &acr->unload_bl_header, false);
+		if (ret)
+			return ret;
+	}
+
+	/* Load the HS firmware bootloader */
+	if (!acr->hsbl_blob) {
+		ret = acr_r352_prepare_hsbl_blob(acr);
+		if (ret)
+			return ret;
+	}
+
+	acr->firmware_ok = true;
+	nvkm_debug(&sb->subdev, "LS blob successfully created\n");
+
+	return 0;
+}
+
+/**
+ * acr_r352_load() - prepare HS falcon to run the specified blob, mapped
+ * at GPU address offset.
+ */
+static int
+acr_r352_load(struct nvkm_acr *_acr, struct nvkm_secboot *sb,
+	      struct nvkm_gpuobj *blob, u64 offset)
+{
+	struct acr_r352 *acr = acr_r352(_acr);
+	struct nvkm_falcon *falcon = sb->boot_falcon;
+	struct fw_bin_header *hdr = acr->hsbl_blob;
+	struct fw_bl_desc *hsbl_desc = acr->hsbl_blob + hdr->header_offset;
+	void *blob_data = acr->hsbl_blob + hdr->data_offset;
+	void *hsbl_code = blob_data + hsbl_desc->code_off;
+	void *hsbl_data = blob_data + hsbl_desc->data_off;
+	u32 code_size = ALIGN(hsbl_desc->code_size, 256);
+	const struct hsf_load_header *load_hdr;
+	const u32 bl_desc_size = acr->func->hs_bl_desc_size;
+	u8 bl_desc[bl_desc_size];
+
+	/* Find the bootloader descriptor for our blob and copy it */
+	if (blob == acr->load_blob) {
+		load_hdr = &acr->load_bl_header;
+	} else if (blob == acr->unload_blob) {
+		load_hdr = &acr->unload_bl_header;
+	} else {
+		nvkm_error(_acr->subdev, "invalid secure boot blob!\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Copy HS bootloader data
+	 */
+	nvkm_falcon_load_dmem(falcon, hsbl_data, 0x0, hsbl_desc->data_size, 0);
+
+	/* Copy HS bootloader code to end of IMEM */
+	nvkm_falcon_load_imem(falcon, hsbl_code, falcon->code.limit - code_size,
+			      code_size, hsbl_desc->start_tag, 0, false);
+
+	/* Generate the BL header */
+	memset(bl_desc, 0, bl_desc_size);
+	acr->func->generate_hs_bl_desc(load_hdr, bl_desc, offset);
+
+	/*
+	 * Copy HS BL header where the HS descriptor expects it to be
+	 */
+	nvkm_falcon_load_dmem(falcon, bl_desc, hsbl_desc->dmem_load_off,
+			      bl_desc_size, 0);
+
+	return 0;
+}
+
+static int
+acr_r352_shutdown(struct acr_r352 *acr, struct nvkm_secboot *sb)
+{
+	int i;
+
+	/* Run the unload blob to unprotect the WPR region */
+	if (acr->unload_blob && sb->wpr_set) {
+		int ret;
+
+		nvkm_debug(&sb->subdev, "running HS unload blob\n");
+		ret = sb->func->run_blob(sb, acr->unload_blob);
+		if (ret)
+			return ret;
+		nvkm_debug(&sb->subdev, "HS unload blob completed\n");
+	}
+
+	for (i = 0; i < NVKM_SECBOOT_FALCON_END; i++)
+		acr->falcon_state[i] = NON_SECURE;
+
+	sb->wpr_set = false;
+
+	return 0;
+}
+
+static int
+acr_r352_bootstrap(struct acr_r352 *acr, struct nvkm_secboot *sb)
+{
+	int ret;
+
+	if (sb->wpr_set)
+		return 0;
+
+	/* Make sure all blobs are ready */
+	ret = acr_r352_load_blobs(acr, sb);
+	if (ret)
+		return ret;
+
+	nvkm_debug(&sb->subdev, "running HS load blob\n");
+	ret = sb->func->run_blob(sb, acr->load_blob);
+	/* clear halt interrupt */
+	nvkm_falcon_clear_interrupt(sb->boot_falcon, 0x10);
+	if (ret)
+		return ret;
+	nvkm_debug(&sb->subdev, "HS load blob completed\n");
+
+	sb->wpr_set = true;
+
+	return 0;
+}
+
+/*
+ * acr_r352_reset() - execute secure boot from the prepared state
+ *
+ * Load the HS bootloader and ask the falcon to run it. This will in turn
+ * load the HS firmware and run it, so once the falcon stops all the managed
+ * falcons should have their LS firmware loaded and be ready to run.
+ */
+static int
+acr_r352_reset(struct nvkm_acr *_acr, struct nvkm_secboot *sb,
+	       enum nvkm_secboot_falcon falcon)
+{
+	struct acr_r352 *acr = acr_r352(_acr);
+	int ret;
+
+	/*
+	 * Dummy GM200 implementation: perform secure boot each time we are
+	 * called on FECS. Since only FECS and GPCCS are managed and started
+	 * together, this ought to be safe.
+	 *
+	 * Once we have proper PMU firmware and support, this will be changed
+	 * to a proper call to the PMU method.
+	 */
+	if (falcon != NVKM_SECBOOT_FALCON_FECS)
+		goto end;
+
+	ret = acr_r352_shutdown(acr, sb);
+	if (ret)
+		return ret;
+
+	acr_r352_bootstrap(acr, sb);
+	if (ret)
+		return ret;
+
+end:
+	acr->falcon_state[falcon] = RESET;
+	return 0;
+}
+
+static int
+acr_r352_start(struct nvkm_acr *_acr, struct nvkm_secboot *sb,
+		    enum nvkm_secboot_falcon falcon)
+{
+	struct acr_r352 *acr = acr_r352(_acr);
+	const struct nvkm_subdev *subdev = &sb->subdev;
+	int base;
+
+	switch (falcon) {
+	case NVKM_SECBOOT_FALCON_FECS:
+		base = 0x409000;
+		break;
+	case NVKM_SECBOOT_FALCON_GPCCS:
+		base = 0x41a000;
+		break;
+	default:
+		nvkm_error(subdev, "cannot start unhandled falcon!\n");
+		return -EINVAL;
+	}
+
+	nvkm_wr32(subdev->device, base + 0x130, 0x00000002);
+	acr->falcon_state[falcon] = RUNNING;
+
+	return 0;
+}
+
+static int
+acr_r352_fini(struct nvkm_acr *_acr, struct nvkm_secboot *sb, bool suspend)
+{
+	struct acr_r352 *acr = acr_r352(_acr);
+
+	return acr_r352_shutdown(acr, sb);
+}
+
+static void
+acr_r352_dtor(struct nvkm_acr *_acr)
+{
+	struct acr_r352 *acr = acr_r352(_acr);
+
+	nvkm_gpuobj_del(&acr->unload_blob);
+
+	kfree(acr->hsbl_blob);
+	nvkm_gpuobj_del(&acr->load_blob);
+	nvkm_gpuobj_del(&acr->ls_blob);
+
+	kfree(acr);
+}
+
+const struct acr_r352_ls_func
+acr_r352_ls_fecs_func = {
+	.load = acr_ls_ucode_load_fecs,
+	.generate_bl_desc = acr_r352_generate_flcn_bl_desc,
+	.bl_desc_size = sizeof(struct acr_r352_flcn_bl_desc),
+};
+
+const struct acr_r352_ls_func
+acr_r352_ls_gpccs_func = {
+	.load = acr_ls_ucode_load_gpccs,
+	.generate_bl_desc = acr_r352_generate_flcn_bl_desc,
+	.bl_desc_size = sizeof(struct acr_r352_flcn_bl_desc),
+	/* GPCCS will be loaded using PRI */
+	.lhdr_flags = LSF_FLAG_FORCE_PRIV_LOAD,
+};
+
+const struct acr_r352_func
+acr_r352_func = {
+	.generate_hs_bl_desc = acr_r352_generate_hs_bl_desc,
+	.hs_bl_desc_size = sizeof(struct acr_r352_flcn_bl_desc),
+	.ls_ucode_img_load = acr_r352_ls_ucode_img_load,
+	.ls_fill_headers = acr_r352_ls_fill_headers,
+	.ls_write_wpr = acr_r352_ls_write_wpr,
+	.ls_func = {
+		[NVKM_SECBOOT_FALCON_FECS] = &acr_r352_ls_fecs_func,
+		[NVKM_SECBOOT_FALCON_GPCCS] = &acr_r352_ls_gpccs_func,
+	},
+};
+
+static const struct nvkm_acr_func
+acr_r352_base_func = {
+	.dtor = acr_r352_dtor,
+	.fini = acr_r352_fini,
+	.load = acr_r352_load,
+	.reset = acr_r352_reset,
+	.start = acr_r352_start,
+};
+
+struct nvkm_acr *
+acr_r352_new_(const struct acr_r352_func *func,
+	      enum nvkm_secboot_falcon boot_falcon,
+	      unsigned long managed_falcons)
+{
+	struct acr_r352 *acr;
+
+	acr = kzalloc(sizeof(*acr), GFP_KERNEL);
+	if (!acr)
+		return ERR_PTR(-ENOMEM);
+
+	acr->base.boot_falcon = boot_falcon;
+	acr->base.managed_falcons = managed_falcons;
+	acr->base.func = &acr_r352_base_func;
+	acr->func = func;
+
+	return &acr->base;
+}
+
+struct nvkm_acr *
+acr_r352_new(unsigned long managed_falcons)
+{
+	return acr_r352_new_(&acr_r352_func, NVKM_SECBOOT_FALCON_PMU,
+			     managed_falcons);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h
new file mode 100644
index 000000000000..ad5923b0fd3c
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h
@@ -0,0 +1,250 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __NVKM_SECBOOT_ACR_R352_H__
+#define __NVKM_SECBOOT_ACR_R352_H__
+
+#include "acr.h"
+#include "ls_ucode.h"
+
+struct ls_ucode_img;
+
+#define ACR_R352_MAX_APPS 8
+
+/*
+ *
+ * LS blob structures
+ *
+ */
+
+/**
+ * struct acr_r352_lsf_lsb_header - LS firmware header
+ * @signature:		signature to verify the firmware against
+ * @ucode_off:		offset of the ucode blob in the WPR region. The ucode
+ *                      blob contains the bootloader, code and data of the
+ *                      LS falcon
+ * @ucode_size:		size of the ucode blob, including bootloader
+ * @data_size:		size of the ucode blob data
+ * @bl_code_size:	size of the bootloader code
+ * @bl_imem_off:	offset in imem of the bootloader
+ * @bl_data_off:	offset of the bootloader data in WPR region
+ * @bl_data_size:	size of the bootloader data
+ * @app_code_off:	offset of the app code relative to ucode_off
+ * @app_code_size:	size of the app code
+ * @app_data_off:	offset of the app data relative to ucode_off
+ * @app_data_size:	size of the app data
+ * @flags:		flags for the secure bootloader
+ *
+ * This structure is written into the WPR region for each managed falcon. Each
+ * instance is referenced by the lsb_offset member of the corresponding
+ * lsf_wpr_header.
+ */
+struct acr_r352_lsf_lsb_header {
+	/**
+	 * LS falcon signatures
+	 * @prd_keys:		signature to use in production mode
+	 * @dgb_keys:		signature to use in debug mode
+	 * @b_prd_present:	whether the production key is present
+	 * @b_dgb_present:	whether the debug key is present
+	 * @falcon_id:		ID of the falcon the ucode applies to
+	 */
+	struct {
+		u8 prd_keys[2][16];
+		u8 dbg_keys[2][16];
+		u32 b_prd_present;
+		u32 b_dbg_present;
+		u32 falcon_id;
+	} signature;
+	u32 ucode_off;
+	u32 ucode_size;
+	u32 data_size;
+	u32 bl_code_size;
+	u32 bl_imem_off;
+	u32 bl_data_off;
+	u32 bl_data_size;
+	u32 app_code_off;
+	u32 app_code_size;
+	u32 app_data_off;
+	u32 app_data_size;
+	u32 flags;
+#define LSF_FLAG_LOAD_CODE_AT_0		1
+#define LSF_FLAG_DMACTL_REQ_CTX		4
+#define LSF_FLAG_FORCE_PRIV_LOAD	8
+};
+
+/**
+ * struct acr_r352_lsf_wpr_header - LS blob WPR Header
+ * @falcon_id:		LS falcon ID
+ * @lsb_offset:		offset of the lsb_lsf_header in the WPR region
+ * @bootstrap_owner:	secure falcon reponsible for bootstrapping the LS falcon
+ * @lazy_bootstrap:	skip bootstrapping by ACR
+ * @status:		bootstrapping status
+ *
+ * An array of these is written at the beginning of the WPR region, one for
+ * each managed falcon. The array is terminated by an instance which falcon_id
+ * is LSF_FALCON_ID_INVALID.
+ */
+struct acr_r352_lsf_wpr_header {
+	u32 falcon_id;
+	u32 lsb_offset;
+	u32 bootstrap_owner;
+	u32 lazy_bootstrap;
+	u32 status;
+#define LSF_IMAGE_STATUS_NONE				0
+#define LSF_IMAGE_STATUS_COPY				1
+#define LSF_IMAGE_STATUS_VALIDATION_CODE_FAILED		2
+#define LSF_IMAGE_STATUS_VALIDATION_DATA_FAILED		3
+#define LSF_IMAGE_STATUS_VALIDATION_DONE		4
+#define LSF_IMAGE_STATUS_VALIDATION_SKIPPED		5
+#define LSF_IMAGE_STATUS_BOOTSTRAP_READY		6
+};
+
+/**
+ * struct ls_ucode_img_r352 - ucode image augmented with r352 headers
+ */
+struct ls_ucode_img_r352 {
+	struct ls_ucode_img base;
+
+	struct acr_r352_lsf_wpr_header wpr_header;
+	struct acr_r352_lsf_lsb_header lsb_header;
+};
+#define ls_ucode_img_r352(i) container_of(i, struct ls_ucode_img_r352, base)
+
+
+/*
+ * HS blob structures
+ */
+
+struct hsf_load_header_app {
+	u32 sec_code_off;
+	u32 sec_code_size;
+};
+
+/**
+ * struct hsf_load_header - HS firmware load header
+ */
+struct hsf_load_header {
+	u32 non_sec_code_off;
+	u32 non_sec_code_size;
+	u32 data_dma_base;
+	u32 data_size;
+	u32 num_apps;
+	struct hsf_load_header_app app[0];
+};
+
+/**
+ * struct acr_r352_ls_func - manages a single LS firmware
+ *
+ * @load: load the external firmware into a ls_ucode_img
+ * @generate_bl_desc: function called on a block of bl_desc_size to generate the
+ *		      proper bootloader descriptor for this LS firmware
+ * @bl_desc_size: size of the bootloader descriptor
+ * @lhdr_flags: LS flags
+ */
+struct acr_r352_ls_func {
+	int (*load)(const struct nvkm_subdev *, struct ls_ucode_img *);
+	void (*generate_bl_desc)(const struct nvkm_acr *,
+				 const struct ls_ucode_img *, u64, void *);
+	u32 bl_desc_size;
+	u32 lhdr_flags;
+};
+
+struct acr_r352;
+
+/**
+ * struct acr_r352_func - manages nuances between ACR versions
+ *
+ * @generate_hs_bl_desc: function called on a block of bl_desc_size to generate
+ *			 the proper HS bootloader descriptor
+ * @hs_bl_desc_size: size of the HS bootloader descriptor
+ */
+struct acr_r352_func {
+	void (*generate_hs_bl_desc)(const struct hsf_load_header *, void *,
+				    u64);
+	u32 hs_bl_desc_size;
+
+	struct ls_ucode_img *(*ls_ucode_img_load)(const struct acr_r352 *,
+						  enum nvkm_secboot_falcon);
+	int (*ls_fill_headers)(struct acr_r352 *, struct list_head *);
+	int (*ls_write_wpr)(struct acr_r352 *, struct list_head *,
+			    struct nvkm_gpuobj *, u32);
+
+	const struct acr_r352_ls_func *ls_func[NVKM_SECBOOT_FALCON_END];
+};
+
+/**
+ * struct acr_r352 - ACR data for driver release 352 (and beyond)
+ */
+struct acr_r352 {
+	struct nvkm_acr base;
+	const struct acr_r352_func *func;
+
+	/*
+	 * HS FW - lock WPR region (dGPU only) and load LS FWs
+	 * on Tegra the HS FW copies the LS blob into the fixed WPR instead
+	 */
+	struct nvkm_gpuobj *load_blob;
+	struct {
+		struct hsf_load_header load_bl_header;
+		struct hsf_load_header_app __load_apps[ACR_R352_MAX_APPS];
+	};
+
+	/* HS FW - unlock WPR region (dGPU only) */
+	struct nvkm_gpuobj *unload_blob;
+	struct {
+		struct hsf_load_header unload_bl_header;
+		struct hsf_load_header_app __unload_apps[ACR_R352_MAX_APPS];
+	};
+
+	/* HS bootloader */
+	void *hsbl_blob;
+
+	/* LS FWs, to be loaded by the HS ACR */
+	struct nvkm_gpuobj *ls_blob;
+
+	/* Firmware already loaded? */
+	bool firmware_ok;
+
+	/* Falcons to lazy-bootstrap */
+	u32 lazy_bootstrap;
+
+	/* To keep track of the state of all managed falcons */
+	enum {
+		/* In non-secure state, no firmware loaded, no privileges*/
+		NON_SECURE = 0,
+		/* In low-secure mode and ready to be started */
+		RESET,
+		/* In low-secure mode and running */
+		RUNNING,
+	} falcon_state[NVKM_SECBOOT_FALCON_END];
+};
+#define acr_r352(acr) container_of(acr, struct acr_r352, base)
+
+struct nvkm_acr *acr_r352_new_(const struct acr_r352_func *,
+			       enum nvkm_secboot_falcon, unsigned long);
+
+struct ls_ucode_img *acr_r352_ls_ucode_img_load(const struct acr_r352 *,
+						enum nvkm_secboot_falcon);
+int acr_r352_ls_fill_headers(struct acr_r352 *, struct list_head *);
+int acr_r352_ls_write_wpr(struct acr_r352 *, struct list_head *,
+			  struct nvkm_gpuobj *, u32);
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c
new file mode 100644
index 000000000000..f0aff1d98474
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "acr_r352.h"
+
+#include <engine/falcon.h>
+
+/**
+ * struct acr_r361_flcn_bl_desc - DMEM bootloader descriptor
+ * @signature:		16B signature for secure code. 0s if no secure code
+ * @ctx_dma:		DMA context to be used by BL while loading code/data
+ * @code_dma_base:	256B-aligned Physical FB Address where code is located
+ *			(falcon's $xcbase register)
+ * @non_sec_code_off:	offset from code_dma_base where the non-secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @non_sec_code_size:	the size of the nonSecure code part.
+ * @sec_code_off:	offset from code_dma_base where the secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @sec_code_size:	offset from code_dma_base where the secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @code_entry_point:	code entry point which will be invoked by BL after
+ *                      code is loaded.
+ * @data_dma_base:	256B aligned Physical FB Address where data is located.
+ *			(falcon's $xdbase register)
+ * @data_size:		size of data block. Should be multiple of 256B
+ *
+ * Structure used by the bootloader to load the rest of the code. This has
+ * to be filled by host and copied into DMEM at offset provided in the
+ * hsflcn_bl_desc.bl_desc_dmem_load_off.
+ */
+struct acr_r361_flcn_bl_desc {
+	u32 reserved[4];
+	u32 signature[4];
+	u32 ctx_dma;
+	struct flcn_u64 code_dma_base;
+	u32 non_sec_code_off;
+	u32 non_sec_code_size;
+	u32 sec_code_off;
+	u32 sec_code_size;
+	u32 code_entry_point;
+	struct flcn_u64 data_dma_base;
+	u32 data_size;
+};
+
+static void
+acr_r361_generate_flcn_bl_desc(const struct nvkm_acr *acr,
+			       const struct ls_ucode_img *_img, u64 wpr_addr,
+			       void *_desc)
+{
+	struct ls_ucode_img_r352 *img = ls_ucode_img_r352(_img);
+	struct acr_r361_flcn_bl_desc *desc = _desc;
+	const struct ls_ucode_img_desc *pdesc = &img->base.ucode_desc;
+	u64 base, addr_code, addr_data;
+
+	base = wpr_addr + img->lsb_header.ucode_off + pdesc->app_start_offset;
+	addr_code = base + pdesc->app_resident_code_offset;
+	addr_data = base + pdesc->app_resident_data_offset;
+
+	desc->ctx_dma = FALCON_DMAIDX_UCODE;
+	desc->code_dma_base = u64_to_flcn64(addr_code);
+	desc->non_sec_code_off = pdesc->app_resident_code_offset;
+	desc->non_sec_code_size = pdesc->app_resident_code_size;
+	desc->code_entry_point = pdesc->app_imem_entry;
+	desc->data_dma_base = u64_to_flcn64(addr_data);
+	desc->data_size = pdesc->app_resident_data_size;
+}
+
+static void
+acr_r361_generate_hs_bl_desc(const struct hsf_load_header *hdr, void *_bl_desc,
+			    u64 offset)
+{
+	struct acr_r361_flcn_bl_desc *bl_desc = _bl_desc;
+
+	bl_desc->ctx_dma = FALCON_DMAIDX_VIRT;
+	bl_desc->code_dma_base = u64_to_flcn64(offset);
+	bl_desc->non_sec_code_off = hdr->non_sec_code_off;
+	bl_desc->non_sec_code_size = hdr->non_sec_code_size;
+	bl_desc->sec_code_off = hdr->app[0].sec_code_off;
+	bl_desc->sec_code_size = hdr->app[0].sec_code_size;
+	bl_desc->code_entry_point = 0;
+	bl_desc->data_dma_base = u64_to_flcn64(offset + hdr->data_dma_base);
+	bl_desc->data_size = hdr->data_size;
+}
+
+const struct acr_r352_ls_func
+acr_r361_ls_fecs_func = {
+	.load = acr_ls_ucode_load_fecs,
+	.generate_bl_desc = acr_r361_generate_flcn_bl_desc,
+	.bl_desc_size = sizeof(struct acr_r361_flcn_bl_desc),
+};
+
+const struct acr_r352_ls_func
+acr_r361_ls_gpccs_func = {
+	.load = acr_ls_ucode_load_gpccs,
+	.generate_bl_desc = acr_r361_generate_flcn_bl_desc,
+	.bl_desc_size = sizeof(struct acr_r361_flcn_bl_desc),
+	/* GPCCS will be loaded using PRI */
+	.lhdr_flags = LSF_FLAG_FORCE_PRIV_LOAD,
+};
+
+const struct acr_r352_func
+acr_r361_func = {
+	.generate_hs_bl_desc = acr_r361_generate_hs_bl_desc,
+	.hs_bl_desc_size = sizeof(struct acr_r361_flcn_bl_desc),
+	.ls_ucode_img_load = acr_r352_ls_ucode_img_load,
+	.ls_fill_headers = acr_r352_ls_fill_headers,
+	.ls_write_wpr = acr_r352_ls_write_wpr,
+	.ls_func = {
+		[NVKM_SECBOOT_FALCON_FECS] = &acr_r361_ls_fecs_func,
+		[NVKM_SECBOOT_FALCON_GPCCS] = &acr_r361_ls_gpccs_func,
+	},
+};
+
+struct nvkm_acr *
+acr_r361_new(unsigned long managed_falcons)
+{
+	return acr_r352_new_(&acr_r361_func, NVKM_SECBOOT_FALCON_PMU,
+			     managed_falcons);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c
index 314be2192b7d..27c9dfffb9a6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c
@@ -19,184 +19,108 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */
+
+/*
+ * Secure boot is the process by which NVIDIA-signed firmware is loaded into
+ * some of the falcons of a GPU. For production devices this is the only way
+ * for the firmware to access useful (but sensitive) registers.
+ *
+ * A Falcon microprocessor supporting advanced security modes can run in one of
+ * three modes:
+ *
+ * - Non-secure (NS). In this mode, functionality is similar to Falcon
+ *   architectures before security modes were introduced (pre-Maxwell), but
+ *   capability is restricted. In particular, certain registers may be
+ *   inaccessible for reads and/or writes, and physical memory access may be
+ *   disabled (on certain Falcon instances). This is the only possible mode that
+ *   can be used if you don't have microcode cryptographically signed by NVIDIA.
+ *
+ * - Heavy Secure (HS). In this mode, the microprocessor is a black box - it's
+ *   not possible to read or write any Falcon internal state or Falcon registers
+ *   from outside the Falcon (for example, from the host system). The only way
+ *   to enable this mode is by loading microcode that has been signed by NVIDIA.
+ *   (The loading process involves tagging the IMEM block as secure, writing the
+ *   signature into a Falcon register, and starting execution. The hardware will
+ *   validate the signature, and if valid, grant HS privileges.)
+ *
+ * - Light Secure (LS). In this mode, the microprocessor has more privileges
+ *   than NS but fewer than HS. Some of the microprocessor state is visible to
+ *   host software to ease debugging. The only way to enable this mode is by HS
+ *   microcode enabling LS mode. Some privileges available to HS mode are not
+ *   available here. LS mode is introduced in GM20x.
+ *
+ * Secure boot consists in temporarily switching a HS-capable falcon (typically
+ * PMU) into HS mode in order to validate the LS firmwares of managed falcons,
+ * load them, and switch managed falcons into LS mode. Once secure boot
+ * completes, no falcon remains in HS mode.
+ *
+ * Secure boot requires a write-protected memory region (WPR) which can only be
+ * written by the secure falcon. On dGPU, the driver sets up the WPR region in
+ * video memory. On Tegra, it is set up by the bootloader and its location and
+ * size written into memory controller registers.
+ *
+ * The secure boot process takes place as follows:
+ *
+ * 1) A LS blob is constructed that contains all the LS firmwares we want to
+ *    load, along with their signatures and bootloaders.
+ *
+ * 2) A HS blob (also called ACR) is created that contains the signed HS
+ *    firmware in charge of loading the LS firmwares into their respective
+ *    falcons.
+ *
+ * 3) The HS blob is loaded (via its own bootloader) and executed on the
+ *    HS-capable falcon. It authenticates itself, switches the secure falcon to
+ *    HS mode and setup the WPR region around the LS blob (dGPU) or copies the
+ *    LS blob into the WPR region (Tegra).
+ *
+ * 4) The LS blob is now secure from all external tampering. The HS falcon
+ *    checks the signatures of the LS firmwares and, if valid, switches the
+ *    managed falcons to LS mode and makes them ready to run the LS firmware.
+ *
+ * 5) The managed falcons remain in LS mode and can be started.
+ *
+ */
+
 #include "priv.h"
+#include "acr.h"
 
 #include <subdev/mc.h>
 #include <subdev/timer.h>
+#include <subdev/pmu.h>
 
-static const char *
-managed_falcons_names[] = {
+const char *
+nvkm_secboot_falcon_name[] = {
 	[NVKM_SECBOOT_FALCON_PMU] = "PMU",
 	[NVKM_SECBOOT_FALCON_RESERVED] = "<reserved>",
 	[NVKM_SECBOOT_FALCON_FECS] = "FECS",
 	[NVKM_SECBOOT_FALCON_GPCCS] = "GPCCS",
 	[NVKM_SECBOOT_FALCON_END] = "<invalid>",
 };
-
-/*
- * Helper falcon functions
- */
-
-static int
-falcon_clear_halt_interrupt(struct nvkm_device *device, u32 base)
-{
-	int ret;
-
-	/* clear halt interrupt */
-	nvkm_mask(device, base + 0x004, 0x10, 0x10);
-	/* wait until halt interrupt is cleared */
-	ret = nvkm_wait_msec(device, 10, base + 0x008, 0x10, 0x0);
-	if (ret < 0)
-		return ret;
-
-	return 0;
-}
-
-static int
-falcon_wait_idle(struct nvkm_device *device, u32 base)
-{
-	int ret;
-
-	ret = nvkm_wait_msec(device, 10, base + 0x04c, 0xffff, 0x0);
-	if (ret < 0)
-		return ret;
-
-	return 0;
-}
-
-static int
-nvkm_secboot_falcon_enable(struct nvkm_secboot *sb)
-{
-	struct nvkm_device *device = sb->subdev.device;
-	int ret;
-
-	/* enable engine */
-	nvkm_mc_enable(device, sb->devidx);
-	ret = nvkm_wait_msec(device, 10, sb->base + 0x10c, 0x6, 0x0);
-	if (ret < 0) {
-		nvkm_error(&sb->subdev, "Falcon mem scrubbing timeout\n");
-		nvkm_mc_disable(device, sb->devidx);
-		return ret;
-	}
-
-	ret = falcon_wait_idle(device, sb->base);
-	if (ret)
-		return ret;
-
-	/* enable IRQs */
-	nvkm_wr32(device, sb->base + 0x010, 0xff);
-	nvkm_mc_intr_mask(device, sb->devidx, true);
-
-	return 0;
-}
-
-static int
-nvkm_secboot_falcon_disable(struct nvkm_secboot *sb)
-{
-	struct nvkm_device *device = sb->subdev.device;
-
-	/* disable IRQs and wait for any previous code to complete */
-	nvkm_mc_intr_mask(device, sb->devidx, false);
-	nvkm_wr32(device, sb->base + 0x014, 0xff);
-
-	falcon_wait_idle(device, sb->base);
-
-	/* disable engine */
-	nvkm_mc_disable(device, sb->devidx);
-
-	return 0;
-}
-
-int
-nvkm_secboot_falcon_reset(struct nvkm_secboot *sb)
-{
-	int ret;
-
-	ret = nvkm_secboot_falcon_disable(sb);
-	if (ret)
-		return ret;
-
-	ret = nvkm_secboot_falcon_enable(sb);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
-/**
- * nvkm_secboot_falcon_run - run the falcon that will perform secure boot
- *
- * This function is to be called after all chip-specific preparations have
- * been completed. It will start the falcon to perform secure boot, wait for
- * it to halt, and report if an error occurred.
- */
-int
-nvkm_secboot_falcon_run(struct nvkm_secboot *sb)
-{
-	struct nvkm_device *device = sb->subdev.device;
-	int ret;
-
-	/* Start falcon */
-	nvkm_wr32(device, sb->base + 0x100, 0x2);
-
-	/* Wait for falcon halt */
-	ret = nvkm_wait_msec(device, 100, sb->base + 0x100, 0x10, 0x10);
-	if (ret < 0)
-		return ret;
-
-	/* If mailbox register contains an error code, then ACR has failed */
-	ret = nvkm_rd32(device, sb->base + 0x040);
-	if (ret) {
-		nvkm_error(&sb->subdev, "ACR boot failed, ret 0x%08x", ret);
-		falcon_clear_halt_interrupt(device, sb->base);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-
 /**
  * nvkm_secboot_reset() - reset specified falcon
  */
 int
-nvkm_secboot_reset(struct nvkm_secboot *sb, u32 falcon)
+nvkm_secboot_reset(struct nvkm_secboot *sb, enum nvkm_secboot_falcon falcon)
 {
 	/* Unmanaged falcon? */
-	if (!(BIT(falcon) & sb->func->managed_falcons)) {
+	if (!(BIT(falcon) & sb->acr->managed_falcons)) {
 		nvkm_error(&sb->subdev, "cannot reset unmanaged falcon!\n");
 		return -EINVAL;
 	}
 
-	return sb->func->reset(sb, falcon);
-}
-
-/**
- * nvkm_secboot_start() - start specified falcon
- */
-int
-nvkm_secboot_start(struct nvkm_secboot *sb, u32 falcon)
-{
-	/* Unmanaged falcon? */
-	if (!(BIT(falcon) & sb->func->managed_falcons)) {
-		nvkm_error(&sb->subdev, "cannot start unmanaged falcon!\n");
-		return -EINVAL;
-	}
-
-	return sb->func->start(sb, falcon);
+	return sb->acr->func->reset(sb->acr, sb, falcon);
 }
 
 /**
  * nvkm_secboot_is_managed() - check whether a given falcon is securely-managed
  */
 bool
-nvkm_secboot_is_managed(struct nvkm_secboot *secboot,
-			enum nvkm_secboot_falcon fid)
+nvkm_secboot_is_managed(struct nvkm_secboot *sb, enum nvkm_secboot_falcon fid)
 {
-	if (!secboot)
+	if (!sb)
 		return false;
 
-	return secboot->func->managed_falcons & BIT(fid);
+	return sb->acr->managed_falcons & BIT(fid);
 }
 
 static int
@@ -205,9 +129,19 @@ nvkm_secboot_oneinit(struct nvkm_subdev *subdev)
 	struct nvkm_secboot *sb = nvkm_secboot(subdev);
 	int ret = 0;
 
+	switch (sb->acr->boot_falcon) {
+	case NVKM_SECBOOT_FALCON_PMU:
+		sb->boot_falcon = subdev->device->pmu->falcon;
+		break;
+	default:
+		nvkm_error(subdev, "Unmanaged boot falcon %s!\n",
+			                nvkm_secboot_falcon_name[sb->acr->boot_falcon]);
+		return -EINVAL;
+	}
+
 	/* Call chip-specific init function */
-	if (sb->func->init)
-		ret = sb->func->init(sb);
+	if (sb->func->oneinit)
+		ret = sb->func->oneinit(sb);
 	if (ret) {
 		nvkm_error(subdev, "Secure Boot initialization failed: %d\n",
 			   ret);
@@ -249,7 +183,7 @@ nvkm_secboot = {
 };
 
 int
-nvkm_secboot_ctor(const struct nvkm_secboot_func *func,
+nvkm_secboot_ctor(const struct nvkm_secboot_func *func, struct nvkm_acr *acr,
 		  struct nvkm_device *device, int index,
 		  struct nvkm_secboot *sb)
 {
@@ -257,22 +191,14 @@ nvkm_secboot_ctor(const struct nvkm_secboot_func *func,
 
 	nvkm_subdev_ctor(&nvkm_secboot, device, index, &sb->subdev);
 	sb->func = func;
-
-	/* setup the performing falcon's base address and masks */
-	switch (func->boot_falcon) {
-	case NVKM_SECBOOT_FALCON_PMU:
-		sb->devidx = NVKM_SUBDEV_PMU;
-		sb->base = 0x10a000;
-		break;
-	default:
-		nvkm_error(&sb->subdev, "invalid secure boot falcon\n");
-		return -EINVAL;
-	};
+	sb->acr = acr;
+	acr->subdev = &sb->subdev;
 
 	nvkm_debug(&sb->subdev, "securely managed falcons:\n");
-	for_each_set_bit(fid, &sb->func->managed_falcons,
+	for_each_set_bit(fid, &sb->acr->managed_falcons,
 			 NVKM_SECBOOT_FALCON_END)
-		nvkm_debug(&sb->subdev, "- %s\n", managed_falcons_names[fid]);
+		nvkm_debug(&sb->subdev, "- %s\n",
+			   nvkm_secboot_falcon_name[fid]);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c
index ec48e4ace37a..813c4eb0b25f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c
@@ -20,1313 +20,84 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
-/*
- * Secure boot is the process by which NVIDIA-signed firmware is loaded into
- * some of the falcons of a GPU. For production devices this is the only way
- * for the firmware to access useful (but sensitive) registers.
- *
- * A Falcon microprocessor supporting advanced security modes can run in one of
- * three modes:
- *
- * - Non-secure (NS). In this mode, functionality is similar to Falcon
- *   architectures before security modes were introduced (pre-Maxwell), but
- *   capability is restricted. In particular, certain registers may be
- *   inaccessible for reads and/or writes, and physical memory access may be
- *   disabled (on certain Falcon instances). This is the only possible mode that
- *   can be used if you don't have microcode cryptographically signed by NVIDIA.
- *
- * - Heavy Secure (HS). In this mode, the microprocessor is a black box - it's
- *   not possible to read or write any Falcon internal state or Falcon registers
- *   from outside the Falcon (for example, from the host system). The only way
- *   to enable this mode is by loading microcode that has been signed by NVIDIA.
- *   (The loading process involves tagging the IMEM block as secure, writing the
- *   signature into a Falcon register, and starting execution. The hardware will
- *   validate the signature, and if valid, grant HS privileges.)
- *
- * - Light Secure (LS). In this mode, the microprocessor has more privileges
- *   than NS but fewer than HS. Some of the microprocessor state is visible to
- *   host software to ease debugging. The only way to enable this mode is by HS
- *   microcode enabling LS mode. Some privileges available to HS mode are not
- *   available here. LS mode is introduced in GM20x.
- *
- * Secure boot consists in temporarily switching a HS-capable falcon (typically
- * PMU) into HS mode in order to validate the LS firmwares of managed falcons,
- * load them, and switch managed falcons into LS mode. Once secure boot
- * completes, no falcon remains in HS mode.
- *
- * Secure boot requires a write-protected memory region (WPR) which can only be
- * written by the secure falcon. On dGPU, the driver sets up the WPR region in
- * video memory. On Tegra, it is set up by the bootloader and its location and
- * size written into memory controller registers.
- *
- * The secure boot process takes place as follows:
- *
- * 1) A LS blob is constructed that contains all the LS firmwares we want to
- *    load, along with their signatures and bootloaders.
- *
- * 2) A HS blob (also called ACR) is created that contains the signed HS
- *    firmware in charge of loading the LS firmwares into their respective
- *    falcons.
- *
- * 3) The HS blob is loaded (via its own bootloader) and executed on the
- *    HS-capable falcon. It authenticates itself, switches the secure falcon to
- *    HS mode and setup the WPR region around the LS blob (dGPU) or copies the
- *    LS blob into the WPR region (Tegra).
- *
- * 4) The LS blob is now secure from all external tampering. The HS falcon
- *    checks the signatures of the LS firmwares and, if valid, switches the
- *    managed falcons to LS mode and makes them ready to run the LS firmware.
- *
- * 5) The managed falcons remain in LS mode and can be started.
- *
- */
 
-#include "priv.h"
+#include "acr.h"
+#include "gm200.h"
 
 #include <core/gpuobj.h>
-#include <core/firmware.h>
 #include <subdev/fb.h>
-
-enum {
-	FALCON_DMAIDX_UCODE		= 0,
-	FALCON_DMAIDX_VIRT		= 1,
-	FALCON_DMAIDX_PHYS_VID		= 2,
-	FALCON_DMAIDX_PHYS_SYS_COH	= 3,
-	FALCON_DMAIDX_PHYS_SYS_NCOH	= 4,
-};
-
-/**
- * struct fw_bin_header - header of firmware files
- * @bin_magic:		always 0x3b1d14f0
- * @bin_ver:		version of the bin format
- * @bin_size:		entire image size including this header
- * @header_offset:	offset of the firmware/bootloader header in the file
- * @data_offset:	offset of the firmware/bootloader payload in the file
- * @data_size:		size of the payload
- *
- * This header is located at the beginning of the HS firmware and HS bootloader
- * files, to describe where the headers and data can be found.
- */
-struct fw_bin_header {
-	u32 bin_magic;
-	u32 bin_ver;
-	u32 bin_size;
-	u32 header_offset;
-	u32 data_offset;
-	u32 data_size;
-};
-
-/**
- * struct fw_bl_desc - firmware bootloader descriptor
- * @start_tag:		starting tag of bootloader
- * @desc_dmem_load_off:	DMEM offset of flcn_bl_dmem_desc
- * @code_off:		offset of code section
- * @code_size:		size of code section
- * @data_off:		offset of data section
- * @data_size:		size of data section
- *
- * This structure is embedded in bootloader firmware files at to describe the
- * IMEM and DMEM layout expected by the bootloader.
- */
-struct fw_bl_desc {
-	u32 start_tag;
-	u32 dmem_load_off;
-	u32 code_off;
-	u32 code_size;
-	u32 data_off;
-	u32 data_size;
-};
-
-
-/*
- *
- * LS blob structures
- *
- */
-
-/**
- * struct lsf_ucode_desc - LS falcon signatures
- * @prd_keys:		signature to use when the GPU is in production mode
- * @dgb_keys:		signature to use when the GPU is in debug mode
- * @b_prd_present:	whether the production key is present
- * @b_dgb_present:	whether the debug key is present
- * @falcon_id:		ID of the falcon the ucode applies to
- *
- * Directly loaded from a signature file.
- */
-struct lsf_ucode_desc {
-	u8  prd_keys[2][16];
-	u8  dbg_keys[2][16];
-	u32 b_prd_present;
-	u32 b_dbg_present;
-	u32 falcon_id;
-};
-
-/**
- * struct lsf_lsb_header - LS firmware header
- * @signature:		signature to verify the firmware against
- * @ucode_off:		offset of the ucode blob in the WPR region. The ucode
- *                      blob contains the bootloader, code and data of the
- *                      LS falcon
- * @ucode_size:		size of the ucode blob, including bootloader
- * @data_size:		size of the ucode blob data
- * @bl_code_size:	size of the bootloader code
- * @bl_imem_off:	offset in imem of the bootloader
- * @bl_data_off:	offset of the bootloader data in WPR region
- * @bl_data_size:	size of the bootloader data
- * @app_code_off:	offset of the app code relative to ucode_off
- * @app_code_size:	size of the app code
- * @app_data_off:	offset of the app data relative to ucode_off
- * @app_data_size:	size of the app data
- * @flags:		flags for the secure bootloader
- *
- * This structure is written into the WPR region for each managed falcon. Each
- * instance is referenced by the lsb_offset member of the corresponding
- * lsf_wpr_header.
- */
-struct lsf_lsb_header {
-	struct lsf_ucode_desc signature;
-	u32 ucode_off;
-	u32 ucode_size;
-	u32 data_size;
-	u32 bl_code_size;
-	u32 bl_imem_off;
-	u32 bl_data_off;
-	u32 bl_data_size;
-	u32 app_code_off;
-	u32 app_code_size;
-	u32 app_data_off;
-	u32 app_data_size;
-	u32 flags;
-#define LSF_FLAG_LOAD_CODE_AT_0		1
-#define LSF_FLAG_DMACTL_REQ_CTX		4
-#define LSF_FLAG_FORCE_PRIV_LOAD	8
-};
-
-/**
- * struct lsf_wpr_header - LS blob WPR Header
- * @falcon_id:		LS falcon ID
- * @lsb_offset:		offset of the lsb_lsf_header in the WPR region
- * @bootstrap_owner:	secure falcon reponsible for bootstrapping the LS falcon
- * @lazy_bootstrap:	skip bootstrapping by ACR
- * @status:		bootstrapping status
- *
- * An array of these is written at the beginning of the WPR region, one for
- * each managed falcon. The array is terminated by an instance which falcon_id
- * is LSF_FALCON_ID_INVALID.
- */
-struct lsf_wpr_header {
-	u32  falcon_id;
-	u32  lsb_offset;
-	u32  bootstrap_owner;
-	u32  lazy_bootstrap;
-	u32  status;
-#define LSF_IMAGE_STATUS_NONE				0
-#define LSF_IMAGE_STATUS_COPY				1
-#define LSF_IMAGE_STATUS_VALIDATION_CODE_FAILED		2
-#define LSF_IMAGE_STATUS_VALIDATION_DATA_FAILED		3
-#define LSF_IMAGE_STATUS_VALIDATION_DONE		4
-#define LSF_IMAGE_STATUS_VALIDATION_SKIPPED		5
-#define LSF_IMAGE_STATUS_BOOTSTRAP_READY		6
-};
-
-
-/**
- * struct ls_ucode_img_desc - descriptor of firmware image
- * @descriptor_size:		size of this descriptor
- * @image_size:			size of the whole image
- * @bootloader_start_offset:	start offset of the bootloader in ucode image
- * @bootloader_size:		size of the bootloader
- * @bootloader_imem_offset:	start off set of the bootloader in IMEM
- * @bootloader_entry_point:	entry point of the bootloader in IMEM
- * @app_start_offset:		start offset of the LS firmware
- * @app_size:			size of the LS firmware's code and data
- * @app_imem_offset:		offset of the app in IMEM
- * @app_imem_entry:		entry point of the app in IMEM
- * @app_dmem_offset:		offset of the data in DMEM
- * @app_resident_code_offset:	offset of app code from app_start_offset
- * @app_resident_code_size:	size of the code
- * @app_resident_data_offset:	offset of data from app_start_offset
- * @app_resident_data_size:	size of data
- *
- * A firmware image contains the code, data, and bootloader of a given LS
- * falcon in a single blob. This structure describes where everything is.
- *
- * This can be generated from a (bootloader, code, data) set if they have
- * been loaded separately, or come directly from a file.
- */
-struct ls_ucode_img_desc {
-	u32 descriptor_size;
-	u32 image_size;
-	u32 tools_version;
-	u32 app_version;
-	char date[64];
-	u32 bootloader_start_offset;
-	u32 bootloader_size;
-	u32 bootloader_imem_offset;
-	u32 bootloader_entry_point;
-	u32 app_start_offset;
-	u32 app_size;
-	u32 app_imem_offset;
-	u32 app_imem_entry;
-	u32 app_dmem_offset;
-	u32 app_resident_code_offset;
-	u32 app_resident_code_size;
-	u32 app_resident_data_offset;
-	u32 app_resident_data_size;
-	u32 nb_overlays;
-	struct {u32 start; u32 size; } load_ovl[64];
-	u32 compressed;
-};
-
-/**
- * struct ls_ucode_img - temporary storage for loaded LS firmwares
- * @node:		to link within lsf_ucode_mgr
- * @falcon_id:		ID of the falcon this LS firmware is for
- * @ucode_desc:		loaded or generated map of ucode_data
- * @ucode_header:	header of the firmware
- * @ucode_data:		firmware payload (code and data)
- * @ucode_size:		size in bytes of data in ucode_data
- * @wpr_header:		WPR header to be written to the LS blob
- * @lsb_header:		LSB header to be written to the LS blob
- *
- * Preparing the WPR LS blob requires information about all the LS firmwares
- * (size, etc) to be known. This structure contains all the data of one LS
- * firmware.
- */
-struct ls_ucode_img {
-	struct list_head node;
-	enum nvkm_secboot_falcon falcon_id;
-
-	struct ls_ucode_img_desc ucode_desc;
-	u32 *ucode_header;
-	u8 *ucode_data;
-	u32 ucode_size;
-
-	struct lsf_wpr_header wpr_header;
-	struct lsf_lsb_header lsb_header;
-};
-
-/**
- * struct ls_ucode_mgr - manager for all LS falcon firmwares
- * @count:	number of managed LS falcons
- * @wpr_size:	size of the required WPR region in bytes
- * @img_list:	linked list of lsf_ucode_img
- */
-struct ls_ucode_mgr {
-	u16 count;
-	u32 wpr_size;
-	struct list_head img_list;
-};
-
-
-/*
- *
- * HS blob structures
- *
- */
-
-/**
- * struct hsf_fw_header - HS firmware descriptor
- * @sig_dbg_offset:	offset of the debug signature
- * @sig_dbg_size:	size of the debug signature
- * @sig_prod_offset:	offset of the production signature
- * @sig_prod_size:	size of the production signature
- * @patch_loc:		offset of the offset (sic) of where the signature is
- * @patch_sig:		offset of the offset (sic) to add to sig_*_offset
- * @hdr_offset:		offset of the load header (see struct hs_load_header)
- * @hdr_size:		size of above header
- *
- * This structure is embedded in the HS firmware image at
- * hs_bin_hdr.header_offset.
- */
-struct hsf_fw_header {
-	u32 sig_dbg_offset;
-	u32 sig_dbg_size;
-	u32 sig_prod_offset;
-	u32 sig_prod_size;
-	u32 patch_loc;
-	u32 patch_sig;
-	u32 hdr_offset;
-	u32 hdr_size;
-};
-
-/**
- * struct hsf_load_header - HS firmware load header
- */
-struct hsf_load_header {
-	u32 non_sec_code_off;
-	u32 non_sec_code_size;
-	u32 data_dma_base;
-	u32 data_size;
-	u32 num_apps;
-	struct {
-		u32 sec_code_off;
-		u32 sec_code_size;
-	} app[0];
-};
-
-/**
- * Convenience function to duplicate a firmware file in memory and check that
- * it has the required minimum size.
- */
-static void *
-gm200_secboot_load_firmware(struct nvkm_subdev *subdev, const char *name,
-		    size_t min_size)
-{
-	const struct firmware *fw;
-	void *blob;
-	int ret;
-
-	ret = nvkm_firmware_get(subdev->device, name, &fw);
-	if (ret)
-		return ERR_PTR(ret);
-	if (fw->size < min_size) {
-		nvkm_error(subdev, "%s is smaller than expected size %zu\n",
-			   name, min_size);
-		nvkm_firmware_put(fw);
-		return ERR_PTR(-EINVAL);
-	}
-	blob = kmemdup(fw->data, fw->size, GFP_KERNEL);
-	nvkm_firmware_put(fw);
-	if (!blob)
-		return ERR_PTR(-ENOMEM);
-
-	return blob;
-}
-
-
-/*
- * Low-secure blob creation
- */
-
-#define BL_DESC_BLK_SIZE 256
-/**
- * Build a ucode image and descriptor from provided bootloader, code and data.
- *
- * @bl:		bootloader image, including 16-bytes descriptor
- * @code:	LS firmware code segment
- * @data:	LS firmware data segment
- * @desc:	ucode descriptor to be written
- *
- * Return: allocated ucode image with corresponding descriptor information. desc
- *         is also updated to contain the right offsets within returned image.
- */
-static void *
-ls_ucode_img_build(const struct firmware *bl, const struct firmware *code,
-		   const struct firmware *data, struct ls_ucode_img_desc *desc)
-{
-	struct fw_bin_header *bin_hdr = (void *)bl->data;
-	struct fw_bl_desc *bl_desc = (void *)bl->data + bin_hdr->header_offset;
-	void *bl_data = (void *)bl->data + bin_hdr->data_offset;
-	u32 pos = 0;
-	void *image;
-
-	desc->bootloader_start_offset = pos;
-	desc->bootloader_size = ALIGN(bl_desc->code_size, sizeof(u32));
-	desc->bootloader_imem_offset = bl_desc->start_tag * 256;
-	desc->bootloader_entry_point = bl_desc->start_tag * 256;
-
-	pos = ALIGN(pos + desc->bootloader_size, BL_DESC_BLK_SIZE);
-	desc->app_start_offset = pos;
-	desc->app_size = ALIGN(code->size, BL_DESC_BLK_SIZE) +
-			 ALIGN(data->size, BL_DESC_BLK_SIZE);
-	desc->app_imem_offset = 0;
-	desc->app_imem_entry = 0;
-	desc->app_dmem_offset = 0;
-	desc->app_resident_code_offset = 0;
-	desc->app_resident_code_size = ALIGN(code->size, BL_DESC_BLK_SIZE);
-
-	pos = ALIGN(pos + desc->app_resident_code_size, BL_DESC_BLK_SIZE);
-	desc->app_resident_data_offset = pos - desc->app_start_offset;
-	desc->app_resident_data_size = ALIGN(data->size, BL_DESC_BLK_SIZE);
-
-	desc->image_size = ALIGN(bl_desc->code_size, BL_DESC_BLK_SIZE) +
-			   desc->app_size;
-
-	image = kzalloc(desc->image_size, GFP_KERNEL);
-	if (!image)
-		return ERR_PTR(-ENOMEM);
-
-	memcpy(image + desc->bootloader_start_offset, bl_data,
-	       bl_desc->code_size);
-	memcpy(image + desc->app_start_offset, code->data, code->size);
-	memcpy(image + desc->app_start_offset + desc->app_resident_data_offset,
-	       data->data, data->size);
-
-	return image;
-}
-
-/**
- * ls_ucode_img_load_generic() - load and prepare a LS ucode image
- *
- * Load the LS microcode, bootloader and signature and pack them into a single
- * blob. Also generate the corresponding ucode descriptor.
- */
-static int
-ls_ucode_img_load_generic(struct nvkm_subdev *subdev,
-			  struct ls_ucode_img *img, const char *falcon_name,
-			  const u32 falcon_id)
-{
-	const struct firmware *bl, *code, *data;
-	struct lsf_ucode_desc *lsf_desc;
-	char f[64];
-	int ret;
-
-	img->ucode_header = NULL;
-
-	snprintf(f, sizeof(f), "gr/%s_bl", falcon_name);
-	ret = nvkm_firmware_get(subdev->device, f, &bl);
-	if (ret)
-		goto error;
-
-	snprintf(f, sizeof(f), "gr/%s_inst", falcon_name);
-	ret = nvkm_firmware_get(subdev->device, f, &code);
-	if (ret)
-		goto free_bl;
-
-	snprintf(f, sizeof(f), "gr/%s_data", falcon_name);
-	ret = nvkm_firmware_get(subdev->device, f, &data);
-	if (ret)
-		goto free_inst;
-
-	img->ucode_data = ls_ucode_img_build(bl, code, data,
-					     &img->ucode_desc);
-	if (IS_ERR(img->ucode_data)) {
-		ret = PTR_ERR(img->ucode_data);
-		goto free_data;
-	}
-	img->ucode_size = img->ucode_desc.image_size;
-
-	snprintf(f, sizeof(f), "gr/%s_sig", falcon_name);
-	lsf_desc = gm200_secboot_load_firmware(subdev, f, sizeof(*lsf_desc));
-	if (IS_ERR(lsf_desc)) {
-		ret = PTR_ERR(lsf_desc);
-		goto free_image;
-	}
-	/* not needed? the signature should already have the right value */
-	lsf_desc->falcon_id = falcon_id;
-	memcpy(&img->lsb_header.signature, lsf_desc, sizeof(*lsf_desc));
-	img->falcon_id = lsf_desc->falcon_id;
-	kfree(lsf_desc);
-
-	/* success path - only free requested firmware files */
-	goto free_data;
-
-free_image:
-	kfree(img->ucode_data);
-free_data:
-	nvkm_firmware_put(data);
-free_inst:
-	nvkm_firmware_put(code);
-free_bl:
-	nvkm_firmware_put(bl);
-error:
-	return ret;
-}
-
-typedef int (*lsf_load_func)(struct nvkm_subdev *, struct ls_ucode_img *);
-
-static int
-ls_ucode_img_load_fecs(struct nvkm_subdev *subdev, struct ls_ucode_img *img)
-{
-	return ls_ucode_img_load_generic(subdev, img, "fecs",
-					 NVKM_SECBOOT_FALCON_FECS);
-}
-
-static int
-ls_ucode_img_load_gpccs(struct nvkm_subdev *subdev, struct ls_ucode_img *img)
-{
-	return ls_ucode_img_load_generic(subdev, img, "gpccs",
-					 NVKM_SECBOOT_FALCON_GPCCS);
-}
-
-/**
- * ls_ucode_img_load() - create a lsf_ucode_img and load it
- */
-static struct ls_ucode_img *
-ls_ucode_img_load(struct nvkm_subdev *subdev, lsf_load_func load_func)
-{
-	struct ls_ucode_img *img;
-	int ret;
-
-	img = kzalloc(sizeof(*img), GFP_KERNEL);
-	if (!img)
-		return ERR_PTR(-ENOMEM);
-
-	ret = load_func(subdev, img);
-	if (ret) {
-		kfree(img);
-		return ERR_PTR(ret);
-	}
-
-	return img;
-}
-
-static const lsf_load_func lsf_load_funcs[] = {
-	[NVKM_SECBOOT_FALCON_END] = NULL, /* reserve enough space */
-	[NVKM_SECBOOT_FALCON_FECS] = ls_ucode_img_load_fecs,
-	[NVKM_SECBOOT_FALCON_GPCCS] = ls_ucode_img_load_gpccs,
-};
-
-/**
- * ls_ucode_img_populate_bl_desc() - populate a DMEM BL descriptor for LS image
- * @img:	ucode image to generate against
- * @desc:	descriptor to populate
- * @sb:		secure boot state to use for base addresses
- *
- * Populate the DMEM BL descriptor with the information contained in a
- * ls_ucode_desc.
- *
- */
-static void
-ls_ucode_img_populate_bl_desc(struct ls_ucode_img *img, u64 wpr_addr,
-			      struct gm200_flcn_bl_desc *desc)
-{
-	struct ls_ucode_img_desc *pdesc = &img->ucode_desc;
-	u64 addr_base;
-
-	addr_base = wpr_addr + img->lsb_header.ucode_off +
-		    pdesc->app_start_offset;
-
-	memset(desc, 0, sizeof(*desc));
-	desc->ctx_dma = FALCON_DMAIDX_UCODE;
-	desc->code_dma_base.lo = lower_32_bits(
-		(addr_base + pdesc->app_resident_code_offset));
-	desc->code_dma_base.hi = upper_32_bits(
-		(addr_base + pdesc->app_resident_code_offset));
-	desc->non_sec_code_size = pdesc->app_resident_code_size;
-	desc->data_dma_base.lo = lower_32_bits(
-		(addr_base + pdesc->app_resident_data_offset));
-	desc->data_dma_base.hi = upper_32_bits(
-		(addr_base + pdesc->app_resident_data_offset));
-	desc->data_size = pdesc->app_resident_data_size;
-	desc->code_entry_point = pdesc->app_imem_entry;
-}
-
-#define LSF_LSB_HEADER_ALIGN 256
-#define LSF_BL_DATA_ALIGN 256
-#define LSF_BL_DATA_SIZE_ALIGN 256
-#define LSF_BL_CODE_SIZE_ALIGN 256
-#define LSF_UCODE_DATA_ALIGN 4096
-
-/**
- * ls_ucode_img_fill_headers - fill the WPR and LSB headers of an image
- * @gsb:	secure boot device used
- * @img:	image to generate for
- * @offset:	offset in the WPR region where this image starts
- *
- * Allocate space in the WPR area from offset and write the WPR and LSB headers
- * accordingly.
- *
- * Return: offset at the end of this image.
- */
-static u32
-ls_ucode_img_fill_headers(struct gm200_secboot *gsb, struct ls_ucode_img *img,
-			  u32 offset)
-{
-	struct lsf_wpr_header *whdr = &img->wpr_header;
-	struct lsf_lsb_header *lhdr = &img->lsb_header;
-	struct ls_ucode_img_desc *desc = &img->ucode_desc;
-
-	if (img->ucode_header) {
-		nvkm_fatal(&gsb->base.subdev,
-			    "images withough loader are not supported yet!\n");
-		return offset;
-	}
-
-	/* Fill WPR header */
-	whdr->falcon_id = img->falcon_id;
-	whdr->bootstrap_owner = gsb->base.func->boot_falcon;
-	whdr->status = LSF_IMAGE_STATUS_COPY;
-
-	/* Align, save off, and include an LSB header size */
-	offset = ALIGN(offset, LSF_LSB_HEADER_ALIGN);
-	whdr->lsb_offset = offset;
-	offset += sizeof(struct lsf_lsb_header);
-
-	/*
-	 * Align, save off, and include the original (static) ucode
-	 * image size
-	 */
-	offset = ALIGN(offset, LSF_UCODE_DATA_ALIGN);
-	lhdr->ucode_off = offset;
-	offset += img->ucode_size;
-
-	/*
-	 * For falcons that use a boot loader (BL), we append a loader
-	 * desc structure on the end of the ucode image and consider
-	 * this the boot loader data. The host will then copy the loader
-	 * desc args to this space within the WPR region (before locking
-	 * down) and the HS bin will then copy them to DMEM 0 for the
-	 * loader.
-	 */
-	lhdr->bl_code_size = ALIGN(desc->bootloader_size,
-				   LSF_BL_CODE_SIZE_ALIGN);
-	lhdr->ucode_size = ALIGN(desc->app_resident_data_offset,
-				 LSF_BL_CODE_SIZE_ALIGN) + lhdr->bl_code_size;
-	lhdr->data_size = ALIGN(desc->app_size, LSF_BL_CODE_SIZE_ALIGN) +
-				lhdr->bl_code_size - lhdr->ucode_size;
-	/*
-	 * Though the BL is located at 0th offset of the image, the VA
-	 * is different to make sure that it doesn't collide the actual
-	 * OS VA range
-	 */
-	lhdr->bl_imem_off = desc->bootloader_imem_offset;
-	lhdr->app_code_off = desc->app_start_offset +
-			     desc->app_resident_code_offset;
-	lhdr->app_code_size = desc->app_resident_code_size;
-	lhdr->app_data_off = desc->app_start_offset +
-			     desc->app_resident_data_offset;
-	lhdr->app_data_size = desc->app_resident_data_size;
-
-	lhdr->flags = 0;
-	if (img->falcon_id == gsb->base.func->boot_falcon)
-		lhdr->flags = LSF_FLAG_DMACTL_REQ_CTX;
-
-	/* GPCCS will be loaded using PRI */
-	if (img->falcon_id == NVKM_SECBOOT_FALCON_GPCCS)
-		lhdr->flags |= LSF_FLAG_FORCE_PRIV_LOAD;
-
-	/* Align (size bloat) and save off BL descriptor size */
-	lhdr->bl_data_size = ALIGN(sizeof(struct gm200_flcn_bl_desc),
-				   LSF_BL_DATA_SIZE_ALIGN);
-	/*
-	 * Align, save off, and include the additional BL data
-	 */
-	offset = ALIGN(offset, LSF_BL_DATA_ALIGN);
-	lhdr->bl_data_off = offset;
-	offset += lhdr->bl_data_size;
-
-	return offset;
-}
-
-static void
-ls_ucode_mgr_init(struct ls_ucode_mgr *mgr)
-{
-	memset(mgr, 0, sizeof(*mgr));
-	INIT_LIST_HEAD(&mgr->img_list);
-}
-
-static void
-ls_ucode_mgr_cleanup(struct ls_ucode_mgr *mgr)
-{
-	struct ls_ucode_img *img, *t;
-
-	list_for_each_entry_safe(img, t, &mgr->img_list, node) {
-		kfree(img->ucode_data);
-		kfree(img->ucode_header);
-		kfree(img);
-	}
-}
-
-static void
-ls_ucode_mgr_add_img(struct ls_ucode_mgr *mgr, struct ls_ucode_img *img)
-{
-	mgr->count++;
-	list_add_tail(&img->node, &mgr->img_list);
-}
-
-/**
- * ls_ucode_mgr_fill_headers - fill WPR and LSB headers of all managed images
- */
-static void
-ls_ucode_mgr_fill_headers(struct gm200_secboot *gsb, struct ls_ucode_mgr *mgr)
-{
-	struct ls_ucode_img *img;
-	u32 offset;
-
-	/*
-	 * Start with an array of WPR headers at the base of the WPR.
-	 * The expectation here is that the secure falcon will do a single DMA
-	 * read of this array and cache it internally so it's ok to pack these.
-	 * Also, we add 1 to the falcon count to indicate the end of the array.
-	 */
-	offset = sizeof(struct lsf_wpr_header) * (mgr->count + 1);
-
-	/*
-	 * Walk the managed falcons, accounting for the LSB structs
-	 * as well as the ucode images.
-	 */
-	list_for_each_entry(img, &mgr->img_list, node) {
-		offset = ls_ucode_img_fill_headers(gsb, img, offset);
-	}
-
-	mgr->wpr_size = offset;
-}
-
-/**
- * ls_ucode_mgr_write_wpr - write the WPR blob contents
- */
-static int
-ls_ucode_mgr_write_wpr(struct gm200_secboot *gsb, struct ls_ucode_mgr *mgr,
-		       struct nvkm_gpuobj *wpr_blob)
-{
-	struct ls_ucode_img *img;
-	u32 pos = 0;
-
-	nvkm_kmap(wpr_blob);
-
-	list_for_each_entry(img, &mgr->img_list, node) {
-		nvkm_gpuobj_memcpy_to(wpr_blob, pos, &img->wpr_header,
-				      sizeof(img->wpr_header));
-
-		nvkm_gpuobj_memcpy_to(wpr_blob, img->wpr_header.lsb_offset,
-				     &img->lsb_header, sizeof(img->lsb_header));
-
-		/* Generate and write BL descriptor */
-		if (!img->ucode_header) {
-			u8 desc[gsb->func->bl_desc_size];
-			struct gm200_flcn_bl_desc gdesc;
-
-			ls_ucode_img_populate_bl_desc(img, gsb->wpr_addr,
-						      &gdesc);
-			gsb->func->fixup_bl_desc(&gdesc, &desc);
-			nvkm_gpuobj_memcpy_to(wpr_blob,
-					      img->lsb_header.bl_data_off,
-					      &desc, gsb->func->bl_desc_size);
-		}
-
-		/* Copy ucode */
-		nvkm_gpuobj_memcpy_to(wpr_blob, img->lsb_header.ucode_off,
-				      img->ucode_data, img->ucode_size);
-
-		pos += sizeof(img->wpr_header);
-	}
-
-	nvkm_wo32(wpr_blob, pos, NVKM_SECBOOT_FALCON_INVALID);
-
-	nvkm_done(wpr_blob);
-
-	return 0;
-}
-
-/* Both size and address of WPR need to be 128K-aligned */
-#define WPR_ALIGNMENT	0x20000
-/**
- * gm200_secboot_prepare_ls_blob() - prepare the LS blob
- *
- * For each securely managed falcon, load the FW, signatures and bootloaders and
- * prepare a ucode blob. Then, compute the offsets in the WPR region for each
- * blob, and finally write the headers and ucode blobs into a GPU object that
- * will be copied into the WPR region by the HS firmware.
- */
-static int
-gm200_secboot_prepare_ls_blob(struct gm200_secboot *gsb)
-{
-	struct nvkm_secboot *sb = &gsb->base;
-	struct nvkm_device *device = sb->subdev.device;
-	struct ls_ucode_mgr mgr;
-	int falcon_id;
-	int ret;
-
-	ls_ucode_mgr_init(&mgr);
-
-	/* Load all LS blobs */
-	for_each_set_bit(falcon_id, &gsb->base.func->managed_falcons,
-			 NVKM_SECBOOT_FALCON_END) {
-		struct ls_ucode_img *img;
-
-		img = ls_ucode_img_load(&sb->subdev, lsf_load_funcs[falcon_id]);
-
-		if (IS_ERR(img)) {
-			ret = PTR_ERR(img);
-			goto cleanup;
-		}
-		ls_ucode_mgr_add_img(&mgr, img);
-	}
-
-	/*
-	 * Fill the WPR and LSF headers with the right offsets and compute
-	 * required WPR size
-	 */
-	ls_ucode_mgr_fill_headers(gsb, &mgr);
-	mgr.wpr_size = ALIGN(mgr.wpr_size, WPR_ALIGNMENT);
-
-	/* Allocate GPU object that will contain the WPR region */
-	ret = nvkm_gpuobj_new(device, mgr.wpr_size, WPR_ALIGNMENT, false, NULL,
-			      &gsb->ls_blob);
-	if (ret)
-		goto cleanup;
-
-	nvkm_debug(&sb->subdev, "%d managed LS falcons, WPR size is %d bytes\n",
-		    mgr.count, mgr.wpr_size);
-
-	/* If WPR address and size are not fixed, set them to fit the LS blob */
-	if (!gsb->wpr_size) {
-		gsb->wpr_addr = gsb->ls_blob->addr;
-		gsb->wpr_size = gsb->ls_blob->size;
-	}
-
-	/* Write LS blob */
-	ret = ls_ucode_mgr_write_wpr(gsb, &mgr, gsb->ls_blob);
-	if (ret)
-		nvkm_gpuobj_del(&gsb->ls_blob);
-
-cleanup:
-	ls_ucode_mgr_cleanup(&mgr);
-
-	return ret;
-}
-
-/*
- * High-secure blob creation
- */
-
-/**
- * gm200_secboot_hsf_patch_signature() - patch HS blob with correct signature
- */
-static void
-gm200_secboot_hsf_patch_signature(struct gm200_secboot *gsb, void *acr_image)
-{
-	struct nvkm_secboot *sb = &gsb->base;
-	struct fw_bin_header *hsbin_hdr = acr_image;
-	struct hsf_fw_header *fw_hdr = acr_image + hsbin_hdr->header_offset;
-	void *hs_data = acr_image + hsbin_hdr->data_offset;
-	void *sig;
-	u32 sig_size;
-
-	/* Falcon in debug or production mode? */
-	if ((nvkm_rd32(sb->subdev.device, sb->base + 0xc08) >> 20) & 0x1) {
-		sig = acr_image + fw_hdr->sig_dbg_offset;
-		sig_size = fw_hdr->sig_dbg_size;
-	} else {
-		sig = acr_image + fw_hdr->sig_prod_offset;
-		sig_size = fw_hdr->sig_prod_size;
-	}
-
-	/* Patch signature */
-	memcpy(hs_data + fw_hdr->patch_loc, sig + fw_hdr->patch_sig, sig_size);
-}
-
-/**
- * gm200_secboot_populate_hsf_bl_desc() - populate BL descriptor for HS image
- */
-static void
-gm200_secboot_populate_hsf_bl_desc(void *acr_image,
-				   struct gm200_flcn_bl_desc *bl_desc)
-{
-	struct fw_bin_header *hsbin_hdr = acr_image;
-	struct hsf_fw_header *fw_hdr = acr_image + hsbin_hdr->header_offset;
-	struct hsf_load_header *load_hdr = acr_image + fw_hdr->hdr_offset;
-
-	/*
-	 * Descriptor for the bootloader that will load the ACR image into
-	 * IMEM/DMEM memory.
-	 */
-	fw_hdr = acr_image + hsbin_hdr->header_offset;
-	load_hdr = acr_image + fw_hdr->hdr_offset;
-	memset(bl_desc, 0, sizeof(*bl_desc));
-	bl_desc->ctx_dma = FALCON_DMAIDX_VIRT;
-	bl_desc->non_sec_code_off = load_hdr->non_sec_code_off;
-	bl_desc->non_sec_code_size = load_hdr->non_sec_code_size;
-	bl_desc->sec_code_off = load_hdr->app[0].sec_code_off;
-	bl_desc->sec_code_size = load_hdr->app[0].sec_code_size;
-	bl_desc->code_entry_point = 0;
-	/*
-	 * We need to set code_dma_base to the virtual address of the acr_blob,
-	 * and add this address to data_dma_base before writing it into DMEM
-	 */
-	bl_desc->code_dma_base.lo = 0;
-	bl_desc->data_dma_base.lo = load_hdr->data_dma_base;
-	bl_desc->data_size = load_hdr->data_size;
-}
-
-/**
- * gm200_secboot_prepare_hs_blob - load and prepare a HS blob and BL descriptor
- *
- * @gsb secure boot instance to prepare for
- * @fw name of the HS firmware to load
- * @blob pointer to gpuobj that will be allocated to receive the HS FW payload
- * @bl_desc pointer to the BL descriptor to write for this firmware
- * @patch whether we should patch the HS descriptor (only for HS loaders)
- */
-static int
-gm200_secboot_prepare_hs_blob(struct gm200_secboot *gsb, const char *fw,
-			      struct nvkm_gpuobj **blob,
-			      struct gm200_flcn_bl_desc *bl_desc, bool patch)
-{
-	struct nvkm_subdev *subdev = &gsb->base.subdev;
-	void *acr_image;
-	struct fw_bin_header *hsbin_hdr;
-	struct hsf_fw_header *fw_hdr;
-	void *acr_data;
-	struct hsf_load_header *load_hdr;
-	struct hsflcn_acr_desc *desc;
-	int ret;
-
-	acr_image = gm200_secboot_load_firmware(subdev, fw, 0);
-	if (IS_ERR(acr_image))
-		return PTR_ERR(acr_image);
-	hsbin_hdr = acr_image;
-
-	/* Patch signature */
-	gm200_secboot_hsf_patch_signature(gsb, acr_image);
-
-	acr_data = acr_image + hsbin_hdr->data_offset;
-
-	/* Patch descriptor? */
-	if (patch) {
-		fw_hdr = acr_image + hsbin_hdr->header_offset;
-		load_hdr = acr_image + fw_hdr->hdr_offset;
-		desc = acr_data + load_hdr->data_dma_base;
-		gsb->func->fixup_hs_desc(gsb, desc);
-	}
-
-	/* Generate HS BL descriptor */
-	gm200_secboot_populate_hsf_bl_desc(acr_image, bl_desc);
-
-	/* Create ACR blob and copy HS data to it */
-	ret = nvkm_gpuobj_new(subdev->device, ALIGN(hsbin_hdr->data_size, 256),
-			      0x1000, false, NULL, blob);
-	if (ret)
-		goto cleanup;
-
-	nvkm_kmap(*blob);
-	nvkm_gpuobj_memcpy_to(*blob, 0, acr_data, hsbin_hdr->data_size);
-	nvkm_done(*blob);
-
-cleanup:
-	kfree(acr_image);
-
-	return ret;
-}
-
-/*
- * High-secure bootloader blob creation
- */
-
-static int
-gm200_secboot_prepare_hsbl_blob(struct gm200_secboot *gsb)
-{
-	struct nvkm_subdev *subdev = &gsb->base.subdev;
-
-	gsb->hsbl_blob = gm200_secboot_load_firmware(subdev, "acr/bl", 0);
-	if (IS_ERR(gsb->hsbl_blob)) {
-		int ret = PTR_ERR(gsb->hsbl_blob);
-
-		gsb->hsbl_blob = NULL;
-		return ret;
-	}
-
-	return 0;
-}
+#include <engine/falcon.h>
+#include <subdev/mc.h>
 
 /**
- * gm20x_secboot_prepare_blobs - load blobs common to all GM20X GPUs.
+ * gm200_secboot_run_blob() - run the given high-secure blob
  *
- * This includes the LS blob, HS ucode loading blob, and HS bootloader.
- *
- * The HS ucode unload blob is only used on dGPU.
  */
 int
-gm20x_secboot_prepare_blobs(struct gm200_secboot *gsb)
-{
-	int ret;
-
-	/* Load and prepare the managed falcon's firmwares */
-	if (!gsb->ls_blob) {
-		ret = gm200_secboot_prepare_ls_blob(gsb);
-		if (ret)
-			return ret;
-	}
-
-	/* Load the HS firmware that will load the LS firmwares */
-	if (!gsb->acr_load_blob) {
-		ret = gm200_secboot_prepare_hs_blob(gsb, "acr/ucode_load",
-						&gsb->acr_load_blob,
-						&gsb->acr_load_bl_desc, true);
-		if (ret)
-			return ret;
-	}
-
-	/* Load the HS firmware bootloader */
-	if (!gsb->hsbl_blob) {
-		ret = gm200_secboot_prepare_hsbl_blob(gsb);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
-static int
-gm200_secboot_prepare_blobs(struct gm200_secboot *gsb)
-{
-	int ret;
-
-	ret = gm20x_secboot_prepare_blobs(gsb);
-	if (ret)
-		return ret;
-
-	/* dGPU only: load the HS firmware that unprotects the WPR region */
-	if (!gsb->acr_unload_blob) {
-		ret = gm200_secboot_prepare_hs_blob(gsb, "acr/ucode_unload",
-					       &gsb->acr_unload_blob,
-					       &gsb->acr_unload_bl_desc, false);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
-static int
-gm200_secboot_blobs_ready(struct gm200_secboot *gsb)
+gm200_secboot_run_blob(struct nvkm_secboot *sb, struct nvkm_gpuobj *blob)
 {
+	struct gm200_secboot *gsb = gm200_secboot(sb);
 	struct nvkm_subdev *subdev = &gsb->base.subdev;
+	struct nvkm_falcon *falcon = gsb->base.boot_falcon;
+	struct nvkm_vma vma;
 	int ret;
 
-	/* firmware already loaded, nothing to do... */
-	if (gsb->firmware_ok)
-		return 0;
-
-	ret = gsb->func->prepare_blobs(gsb);
-	if (ret) {
-		nvkm_error(subdev, "failed to load secure firmware\n");
-		return ret;
-	}
-
-	gsb->firmware_ok = true;
-
-	return 0;
-}
-
-
-/*
- * Secure Boot Execution
- */
-
-/**
- * gm200_secboot_load_hs_bl() - load HS bootloader into DMEM and IMEM
- */
-static void
-gm200_secboot_load_hs_bl(struct gm200_secboot *gsb, void *data, u32 data_size)
-{
-	struct nvkm_device *device = gsb->base.subdev.device;
-	struct fw_bin_header *hdr = gsb->hsbl_blob;
-	struct fw_bl_desc *hsbl_desc = gsb->hsbl_blob + hdr->header_offset;
-	void *blob_data = gsb->hsbl_blob + hdr->data_offset;
-	void *hsbl_code = blob_data + hsbl_desc->code_off;
-	void *hsbl_data = blob_data + hsbl_desc->data_off;
-	u32 code_size = ALIGN(hsbl_desc->code_size, 256);
-	const u32 base = gsb->base.base;
-	u32 blk;
-	u32 tag;
-	int i;
-
-	/*
-	 * Copy HS bootloader data
-	 */
-	nvkm_wr32(device, base + 0x1c0, (0x00000000 | (0x1 << 24)));
-	for (i = 0; i < hsbl_desc->data_size / 4; i++)
-		nvkm_wr32(device, base + 0x1c4, ((u32 *)hsbl_data)[i]);
-
-	/*
-	 * Copy HS bootloader interface structure where the HS descriptor
-	 * expects it to be
-	 */
-	nvkm_wr32(device, base + 0x1c0,
-		  (hsbl_desc->dmem_load_off | (0x1 << 24)));
-	for (i = 0; i < data_size / 4; i++)
-		nvkm_wr32(device, base + 0x1c4, ((u32 *)data)[i]);
-
-	/* Copy HS bootloader code to end of IMEM */
-	blk = (nvkm_rd32(device, base + 0x108) & 0x1ff) - (code_size >> 8);
-	tag = hsbl_desc->start_tag;
-	nvkm_wr32(device, base + 0x180, ((blk & 0xff) << 8) | (0x1 << 24));
-	for (i = 0; i < code_size / 4; i++) {
-		/* write new tag every 256B */
-		if ((i & 0x3f) == 0) {
-			nvkm_wr32(device, base + 0x188, tag & 0xffff);
-			tag++;
-		}
-		nvkm_wr32(device, base + 0x184, ((u32 *)hsbl_code)[i]);
-	}
-	nvkm_wr32(device, base + 0x188, 0);
-}
-
-/**
- * gm200_secboot_setup_falcon() - set up the secure falcon for secure boot
- */
-static int
-gm200_secboot_setup_falcon(struct gm200_secboot *gsb)
-{
-	struct nvkm_device *device = gsb->base.subdev.device;
-	struct fw_bin_header *hdr = gsb->hsbl_blob;
-	struct fw_bl_desc *hsbl_desc = gsb->hsbl_blob + hdr->header_offset;
-	/* virtual start address for boot vector */
-	u32 virt_addr = hsbl_desc->start_tag << 8;
-	const u32 base = gsb->base.base;
-	const u32 reg_base = base + 0xe00;
-	u32 inst_loc;
-	int ret;
-
-	ret = nvkm_secboot_falcon_reset(&gsb->base);
+	ret = nvkm_falcon_get(falcon, subdev);
 	if (ret)
 		return ret;
 
-	/* setup apertures - virtual */
-	nvkm_wr32(device, reg_base + 4 * (FALCON_DMAIDX_UCODE), 0x4);
-	nvkm_wr32(device, reg_base + 4 * (FALCON_DMAIDX_VIRT), 0x0);
-	/* setup apertures - physical */
-	nvkm_wr32(device, reg_base + 4 * (FALCON_DMAIDX_PHYS_VID), 0x4);
-	nvkm_wr32(device, reg_base + 4 * (FALCON_DMAIDX_PHYS_SYS_COH),
-		  0x4 | 0x1);
-	nvkm_wr32(device, reg_base + 4 * (FALCON_DMAIDX_PHYS_SYS_NCOH),
-		  0x4 | 0x2);
-
-	/* Set context */
-	if (nvkm_memory_target(gsb->inst->memory) == NVKM_MEM_TARGET_VRAM)
-		inst_loc = 0x0; /* FB */
-	else
-		inst_loc = 0x3; /* Non-coherent sysmem */
-
-	nvkm_mask(device, base + 0x048, 0x1, 0x1);
-	nvkm_wr32(device, base + 0x480,
-		  ((gsb->inst->addr >> 12) & 0xfffffff) |
-		  (inst_loc << 28) | (1 << 30));
-
-	/* Set boot vector to code's starting virtual address */
-	nvkm_wr32(device, base + 0x104, virt_addr);
-
-	return 0;
-}
-
-/**
- * gm200_secboot_run_hs_blob() - run the given high-secure blob
- */
-static int
-gm200_secboot_run_hs_blob(struct gm200_secboot *gsb, struct nvkm_gpuobj *blob,
-			  struct gm200_flcn_bl_desc *desc)
-{
-	struct nvkm_vma vma;
-	u64 vma_addr;
-	const u32 bl_desc_size = gsb->func->bl_desc_size;
-	u8 bl_desc[bl_desc_size];
-	int ret;
-
 	/* Map the HS firmware so the HS bootloader can see it */
 	ret = nvkm_gpuobj_map(blob, gsb->vm, NV_MEM_ACCESS_RW, &vma);
-	if (ret)
+	if (ret) {
+		nvkm_falcon_put(falcon, subdev);
 		return ret;
+	}
 
-	/* Add the mapping address to the DMA bases */
-	vma_addr = flcn64_to_u64(desc->code_dma_base) + vma.offset;
-	desc->code_dma_base.lo = lower_32_bits(vma_addr);
-	desc->code_dma_base.hi = upper_32_bits(vma_addr);
-	vma_addr = flcn64_to_u64(desc->data_dma_base) + vma.offset;
-	desc->data_dma_base.lo = lower_32_bits(vma_addr);
-	desc->data_dma_base.hi = upper_32_bits(vma_addr);
-
-	/* Fixup the BL header */
-	gsb->func->fixup_bl_desc(desc, &bl_desc);
-
-	/* Reset the falcon and make it ready to run the HS bootloader */
-	ret = gm200_secboot_setup_falcon(gsb);
+	/* Reset and set the falcon up */
+	ret = nvkm_falcon_reset(falcon);
 	if (ret)
-		goto done;
+		goto end;
+	nvkm_falcon_bind_context(falcon, gsb->inst);
 
 	/* Load the HS bootloader into the falcon's IMEM/DMEM */
-	gm200_secboot_load_hs_bl(gsb, &bl_desc, bl_desc_size);
-
-	/* Start the HS bootloader */
-	ret = nvkm_secboot_falcon_run(&gsb->base);
+	ret = sb->acr->func->load(sb->acr, &gsb->base, blob, vma.offset);
 	if (ret)
-		goto done;
-
-done:
-	/* Restore the original DMA addresses */
-	vma_addr = flcn64_to_u64(desc->code_dma_base) - vma.offset;
-	desc->code_dma_base.lo = lower_32_bits(vma_addr);
-	desc->code_dma_base.hi = upper_32_bits(vma_addr);
-	vma_addr = flcn64_to_u64(desc->data_dma_base) - vma.offset;
-	desc->data_dma_base.lo = lower_32_bits(vma_addr);
-	desc->data_dma_base.hi = upper_32_bits(vma_addr);
-
-	/* We don't need the ACR firmware anymore */
-	nvkm_gpuobj_unmap(&vma);
+		goto end;
 
-	return ret;
-}
+	/* Disable interrupts as we will poll for the HALT bit */
+	nvkm_mc_intr_mask(sb->subdev.device, falcon->owner->index, false);
 
-/*
- * gm200_secboot_reset() - execute secure boot from the prepared state
- *
- * Load the HS bootloader and ask the falcon to run it. This will in turn
- * load the HS firmware and run it, so once the falcon stops all the managed
- * falcons should have their LS firmware loaded and be ready to run.
- */
-int
-gm200_secboot_reset(struct nvkm_secboot *sb, enum nvkm_secboot_falcon falcon)
-{
-	struct gm200_secboot *gsb = gm200_secboot(sb);
-	int ret;
+	/* Set default error value in mailbox register */
+	nvkm_falcon_wr32(falcon, 0x040, 0xdeada5a5);
 
-	/* Make sure all blobs are ready */
-	ret = gm200_secboot_blobs_ready(gsb);
+	/* Start the HS bootloader */
+	nvkm_falcon_set_start_addr(falcon, sb->acr->start_address);
+	nvkm_falcon_start(falcon);
+	ret = nvkm_falcon_wait_for_halt(falcon, 100);
 	if (ret)
-		return ret;
-
-	/*
-	 * Dummy GM200 implementation: perform secure boot each time we are
-	 * called on FECS. Since only FECS and GPCCS are managed and started
-	 * together, this ought to be safe.
-	 *
-	 * Once we have proper PMU firmware and support, this will be changed
-	 * to a proper call to the PMU method.
-	 */
-	if (falcon != NVKM_SECBOOT_FALCON_FECS)
 		goto end;
 
-	/* If WPR is set and we have an unload blob, run it to unlock WPR */
-	if (gsb->acr_unload_blob &&
-	    gsb->falcon_state[NVKM_SECBOOT_FALCON_FECS] != NON_SECURE) {
-		ret = gm200_secboot_run_hs_blob(gsb, gsb->acr_unload_blob,
-						&gsb->acr_unload_bl_desc);
-		if (ret)
-			return ret;
+	/* If mailbox register contains an error code, then ACR has failed */
+	ret = nvkm_falcon_rd32(falcon, 0x040);
+	if (ret) {
+		nvkm_error(subdev, "ACR boot failed, ret 0x%08x", ret);
+		ret = -EINVAL;
+		goto end;
 	}
 
-	/* Reload all managed falcons */
-	ret = gm200_secboot_run_hs_blob(gsb, gsb->acr_load_blob,
-					&gsb->acr_load_bl_desc);
-	if (ret)
-		return ret;
-
 end:
-	gsb->falcon_state[falcon] = RESET;
-	return 0;
-}
+	/* Reenable interrupts */
+	nvkm_mc_intr_mask(sb->subdev.device, falcon->owner->index, true);
 
-int
-gm200_secboot_start(struct nvkm_secboot *sb, enum nvkm_secboot_falcon falcon)
-{
-	struct gm200_secboot *gsb = gm200_secboot(sb);
-	int base;
-
-	switch (falcon) {
-	case NVKM_SECBOOT_FALCON_FECS:
-		base = 0x409000;
-		break;
-	case NVKM_SECBOOT_FALCON_GPCCS:
-		base = 0x41a000;
-		break;
-	default:
-		nvkm_error(&sb->subdev, "cannot start unhandled falcon!\n");
-		return -EINVAL;
-	}
-
-	nvkm_wr32(sb->subdev.device, base + 0x130, 0x00000002);
-	gsb->falcon_state[falcon] = RUNNING;
+	/* We don't need the ACR firmware anymore */
+	nvkm_gpuobj_unmap(&vma);
+	nvkm_falcon_put(falcon, subdev);
 
-	return 0;
+	return ret;
 }
 
-
-
 int
-gm200_secboot_init(struct nvkm_secboot *sb)
+gm200_secboot_oneinit(struct nvkm_secboot *sb)
 {
 	struct gm200_secboot *gsb = gm200_secboot(sb);
 	struct nvkm_device *device = sb->subdev.device;
@@ -1361,24 +132,22 @@ gm200_secboot_init(struct nvkm_secboot *sb)
 	nvkm_wo32(gsb->inst, 0x20c, upper_32_bits(vm_area_len - 1));
 	nvkm_done(gsb->inst);
 
+	if (sb->acr->func->oneinit) {
+		ret = sb->acr->func->oneinit(sb->acr, sb);
+		if (ret)
+			return ret;
+	}
+
 	return 0;
 }
 
-static int
+int
 gm200_secboot_fini(struct nvkm_secboot *sb, bool suspend)
 {
-	struct gm200_secboot *gsb = gm200_secboot(sb);
 	int ret = 0;
-	int i;
 
-	/* Run the unload blob to unprotect the WPR region */
-	if (gsb->acr_unload_blob &&
-	    gsb->falcon_state[NVKM_SECBOOT_FALCON_FECS] != NON_SECURE)
-		ret = gm200_secboot_run_hs_blob(gsb, gsb->acr_unload_blob,
-						&gsb->acr_unload_bl_desc);
-
-	for (i = 0; i < NVKM_SECBOOT_FALCON_END; i++)
-		gsb->falcon_state[i] = NON_SECURE;
+	if (sb->acr->func->fini)
+		ret = sb->acr->func->fini(sb->acr, sb, suspend);
 
 	return ret;
 }
@@ -1388,11 +157,7 @@ gm200_secboot_dtor(struct nvkm_secboot *sb)
 {
 	struct gm200_secboot *gsb = gm200_secboot(sb);
 
-	nvkm_gpuobj_del(&gsb->acr_unload_blob);
-
-	kfree(gsb->hsbl_blob);
-	nvkm_gpuobj_del(&gsb->acr_load_blob);
-	nvkm_gpuobj_del(&gsb->ls_blob);
+	sb->acr->func->dtor(sb->acr);
 
 	nvkm_vm_ref(NULL, &gsb->vm, gsb->pgd);
 	nvkm_gpuobj_del(&gsb->pgd);
@@ -1405,50 +170,9 @@ gm200_secboot_dtor(struct nvkm_secboot *sb)
 static const struct nvkm_secboot_func
 gm200_secboot = {
 	.dtor = gm200_secboot_dtor,
-	.init = gm200_secboot_init,
+	.oneinit = gm200_secboot_oneinit,
 	.fini = gm200_secboot_fini,
-	.reset = gm200_secboot_reset,
-	.start = gm200_secboot_start,
-	.managed_falcons = BIT(NVKM_SECBOOT_FALCON_FECS) |
-			   BIT(NVKM_SECBOOT_FALCON_GPCCS),
-	.boot_falcon = NVKM_SECBOOT_FALCON_PMU,
-};
-
-/**
- * gm200_fixup_bl_desc - just copy the BL descriptor
- *
- * Use the GM200 descriptor format by default.
- */
-static void
-gm200_secboot_fixup_bl_desc(const struct gm200_flcn_bl_desc *desc, void *ret)
-{
-	memcpy(ret, desc, sizeof(*desc));
-}
-
-static void
-gm200_secboot_fixup_hs_desc(struct gm200_secboot *gsb,
-			    struct hsflcn_acr_desc *desc)
-{
-	desc->ucode_blob_base = gsb->ls_blob->addr;
-	desc->ucode_blob_size = gsb->ls_blob->size;
-
-	desc->wpr_offset = 0;
-
-	/* WPR region information for the HS binary to set up */
-	desc->wpr_region_id = 1;
-	desc->regions.no_regions = 1;
-	desc->regions.region_props[0].region_id = 1;
-	desc->regions.region_props[0].start_addr = gsb->wpr_addr >> 8;
-	desc->regions.region_props[0].end_addr =
-		(gsb->wpr_addr + gsb->wpr_size) >> 8;
-}
-
-static const struct gm200_secboot_func
-gm200_secboot_func = {
-	.bl_desc_size = sizeof(struct gm200_flcn_bl_desc),
-	.fixup_bl_desc = gm200_secboot_fixup_bl_desc,
-	.fixup_hs_desc = gm200_secboot_fixup_hs_desc,
-	.prepare_blobs = gm200_secboot_prepare_blobs,
+	.run_blob = gm200_secboot_run_blob,
 };
 
 int
@@ -1457,6 +181,12 @@ gm200_secboot_new(struct nvkm_device *device, int index,
 {
 	int ret;
 	struct gm200_secboot *gsb;
+	struct nvkm_acr *acr;
+
+	acr = acr_r361_new(BIT(NVKM_SECBOOT_FALCON_FECS) |
+			   BIT(NVKM_SECBOOT_FALCON_GPCCS));
+	if (IS_ERR(acr))
+		return PTR_ERR(acr);
 
 	gsb = kzalloc(sizeof(*gsb), GFP_KERNEL);
 	if (!gsb) {
@@ -1465,15 +195,14 @@ gm200_secboot_new(struct nvkm_device *device, int index,
 	}
 	*psb = &gsb->base;
 
-	ret = nvkm_secboot_ctor(&gm200_secboot, device, index, &gsb->base);
+	ret = nvkm_secboot_ctor(&gm200_secboot, acr, device, index, &gsb->base);
 	if (ret)
 		return ret;
 
-	gsb->func = &gm200_secboot_func;
-
 	return 0;
 }
 
+
 MODULE_FIRMWARE("nvidia/gm200/acr/bl.bin");
 MODULE_FIRMWARE("nvidia/gm200/acr/ucode_load.bin");
 MODULE_FIRMWARE("nvidia/gm200/acr/ucode_unload.bin");
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h
new file mode 100644
index 000000000000..45adf1a3bc20
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVKM_SECBOOT_GM200_H__
+#define __NVKM_SECBOOT_GM200_H__
+
+#include "priv.h"
+
+struct gm200_secboot {
+	struct nvkm_secboot base;
+
+	/* Instance block & address space used for HS FW execution */
+	struct nvkm_gpuobj *inst;
+	struct nvkm_gpuobj *pgd;
+	struct nvkm_vm *vm;
+};
+#define gm200_secboot(sb) container_of(sb, struct gm200_secboot, base)
+
+int gm200_secboot_oneinit(struct nvkm_secboot *);
+int gm200_secboot_fini(struct nvkm_secboot *, bool);
+void *gm200_secboot_dtor(struct nvkm_secboot *);
+int gm200_secboot_run_blob(struct nvkm_secboot *, struct nvkm_gpuobj *);
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c
index d5395ebfe8d3..6707b8edc086 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c
@@ -20,103 +20,8 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
-#include "priv.h"
-
-#include <core/gpuobj.h>
-
-/*
- * The BL header format used by GM20B's firmware is slightly different
- * from the one of GM200. Fix the differences here.
- */
-struct gm20b_flcn_bl_desc {
-	u32 reserved[4];
-	u32 signature[4];
-	u32 ctx_dma;
-	u32 code_dma_base;
-	u32 non_sec_code_off;
-	u32 non_sec_code_size;
-	u32 sec_code_off;
-	u32 sec_code_size;
-	u32 code_entry_point;
-	u32 data_dma_base;
-	u32 data_size;
-};
-
-static int
-gm20b_secboot_prepare_blobs(struct gm200_secboot *gsb)
-{
-	struct nvkm_subdev *subdev = &gsb->base.subdev;
-	int acr_size;
-	int ret;
-
-	ret = gm20x_secboot_prepare_blobs(gsb);
-	if (ret)
-		return ret;
-
-	acr_size = gsb->acr_load_blob->size;
-	/*
-	 * On Tegra the WPR region is set by the bootloader. It is illegal for
-	 * the HS blob to be larger than this region.
-	 */
-	if (acr_size > gsb->wpr_size) {
-		nvkm_error(subdev, "WPR region too small for FW blob!\n");
-		nvkm_error(subdev, "required: %dB\n", acr_size);
-		nvkm_error(subdev, "WPR size: %dB\n", gsb->wpr_size);
-		return -ENOSPC;
-	}
-
-	return 0;
-}
-
-/**
- * gm20b_secboot_fixup_bl_desc - adapt BL descriptor to format used by GM20B FW
- *
- * There is only a slight format difference (DMA addresses being 32-bits and
- * 256B-aligned) to address.
- */
-static void
-gm20b_secboot_fixup_bl_desc(const struct gm200_flcn_bl_desc *desc, void *ret)
-{
-	struct gm20b_flcn_bl_desc *gdesc = ret;
-	u64 addr;
-
-	memcpy(gdesc->reserved, desc->reserved, sizeof(gdesc->reserved));
-	memcpy(gdesc->signature, desc->signature, sizeof(gdesc->signature));
-	gdesc->ctx_dma = desc->ctx_dma;
-	addr = desc->code_dma_base.hi;
-	addr <<= 32;
-	addr |= desc->code_dma_base.lo;
-	gdesc->code_dma_base = lower_32_bits(addr >> 8);
-	gdesc->non_sec_code_off = desc->non_sec_code_off;
-	gdesc->non_sec_code_size = desc->non_sec_code_size;
-	gdesc->sec_code_off = desc->sec_code_off;
-	gdesc->sec_code_size = desc->sec_code_size;
-	gdesc->code_entry_point = desc->code_entry_point;
-	addr = desc->data_dma_base.hi;
-	addr <<= 32;
-	addr |= desc->data_dma_base.lo;
-	gdesc->data_dma_base = lower_32_bits(addr >> 8);
-	gdesc->data_size = desc->data_size;
-}
-
-static void
-gm20b_secboot_fixup_hs_desc(struct gm200_secboot *gsb,
-			    struct hsflcn_acr_desc *desc)
-{
-	desc->ucode_blob_base = gsb->ls_blob->addr;
-	desc->ucode_blob_size = gsb->ls_blob->size;
-
-	desc->wpr_offset = 0;
-}
-
-static const struct gm200_secboot_func
-gm20b_secboot_func = {
-	.bl_desc_size = sizeof(struct gm20b_flcn_bl_desc),
-	.fixup_bl_desc = gm20b_secboot_fixup_bl_desc,
-	.fixup_hs_desc = gm20b_secboot_fixup_hs_desc,
-	.prepare_blobs = gm20b_secboot_prepare_blobs,
-};
-
+#include "acr.h"
+#include "gm200.h"
 
 #ifdef CONFIG_ARCH_TEGRA
 #define TEGRA_MC_BASE				0x70019000
@@ -144,15 +49,15 @@ gm20b_tegra_read_wpr(struct gm200_secboot *gsb)
 		nvkm_error(&sb->subdev, "Cannot map Tegra MC registers\n");
 		return PTR_ERR(mc);
 	}
-	gsb->wpr_addr = ioread32_native(mc + MC_SECURITY_CARVEOUT2_BOM_0) |
+	sb->wpr_addr = ioread32_native(mc + MC_SECURITY_CARVEOUT2_BOM_0) |
 	      ((u64)ioread32_native(mc + MC_SECURITY_CARVEOUT2_BOM_HI_0) << 32);
-	gsb->wpr_size = ioread32_native(mc + MC_SECURITY_CARVEOUT2_SIZE_128K)
+	sb->wpr_size = ioread32_native(mc + MC_SECURITY_CARVEOUT2_SIZE_128K)
 		<< 17;
 	cfg = ioread32_native(mc + MC_SECURITY_CARVEOUT2_CFG0);
 	iounmap(mc);
 
 	/* Check that WPR settings are valid */
-	if (gsb->wpr_size == 0) {
+	if (sb->wpr_size == 0) {
 		nvkm_error(&sb->subdev, "WPR region is empty\n");
 		return -EINVAL;
 	}
@@ -174,7 +79,7 @@ gm20b_tegra_read_wpr(struct gm200_secboot *gsb)
 #endif
 
 static int
-gm20b_secboot_init(struct nvkm_secboot *sb)
+gm20b_secboot_oneinit(struct nvkm_secboot *sb)
 {
 	struct gm200_secboot *gsb = gm200_secboot(sb);
 	int ret;
@@ -183,17 +88,15 @@ gm20b_secboot_init(struct nvkm_secboot *sb)
 	if (ret)
 		return ret;
 
-	return gm200_secboot_init(sb);
+	return gm200_secboot_oneinit(sb);
 }
 
 static const struct nvkm_secboot_func
 gm20b_secboot = {
 	.dtor = gm200_secboot_dtor,
-	.init = gm20b_secboot_init,
-	.reset = gm200_secboot_reset,
-	.start = gm200_secboot_start,
-	.managed_falcons = BIT(NVKM_SECBOOT_FALCON_FECS),
-	.boot_falcon = NVKM_SECBOOT_FALCON_PMU,
+	.oneinit = gm20b_secboot_oneinit,
+	.fini = gm200_secboot_fini,
+	.run_blob = gm200_secboot_run_blob,
 };
 
 int
@@ -202,6 +105,11 @@ gm20b_secboot_new(struct nvkm_device *device, int index,
 {
 	int ret;
 	struct gm200_secboot *gsb;
+	struct nvkm_acr *acr;
+
+	acr = acr_r352_new(BIT(NVKM_SECBOOT_FALCON_FECS));
+	if (IS_ERR(acr))
+		return PTR_ERR(acr);
 
 	gsb = kzalloc(sizeof(*gsb), GFP_KERNEL);
 	if (!gsb) {
@@ -210,12 +118,10 @@ gm20b_secboot_new(struct nvkm_device *device, int index,
 	}
 	*psb = &gsb->base;
 
-	ret = nvkm_secboot_ctor(&gm20b_secboot, device, index, &gsb->base);
+	ret = nvkm_secboot_ctor(&gm20b_secboot, acr, device, index, &gsb->base);
 	if (ret)
 		return ret;
 
-	gsb->func = &gm20b_secboot_func;
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode.h
new file mode 100644
index 000000000000..00886cee57eb
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVKM_SECBOOT_LS_UCODE_H__
+#define __NVKM_SECBOOT_LS_UCODE_H__
+
+#include <core/os.h>
+#include <core/subdev.h>
+#include <subdev/secboot.h>
+
+
+/**
+ * struct ls_ucode_img_desc - descriptor of firmware image
+ * @descriptor_size:		size of this descriptor
+ * @image_size:			size of the whole image
+ * @bootloader_start_offset:	start offset of the bootloader in ucode image
+ * @bootloader_size:		size of the bootloader
+ * @bootloader_imem_offset:	start off set of the bootloader in IMEM
+ * @bootloader_entry_point:	entry point of the bootloader in IMEM
+ * @app_start_offset:		start offset of the LS firmware
+ * @app_size:			size of the LS firmware's code and data
+ * @app_imem_offset:		offset of the app in IMEM
+ * @app_imem_entry:		entry point of the app in IMEM
+ * @app_dmem_offset:		offset of the data in DMEM
+ * @app_resident_code_offset:	offset of app code from app_start_offset
+ * @app_resident_code_size:	size of the code
+ * @app_resident_data_offset:	offset of data from app_start_offset
+ * @app_resident_data_size:	size of data
+ *
+ * A firmware image contains the code, data, and bootloader of a given LS
+ * falcon in a single blob. This structure describes where everything is.
+ *
+ * This can be generated from a (bootloader, code, data) set if they have
+ * been loaded separately, or come directly from a file.
+ */
+struct ls_ucode_img_desc {
+	u32 descriptor_size;
+	u32 image_size;
+	u32 tools_version;
+	u32 app_version;
+	char date[64];
+	u32 bootloader_start_offset;
+	u32 bootloader_size;
+	u32 bootloader_imem_offset;
+	u32 bootloader_entry_point;
+	u32 app_start_offset;
+	u32 app_size;
+	u32 app_imem_offset;
+	u32 app_imem_entry;
+	u32 app_dmem_offset;
+	u32 app_resident_code_offset;
+	u32 app_resident_code_size;
+	u32 app_resident_data_offset;
+	u32 app_resident_data_size;
+	u32 nb_overlays;
+	struct {u32 start; u32 size; } load_ovl[64];
+	u32 compressed;
+};
+
+/**
+ * struct ls_ucode_img - temporary storage for loaded LS firmwares
+ * @node:		to link within lsf_ucode_mgr
+ * @falcon_id:		ID of the falcon this LS firmware is for
+ * @ucode_desc:		loaded or generated map of ucode_data
+ * @ucode_data:		firmware payload (code and data)
+ * @ucode_size:		size in bytes of data in ucode_data
+ * @sig:		signature for this firmware
+ * @sig:size:		size of the signature in bytes
+ *
+ * Preparing the WPR LS blob requires information about all the LS firmwares
+ * (size, etc) to be known. This structure contains all the data of one LS
+ * firmware.
+ */
+struct ls_ucode_img {
+	struct list_head node;
+	enum nvkm_secboot_falcon falcon_id;
+
+	struct ls_ucode_img_desc ucode_desc;
+	u8 *ucode_data;
+	u32 ucode_size;
+
+	u8 *sig;
+	u32 sig_size;
+};
+
+/**
+ * struct fw_bin_header - header of firmware files
+ * @bin_magic:		always 0x3b1d14f0
+ * @bin_ver:		version of the bin format
+ * @bin_size:		entire image size including this header
+ * @header_offset:	offset of the firmware/bootloader header in the file
+ * @data_offset:	offset of the firmware/bootloader payload in the file
+ * @data_size:		size of the payload
+ *
+ * This header is located at the beginning of the HS firmware and HS bootloader
+ * files, to describe where the headers and data can be found.
+ */
+struct fw_bin_header {
+	u32 bin_magic;
+	u32 bin_ver;
+	u32 bin_size;
+	u32 header_offset;
+	u32 data_offset;
+	u32 data_size;
+};
+
+/**
+ * struct fw_bl_desc - firmware bootloader descriptor
+ * @start_tag:		starting tag of bootloader
+ * @desc_dmem_load_off:	DMEM offset of flcn_bl_dmem_desc
+ * @code_off:		offset of code section
+ * @code_size:		size of code section
+ * @data_off:		offset of data section
+ * @data_size:		size of data section
+ *
+ * This structure is embedded in bootloader firmware files at to describe the
+ * IMEM and DMEM layout expected by the bootloader.
+ */
+struct fw_bl_desc {
+	u32 start_tag;
+	u32 dmem_load_off;
+	u32 code_off;
+	u32 code_size;
+	u32 data_off;
+	u32 data_size;
+};
+
+int acr_ls_ucode_load_fecs(const struct nvkm_subdev *, struct ls_ucode_img *);
+int acr_ls_ucode_load_gpccs(const struct nvkm_subdev *, struct ls_ucode_img *);
+
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_gr.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_gr.c
new file mode 100644
index 000000000000..40a6df77bb8a
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_gr.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "ls_ucode.h"
+#include "acr.h"
+
+#include <core/firmware.h>
+
+#define BL_DESC_BLK_SIZE 256
+/**
+ * Build a ucode image and descriptor from provided bootloader, code and data.
+ *
+ * @bl:		bootloader image, including 16-bytes descriptor
+ * @code:	LS firmware code segment
+ * @data:	LS firmware data segment
+ * @desc:	ucode descriptor to be written
+ *
+ * Return: allocated ucode image with corresponding descriptor information. desc
+ *         is also updated to contain the right offsets within returned image.
+ */
+static void *
+ls_ucode_img_build(const struct firmware *bl, const struct firmware *code,
+		   const struct firmware *data, struct ls_ucode_img_desc *desc)
+{
+	struct fw_bin_header *bin_hdr = (void *)bl->data;
+	struct fw_bl_desc *bl_desc = (void *)bl->data + bin_hdr->header_offset;
+	void *bl_data = (void *)bl->data + bin_hdr->data_offset;
+	u32 pos = 0;
+	void *image;
+
+	desc->bootloader_start_offset = pos;
+	desc->bootloader_size = ALIGN(bl_desc->code_size, sizeof(u32));
+	desc->bootloader_imem_offset = bl_desc->start_tag * 256;
+	desc->bootloader_entry_point = bl_desc->start_tag * 256;
+
+	pos = ALIGN(pos + desc->bootloader_size, BL_DESC_BLK_SIZE);
+	desc->app_start_offset = pos;
+	desc->app_size = ALIGN(code->size, BL_DESC_BLK_SIZE) +
+			 ALIGN(data->size, BL_DESC_BLK_SIZE);
+	desc->app_imem_offset = 0;
+	desc->app_imem_entry = 0;
+	desc->app_dmem_offset = 0;
+	desc->app_resident_code_offset = 0;
+	desc->app_resident_code_size = ALIGN(code->size, BL_DESC_BLK_SIZE);
+
+	pos = ALIGN(pos + desc->app_resident_code_size, BL_DESC_BLK_SIZE);
+	desc->app_resident_data_offset = pos - desc->app_start_offset;
+	desc->app_resident_data_size = ALIGN(data->size, BL_DESC_BLK_SIZE);
+
+	desc->image_size = ALIGN(bl_desc->code_size, BL_DESC_BLK_SIZE) +
+			   desc->app_size;
+
+	image = kzalloc(desc->image_size, GFP_KERNEL);
+	if (!image)
+		return ERR_PTR(-ENOMEM);
+
+	memcpy(image + desc->bootloader_start_offset, bl_data,
+	       bl_desc->code_size);
+	memcpy(image + desc->app_start_offset, code->data, code->size);
+	memcpy(image + desc->app_start_offset + desc->app_resident_data_offset,
+	       data->data, data->size);
+
+	return image;
+}
+
+/**
+ * ls_ucode_img_load_gr() - load and prepare a LS GR ucode image
+ *
+ * Load the LS microcode, bootloader and signature and pack them into a single
+ * blob. Also generate the corresponding ucode descriptor.
+ */
+static int
+ls_ucode_img_load_gr(const struct nvkm_subdev *subdev, struct ls_ucode_img *img,
+		     const char *falcon_name)
+{
+	const struct firmware *bl, *code, *data, *sig;
+	char f[64];
+	int ret;
+
+	snprintf(f, sizeof(f), "gr/%s_bl", falcon_name);
+	ret = nvkm_firmware_get(subdev->device, f, &bl);
+	if (ret)
+		goto error;
+
+	snprintf(f, sizeof(f), "gr/%s_inst", falcon_name);
+	ret = nvkm_firmware_get(subdev->device, f, &code);
+	if (ret)
+		goto free_bl;
+
+	snprintf(f, sizeof(f), "gr/%s_data", falcon_name);
+	ret = nvkm_firmware_get(subdev->device, f, &data);
+	if (ret)
+		goto free_inst;
+
+	snprintf(f, sizeof(f), "gr/%s_sig", falcon_name);
+	ret = nvkm_firmware_get(subdev->device, f, &sig);
+	if (ret)
+		goto free_data;
+	img->sig = kmemdup(sig->data, sig->size, GFP_KERNEL);
+	if (!img->sig) {
+		ret = -ENOMEM;
+		goto free_sig;
+	}
+	img->sig_size = sig->size;
+
+	img->ucode_data = ls_ucode_img_build(bl, code, data,
+					     &img->ucode_desc);
+	if (IS_ERR(img->ucode_data)) {
+		ret = PTR_ERR(img->ucode_data);
+		goto free_data;
+	}
+	img->ucode_size = img->ucode_desc.image_size;
+
+free_sig:
+	nvkm_firmware_put(sig);
+free_data:
+	nvkm_firmware_put(data);
+free_inst:
+	nvkm_firmware_put(code);
+free_bl:
+	nvkm_firmware_put(bl);
+error:
+	return ret;
+}
+
+int
+acr_ls_ucode_load_fecs(const struct nvkm_subdev *subdev,
+		       struct ls_ucode_img *img)
+{
+	return ls_ucode_img_load_gr(subdev, img, "fecs");
+}
+
+int
+acr_ls_ucode_load_gpccs(const struct nvkm_subdev *subdev,
+			struct ls_ucode_img *img)
+{
+	return ls_ucode_img_load_gr(subdev, img, "gpccs");
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h
index a9a8a0e1017e..936a65f5658c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h
@@ -27,20 +27,16 @@
 #include <subdev/mmu.h>
 
 struct nvkm_secboot_func {
-	int (*init)(struct nvkm_secboot *);
+	int (*oneinit)(struct nvkm_secboot *);
 	int (*fini)(struct nvkm_secboot *, bool suspend);
 	void *(*dtor)(struct nvkm_secboot *);
-	int (*reset)(struct nvkm_secboot *, enum nvkm_secboot_falcon);
-	int (*start)(struct nvkm_secboot *, enum nvkm_secboot_falcon);
-
-	/* ID of the falcon that will perform secure boot */
-	enum nvkm_secboot_falcon boot_falcon;
-	/* Bit-mask of IDs of managed falcons */
-	unsigned long managed_falcons;
+	int (*run_blob)(struct nvkm_secboot *, struct nvkm_gpuobj *);
 };
 
-int nvkm_secboot_ctor(const struct nvkm_secboot_func *, struct nvkm_device *,
-		      int index, struct nvkm_secboot *);
+extern const char *nvkm_secboot_falcon_name[];
+
+int nvkm_secboot_ctor(const struct nvkm_secboot_func *, struct nvkm_acr *,
+		      struct nvkm_device *, int, struct nvkm_secboot *);
 int nvkm_secboot_falcon_reset(struct nvkm_secboot *);
 int nvkm_secboot_falcon_run(struct nvkm_secboot *);
 
@@ -48,187 +44,20 @@ struct flcn_u64 {
 	u32 lo;
 	u32 hi;
 };
+
 static inline u64 flcn64_to_u64(const struct flcn_u64 f)
 {
 	return ((u64)f.hi) << 32 | f.lo;
 }
 
-/**
- * struct gm200_flcn_bl_desc - DMEM bootloader descriptor
- * @signature:		16B signature for secure code. 0s if no secure code
- * @ctx_dma:		DMA context to be used by BL while loading code/data
- * @code_dma_base:	256B-aligned Physical FB Address where code is located
- *			(falcon's $xcbase register)
- * @non_sec_code_off:	offset from code_dma_base where the non-secure code is
- *                      located. The offset must be multiple of 256 to help perf
- * @non_sec_code_size:	the size of the nonSecure code part.
- * @sec_code_off:	offset from code_dma_base where the secure code is
- *                      located. The offset must be multiple of 256 to help perf
- * @sec_code_size:	offset from code_dma_base where the secure code is
- *                      located. The offset must be multiple of 256 to help perf
- * @code_entry_point:	code entry point which will be invoked by BL after
- *                      code is loaded.
- * @data_dma_base:	256B aligned Physical FB Address where data is located.
- *			(falcon's $xdbase register)
- * @data_size:		size of data block. Should be multiple of 256B
- *
- * Structure used by the bootloader to load the rest of the code. This has
- * to be filled by host and copied into DMEM at offset provided in the
- * hsflcn_bl_desc.bl_desc_dmem_load_off.
- */
-struct gm200_flcn_bl_desc {
-	u32 reserved[4];
-	u32 signature[4];
-	u32 ctx_dma;
-	struct flcn_u64 code_dma_base;
-	u32 non_sec_code_off;
-	u32 non_sec_code_size;
-	u32 sec_code_off;
-	u32 sec_code_size;
-	u32 code_entry_point;
-	struct flcn_u64 data_dma_base;
-	u32 data_size;
-};
-
-/**
- * struct hsflcn_acr_desc - data section of the HS firmware
- *
- * This header is to be copied at the beginning of DMEM by the HS bootloader.
- *
- * @signature:		signature of ACR ucode
- * @wpr_region_id:	region ID holding the WPR header and its details
- * @wpr_offset:		offset from the WPR region holding the wpr header
- * @regions:		region descriptors
- * @nonwpr_ucode_blob_size:	size of LS blob
- * @nonwpr_ucode_blob_start:	FB location of LS blob is
- */
-struct hsflcn_acr_desc {
-	union {
-		u8 reserved_dmem[0x200];
-		u32 signatures[4];
-	} ucode_reserved_space;
-	u32 wpr_region_id;
-	u32 wpr_offset;
-	u32 mmu_mem_range;
-#define FLCN_ACR_MAX_REGIONS 2
-	struct {
-		u32 no_regions;
-		struct {
-			u32 start_addr;
-			u32 end_addr;
-			u32 region_id;
-			u32 read_mask;
-			u32 write_mask;
-			u32 client_mask;
-		} region_props[FLCN_ACR_MAX_REGIONS];
-	} regions;
-	u32 ucode_blob_size;
-	u64 ucode_blob_base __aligned(8);
-	struct {
-		u32 vpr_enabled;
-		u32 vpr_start;
-		u32 vpr_end;
-		u32 hdcp_policies;
-	} vpr_desc;
-};
-
-/**
- * Contains the whole secure boot state, allowing it to be performed as needed
- * @wpr_addr:		physical address of the WPR region
- * @wpr_size:		size in bytes of the WPR region
- * @ls_blob:		LS blob of all the LS firmwares, signatures, bootloaders
- * @ls_blob_size:	size of the LS blob
- * @ls_blob_nb_regions:	number of LS firmwares that will be loaded
- * @acr_blob:		HS blob
- * @acr_blob_vma:	mapping of the HS blob into the secure falcon's VM
- * @acr_bl_desc:	bootloader descriptor of the HS blob
- * @hsbl_blob:		HS blob bootloader
- * @inst:		instance block for HS falcon
- * @pgd:		page directory for the HS falcon
- * @vm:			address space used by the HS falcon
- * @falcon_state:	current state of the managed falcons
- * @firmware_ok:	whether the firmware blobs have been created
- */
-struct gm200_secboot {
-	struct nvkm_secboot base;
-	const struct gm200_secboot_func *func;
-
-	/*
-	 * Address and size of the WPR region. On dGPU this will be the
-	 * address of the LS blob. On Tegra this is a fixed region set by the
-	 * bootloader
-	 */
-	u64 wpr_addr;
-	u32 wpr_size;
-
-	/*
-	 * HS FW - lock WPR region (dGPU only) and load LS FWs
-	 * on Tegra the HS FW copies the LS blob into the fixed WPR instead
-	 */
-	struct nvkm_gpuobj *acr_load_blob;
-	struct gm200_flcn_bl_desc acr_load_bl_desc;
-
-	/* HS FW - unlock WPR region (dGPU only) */
-	struct nvkm_gpuobj *acr_unload_blob;
-	struct gm200_flcn_bl_desc acr_unload_bl_desc;
-
-	/* HS bootloader */
-	void *hsbl_blob;
-
-	/* LS FWs, to be loaded by the HS ACR */
-	struct nvkm_gpuobj *ls_blob;
-
-	/* Instance block & address space used for HS FW execution */
-	struct nvkm_gpuobj *inst;
-	struct nvkm_gpuobj *pgd;
-	struct nvkm_vm *vm;
-
-	/* To keep track of the state of all managed falcons */
-	enum {
-		/* In non-secure state, no firmware loaded, no privileges*/
-		NON_SECURE = 0,
-		/* In low-secure mode and ready to be started */
-		RESET,
-		/* In low-secure mode and running */
-		RUNNING,
-	} falcon_state[NVKM_SECBOOT_FALCON_END];
-
-	bool firmware_ok;
-};
-#define gm200_secboot(sb) container_of(sb, struct gm200_secboot, base)
-
-/**
- * Contains functions we wish to abstract between GM200-like implementations
- * @bl_desc_size:	size of the BL descriptor used by this chip.
- * @fixup_bl_desc:	hook that generates the proper BL descriptor format from
- *			the generic GM200 format into a data array of size
- *			bl_desc_size
- * @fixup_hs_desc:	hook that twiddles the HS descriptor before it is used
- * @prepare_blobs:	prepares the various blobs needed for secure booting
- */
-struct gm200_secboot_func {
-	/*
-	 * Size of the bootloader descriptor for this chip. A block of this
-	 * size is allocated before booting a falcon and the fixup_bl_desc
-	 * callback is called on it
-	 */
-	u32 bl_desc_size;
-	void (*fixup_bl_desc)(const struct gm200_flcn_bl_desc *, void *);
-
-	/*
-	 * Chip-specific modifications of the HS descriptor can be done here.
-	 * On dGPU this is used to fill the information about the WPR region
-	 * we want the HS FW to set up.
-	 */
-	void (*fixup_hs_desc)(struct gm200_secboot *, struct hsflcn_acr_desc *);
-	int (*prepare_blobs)(struct gm200_secboot *);
-};
+static inline struct flcn_u64 u64_to_flcn64(u64 u)
+{
+	struct flcn_u64 ret;
 
-int gm200_secboot_init(struct nvkm_secboot *);
-void *gm200_secboot_dtor(struct nvkm_secboot *);
-int gm200_secboot_reset(struct nvkm_secboot *, u32);
-int gm200_secboot_start(struct nvkm_secboot *, u32);
+	ret.hi = upper_32_bits(u);
+	ret.lo = lower_32_bits(u);
 
-int gm20x_secboot_prepare_blobs(struct gm200_secboot *);
+	return ret;
+}
 
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c
index 8894fee30cbc..df949fa7d05d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c
@@ -64,10 +64,9 @@ nvkm_therm_update_trip(struct nvkm_therm *therm)
 }
 
 static int
-nvkm_therm_update_linear(struct nvkm_therm *therm)
+nvkm_therm_compute_linear_duty(struct nvkm_therm *therm, u8 linear_min_temp,
+                               u8 linear_max_temp)
 {
-	u8  linear_min_temp = therm->fan->bios.linear_min_temp;
-	u8  linear_max_temp = therm->fan->bios.linear_max_temp;
 	u8  temp = therm->func->temp_get(therm);
 	u16 duty;
 
@@ -85,6 +84,21 @@ nvkm_therm_update_linear(struct nvkm_therm *therm)
 	return duty;
 }
 
+static int
+nvkm_therm_update_linear(struct nvkm_therm *therm)
+{
+	u8  min = therm->fan->bios.linear_min_temp;
+	u8  max = therm->fan->bios.linear_max_temp;
+	return nvkm_therm_compute_linear_duty(therm, min, max);
+}
+
+static int
+nvkm_therm_update_linear_fallback(struct nvkm_therm *therm)
+{
+	u8 max = therm->bios_sensor.thrs_fan_boost.temp;
+	return nvkm_therm_compute_linear_duty(therm, 30, max);
+}
+
 static void
 nvkm_therm_update(struct nvkm_therm *therm, int mode)
 {
@@ -119,6 +133,8 @@ nvkm_therm_update(struct nvkm_therm *therm, int mode)
 		case NVBIOS_THERM_FAN_OTHER:
 			if (therm->cstate)
 				duty = therm->cstate;
+			else
+				duty = nvkm_therm_update_linear_fallback(therm);
 			poll = false;
 			break;
 		}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c
index fe063d5728e2..67ada1d9a28c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c
@@ -95,6 +95,20 @@ nvkm_top_intr(struct nvkm_device *device, u32 intr, u64 *psubdevs)
 	return intr & ~handled;
 }
 
+int
+nvkm_top_fault_id(struct nvkm_device *device, enum nvkm_devidx devidx)
+{
+	struct nvkm_top *top = device->top;
+	struct nvkm_top_device *info;
+
+	list_for_each_entry(info, &top->device, head) {
+		if (info->index == devidx && info->fault >= 0)
+			return info->fault;
+	}
+
+	return -ENOENT;
+}
+
 enum nvkm_devidx
 nvkm_top_fault(struct nvkm_device *device, int fault)
 {