From 04b8a4bd8e01e25b9fa9fa7b1c957a7346ae83c1 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Tue, 20 Oct 2015 01:15:39 -0400
Subject: [PATCH 01/45] drm/nouveau/gem: return only valid domain when there's
 only one

On nv50+, we restrict the valid domains to just the one where the buffer
was originally created. However after the buffer is evicted to system
memory, we might move it back to a different domain that was not
originally valid. When sharing the buffer and retrieving its GEM_INFO
data, we still want the domain that will be valid for this buffer in a
pushbuf, not the one where it currently happens to be.

This resolves fdo#92504 and several others. These are due to suspend
evicting all buffers, making it more likely that they temporarily end up
in the wrong place.

Cc: stable@vger.kernel.org
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=92504
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_gem.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 2c9981512d27b..41be584147b93 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -227,11 +227,12 @@ nouveau_gem_info(struct drm_file *file_priv, struct drm_gem_object *gem,
 	struct nouveau_bo *nvbo = nouveau_gem_object(gem);
 	struct nvkm_vma *vma;
 
-	if (nvbo->bo.mem.mem_type == TTM_PL_TT)
+	if (is_power_of_2(nvbo->valid_domains))
+		rep->domain = nvbo->valid_domains;
+	else if (nvbo->bo.mem.mem_type == TTM_PL_TT)
 		rep->domain = NOUVEAU_GEM_DOMAIN_GART;
 	else
 		rep->domain = NOUVEAU_GEM_DOMAIN_VRAM;
-
 	rep->offset = nvbo->bo.offset;
 	if (cli->vm) {
 		vma = nouveau_bo_vma_find(nvbo, cli->vm);

From 579b7c58215329803ce184704463de09f0f310ac Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Thu, 3 Sep 2015 17:39:52 +0900
Subject: [PATCH 02/45] drm/nouveau/pmu: do not assume a PMU is present

Some devices may not have a PMU. Avoid a NULL pointer dereference in
such cases by checking whether the pointer given to nvkm_pmu_pgob() is
valid.

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c
index 27a79c0c38886..d95eb8659d1bd 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c
@@ -28,7 +28,7 @@
 void
 nvkm_pmu_pgob(struct nvkm_pmu *pmu, bool enable)
 {
-	if (pmu->func->pgob)
+	if (pmu && pmu->func->pgob)
 		pmu->func->pgob(pmu, enable);
 }
 

From 542f60dc84c6f382a56a0b42dc09aa183198a55f Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Thu, 3 Sep 2015 17:48:13 +0900
Subject: [PATCH 03/45] drm/nouveau/timer: re-introduce nvkm_wait_xsec macros

Reintroduce macros allowing us to test a register against a certain
mask, since this is the most common usage pattern for the more generic
nvkm_xsec macros and makes the code more concise and readable.

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h
index 62ed0880b0e1f..82d3e28918fd1 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h
@@ -59,6 +59,16 @@ void nvkm_timer_alarm_cancel(struct nvkm_timer *, struct nvkm_alarm *);
 #define nvkm_usec(d,u,cond...) nvkm_nsec((d), (u) * 1000, ##cond)
 #define nvkm_msec(d,m,cond...) nvkm_usec((d), (m) * 1000, ##cond)
 
+#define nvkm_wait_nsec(d,n,addr,mask,data)                                     \
+	nvkm_nsec(d, n,                                                        \
+		if ((nvkm_rd32(d, (addr)) & (mask)) == (data))                 \
+			break;                                                 \
+		)
+#define nvkm_wait_usec(d,u,addr,mask,data)                                     \
+	nvkm_wait_nsec((d), (u) * 1000, (addr), (mask), (data))
+#define nvkm_wait_msec(d,m,addr,mask,data)                                     \
+	nvkm_wait_usec((d), (m) * 1000, (addr), (mask), (data))
+
 int nv04_timer_new(struct nvkm_device *, int, struct nvkm_timer **);
 int nv40_timer_new(struct nvkm_device *, int, struct nvkm_timer **);
 int nv41_timer_new(struct nvkm_device *, int, struct nvkm_timer **);

From 38a8fc78d00749328888bb0e37ad93535e7e9992 Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Thu, 3 Sep 2015 17:48:14 +0900
Subject: [PATCH 04/45] drm/nouveau/ltc: add hooks for invalidate and flush

These are useful for systems without a coherent CPU/GPU bus. For such
systems we may need to maintain the L2 ourselves.

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h |  3 +++
 drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c    | 14 ++++++++++++++
 drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h    |  3 +++
 3 files changed, 20 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h
index c773b5e958b4d..5464fcf482f1c 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h
@@ -30,6 +30,9 @@ void nvkm_ltc_tags_clear(struct nvkm_ltc *, u32 first, u32 count);
 int nvkm_ltc_zbc_color_get(struct nvkm_ltc *, int index, const u32[4]);
 int nvkm_ltc_zbc_depth_get(struct nvkm_ltc *, int index, const u32);
 
+void nvkm_ltc_invalidate(struct nvkm_ltc *);
+void nvkm_ltc_flush(struct nvkm_ltc *);
+
 int gf100_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
 int gk104_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
 int gm107_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c
index 930d25b6e63cb..85b1464c01942 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c
@@ -67,6 +67,20 @@ nvkm_ltc_zbc_depth_get(struct nvkm_ltc *ltc, int index, const u32 depth)
 	return index;
 }
 
+void
+nvkm_ltc_invalidate(struct nvkm_ltc *ltc)
+{
+	if (ltc->func->invalidate)
+		ltc->func->invalidate(ltc);
+}
+
+void
+nvkm_ltc_flush(struct nvkm_ltc *ltc)
+{
+	if (ltc->func->flush)
+		ltc->func->flush(ltc);
+}
+
 static void
 nvkm_ltc_intr(struct nvkm_subdev *subdev)
 {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h
index 4e05037cc99f9..6f66bd03f8299 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h
@@ -17,6 +17,9 @@ struct nvkm_ltc_func {
 	int zbc;
 	void (*zbc_clear_color)(struct nvkm_ltc *, int, const u32[4]);
 	void (*zbc_clear_depth)(struct nvkm_ltc *, int, const u32);
+
+	void (*invalidate)(struct nvkm_ltc *);
+	void (*flush)(struct nvkm_ltc *);
 };
 
 int gf100_ltc_oneinit(struct nvkm_ltc *);

From a0a49bac2fe18375b7ccb9de4671960531294470 Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Thu, 3 Sep 2015 17:48:15 +0900
Subject: [PATCH 05/45] drm/nouveau/ltc/gf100: add flush/invalidate functions

Allow clients to manually flush and invalidate L2. This will be useful
for Tegra systems for which we want to write instmem using the CPU.

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 .../gpu/drm/nouveau/include/nvkm/subdev/ltc.h |  1 +
 .../gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c   | 32 +++++++++++++++++++
 .../gpu/drm/nouveau/nvkm/subdev/ltc/gk104.c   |  2 ++
 .../gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c   |  2 ++
 .../gpu/drm/nouveau/nvkm/subdev/ltc/priv.h    |  2 ++
 5 files changed, 39 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h
index 5464fcf482f1c..3d4dbbf9aab3e 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h
@@ -35,5 +35,6 @@ void nvkm_ltc_flush(struct nvkm_ltc *);
 
 int gf100_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
 int gk104_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
+int gk20a_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
 int gm107_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c
index 45ac765b753ed..fb0de83da13c0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c
@@ -122,6 +122,36 @@ gf100_ltc_intr(struct nvkm_ltc *ltc)
 	}
 }
 
+void
+gf100_ltc_invalidate(struct nvkm_ltc *ltc)
+{
+	struct nvkm_device *device = ltc->subdev.device;
+	s64 taken;
+
+	nvkm_wr32(device, 0x70004, 0x00000001);
+	taken = nvkm_wait_msec(device, 2, 0x70004, 0x00000003, 0x00000000);
+	if (taken < 0)
+		nvkm_warn(&ltc->subdev, "LTC invalidate timeout\n");
+
+	if (taken > 0)
+		nvkm_debug(&ltc->subdev, "LTC invalidate took %lld ns\n", taken);
+}
+
+void
+gf100_ltc_flush(struct nvkm_ltc *ltc)
+{
+	struct nvkm_device *device = ltc->subdev.device;
+	s64 taken;
+
+	nvkm_wr32(device, 0x70010, 0x00000001);
+	taken = nvkm_wait_msec(device, 2, 0x70010, 0x00000003, 0x00000000);
+	if (taken < 0)
+		nvkm_warn(&ltc->subdev, "LTC flush timeout\n");
+
+	if (taken > 0)
+		nvkm_debug(&ltc->subdev, "LTC flush took %lld ns\n", taken);
+}
+
 /* TODO: Figure out tag memory details and drop the over-cautious allocation.
  */
 int
@@ -215,6 +245,8 @@ gf100_ltc = {
 	.zbc = 16,
 	.zbc_clear_color = gf100_ltc_zbc_clear_color,
 	.zbc_clear_depth = gf100_ltc_zbc_clear_depth,
+	.invalidate = gf100_ltc_invalidate,
+	.flush = gf100_ltc_flush,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gk104.c
index 839e6b4c597b2..b4f6e0034d588 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gk104.c
@@ -45,6 +45,8 @@ gk104_ltc = {
 	.zbc = 16,
 	.zbc_clear_color = gf100_ltc_zbc_clear_color,
 	.zbc_clear_depth = gf100_ltc_zbc_clear_depth,
+	.invalidate = gf100_ltc_invalidate,
+	.flush = gf100_ltc_flush,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c
index 389331bb63bae..3043bbfd73844 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c
@@ -138,6 +138,8 @@ gm107_ltc = {
 	.zbc = 16,
 	.zbc_clear_color = gm107_ltc_zbc_clear_color,
 	.zbc_clear_depth = gm107_ltc_zbc_clear_depth,
+	.invalidate = gf100_ltc_invalidate,
+	.flush = gf100_ltc_flush,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h
index 6f66bd03f8299..4e3755b82769e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h
@@ -29,4 +29,6 @@ void gf100_ltc_cbc_clear(struct nvkm_ltc *, u32, u32);
 void gf100_ltc_cbc_wait(struct nvkm_ltc *);
 void gf100_ltc_zbc_clear_color(struct nvkm_ltc *, int, const u32[4]);
 void gf100_ltc_zbc_clear_depth(struct nvkm_ltc *, int, const u32);
+void gf100_ltc_invalidate(struct nvkm_ltc *);
+void gf100_ltc_flush(struct nvkm_ltc *);
 #endif

From fcf3f91c34105c3551741febbfc1066aaa7f1db7 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Fri, 4 Sep 2015 14:40:32 +1000
Subject: [PATCH 06/45] drm/nouveau: remove unnecessary usage of object handles

No longer required in a lot of cases, as objects are identified over NVIF
via an alternate mechanism since the rework.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_abi16.c   | 17 ++++---------
 drivers/gpu/drm/nouveau/nouveau_chan.c    | 30 ++++++++++-------------
 drivers/gpu/drm/nouveau/nouveau_chan.h    |  3 +--
 drivers/gpu/drm/nouveau/nouveau_display.c |  5 ++--
 drivers/gpu/drm/nouveau/nouveau_drm.c     | 10 +++-----
 drivers/gpu/drm/nouveau/nouveau_drm.h     |  5 ----
 drivers/gpu/drm/nouveau/nouveau_gem.c     |  2 +-
 drivers/gpu/drm/nouveau/nouveau_sysfs.c   |  5 ++--
 drivers/gpu/drm/nouveau/nv50_display.c    |  7 +++---
 include/uapi/drm/nouveau_drm.h            |  8 ------
 10 files changed, 31 insertions(+), 61 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index d336c2247d6af..0b3c8abd68431 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -51,8 +51,7 @@ nouveau_abi16_get(struct drm_file *file_priv, struct drm_device *dev)
 			 * device (ie. the one that belongs to the fd it
 			 * opened)
 			 */
-			if (nvif_device_init(&cli->base.object,
-					     NOUVEAU_ABI16_DEVICE, NV_DEVICE,
+			if (nvif_device_init(&cli->base.object, 0, NV_DEVICE,
 					     &args, sizeof(args),
 					     &abi16->device) == 0)
 				return cli->abi16;
@@ -133,7 +132,6 @@ nouveau_abi16_chan_fini(struct nouveau_abi16 *abi16,
 
 	/* destroy channel object, all children will be killed too */
 	if (chan->chan) {
-		abi16->handles &= ~(1ULL << (chan->chan->user.handle & 0xffff));
 		nouveau_channel_idle(chan->chan);
 		nouveau_channel_del(&chan->chan);
 	}
@@ -268,26 +266,21 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
 		return nouveau_abi16_put(abi16, -EINVAL);
 
 	/* allocate "abi16 channel" data and make up a handle for it */
-	init->channel = __ffs64(~abi16->handles);
-	if (~abi16->handles == 0)
-		return nouveau_abi16_put(abi16, -ENOSPC);
-
 	chan = kzalloc(sizeof(*chan), GFP_KERNEL);
 	if (!chan)
 		return nouveau_abi16_put(abi16, -ENOMEM);
 
 	INIT_LIST_HEAD(&chan->notifiers);
 	list_add(&chan->head, &abi16->channels);
-	abi16->handles |= (1ULL << init->channel);
 
 	/* create channel object and initialise dma and fence management */
-	ret = nouveau_channel_new(drm, device,
-				  NOUVEAU_ABI16_CHAN(init->channel),
-				  init->fb_ctxdma_handle,
+	ret = nouveau_channel_new(drm, device, init->fb_ctxdma_handle,
 				  init->tt_ctxdma_handle, &chan->chan);
 	if (ret)
 		goto done;
 
+	init->channel = chan->chan->chid;
+
 	if (device->info.family >= NV_DEVICE_INFO_V0_TESLA)
 		init->pushbuf_domains = NOUVEAU_GEM_DOMAIN_VRAM |
 					NOUVEAU_GEM_DOMAIN_GART;
@@ -338,7 +331,7 @@ nouveau_abi16_chan(struct nouveau_abi16 *abi16, int channel)
 	struct nouveau_abi16_chan *chan;
 
 	list_for_each_entry(chan, &abi16->channels, head) {
-		if (chan->chan->user.handle == NOUVEAU_ABI16_CHAN(channel))
+		if (chan->chan->chid == channel)
 			return chan;
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c
index ff5e59db49db0..1860f389f21f1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.c
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
@@ -55,10 +55,8 @@ nouveau_channel_idle(struct nouveau_channel *chan)
 		}
 
 		if (ret) {
-			NV_PRINTK(err, cli, "failed to idle channel "
-					    "0x%08x [%s]\n",
-				  chan->user.handle,
-				  nvxx_client(&cli->base)->name);
+			NV_PRINTK(err, cli, "failed to idle channel %d [%s]\n",
+				  chan->chid, nvxx_client(&cli->base)->name);
 			return ret;
 		}
 	}
@@ -89,7 +87,7 @@ nouveau_channel_del(struct nouveau_channel **pchan)
 
 static int
 nouveau_channel_prep(struct nouveau_drm *drm, struct nvif_device *device,
-		     u32 handle, u32 size, struct nouveau_channel **pchan)
+		     u32 size, struct nouveau_channel **pchan)
 {
 	struct nouveau_cli *cli = (void *)device->object.client;
 	struct nvkm_mmu *mmu = nvxx_mmu(device);
@@ -174,8 +172,7 @@ nouveau_channel_prep(struct nouveau_drm *drm, struct nvif_device *device,
 		}
 	}
 
-	ret = nvif_object_init(&device->object, NVDRM_PUSH |
-			       (handle & 0xffff), NV_DMA_FROM_MEMORY,
+	ret = nvif_object_init(&device->object, 0, NV_DMA_FROM_MEMORY,
 			       &args, sizeof(args), &chan->push.ctxdma);
 	if (ret) {
 		nouveau_channel_del(pchan);
@@ -187,7 +184,7 @@ nouveau_channel_prep(struct nouveau_drm *drm, struct nvif_device *device,
 
 static int
 nouveau_channel_ind(struct nouveau_drm *drm, struct nvif_device *device,
-		    u32 handle, u32 engine, struct nouveau_channel **pchan)
+		    u32 engine, struct nouveau_channel **pchan)
 {
 	static const u16 oclasses[] = { MAXWELL_CHANNEL_GPFIFO_A,
 					KEPLER_CHANNEL_GPFIFO_A,
@@ -206,7 +203,7 @@ nouveau_channel_ind(struct nouveau_drm *drm, struct nvif_device *device,
 	int ret;
 
 	/* allocate dma push buffer */
-	ret = nouveau_channel_prep(drm, device, handle, 0x12000, &chan);
+	ret = nouveau_channel_prep(drm, device, 0x12000, &chan);
 	*pchan = chan;
 	if (ret)
 		return ret;
@@ -236,7 +233,7 @@ nouveau_channel_ind(struct nouveau_drm *drm, struct nvif_device *device,
 			size = sizeof(args.nv50);
 		}
 
-		ret = nvif_object_init(&device->object, handle, *oclass++,
+		ret = nvif_object_init(&device->object, 0, *oclass++,
 				       &args, size, &chan->user);
 		if (ret == 0) {
 			if (chan->user.oclass >= KEPLER_CHANNEL_GPFIFO_A)
@@ -256,7 +253,7 @@ nouveau_channel_ind(struct nouveau_drm *drm, struct nvif_device *device,
 
 static int
 nouveau_channel_dma(struct nouveau_drm *drm, struct nvif_device *device,
-		    u32 handle, struct nouveau_channel **pchan)
+		    struct nouveau_channel **pchan)
 {
 	static const u16 oclasses[] = { NV40_CHANNEL_DMA,
 					NV17_CHANNEL_DMA,
@@ -269,7 +266,7 @@ nouveau_channel_dma(struct nouveau_drm *drm, struct nvif_device *device,
 	int ret;
 
 	/* allocate dma push buffer */
-	ret = nouveau_channel_prep(drm, device, handle, 0x10000, &chan);
+	ret = nouveau_channel_prep(drm, device, 0x10000, &chan);
 	*pchan = chan;
 	if (ret)
 		return ret;
@@ -280,7 +277,7 @@ nouveau_channel_dma(struct nouveau_drm *drm, struct nvif_device *device,
 	args.offset = chan->push.vma.offset;
 
 	do {
-		ret = nvif_object_init(&device->object, handle, *oclass++,
+		ret = nvif_object_init(&device->object, 0, *oclass++,
 				       &args, sizeof(args), &chan->user);
 		if (ret == 0) {
 			chan->chid = args.chid;
@@ -401,8 +398,7 @@ nouveau_channel_init(struct nouveau_channel *chan, u32 vram, u32 gart)
 
 int
 nouveau_channel_new(struct nouveau_drm *drm, struct nvif_device *device,
-		    u32 handle, u32 arg0, u32 arg1,
-		    struct nouveau_channel **pchan)
+		    u32 arg0, u32 arg1, struct nouveau_channel **pchan)
 {
 	struct nouveau_cli *cli = (void *)device->object.client;
 	bool super;
@@ -412,10 +408,10 @@ nouveau_channel_new(struct nouveau_drm *drm, struct nvif_device *device,
 	super = cli->base.super;
 	cli->base.super = true;
 
-	ret = nouveau_channel_ind(drm, device, handle, arg0, pchan);
+	ret = nouveau_channel_ind(drm, device, arg0, pchan);
 	if (ret) {
 		NV_PRINTK(dbg, cli, "ib channel create, %d\n", ret);
-		ret = nouveau_channel_dma(drm, device, handle, pchan);
+		ret = nouveau_channel_dma(drm, device, pchan);
 		if (ret) {
 			NV_PRINTK(dbg, cli, "dma channel create, %d\n", ret);
 			goto done;
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.h b/drivers/gpu/drm/nouveau/nouveau_chan.h
index 2ed32414cb697..48062c94f36d7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.h
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.h
@@ -42,8 +42,7 @@ struct nouveau_channel {
 
 
 int  nouveau_channel_new(struct nouveau_drm *, struct nvif_device *,
-			 u32 handle, u32 arg0, u32 arg1,
-			 struct nouveau_channel **);
+			 u32 arg0, u32 arg1, struct nouveau_channel **);
 void nouveau_channel_del(struct nouveau_channel **);
 int  nouveau_channel_idle(struct nouveau_channel *);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index 614b32e6381cb..db6bc67605451 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -509,9 +509,8 @@ nouveau_display_create(struct drm_device *dev)
 		int i;
 
 		for (i = 0, ret = -ENODEV; ret && i < ARRAY_SIZE(oclass); i++) {
-			ret = nvif_object_init(&drm->device.object,
-					       NVDRM_DISPLAY, oclass[i],
-					       NULL, 0, &disp->disp);
+			ret = nvif_object_init(&drm->device.object, 0,
+					       oclass[i], NULL, 0, &disp->disp);
 		}
 
 		if (ret == 0) {
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 45ba678191990..f93266582f278 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -208,7 +208,7 @@ nouveau_accel_init(struct nouveau_drm *drm)
 	}
 
 	if (device->info.family >= NV_DEVICE_INFO_V0_KEPLER) {
-		ret = nouveau_channel_new(drm, &drm->device, NVDRM_CHAN + 1,
+		ret = nouveau_channel_new(drm, &drm->device,
 					  KEPLER_CHANNEL_GPFIFO_A_V0_ENGINE_CE0|
 					  KEPLER_CHANNEL_GPFIFO_A_V0_ENGINE_CE1,
 					  0, &drm->cechan);
@@ -221,7 +221,7 @@ nouveau_accel_init(struct nouveau_drm *drm)
 	if (device->info.chipset >= 0xa3 &&
 	    device->info.chipset != 0xaa &&
 	    device->info.chipset != 0xac) {
-		ret = nouveau_channel_new(drm, &drm->device, NVDRM_CHAN + 1,
+		ret = nouveau_channel_new(drm, &drm->device,
 					  NvDmaFB, NvDmaTT, &drm->cechan);
 		if (ret)
 			NV_ERROR(drm, "failed to create ce channel, %d\n", ret);
@@ -233,8 +233,7 @@ nouveau_accel_init(struct nouveau_drm *drm)
 		arg1 = NvDmaTT;
 	}
 
-	ret = nouveau_channel_new(drm, &drm->device, NVDRM_CHAN, arg0, arg1,
-				 &drm->channel);
+	ret = nouveau_channel_new(drm, &drm->device, arg0, arg1, &drm->channel);
 	if (ret) {
 		NV_ERROR(drm, "failed to create kernel channel, %d\n", ret);
 		nouveau_accel_fini(drm);
@@ -403,8 +402,7 @@ nouveau_drm_load(struct drm_device *dev, unsigned long flags)
 
 	nouveau_get_hdmi_dev(drm);
 
-	ret = nvif_device_init(&drm->client.base.object,
-			       NVDRM_DEVICE, NV_DEVICE,
+	ret = nvif_device_init(&drm->client.base.object, 0, NV_DEVICE,
 			       &(struct nv_device_v0) {
 					.device = ~0,
 			       }, sizeof(struct nv_device_v0),
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.h b/drivers/gpu/drm/nouveau/nouveau_drm.h
index 3c902c24a8dde..acfa03bd50de9 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.h
@@ -74,11 +74,6 @@ enum nouveau_drm_notify_route {
 };
 
 enum nouveau_drm_handle {
-	NVDRM_CLIENT  = 0xffffffff,
-	NVDRM_DEVICE  = 0xdddddddd,
-	NVDRM_CONTROL = 0xdddddddc,
-	NVDRM_DISPLAY = 0xd1500000,
-	NVDRM_PUSH    = 0xbbbb0000, /* |= client chid */
 	NVDRM_CHAN    = 0xcccc0000, /* |= client chid */
 	NVDRM_NVSW    = 0x55550000,
 };
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 41be584147b93..6bf1a7895708e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -682,7 +682,7 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
 		return -ENOMEM;
 
 	list_for_each_entry(temp, &abi16->channels, head) {
-		if (temp->chan->user.handle == (NVDRM_CHAN | req->channel)) {
+		if (temp->chan->chid == req->channel) {
 			chan = temp->chan;
 			break;
 		}
diff --git a/drivers/gpu/drm/nouveau/nouveau_sysfs.c b/drivers/gpu/drm/nouveau/nouveau_sysfs.c
index d12a5faee0478..5dac3546c1b89 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sysfs.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sysfs.c
@@ -188,9 +188,8 @@ nouveau_sysfs_init(struct drm_device *dev)
 	if (!sysfs)
 		return -ENOMEM;
 
-	ret = nvif_object_init(&device->object, NVDRM_CONTROL,
-			       NVIF_IOCTL_NEW_V0_CONTROL, NULL, 0,
-			       &sysfs->ctrl);
+	ret = nvif_object_init(&device->object, 0, NVIF_IOCTL_NEW_V0_CONTROL,
+			       NULL, 0, &sysfs->ctrl);
 	if (ret == 0)
 		device_create_file(nvxx_device(device)->dev, &dev_attr_pstate);
 
diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index 4ae87aed45054..c053c50b346a2 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -68,7 +68,6 @@ nv50_chan_create(struct nvif_device *device, struct nvif_object *disp,
 		 const s32 *oclass, u8 head, void *data, u32 size,
 		 struct nv50_chan *chan)
 {
-	const u32 handle = (oclass[0] << 16) | head;
 	struct nvif_sclass *sclass;
 	int ret, i, n;
 
@@ -81,7 +80,7 @@ nv50_chan_create(struct nvif_device *device, struct nvif_object *disp,
 	while (oclass[0]) {
 		for (i = 0; i < n; i++) {
 			if (sclass[i].oclass == oclass[0]) {
-				ret = nvif_object_init(disp, handle, oclass[0],
+				ret = nvif_object_init(disp, 0, oclass[0],
 						       data, size, &chan->user);
 				if (ret == 0)
 					nvif_object_map(&chan->user);
@@ -231,8 +230,8 @@ nv50_dmac_create(struct nvif_device *device, struct nvif_object *disp,
 	if (!dmac->ptr)
 		return -ENOMEM;
 
-	ret = nvif_object_init(&device->object, 0xd0000000,
-			       NV_DMA_FROM_MEMORY, &(struct nv_dma_v0) {
+	ret = nvif_object_init(&device->object, 0, NV_DMA_FROM_MEMORY,
+			       &(struct nv_dma_v0) {
 					.target = NV_DMA_V0_TARGET_PCI_US,
 					.access = NV_DMA_V0_ACCESS_RD,
 					.start = dmac->handle + 0x0000,
diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h
index 5507eead58634..fd594cc73cc05 100644
--- a/include/uapi/drm/nouveau_drm.h
+++ b/include/uapi/drm/nouveau_drm.h
@@ -27,14 +27,6 @@
 
 #define DRM_NOUVEAU_EVENT_NVIF                                       0x80000000
 
-/* reserved object handles when using deprecated object APIs - these
- * are here so that libdrm can allow interoperability with the new
- * object APIs
- */
-#define NOUVEAU_ABI16_CLIENT   0xffffffff
-#define NOUVEAU_ABI16_DEVICE   0xdddddddd
-#define NOUVEAU_ABI16_CHAN(n) (0xcccc0000 | (n))
-
 #define NOUVEAU_GEM_DOMAIN_CPU       (1 << 0)
 #define NOUVEAU_GEM_DOMAIN_VRAM      (1 << 1)
 #define NOUVEAU_GEM_DOMAIN_GART      (1 << 2)

From 69c4938249fb48aeed32fd76c67972e71f471cd2 Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Fri, 4 Sep 2015 19:52:11 +0900
Subject: [PATCH 07/45] drm/nouveau/instmem/gk20a: use direct CPU access

The Great Nouveau Refactoring Take II brought us a lot of goodness,
including acquire/release methods that are called before and after an
instobj is modified. These functions can be used as synchronization
points to manage CPU/GPU coherency if we modify an instobj using the
CPU.

This patch replaces the legacy and slow PRAMIN access for gk20a instmem
with CPU mappings and writes. A LRU list is used to unmap unused
mappings after a certain threshold (currently 1MB) of mapped instobjs is
reached. This allows mappings to be reused most of the time.

Accessing instobjs using the CPU requires to maintain the GPU L2 cache,
which we do in the acquire/release functions. This triggers a lot of L2
flushes/invalidates, but most of them are performed on an empty cache
(and thus return immediately), and overall context setup performance
greatly benefits from this (from 250ms to 160ms on Jetson TK1 for a
simple libdrm program).

Making L2 management more explicit should allow us to grab some more
performance in the future.

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 .../drm/nouveau/nvkm/subdev/instmem/gk20a.c   | 361 +++++++++++++-----
 1 file changed, 264 insertions(+), 97 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
index cd7feb1b25f69..a2921ace40458 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
@@ -23,35 +23,42 @@
 /*
  * GK20A does not have dedicated video memory, and to accurately represent this
  * fact Nouveau will not create a RAM device for it. Therefore its instmem
- * implementation must be done directly on top of system memory, while providing
- * coherent read and write operations.
+ * implementation must be done directly on top of system memory, while
+ * preserving coherency for read and write operations.
  *
  * Instmem can be allocated through two means:
- * 1) If an IOMMU mapping has been probed, the IOMMU API is used to make memory
+ * 1) If an IOMMU unit has been probed, the IOMMU API is used to make memory
  *    pages contiguous to the GPU. This is the preferred way.
- * 2) If no IOMMU mapping is probed, the DMA API is used to allocate physically
+ * 2) If no IOMMU unit is probed, the DMA API is used to allocate physically
  *    contiguous memory.
  *
- * In both cases CPU read and writes are performed using PRAMIN (i.e. using the
- * GPU path) to ensure these operations are coherent for the GPU. This allows us
- * to use more "relaxed" allocation parameters when using the DMA API, since we
- * never need a kernel mapping.
+ * In both cases CPU read and writes are performed by creating a write-combined
+ * mapping. The GPU L2 cache must thus be flushed/invalidated when required. To
+ * be conservative we do this every time we acquire or release an instobj, but
+ * ideally L2 management should be handled at a higher level.
+ *
+ * To improve performance, CPU mappings are not removed upon instobj release.
+ * Instead they are placed into a LRU list to be recycled when the mapped space
+ * goes beyond a certain threshold. At the moment this limit is 1MB.
  */
-#define gk20a_instmem(p) container_of((p), struct gk20a_instmem, base)
 #include "priv.h"
 
 #include <core/memory.h>
 #include <core/mm.h>
 #include <core/tegra.h>
 #include <subdev/fb.h>
-
-#define gk20a_instobj(p) container_of((p), struct gk20a_instobj, memory)
+#include <subdev/ltc.h>
 
 struct gk20a_instobj {
 	struct nvkm_memory memory;
-	struct gk20a_instmem *imem;
 	struct nvkm_mem mem;
+	struct gk20a_instmem *imem;
+
+	/* CPU mapping */
+	u32 *vaddr;
+	struct list_head vaddr_node;
 };
+#define gk20a_instobj(p) container_of((p), struct gk20a_instobj, memory)
 
 /*
  * Used for objects allocated using the DMA API
@@ -59,10 +66,12 @@ struct gk20a_instobj {
 struct gk20a_instobj_dma {
 	struct gk20a_instobj base;
 
-	void *cpuaddr;
+	u32 *cpuaddr;
 	dma_addr_t handle;
 	struct nvkm_mm_node r;
 };
+#define gk20a_instobj_dma(p) \
+	container_of(gk20a_instobj(p), struct gk20a_instobj_dma, base)
 
 /*
  * Used for objects flattened using the IOMMU API
@@ -70,15 +79,24 @@ struct gk20a_instobj_dma {
 struct gk20a_instobj_iommu {
 	struct gk20a_instobj base;
 
-	/* array of base.mem->size pages */
+	/* will point to the higher half of pages */
+	dma_addr_t *dma_addrs;
+	/* array of base.mem->size pages (+ dma_addr_ts) */
 	struct page *pages[];
 };
+#define gk20a_instobj_iommu(p) \
+	container_of(gk20a_instobj(p), struct gk20a_instobj_iommu, base)
 
 struct gk20a_instmem {
 	struct nvkm_instmem base;
-	unsigned long lock_flags;
+
+	/* protects vaddr_* and gk20a_instobj::vaddr* */
 	spinlock_t lock;
-	u64 addr;
+
+	/* CPU mappings LRU */
+	unsigned int vaddr_use;
+	unsigned int vaddr_max;
+	struct list_head vaddr_lru;
 
 	/* Only used if IOMMU if present */
 	struct mutex *mm_mutex;
@@ -88,7 +106,10 @@ struct gk20a_instmem {
 
 	/* Only used by DMA API */
 	struct dma_attrs attrs;
+
+	void __iomem * (*cpu_map)(struct nvkm_memory *);
 };
+#define gk20a_instmem(p) container_of((p), struct gk20a_instmem, base)
 
 static enum nvkm_memory_target
 gk20a_instobj_target(struct nvkm_memory *memory)
@@ -100,7 +121,6 @@ static u64
 gk20a_instobj_addr(struct nvkm_memory *memory)
 {
 	return gk20a_instobj(memory)->mem.offset;
-
 }
 
 static u64
@@ -109,108 +129,218 @@ gk20a_instobj_size(struct nvkm_memory *memory)
 	return (u64)gk20a_instobj(memory)->mem.size << 12;
 }
 
+static void __iomem *
+gk20a_instobj_cpu_map_dma(struct nvkm_memory *memory)
+{
+	struct gk20a_instobj_dma *node = gk20a_instobj_dma(memory);
+	struct device *dev = node->base.imem->base.subdev.device->dev;
+	int npages = nvkm_memory_size(memory) >> 12;
+	struct page *pages[npages];
+	int i;
+
+	/* phys_to_page does not exist on all platforms... */
+	pages[0] = pfn_to_page(dma_to_phys(dev, node->handle) >> PAGE_SHIFT);
+	for (i = 1; i < npages; i++)
+		pages[i] = pages[0] + i;
+
+	return vmap(pages, npages, VM_MAP, pgprot_writecombine(PAGE_KERNEL));
+}
+
+static void __iomem *
+gk20a_instobj_cpu_map_iommu(struct nvkm_memory *memory)
+{
+	struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory);
+	int npages = nvkm_memory_size(memory) >> 12;
+
+	return vmap(node->pages, npages, VM_MAP,
+		    pgprot_writecombine(PAGE_KERNEL));
+}
+
+/*
+ * Must be called while holding gk20a_instmem_lock
+ */
+static void
+gk20a_instmem_vaddr_gc(struct gk20a_instmem *imem, const u64 size)
+{
+	while (imem->vaddr_use + size > imem->vaddr_max) {
+		struct gk20a_instobj *obj;
+
+		/* no candidate that can be unmapped, abort... */
+		if (list_empty(&imem->vaddr_lru))
+			break;
+
+		obj = list_first_entry(&imem->vaddr_lru, struct gk20a_instobj,
+				       vaddr_node);
+		list_del(&obj->vaddr_node);
+		vunmap(obj->vaddr);
+		obj->vaddr = NULL;
+		imem->vaddr_use -= nvkm_memory_size(&obj->memory);
+		nvkm_debug(&imem->base.subdev, "(GC) vaddr used: %x/%x\n",
+			   imem->vaddr_use, imem->vaddr_max);
+
+	}
+}
+
 static void __iomem *
 gk20a_instobj_acquire(struct nvkm_memory *memory)
 {
-	struct gk20a_instmem *imem = gk20a_instobj(memory)->imem;
+	struct gk20a_instobj *node = gk20a_instobj(memory);
+	struct gk20a_instmem *imem = node->imem;
+	struct nvkm_ltc *ltc = imem->base.subdev.device->ltc;
+	const u64 size = nvkm_memory_size(memory);
 	unsigned long flags;
+
+	nvkm_ltc_flush(ltc);
+
 	spin_lock_irqsave(&imem->lock, flags);
-	imem->lock_flags = flags;
-	return NULL;
+
+	if (node->vaddr) {
+		/* remove us from the LRU list since we cannot be unmapped */
+		list_del(&node->vaddr_node);
+
+		goto out;
+	}
+
+	/* try to free some address space if we reached the limit */
+	gk20a_instmem_vaddr_gc(imem, size);
+
+	node->vaddr = imem->cpu_map(memory);
+
+	if (!node->vaddr) {
+		nvkm_error(&imem->base.subdev, "cannot map instobj - "
+			   "this is not going to end well...\n");
+		goto out;
+	}
+
+	imem->vaddr_use += size;
+	nvkm_debug(&imem->base.subdev, "vaddr used: %x/%x\n",
+		   imem->vaddr_use, imem->vaddr_max);
+
+out:
+	spin_unlock_irqrestore(&imem->lock, flags);
+
+	return node->vaddr;
 }
 
 static void
 gk20a_instobj_release(struct nvkm_memory *memory)
 {
-	struct gk20a_instmem *imem = gk20a_instobj(memory)->imem;
-	spin_unlock_irqrestore(&imem->lock, imem->lock_flags);
-}
+	struct gk20a_instobj *node = gk20a_instobj(memory);
+	struct gk20a_instmem *imem = node->imem;
+	struct nvkm_ltc *ltc = imem->base.subdev.device->ltc;
+	unsigned long flags;
 
-/*
- * Use PRAMIN to read/write data and avoid coherency issues.
- * PRAMIN uses the GPU path and ensures data will always be coherent.
- *
- * A dynamic mapping based solution would be desirable in the future, but
- * the issue remains of how to maintain coherency efficiently. On ARM it is
- * not easy (if possible at all?) to create uncached temporary mappings.
- */
+	spin_lock_irqsave(&imem->lock, flags);
+
+	/* add ourselves to the LRU list so our CPU mapping can be freed */
+	list_add_tail(&node->vaddr_node, &imem->vaddr_lru);
+
+	spin_unlock_irqrestore(&imem->lock, flags);
+
+	wmb();
+	nvkm_ltc_invalidate(ltc);
+}
 
 static u32
 gk20a_instobj_rd32(struct nvkm_memory *memory, u64 offset)
 {
 	struct gk20a_instobj *node = gk20a_instobj(memory);
-	struct gk20a_instmem *imem = node->imem;
-	struct nvkm_device *device = imem->base.subdev.device;
-	u64 base = (node->mem.offset + offset) & 0xffffff00000ULL;
-	u64 addr = (node->mem.offset + offset) & 0x000000fffffULL;
-	u32 data;
-
-	if (unlikely(imem->addr != base)) {
-		nvkm_wr32(device, 0x001700, base >> 16);
-		imem->addr = base;
-	}
-	data = nvkm_rd32(device, 0x700000 + addr);
-	return data;
+
+	return node->vaddr[offset / 4];
 }
 
 static void
 gk20a_instobj_wr32(struct nvkm_memory *memory, u64 offset, u32 data)
 {
 	struct gk20a_instobj *node = gk20a_instobj(memory);
-	struct gk20a_instmem *imem = node->imem;
-	struct nvkm_device *device = imem->base.subdev.device;
-	u64 base = (node->mem.offset + offset) & 0xffffff00000ULL;
-	u64 addr = (node->mem.offset + offset) & 0x000000fffffULL;
 
-	if (unlikely(imem->addr != base)) {
-		nvkm_wr32(device, 0x001700, base >> 16);
-		imem->addr = base;
-	}
-	nvkm_wr32(device, 0x700000 + addr, data);
+	node->vaddr[offset / 4] = data;
 }
 
 static void
 gk20a_instobj_map(struct nvkm_memory *memory, struct nvkm_vma *vma, u64 offset)
 {
 	struct gk20a_instobj *node = gk20a_instobj(memory);
+
 	nvkm_vm_map_at(vma, offset, &node->mem);
 }
 
+/*
+ * Clear the CPU mapping of an instobj if it exists
+ */
 static void
-gk20a_instobj_dtor_dma(struct gk20a_instobj *_node)
+gk20a_instobj_dtor(struct gk20a_instobj *node)
+{
+	struct gk20a_instmem *imem = node->imem;
+	struct gk20a_instobj *obj;
+	unsigned long flags;
+
+	spin_lock_irqsave(&imem->lock, flags);
+
+	if (!node->vaddr)
+		goto out;
+
+	list_for_each_entry(obj, &imem->vaddr_lru, vaddr_node) {
+		if (obj == node) {
+			list_del(&obj->vaddr_node);
+			break;
+		}
+	}
+	vunmap(node->vaddr);
+	node->vaddr = NULL;
+	imem->vaddr_use -= nvkm_memory_size(&node->memory);
+	nvkm_debug(&imem->base.subdev, "vaddr used: %x/%x\n",
+		   imem->vaddr_use, imem->vaddr_max);
+
+out:
+	spin_unlock_irqrestore(&imem->lock, flags);
+}
+
+static void *
+gk20a_instobj_dtor_dma(struct nvkm_memory *memory)
 {
-	struct gk20a_instobj_dma *node = (void *)_node;
-	struct gk20a_instmem *imem = _node->imem;
+	struct gk20a_instobj_dma *node = gk20a_instobj_dma(memory);
+	struct gk20a_instmem *imem = node->base.imem;
 	struct device *dev = imem->base.subdev.device->dev;
 
+	gk20a_instobj_dtor(&node->base);
+
 	if (unlikely(!node->cpuaddr))
-		return;
+		goto out;
 
-	dma_free_attrs(dev, _node->mem.size << PAGE_SHIFT, node->cpuaddr,
+	dma_free_attrs(dev, node->base.mem.size << PAGE_SHIFT, node->cpuaddr,
 		       node->handle, &imem->attrs);
+
+out:
+	return node;
 }
 
-static void
-gk20a_instobj_dtor_iommu(struct gk20a_instobj *_node)
+static void *
+gk20a_instobj_dtor_iommu(struct nvkm_memory *memory)
 {
-	struct gk20a_instobj_iommu *node = (void *)_node;
-	struct gk20a_instmem *imem = _node->imem;
+	struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory);
+	struct gk20a_instmem *imem = node->base.imem;
+	struct device *dev = imem->base.subdev.device->dev;
 	struct nvkm_mm_node *r;
 	int i;
 
-	if (unlikely(list_empty(&_node->mem.regions)))
-		return;
+	gk20a_instobj_dtor(&node->base);
 
-	r = list_first_entry(&_node->mem.regions, struct nvkm_mm_node,
+	if (unlikely(list_empty(&node->base.mem.regions)))
+		goto out;
+
+	r = list_first_entry(&node->base.mem.regions, struct nvkm_mm_node,
 			     rl_entry);
 
 	/* clear bit 34 to unmap pages */
 	r->offset &= ~BIT(34 - imem->iommu_pgshift);
 
 	/* Unmap pages from GPU address space and free them */
-	for (i = 0; i < _node->mem.size; i++) {
+	for (i = 0; i < node->base.mem.size; i++) {
 		iommu_unmap(imem->domain,
 			    (r->offset + i) << imem->iommu_pgshift, PAGE_SIZE);
+		dma_unmap_page(dev, node->dma_addrs[i], PAGE_SIZE,
+			       DMA_BIDIRECTIONAL);
 		__free_page(node->pages[i]);
 	}
 
@@ -218,25 +348,27 @@ gk20a_instobj_dtor_iommu(struct gk20a_instobj *_node)
 	mutex_lock(imem->mm_mutex);
 	nvkm_mm_free(imem->mm, &r);
 	mutex_unlock(imem->mm_mutex);
-}
-
-static void *
-gk20a_instobj_dtor(struct nvkm_memory *memory)
-{
-	struct gk20a_instobj *node = gk20a_instobj(memory);
-	struct gk20a_instmem *imem = node->imem;
-
-	if (imem->domain)
-		gk20a_instobj_dtor_iommu(node);
-	else
-		gk20a_instobj_dtor_dma(node);
 
+out:
 	return node;
 }
 
 static const struct nvkm_memory_func
-gk20a_instobj_func = {
-	.dtor = gk20a_instobj_dtor,
+gk20a_instobj_func_dma = {
+	.dtor = gk20a_instobj_dtor_dma,
+	.target = gk20a_instobj_target,
+	.addr = gk20a_instobj_addr,
+	.size = gk20a_instobj_size,
+	.acquire = gk20a_instobj_acquire,
+	.release = gk20a_instobj_release,
+	.rd32 = gk20a_instobj_rd32,
+	.wr32 = gk20a_instobj_wr32,
+	.map = gk20a_instobj_map,
+};
+
+static const struct nvkm_memory_func
+gk20a_instobj_func_iommu = {
+	.dtor = gk20a_instobj_dtor_iommu,
 	.target = gk20a_instobj_target,
 	.addr = gk20a_instobj_addr,
 	.size = gk20a_instobj_size,
@@ -259,6 +391,8 @@ gk20a_instobj_ctor_dma(struct gk20a_instmem *imem, u32 npages, u32 align,
 		return -ENOMEM;
 	*_node = &node->base;
 
+	nvkm_memory_ctor(&gk20a_instobj_func_dma, &node->base.memory);
+
 	node->cpuaddr = dma_alloc_attrs(dev, npages << PAGE_SHIFT,
 					&node->handle, GFP_KERNEL,
 					&imem->attrs);
@@ -292,24 +426,40 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align,
 {
 	struct gk20a_instobj_iommu *node;
 	struct nvkm_subdev *subdev = &imem->base.subdev;
+	struct device *dev = subdev->device->dev;
 	struct nvkm_mm_node *r;
 	int ret;
 	int i;
 
-	if (!(node = kzalloc(sizeof(*node) +
-			     sizeof( node->pages[0]) * npages, GFP_KERNEL)))
+	/*
+	 * despite their variable size, instmem allocations are small enough
+	 * (< 1 page) to be handled by kzalloc
+	 */
+	if (!(node = kzalloc(sizeof(*node) + ((sizeof(node->pages[0]) +
+			     sizeof(*node->dma_addrs)) * npages), GFP_KERNEL)))
 		return -ENOMEM;
 	*_node = &node->base;
+	node->dma_addrs = (void *)(node->pages + npages);
+
+	nvkm_memory_ctor(&gk20a_instobj_func_iommu, &node->base.memory);
 
 	/* Allocate backing memory */
 	for (i = 0; i < npages; i++) {
 		struct page *p = alloc_page(GFP_KERNEL);
+		dma_addr_t dma_adr;
 
 		if (p == NULL) {
 			ret = -ENOMEM;
 			goto free_pages;
 		}
 		node->pages[i] = p;
+		dma_adr = dma_map_page(dev, p, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(dev, dma_adr)) {
+			nvkm_error(subdev, "DMA mapping error!\n");
+			ret = -ENOMEM;
+			goto free_pages;
+		}
+		node->dma_addrs[i] = dma_adr;
 	}
 
 	mutex_lock(imem->mm_mutex);
@@ -318,16 +468,15 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align,
 			   align >> imem->iommu_pgshift, &r);
 	mutex_unlock(imem->mm_mutex);
 	if (ret) {
-		nvkm_error(subdev, "virtual space is full!\n");
+		nvkm_error(subdev, "IOMMU space is full!\n");
 		goto free_pages;
 	}
 
 	/* Map into GPU address space */
 	for (i = 0; i < npages; i++) {
-		struct page *p = node->pages[i];
 		u32 offset = (r->offset + i) << imem->iommu_pgshift;
 
-		ret = iommu_map(imem->domain, offset, page_to_phys(p),
+		ret = iommu_map(imem->domain, offset, node->dma_addrs[i],
 				PAGE_SIZE, IOMMU_READ | IOMMU_WRITE);
 		if (ret < 0) {
 			nvkm_error(subdev, "IOMMU mapping failure: %d\n", ret);
@@ -356,8 +505,13 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align,
 	mutex_unlock(imem->mm_mutex);
 
 free_pages:
-	for (i = 0; i < npages && node->pages[i] != NULL; i++)
+	for (i = 0; i < npages && node->pages[i] != NULL; i++) {
+		dma_addr_t dma_addr = node->dma_addrs[i];
+		if (dma_addr)
+			dma_unmap_page(dev, dma_addr, PAGE_SIZE,
+				       DMA_BIDIRECTIONAL);
 		__free_page(node->pages[i]);
+	}
 
 	return ret;
 }
@@ -367,8 +521,8 @@ gk20a_instobj_new(struct nvkm_instmem *base, u32 size, u32 align, bool zero,
 		  struct nvkm_memory **pmemory)
 {
 	struct gk20a_instmem *imem = gk20a_instmem(base);
-	struct gk20a_instobj *node = NULL;
 	struct nvkm_subdev *subdev = &imem->base.subdev;
+	struct gk20a_instobj *node = NULL;
 	int ret;
 
 	nvkm_debug(subdev, "%s (%s): size: %x align: %x\n", __func__,
@@ -388,7 +542,6 @@ gk20a_instobj_new(struct nvkm_instmem *base, u32 size, u32 align, bool zero,
 	if (ret)
 		return ret;
 
-	nvkm_memory_ctor(&gk20a_instobj_func, &node->memory);
 	node->imem = imem;
 
 	/* present memory for being mapped using small pages */
@@ -402,15 +555,25 @@ gk20a_instobj_new(struct nvkm_instmem *base, u32 size, u32 align, bool zero,
 	return 0;
 }
 
-static void
-gk20a_instmem_fini(struct nvkm_instmem *base)
+static void *
+gk20a_instmem_dtor(struct nvkm_instmem *base)
 {
-	gk20a_instmem(base)->addr = ~0ULL;
+	struct gk20a_instmem *imem = gk20a_instmem(base);
+
+	/* perform some sanity checks... */
+	if (!list_empty(&imem->vaddr_lru))
+		nvkm_warn(&base->subdev, "instobj LRU not empty!\n");
+
+	if (imem->vaddr_use != 0)
+		nvkm_warn(&base->subdev, "instobj vmap area not empty! "
+			  "0x%x bytes still mapped\n", imem->vaddr_use);
+
+	return imem;
 }
 
 static const struct nvkm_instmem_func
 gk20a_instmem = {
-	.fini = gk20a_instmem_fini,
+	.dtor = gk20a_instmem_dtor,
 	.memory_new = gk20a_instobj_new,
 	.persistent = true,
 	.zero = false,
@@ -429,23 +592,27 @@ gk20a_instmem_new(struct nvkm_device *device, int index,
 	spin_lock_init(&imem->lock);
 	*pimem = &imem->base;
 
+	/* do not allow more than 1MB of CPU-mapped instmem */
+	imem->vaddr_use = 0;
+	imem->vaddr_max = 0x100000;
+	INIT_LIST_HEAD(&imem->vaddr_lru);
+
 	if (tdev->iommu.domain) {
-		imem->domain = tdev->iommu.domain;
+		imem->mm_mutex = &tdev->iommu.mutex;
 		imem->mm = &tdev->iommu.mm;
+		imem->domain = tdev->iommu.domain;
 		imem->iommu_pgshift = tdev->iommu.pgshift;
-		imem->mm_mutex = &tdev->iommu.mutex;
+		imem->cpu_map = gk20a_instobj_cpu_map_iommu;
 
 		nvkm_info(&imem->base.subdev, "using IOMMU\n");
 	} else {
 		init_dma_attrs(&imem->attrs);
-		/*
-		 * We will access instmem through PRAMIN and thus do not need a
-		 * consistent CPU pointer or kernel mapping
-		 */
+		/* We will access the memory through our own mapping */
 		dma_set_attr(DMA_ATTR_NON_CONSISTENT, &imem->attrs);
 		dma_set_attr(DMA_ATTR_WEAK_ORDERING, &imem->attrs);
 		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &imem->attrs);
 		dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &imem->attrs);
+		imem->cpu_map = gk20a_instobj_cpu_map_dma;
 
 		nvkm_info(&imem->base.subdev, "using DMA API\n");
 	}

From e396ecd178c6c8373e765d78b5b1a29e38837d3a Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Fri, 4 Sep 2015 19:59:31 +0900
Subject: [PATCH 08/45] drm/nouveau/platform: allow to specify the IOMMU bit

Current Tegra code taking advantage of the IOMMU assumes a hardcoded
value for the IOMMU bit. Make it a platform property instead for
flexibility.

v2 (Ben Skeggs): remove nvkm dependence on drm structures

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/include/nvif/os.h     |  1 +
 .../gpu/drm/nouveau/include/nvkm/core/tegra.h | 13 ++++++++++++-
 drivers/gpu/drm/nouveau/nouveau_drm.c         |  5 +++--
 drivers/gpu/drm/nouveau/nouveau_drm.h         |  5 ++++-
 drivers/gpu/drm/nouveau/nouveau_platform.c    | 19 ++++++++++++++++---
 .../drm/nouveau/nvkm/engine/device/tegra.c    | 13 ++++++++++---
 6 files changed, 46 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/include/nvif/os.h b/drivers/gpu/drm/nouveau/include/nvif/os.h
index 3accc99d8e0b9..9fcab67c85577 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/os.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/os.h
@@ -27,6 +27,7 @@
 #include <linux/agp_backend.h>
 #include <linux/reset.h>
 #include <linux/iommu.h>
+#include <linux/of_device.h>
 
 #include <asm/unaligned.h>
 
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h b/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h
index 5aa2480da25f5..16641cec18a28 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h
@@ -4,6 +4,7 @@
 #include <core/mm.h>
 
 struct nvkm_device_tegra {
+	const struct nvkm_device_tegra_func *func;
 	struct nvkm_device device;
 	struct platform_device *pdev;
 	int irq;
@@ -28,7 +29,17 @@ struct nvkm_device_tegra {
 	int gpu_speedo;
 };
 
-int nvkm_device_tegra_new(struct platform_device *,
+struct nvkm_device_tegra_func {
+	/*
+	 * If an IOMMU is used, indicates which address bit will trigger a
+	 * IOMMU translation when set (when this bit is not set, IOMMU is
+	 * bypassed). A value of 0 means an IOMMU is never used.
+	 */
+	u8 iommu_bit;
+};
+
+int nvkm_device_tegra_new(const struct nvkm_device_tegra_func *,
+			  struct platform_device *,
 			  const char *cfg, const char *dbg,
 			  bool detect, bool mmio, u64 subdev_mask,
 			  struct nvkm_device **);
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index f93266582f278..1d3ee5179ab85 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -1028,13 +1028,14 @@ nouveau_drm_pci_driver = {
 };
 
 struct drm_device *
-nouveau_platform_device_create(struct platform_device *pdev,
+nouveau_platform_device_create(const struct nvkm_device_tegra_func *func,
+			       struct platform_device *pdev,
 			       struct nvkm_device **pdevice)
 {
 	struct drm_device *drm;
 	int err;
 
-	err = nvkm_device_tegra_new(pdev, nouveau_config, nouveau_debug,
+	err = nvkm_device_tegra_new(func, pdev, nouveau_config, nouveau_debug,
 				    true, true, ~0ULL, pdevice);
 	if (err)
 		goto err_free;
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.h b/drivers/gpu/drm/nouveau/nouveau_drm.h
index acfa03bd50de9..51027a9030f62 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.h
@@ -178,8 +178,11 @@ nouveau_drm(struct drm_device *dev)
 int nouveau_pmops_suspend(struct device *);
 int nouveau_pmops_resume(struct device *);
 
+#include <nvkm/core/tegra.h>
+
 struct drm_device *
-nouveau_platform_device_create(struct platform_device *, struct nvkm_device **);
+nouveau_platform_device_create(const struct nvkm_device_tegra_func *,
+			       struct platform_device *, struct nvkm_device **);
 void nouveau_drm_device_remove(struct drm_device *dev);
 
 #define NV_PRINTK(l,c,f,a...) do {                                             \
diff --git a/drivers/gpu/drm/nouveau/nouveau_platform.c b/drivers/gpu/drm/nouveau/nouveau_platform.c
index 3eb665453165b..60e32c4e4e496 100644
--- a/drivers/gpu/drm/nouveau/nouveau_platform.c
+++ b/drivers/gpu/drm/nouveau/nouveau_platform.c
@@ -23,11 +23,14 @@
 
 static int nouveau_platform_probe(struct platform_device *pdev)
 {
+	const struct nvkm_device_tegra_func *func;
 	struct nvkm_device *device;
 	struct drm_device *drm;
 	int ret;
 
-	drm = nouveau_platform_device_create(pdev, &device);
+	func = of_device_get_match_data(&pdev->dev);
+
+	drm = nouveau_platform_device_create(func, pdev, &device);
 	if (IS_ERR(drm))
 		return PTR_ERR(drm);
 
@@ -48,9 +51,19 @@ static int nouveau_platform_remove(struct platform_device *pdev)
 }
 
 #if IS_ENABLED(CONFIG_OF)
+static const struct nvkm_device_tegra_func gk20a_platform_data = {
+	.iommu_bit = 34,
+};
+
 static const struct of_device_id nouveau_platform_match[] = {
-	{ .compatible = "nvidia,gk20a" },
-	{ .compatible = "nvidia,gm20b" },
+	{
+		.compatible = "nvidia,gk20a",
+		.data = &gk20a_platform_data,
+	},
+	{
+		.compatible = "nvidia,gm20b",
+		.data = &gk20a_platform_data,
+	},
 	{ }
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
index da57c8a606085..7f8a42721eb20 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
@@ -85,6 +85,9 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev)
 	unsigned long pgsize_bitmap;
 	int ret;
 
+	if (!tdev->func->iommu_bit)
+		return;
+
 	mutex_init(&tdev->iommu.mutex);
 
 	if (iommu_present(&platform_bus_type)) {
@@ -114,7 +117,8 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev)
 			goto free_domain;
 
 		ret = nvkm_mm_init(&tdev->iommu.mm, 0,
-				   (1ULL << 40) >> tdev->iommu.pgshift, 1);
+				   (1ULL << tdev->func->iommu_bit) >>
+				   tdev->iommu.pgshift, 1);
 		if (ret)
 			goto detach_device;
 	}
@@ -237,7 +241,8 @@ nvkm_device_tegra_func = {
 };
 
 int
-nvkm_device_tegra_new(struct platform_device *pdev,
+nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func,
+		      struct platform_device *pdev,
 		      const char *cfg, const char *dbg,
 		      bool detect, bool mmio, u64 subdev_mask,
 		      struct nvkm_device **pdevice)
@@ -248,6 +253,7 @@ nvkm_device_tegra_new(struct platform_device *pdev,
 	if (!(tdev = kzalloc(sizeof(*tdev), GFP_KERNEL)))
 		return -ENOMEM;
 	*pdevice = &tdev->device;
+	tdev->func = func;
 	tdev->pdev = pdev;
 	tdev->irq = -1;
 
@@ -285,7 +291,8 @@ nvkm_device_tegra_new(struct platform_device *pdev,
 }
 #else
 int
-nvkm_device_tegra_new(struct platform_device *pdev,
+nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func,
+		      struct platform_device *pdev,
 		      const char *cfg, const char *dbg,
 		      bool detect, bool mmio, u64 subdev_mask,
 		      struct nvkm_device **pdevice)

From 68b566534c1275facf64969cfd643fa6a724b351 Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Fri, 4 Sep 2015 19:59:32 +0900
Subject: [PATCH 09/45] drm/nouveau/instmem/gk20a: make use of the IOMMU bit

Use the IOMMU bit specified in platform data instead of hardcoding it to
the bit used by current Tegra GPUs.

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
index a2921ace40458..fc419bb8eab74 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
@@ -103,6 +103,7 @@ struct gk20a_instmem {
 	struct nvkm_mm *mm;
 	struct iommu_domain *domain;
 	unsigned long iommu_pgshift;
+	u16 iommu_bit;
 
 	/* Only used by DMA API */
 	struct dma_attrs attrs;
@@ -332,8 +333,8 @@ gk20a_instobj_dtor_iommu(struct nvkm_memory *memory)
 	r = list_first_entry(&node->base.mem.regions, struct nvkm_mm_node,
 			     rl_entry);
 
-	/* clear bit 34 to unmap pages */
-	r->offset &= ~BIT(34 - imem->iommu_pgshift);
+	/* clear IOMMU bit to unmap pages */
+	r->offset &= ~BIT(imem->iommu_bit - imem->iommu_pgshift);
 
 	/* Unmap pages from GPU address space and free them */
 	for (i = 0; i < node->base.mem.size; i++) {
@@ -489,8 +490,8 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align,
 		}
 	}
 
-	/* Bit 34 tells that an address is to be resolved through the IOMMU */
-	r->offset |= BIT(34 - imem->iommu_pgshift);
+	/* IOMMU bit tells that an address is to be resolved through the IOMMU */
+	r->offset |= BIT(imem->iommu_bit - imem->iommu_pgshift);
 
 	node->base.mem.offset = ((u64)r->offset) << imem->iommu_pgshift;
 
@@ -603,6 +604,7 @@ gk20a_instmem_new(struct nvkm_device *device, int index,
 		imem->domain = tdev->iommu.domain;
 		imem->iommu_pgshift = tdev->iommu.pgshift;
 		imem->cpu_map = gk20a_instobj_cpu_map_iommu;
+		imem->iommu_bit = tdev->func->iommu_bit;
 
 		nvkm_info(&imem->base.subdev, "using IOMMU\n");
 	} else {

From 524883bb48464ed76bd635819989284a249bf917 Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Fri, 4 Sep 2015 19:59:33 +0900
Subject: [PATCH 10/45] drm/nouveau/ttm: convert to DMA API

The pci_dma_* functions are now superseeded in the kernel by the DMA
API. Make the conversion to this more generic API.

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_ttm.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c
index 3f0fb55cb4733..bd287c2728c89 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
@@ -338,7 +338,7 @@ nouveau_ttm_init(struct nouveau_drm *drm)
 	struct nvkm_device *device = nvxx_device(&drm->device);
 	struct nvkm_pci *pci = device->pci;
 	struct drm_device *dev = drm->dev;
-	u32 bits;
+	u8 bits;
 	int ret;
 
 	if (pci && pci->agp.bridge) {
@@ -351,18 +351,16 @@ nouveau_ttm_init(struct nouveau_drm *drm)
 	bits = nvxx_mmu(&drm->device)->dma_bits;
 	if (nvxx_device(&drm->device)->func->pci) {
 		if (drm->agp.bridge ||
-		     !pci_dma_supported(dev->pdev, DMA_BIT_MASK(bits)))
+		     !dma_supported(dev->dev, DMA_BIT_MASK(bits)))
 			bits = 32;
 
-		ret = pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(bits));
+		ret = dma_set_mask(dev->dev, DMA_BIT_MASK(bits));
 		if (ret)
 			return ret;
 
-		ret = pci_set_consistent_dma_mask(dev->pdev,
-						  DMA_BIT_MASK(bits));
+		ret = dma_set_coherent_mask(dev->dev, DMA_BIT_MASK(bits));
 		if (ret)
-			pci_set_consistent_dma_mask(dev->pdev,
-						    DMA_BIT_MASK(32));
+			dma_set_coherent_mask(dev->dev, DMA_BIT_MASK(32));
 	}
 
 	ret = nouveau_ttm_global_init(drm);

From b31cf78b93243f8ff64297c1f77a4d030c32ca56 Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Fri, 4 Sep 2015 19:59:34 +0900
Subject: [PATCH 11/45] drm/nouveau/ttm: set the DMA mask for platform devices

So far the DMA mask was not set for platform devices, which limited them
to a 32-bit physical space. Allow dma_set_mask() to be called for
non-PCI devices, and also take the IOMMU bit into account since it could
restrict the physically addressable space.

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_ttm.c | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c
index bd287c2728c89..3f713c1b5dc11 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
@@ -29,6 +29,9 @@
 #include "nouveau_gem.h"
 
 #include "drm_legacy.h"
+
+#include <core/tegra.h>
+
 static int
 nouveau_vram_manager_init(struct ttm_mem_type_manager *man, unsigned long psize)
 {
@@ -353,16 +356,26 @@ nouveau_ttm_init(struct nouveau_drm *drm)
 		if (drm->agp.bridge ||
 		     !dma_supported(dev->dev, DMA_BIT_MASK(bits)))
 			bits = 32;
+	} else if (device->func->tegra) {
+		struct nvkm_device_tegra *tegra = device->func->tegra(device);
 
-		ret = dma_set_mask(dev->dev, DMA_BIT_MASK(bits));
-		if (ret)
-			return ret;
+		/*
+		 * If the platform can use a IOMMU, then the addressable DMA
+		 * space is constrained by the IOMMU bit
+		 */
+		if (tegra->func->iommu_bit)
+			bits = min(bits, tegra->func->iommu_bit);
 
-		ret = dma_set_coherent_mask(dev->dev, DMA_BIT_MASK(bits));
-		if (ret)
-			dma_set_coherent_mask(dev->dev, DMA_BIT_MASK(32));
 	}
 
+	ret = dma_set_mask(dev->dev, DMA_BIT_MASK(bits));
+	if (ret)
+		return ret;
+
+	ret = dma_set_coherent_mask(dev->dev, DMA_BIT_MASK(bits));
+	if (ret)
+		dma_set_coherent_mask(dev->dev, DMA_BIT_MASK(32));
+
 	ret = nouveau_ttm_global_init(drm);
 	if (ret)
 		return ret;

From 7bddeba9663962a5bd507e636c84361852314f04 Mon Sep 17 00:00:00 2001
From: Martin Peres <martin.peres@free.fr>
Date: Wed, 9 Sep 2015 04:05:50 +0300
Subject: [PATCH 12/45] drm/nouveau/bios/volt: add support for pwm-based volt
 management

Signed-off-by: Martin Peres <martin.peres@free.fr>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 .../drm/nouveau/include/nvkm/subdev/bios/volt.h | 15 ++++++++++++++-
 drivers/gpu/drm/nouveau/nvkm/subdev/bios/volt.c | 17 +++++++++++++++--
 2 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/volt.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/volt.h
index eb2de4b85bbdd..b0df610cec2b5 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/volt.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/volt.h
@@ -1,11 +1,24 @@
 #ifndef __NVBIOS_VOLT_H__
 #define __NVBIOS_VOLT_H__
+
+enum nvbios_volt_type {
+	NVBIOS_VOLT_GPIO = 0,
+	NVBIOS_VOLT_PWM,
+};
+
 struct nvbios_volt {
-	u8  vidmask;
+	enum nvbios_volt_type type;
 	u32 min;
 	u32 max;
 	u32 base;
+
+	/* GPIO mode */
+	u8  vidmask;
 	s16 step;
+
+	/* PWM mode */
+	u32 pwm_freq;
+	u32 pwm_range;
 };
 
 u16 nvbios_volt_table(struct nvkm_bios *, u8 *ver, u8 *hdr, u8 *cnt, u8 *len);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/volt.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/volt.c
index 615804c3887be..6e0a33648be92 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/volt.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/volt.c
@@ -73,15 +73,19 @@ nvbios_volt_parse(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
 	memset(info, 0x00, sizeof(*info));
 	switch (!!volt * *ver) {
 	case 0x12:
+		info->type    = NVBIOS_VOLT_GPIO;
 		info->vidmask = nvbios_rd08(bios, volt + 0x04);
 		break;
 	case 0x20:
+		info->type    = NVBIOS_VOLT_GPIO;
 		info->vidmask = nvbios_rd08(bios, volt + 0x05);
 		break;
 	case 0x30:
+		info->type    = NVBIOS_VOLT_GPIO;
 		info->vidmask = nvbios_rd08(bios, volt + 0x04);
 		break;
 	case 0x40:
+		info->type    = NVBIOS_VOLT_GPIO;
 		info->base    = nvbios_rd32(bios, volt + 0x04);
 		info->step    = nvbios_rd16(bios, volt + 0x08);
 		info->vidmask = nvbios_rd08(bios, volt + 0x0b);
@@ -90,11 +94,20 @@ nvbios_volt_parse(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
 		info->max     = info->base;
 		break;
 	case 0x50:
-		info->vidmask = nvbios_rd08(bios, volt + 0x06);
 		info->min     = nvbios_rd32(bios, volt + 0x0a);
 		info->max     = nvbios_rd32(bios, volt + 0x0e);
 		info->base    = nvbios_rd32(bios, volt + 0x12) & 0x00ffffff;
-		info->step    = nvbios_rd16(bios, volt + 0x16);
+
+		/* offset 4 seems to be a flag byte */
+		if (nvbios_rd32(bios, volt + 0x4) & 1) {
+			info->type      = NVBIOS_VOLT_PWM;
+			info->pwm_freq  = nvbios_rd32(bios, volt + 0x5) / 1000;
+			info->pwm_range = nvbios_rd32(bios, volt + 0x16);
+		} else {
+			info->type      = NVBIOS_VOLT_GPIO;
+			info->vidmask   = nvbios_rd08(bios, volt + 0x06);
+			info->step      = nvbios_rd16(bios, volt + 0x16);
+		}
 		break;
 	}
 	return volt;

From 4c58a05b4a4709db7a1667d740f3ac2725fa3ce4 Mon Sep 17 00:00:00 2001
From: Martin Peres <martin.peres@free.fr>
Date: Wed, 9 Sep 2015 04:05:51 +0300
Subject: [PATCH 13/45] drm/nouveau/volt: add support for non-vid-based voltage
 controllers

This patch is not ideal but it definitely beats a rewrite of the current
interface and is very self-contained.

Signed-off-by: Martin Peres <martin.peres@free.fr>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nvkm/subdev/volt/base.c | 11 ++++++++++-
 drivers/gpu/drm/nouveau/nvkm/subdev/volt/priv.h |  2 ++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/base.c
index 4752dbd339230..50b5649ad1a47 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/base.c
@@ -30,7 +30,12 @@
 int
 nvkm_volt_get(struct nvkm_volt *volt)
 {
-	int ret = volt->func->vid_get(volt), i;
+	int ret, i;
+
+	if (volt->func->volt_get)
+		return volt->func->volt_get(volt);
+
+	ret = volt->func->vid_get(volt);
 	if (ret >= 0) {
 		for (i = 0; i < volt->vid_nr; i++) {
 			if (volt->vid[i].vid == ret)
@@ -46,6 +51,10 @@ nvkm_volt_set(struct nvkm_volt *volt, u32 uv)
 {
 	struct nvkm_subdev *subdev = &volt->subdev;
 	int i, ret = -EINVAL;
+
+	if (volt->func->volt_set)
+		return volt->func->volt_set(volt, uv);
+
 	for (i = 0; i < volt->vid_nr; i++) {
 		if (volt->vid[i].uv == uv) {
 			ret = volt->func->vid_set(volt, volt->vid[i].vid);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/priv.h
index 394f37c723afa..cdb3d9f80aa60 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/priv.h
@@ -9,6 +9,8 @@ int nvkm_volt_new_(const struct nvkm_volt_func *, struct nvkm_device *,
 		   int index, struct nvkm_volt **);
 
 struct nvkm_volt_func {
+	int (*volt_get)(struct nvkm_volt *);
+	int (*volt_set)(struct nvkm_volt *, u32 uv);
 	int (*vid_get)(struct nvkm_volt *);
 	int (*vid_set)(struct nvkm_volt *, u8 vid);
 	int (*set_id)(struct nvkm_volt *, u8 id, int condition);

From 1531dbbb56820d4e0fee4f9baa9fc84485bb6623 Mon Sep 17 00:00:00 2001
From: Martin Peres <martin.peres@free.fr>
Date: Wed, 9 Sep 2015 00:34:33 +0200
Subject: [PATCH 14/45] drm/nouveau/volt/gk104: add support for pwm and gpio
 modes

Most Keplers actually use the GPIO-based voltage management instead of the new
PWM-based one. Use the GPIO mode as a fallback as it already gracefully handles
the case where no GPIOs exist.

All the Maxwells seem to use the PWM method though.

v2:
 - Do not forget to commit the PWM configuration change!

Signed-off-by: Martin Peres <martin.peres@free.fr>
---
 .../nouveau/include/nvkm/subdev/bios/gpio.h   |   1 +
 .../drm/nouveau/include/nvkm/subdev/volt.h    |   1 +
 .../gpu/drm/nouveau/nvkm/engine/device/base.c |  14 +--
 .../gpu/drm/nouveau/nvkm/subdev/volt/Kbuild   |   1 +
 .../gpu/drm/nouveau/nvkm/subdev/volt/gk104.c  | 119 ++++++++++++++++++
 .../gpu/drm/nouveau/nvkm/subdev/volt/priv.h   |   4 +
 6 files changed, 133 insertions(+), 7 deletions(-)
 create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk104.c

diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/gpio.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/gpio.h
index 33be260ddd383..a47d46dda7047 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/gpio.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/gpio.h
@@ -15,6 +15,7 @@ enum dcb_gpio_func_name {
 	DCB_GPIO_VID5 = 0x74,
 	DCB_GPIO_VID6 = 0x75,
 	DCB_GPIO_VID7 = 0x76,
+	DCB_GPIO_VID_PWM = 0x81,
 };
 
 #define DCB_GPIO_LOG_DIR     0x02
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h
index 5c8a3f1196de5..b458d046dba70 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h
@@ -18,5 +18,6 @@ int nvkm_volt_get(struct nvkm_volt *);
 int nvkm_volt_set_id(struct nvkm_volt *, u8 id, int condition);
 
 int nv40_volt_new(struct nvkm_device *, int, struct nvkm_volt **);
+int gk104_volt_new(struct nvkm_device *, int, struct nvkm_volt **);
 int gk20a_volt_new(struct nvkm_device *, int, struct nvkm_volt **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index 94a906b8cb88b..7c35f1ff8a645 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -1673,7 +1673,7 @@ nve4_chipset = {
 	.pmu = gk104_pmu_new,
 	.therm = gf119_therm_new,
 	.timer = nv41_timer_new,
-	.volt = nv40_volt_new,
+	.volt = gk104_volt_new,
 	.ce[0] = gk104_ce_new,
 	.ce[1] = gk104_ce_new,
 	.ce[2] = gk104_ce_new,
@@ -1710,7 +1710,7 @@ nve6_chipset = {
 	.pmu = gk104_pmu_new,
 	.therm = gf119_therm_new,
 	.timer = nv41_timer_new,
-	.volt = nv40_volt_new,
+	.volt = gk104_volt_new,
 	.ce[0] = gk104_ce_new,
 	.ce[1] = gk104_ce_new,
 	.ce[2] = gk104_ce_new,
@@ -1747,7 +1747,7 @@ nve7_chipset = {
 	.pmu = gf119_pmu_new,
 	.therm = gf119_therm_new,
 	.timer = nv41_timer_new,
-	.volt = nv40_volt_new,
+	.volt = gk104_volt_new,
 	.ce[0] = gk104_ce_new,
 	.ce[1] = gk104_ce_new,
 	.ce[2] = gk104_ce_new,
@@ -1808,7 +1808,7 @@ nvf0_chipset = {
 	.pmu = gk110_pmu_new,
 	.therm = gf119_therm_new,
 	.timer = nv41_timer_new,
-	.volt = nv40_volt_new,
+	.volt = gk104_volt_new,
 	.ce[0] = gk104_ce_new,
 	.ce[1] = gk104_ce_new,
 	.ce[2] = gk104_ce_new,
@@ -1844,7 +1844,7 @@ nvf1_chipset = {
 	.pmu = gk110_pmu_new,
 	.therm = gf119_therm_new,
 	.timer = nv41_timer_new,
-	.volt = nv40_volt_new,
+	.volt = gk104_volt_new,
 	.ce[0] = gk104_ce_new,
 	.ce[1] = gk104_ce_new,
 	.ce[2] = gk104_ce_new,
@@ -1880,7 +1880,7 @@ nv106_chipset = {
 	.pmu = gk208_pmu_new,
 	.therm = gf119_therm_new,
 	.timer = nv41_timer_new,
-	.volt = nv40_volt_new,
+	.volt = gk104_volt_new,
 	.ce[0] = gk104_ce_new,
 	.ce[1] = gk104_ce_new,
 	.ce[2] = gk104_ce_new,
@@ -1916,7 +1916,7 @@ nv108_chipset = {
 	.pmu = gk208_pmu_new,
 	.therm = gf119_therm_new,
 	.timer = nv41_timer_new,
-	.volt = nv40_volt_new,
+	.volt = gk104_volt_new,
 	.ce[0] = gk104_ce_new,
 	.ce[1] = gk104_ce_new,
 	.ce[2] = gk104_ce_new,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/Kbuild
index 6b46ff4213a31..b035c6e28be8a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/Kbuild
@@ -1,4 +1,5 @@
 nvkm-y += nvkm/subdev/volt/base.o
 nvkm-y += nvkm/subdev/volt/gpio.o
 nvkm-y += nvkm/subdev/volt/nv40.o
+nvkm-y += nvkm/subdev/volt/gk104.o
 nvkm-y += nvkm/subdev/volt/gk20a.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk104.c
new file mode 100644
index 0000000000000..b61509e26ec9f
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk104.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright 2015 Martin Peres
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Martin Peres
+ */
+#include "priv.h"
+
+#include <subdev/volt.h>
+#include <subdev/gpio.h>
+#include <subdev/bios.h>
+#include <subdev/bios/volt.h>
+
+#define gk104_volt(p) container_of((p), struct gk104_volt, base)
+struct gk104_volt {
+	struct nvkm_volt base;
+	struct nvbios_volt bios;
+};
+
+int
+gk104_volt_get(struct nvkm_volt *base)
+{
+	struct nvbios_volt *bios = &gk104_volt(base)->bios;
+	struct nvkm_device *device = base->subdev.device;
+	u32 div, duty;
+
+	div  = nvkm_rd32(device, 0x20340);
+	duty = nvkm_rd32(device, 0x20344);
+
+	return bios->base + bios->pwm_range * duty / div;
+}
+
+int
+gk104_volt_set(struct nvkm_volt *base, u32 uv)
+{
+	struct nvbios_volt *bios = &gk104_volt(base)->bios;
+	struct nvkm_device *device = base->subdev.device;
+	u32 div, duty;
+
+	/* the blob uses this crystal frequency, let's use it too. */
+	div = 27648000 / bios->pwm_freq;
+	duty = (uv - bios->base) * div / bios->pwm_range;
+
+	nvkm_wr32(device, 0x20340, div);
+	nvkm_wr32(device, 0x20344, 0x8000000 | duty);
+
+	return 0;
+}
+
+static const struct nvkm_volt_func
+gk104_volt_pwm = {
+	.volt_get = gk104_volt_get,
+	.volt_set = gk104_volt_set,
+}, gk104_volt_gpio = {
+	.vid_get = nvkm_voltgpio_get,
+	.vid_set = nvkm_voltgpio_set,
+};
+
+int
+gk104_volt_new(struct nvkm_device *device, int index, struct nvkm_volt **pvolt)
+{
+	const struct nvkm_volt_func *volt_func = &gk104_volt_gpio;
+	struct dcb_gpio_func gpio;
+	struct nvbios_volt bios;
+	struct gk104_volt *volt;
+	u8 ver, hdr, cnt, len;
+	const char *mode;
+
+	if (!nvbios_volt_parse(device->bios, &ver, &hdr, &cnt, &len, &bios))
+		return 0;
+
+	if (!nvkm_gpio_find(device->gpio, 0, DCB_GPIO_VID_PWM, 0xff, &gpio) &&
+	    bios.type == NVBIOS_VOLT_PWM) {
+		volt_func = &gk104_volt_pwm;
+	}
+
+	if (!(volt = kzalloc(sizeof(*volt), GFP_KERNEL)))
+		return -ENOMEM;
+	nvkm_volt_ctor(volt_func, device, index, &volt->base);
+	*pvolt = &volt->base;
+	volt->bios = bios;
+
+	/* now that we have a subdev, we can show an error if we found through
+	 * the voltage table that we were supposed to use the PWN mode but we
+	 * did not find the right GPIO for it.
+	 */
+	if (bios.type == NVBIOS_VOLT_PWM && volt_func != &gk104_volt_pwm) {
+		nvkm_error(&volt->base.subdev,
+			   "Type mismatch between the voltage table type and "
+			   "the GPIO table. Fallback to GPIO mode.\n");
+	}
+
+	if (volt_func == &gk104_volt_gpio) {
+		nvkm_voltgpio_init(&volt->base);
+		mode = "GPIO";
+	} else
+		mode = "PWM";
+
+	nvkm_debug(&volt->base.subdev, "Using %s mode\n", mode);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/priv.h
index cdb3d9f80aa60..d5140d9911613 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/priv.h
@@ -19,4 +19,8 @@ struct nvkm_volt_func {
 int nvkm_voltgpio_init(struct nvkm_volt *);
 int nvkm_voltgpio_get(struct nvkm_volt *);
 int nvkm_voltgpio_set(struct nvkm_volt *, u8);
+
+int nvkm_voltpwm_init(struct nvkm_volt *volt);
+int nvkm_voltpwm_get(struct nvkm_volt *volt);
+int nvkm_voltpwm_set(struct nvkm_volt *volt, u32 uv);
 #endif

From dc47700f7d965ca0c6abeccc8cf467de8a1ce768 Mon Sep 17 00:00:00 2001
From: Martin Peres <martin.peres@free.fr>
Date: Wed, 9 Sep 2015 02:13:30 +0200
Subject: [PATCH 15/45] drm/nouveau/gm107: add voltage control using the new
 gk104 volt class

Let's ignore the other desktop Maxwells until I get my hands on one and confirm
that we still can change the voltage.

Signed-off-by: Martin Peres <martin.peres@free.fr>
---
 drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index 7c35f1ff8a645..b324fd193402a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -1952,6 +1952,7 @@ nv117_chipset = {
 	.pmu = gm107_pmu_new,
 	.therm = gm107_therm_new,
 	.timer = gk20a_timer_new,
+	.volt = gk104_volt_new,
 	.ce[0] = gk104_ce_new,
 	.ce[2] = gk104_ce_new,
 	.disp = gm107_disp_new,

From 24580d1c310d5987f72adbb49bf3e976a5e5313b Mon Sep 17 00:00:00 2001
From: Martin Peres <martin.peres@free.fr>
Date: Wed, 16 Sep 2015 22:45:33 +0300
Subject: [PATCH 16/45] drm/nouveau/gm204/6: add voltage control using the new
 gk104 volt class

I got confirmation that we can read and change the voltage with the same code.
The divider is also computed correctly on the gm204 we got our hands on.

Thanks to Yoshimo on IRC for executing the tests on his gm204!

Signed-off-by: Martin Peres <martin.peres@free.fr>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index b324fd193402a..8e9b5e8ca7d8d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -1982,6 +1982,7 @@ nv124_chipset = {
 	.pci = nv40_pci_new,
 	.pmu = gm107_pmu_new,
 	.timer = gk20a_timer_new,
+	.volt = gk104_volt_new,
 	.ce[0] = gm204_ce_new,
 	.ce[1] = gm204_ce_new,
 	.ce[2] = gm204_ce_new,
@@ -2012,6 +2013,7 @@ nv126_chipset = {
 	.pci = nv40_pci_new,
 	.pmu = gm107_pmu_new,
 	.timer = gk20a_timer_new,
+	.volt = gk104_volt_new,
 	.ce[0] = gm204_ce_new,
 	.ce[1] = gm204_ce_new,
 	.ce[2] = gm204_ce_new,

From b6afa2650cb3ed4ec8ba0c2ccb29c1dc99e039d5 Mon Sep 17 00:00:00 2001
From: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Date: Thu, 24 Sep 2015 20:26:15 +0200
Subject: [PATCH 17/45] drm/nouveau/ibus/gf100: increase wait timeout to avoid
 read faults

Increase clock timeout of some unknown engines in order to avoid failure
at high gpcclk rate.

This fixes IBUS read faults on my GF119 when reclocking is manually
enabled. Note that memory reclocking is completely broken and NvMemExec
has to be disabled to allow core clock reclocking only.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 .../drm/nouveau/include/nvkm/subdev/ibus.h    |  1 +
 .../gpu/drm/nouveau/nvkm/engine/device/base.c |  4 +-
 .../gpu/drm/nouveau/nvkm/subdev/ibus/Kbuild   |  1 +
 .../gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c  | 17 ++++++-
 .../gpu/drm/nouveau/nvkm/subdev/ibus/gf117.c  | 51 +++++++++++++++++++
 .../gpu/drm/nouveau/nvkm/subdev/ibus/priv.h   |  7 +++
 6 files changed, 77 insertions(+), 4 deletions(-)
 create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf117.c
 create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/ibus/priv.h

diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ibus.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ibus.h
index 9d512cd5a0a7f..c4dcd2680fe16 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ibus.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ibus.h
@@ -3,6 +3,7 @@
 #include <core/subdev.h>
 
 int gf100_ibus_new(struct nvkm_device *, int, struct nvkm_subdev **);
+int gf117_ibus_new(struct nvkm_device *, int, struct nvkm_subdev **);
 int gk104_ibus_new(struct nvkm_device *, int, struct nvkm_subdev **);
 int gk20a_ibus_new(struct nvkm_device *, int, struct nvkm_subdev **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index 8e9b5e8ca7d8d..acc2fe9fb526e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -1595,7 +1595,7 @@ nvd7_chipset = {
 	.fuse = gf100_fuse_new,
 	.gpio = gf119_gpio_new,
 	.i2c = gf117_i2c_new,
-	.ibus = gf100_ibus_new,
+	.ibus = gf117_ibus_new,
 	.imem = nv50_instmem_new,
 	.ltc = gf100_ltc_new,
 	.mc = gf100_mc_new,
@@ -1628,7 +1628,7 @@ nvd9_chipset = {
 	.fuse = gf100_fuse_new,
 	.gpio = gf119_gpio_new,
 	.i2c = gf119_i2c_new,
-	.ibus = gf100_ibus_new,
+	.ibus = gf117_ibus_new,
 	.imem = nv50_instmem_new,
 	.ltc = gf100_ltc_new,
 	.mc = gf100_mc_new,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/Kbuild
index a0b12d27284aa..de888fa62b3ec 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/Kbuild
@@ -1,3 +1,4 @@
 nvkm-y += nvkm/subdev/ibus/gf100.o
+nvkm-y += nvkm/subdev/ibus/gf117.o
 nvkm-y += nvkm/subdev/ibus/gk104.o
 nvkm-y += nvkm/subdev/ibus/gk20a.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c
index 37a0496f7ed18..72d6330d243d2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf100.c
@@ -21,7 +21,7 @@
  *
  * Authors: Ben Skeggs
  */
-#include <subdev/ibus.h>
+#include "priv.h"
 
 static void
 gf100_ibus_intr_hub(struct nvkm_subdev *ibus, int i)
@@ -56,7 +56,7 @@ gf100_ibus_intr_gpc(struct nvkm_subdev *ibus, int i)
 	nvkm_mask(device, 0x128128 + (i * 0x0400), 0x00000200, 0x00000000);
 }
 
-static void
+void
 gf100_ibus_intr(struct nvkm_subdev *ibus)
 {
 	struct nvkm_device *device = ibus->device;
@@ -92,8 +92,21 @@ gf100_ibus_intr(struct nvkm_subdev *ibus)
 	}
 }
 
+static int
+gf100_ibus_init(struct nvkm_subdev *ibus)
+{
+	struct nvkm_device *device = ibus->device;
+	nvkm_mask(device, 0x122310, 0x0003ffff, 0x00000800);
+	nvkm_wr32(device, 0x12232c, 0x00100064);
+	nvkm_wr32(device, 0x122330, 0x00100064);
+	nvkm_wr32(device, 0x122334, 0x00100064);
+	nvkm_mask(device, 0x122348, 0x0003ffff, 0x00000100);
+	return 0;
+}
+
 static const struct nvkm_subdev_func
 gf100_ibus = {
+	.init = gf100_ibus_init,
 	.intr = gf100_ibus_intr,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf117.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf117.c
new file mode 100644
index 0000000000000..f69f263c5906f
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gf117.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2015 Samuel Pitosiet
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Samuel Pitoiset
+ */
+#include "priv.h"
+
+static int
+gf117_ibus_init(struct nvkm_subdev *ibus)
+{
+	struct nvkm_device *device = ibus->device;
+	nvkm_mask(device, 0x122310, 0x0003ffff, 0x00000800);
+	nvkm_mask(device, 0x122348, 0x0003ffff, 0x00000100);
+	nvkm_mask(device, 0x1223b0, 0x0003ffff, 0x00000fff);
+	return 0;
+}
+
+static const struct nvkm_subdev_func
+gf117_ibus = {
+	.init = gf117_ibus_init,
+	.intr = gf100_ibus_intr,
+};
+
+int
+gf117_ibus_new(struct nvkm_device *device, int index,
+	       struct nvkm_subdev **pibus)
+{
+	struct nvkm_subdev *ibus;
+	if (!(ibus = *pibus = kzalloc(sizeof(*ibus), GFP_KERNEL)))
+		return -ENOMEM;
+	nvkm_subdev_ctor(&gf117_ibus, device, index, 0, ibus);
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/priv.h
new file mode 100644
index 0000000000000..48e1b6365ce69
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/priv.h
@@ -0,0 +1,7 @@
+#ifndef __NVKM_IBUS_PRIV_H__
+#define __NVKM_IBUS_PRIV_H__
+
+#include <subdev/ibus.h>
+
+void gf100_ibus_intr(struct nvkm_subdev *);
+#endif

From 3e55b53bc7039f1a6b051aa1a5ba15eec6dce2e7 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Thu, 1 Oct 2015 09:29:58 +1000
Subject: [PATCH 18/45] drm/nouveau/pci/g84: split implementation from nv50

An upcoming patch will implement functionality that we don't use on the
original NV50.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 .../gpu/drm/nouveau/include/nvkm/subdev/pci.h |  1 +
 .../gpu/drm/nouveau/nvkm/engine/device/base.c |  6 +--
 .../gpu/drm/nouveau/nvkm/subdev/pci/Kbuild    |  1 +
 drivers/gpu/drm/nouveau/nvkm/subdev/pci/g84.c | 40 +++++++++++++++++++
 .../gpu/drm/nouveau/nvkm/subdev/pci/nv50.c    |  4 +-
 .../gpu/drm/nouveau/nvkm/subdev/pci/priv.h    |  2 +
 6 files changed, 49 insertions(+), 5 deletions(-)
 create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/pci/g84.c

diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h
index 5b3c054f3b551..b5b004bdda097 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h
@@ -30,5 +30,6 @@ int nv04_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int nv40_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int nv4c_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int nv50_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
+int g84_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int gf100_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index acc2fe9fb526e..13774c18d263c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -929,7 +929,7 @@ nv84_chipset = {
 	.mc = nv50_mc_new,
 	.mmu = nv50_mmu_new,
 	.mxm = nv50_mxm_new,
-	.pci = nv50_pci_new,
+	.pci = g84_pci_new,
 	.therm = g84_therm_new,
 	.timer = nv41_timer_new,
 	.volt = nv40_volt_new,
@@ -961,7 +961,7 @@ nv86_chipset = {
 	.mc = nv50_mc_new,
 	.mmu = nv50_mmu_new,
 	.mxm = nv50_mxm_new,
-	.pci = nv50_pci_new,
+	.pci = g84_pci_new,
 	.therm = g84_therm_new,
 	.timer = nv41_timer_new,
 	.volt = nv40_volt_new,
@@ -993,7 +993,7 @@ nv92_chipset = {
 	.mc = nv50_mc_new,
 	.mmu = nv50_mmu_new,
 	.mxm = nv50_mxm_new,
-	.pci = nv50_pci_new,
+	.pci = g84_pci_new,
 	.therm = g84_therm_new,
 	.timer = nv41_timer_new,
 	.volt = nv40_volt_new,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild
index 99672c3d0bad7..57652f5f9bbc0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild
@@ -4,4 +4,5 @@ nvkm-y += nvkm/subdev/pci/nv04.o
 nvkm-y += nvkm/subdev/pci/nv40.o
 nvkm-y += nvkm/subdev/pci/nv4c.o
 nvkm-y += nvkm/subdev/pci/nv50.o
+nvkm-y += nvkm/subdev/pci/g84.o
 nvkm-y += nvkm/subdev/pci/gf100.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g84.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g84.c
new file mode 100644
index 0000000000000..30be277edf2a5
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g84.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2015 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "priv.h"
+
+#include <core/pci.h>
+
+static const struct nvkm_pci_func
+g84_pci_func = {
+	.rd32 = nv40_pci_rd32,
+	.wr08 = nv40_pci_wr08,
+	.wr32 = nv40_pci_wr32,
+	.msi_rearm = nv50_pci_msi_rearm,
+};
+
+int
+g84_pci_new(struct nvkm_device *device, int index, struct nvkm_pci **ppci)
+{
+	return nvkm_pci_new_(&g84_pci_func, device, index, ppci);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/nv50.c
index 3e167d4a381f4..027481519ff9c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/nv50.c
@@ -25,10 +25,10 @@
 
 #include <core/pci.h>
 
-/* MSI re-arm through the PRI appears to be broken on the original G80,
+/* MSI re-arm through the PRI appears to be broken on NV50/G84/G86/G92,
  * so we access it via alternate PCI config space mechanisms.
  */
-static void
+void
 nv50_pci_msi_rearm(struct nvkm_pci *pci)
 {
 	struct nvkm_device *device = pci->subdev.device;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h
index d22c2c117106e..d7622aaeb86e8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h
@@ -16,4 +16,6 @@ struct nvkm_pci_func {
 u32 nv40_pci_rd32(struct nvkm_pci *, u16);
 void nv40_pci_wr08(struct nvkm_pci *, u16, u8);
 void nv40_pci_wr32(struct nvkm_pci *, u16, u32);
+
+void nv50_pci_msi_rearm(struct nvkm_pci *);
 #endif

From b31505c472f2451c6143bf4727e79ea177a35ca5 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Thu, 1 Oct 2015 09:34:45 +1000
Subject: [PATCH 19/45] drm/nouveau/pci/g94: split implementation from nv40

An upcoming patch will implement functionality that we don't use on any
NV40 chipset.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 .../gpu/drm/nouveau/include/nvkm/subdev/pci.h |  1 +
 .../gpu/drm/nouveau/nvkm/engine/device/base.c | 50 +++++++++----------
 .../gpu/drm/nouveau/nvkm/subdev/pci/Kbuild    |  1 +
 drivers/gpu/drm/nouveau/nvkm/subdev/pci/g94.c | 38 ++++++++++++++
 .../gpu/drm/nouveau/nvkm/subdev/pci/nv40.c    |  2 +-
 .../gpu/drm/nouveau/nvkm/subdev/pci/priv.h    |  1 +
 6 files changed, 67 insertions(+), 26 deletions(-)
 create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/pci/g94.c

diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h
index b5b004bdda097..9c0b5e1605dce 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h
@@ -31,5 +31,6 @@ int nv40_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int nv4c_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int nv50_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int g84_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
+int g94_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int gf100_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index 13774c18d263c..727e18e0acd0e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -1025,7 +1025,7 @@ nv94_chipset = {
 	.mc = nv50_mc_new,
 	.mmu = nv50_mmu_new,
 	.mxm = nv50_mxm_new,
-	.pci = nv40_pci_new,
+	.pci = g94_pci_new,
 	.therm = g84_therm_new,
 	.timer = nv41_timer_new,
 	.volt = nv40_volt_new,
@@ -1057,7 +1057,7 @@ nv96_chipset = {
 	.mc = nv50_mc_new,
 	.mmu = nv50_mmu_new,
 	.mxm = nv50_mxm_new,
-	.pci = nv40_pci_new,
+	.pci = g94_pci_new,
 	.therm = g84_therm_new,
 	.timer = nv41_timer_new,
 	.volt = nv40_volt_new,
@@ -1089,7 +1089,7 @@ nv98_chipset = {
 	.mc = g98_mc_new,
 	.mmu = nv50_mmu_new,
 	.mxm = nv50_mxm_new,
-	.pci = nv40_pci_new,
+	.pci = g94_pci_new,
 	.therm = g84_therm_new,
 	.timer = nv41_timer_new,
 	.volt = nv40_volt_new,
@@ -1121,7 +1121,7 @@ nva0_chipset = {
 	.mc = g98_mc_new,
 	.mmu = nv50_mmu_new,
 	.mxm = nv50_mxm_new,
-	.pci = nv40_pci_new,
+	.pci = g94_pci_new,
 	.therm = g84_therm_new,
 	.timer = nv41_timer_new,
 	.volt = nv40_volt_new,
@@ -1153,7 +1153,7 @@ nva3_chipset = {
 	.mc = g98_mc_new,
 	.mmu = nv50_mmu_new,
 	.mxm = nv50_mxm_new,
-	.pci = nv40_pci_new,
+	.pci = g94_pci_new,
 	.pmu = gt215_pmu_new,
 	.therm = gt215_therm_new,
 	.timer = nv41_timer_new,
@@ -1187,7 +1187,7 @@ nva5_chipset = {
 	.mc = g98_mc_new,
 	.mmu = nv50_mmu_new,
 	.mxm = nv50_mxm_new,
-	.pci = nv40_pci_new,
+	.pci = g94_pci_new,
 	.pmu = gt215_pmu_new,
 	.therm = gt215_therm_new,
 	.timer = nv41_timer_new,
@@ -1220,7 +1220,7 @@ nva8_chipset = {
 	.mc = g98_mc_new,
 	.mmu = nv50_mmu_new,
 	.mxm = nv50_mxm_new,
-	.pci = nv40_pci_new,
+	.pci = g94_pci_new,
 	.pmu = gt215_pmu_new,
 	.therm = gt215_therm_new,
 	.timer = nv41_timer_new,
@@ -1253,7 +1253,7 @@ nvaa_chipset = {
 	.mc = g98_mc_new,
 	.mmu = nv50_mmu_new,
 	.mxm = nv50_mxm_new,
-	.pci = nv40_pci_new,
+	.pci = g94_pci_new,
 	.therm = g84_therm_new,
 	.timer = nv41_timer_new,
 	.volt = nv40_volt_new,
@@ -1285,7 +1285,7 @@ nvac_chipset = {
 	.mc = g98_mc_new,
 	.mmu = nv50_mmu_new,
 	.mxm = nv50_mxm_new,
-	.pci = nv40_pci_new,
+	.pci = g94_pci_new,
 	.therm = g84_therm_new,
 	.timer = nv41_timer_new,
 	.volt = nv40_volt_new,
@@ -1317,7 +1317,7 @@ nvaf_chipset = {
 	.mc = g98_mc_new,
 	.mmu = nv50_mmu_new,
 	.mxm = nv50_mxm_new,
-	.pci = nv40_pci_new,
+	.pci = g94_pci_new,
 	.pmu = gt215_pmu_new,
 	.therm = gt215_therm_new,
 	.timer = nv41_timer_new,
@@ -1388,7 +1388,7 @@ nvc1_chipset = {
 	.mc = gf100_mc_new,
 	.mmu = gf100_mmu_new,
 	.mxm = nv50_mxm_new,
-	.pci = nv40_pci_new,
+	.pci = g94_pci_new,
 	.pmu = gf100_pmu_new,
 	.therm = gt215_therm_new,
 	.timer = nv41_timer_new,
@@ -1423,7 +1423,7 @@ nvc3_chipset = {
 	.mc = gf100_mc_new,
 	.mmu = gf100_mmu_new,
 	.mxm = nv50_mxm_new,
-	.pci = nv40_pci_new,
+	.pci = g94_pci_new,
 	.pmu = gf100_pmu_new,
 	.therm = gt215_therm_new,
 	.timer = nv41_timer_new,
@@ -1566,7 +1566,7 @@ nvcf_chipset = {
 	.mc = gf100_mc_new,
 	.mmu = gf100_mmu_new,
 	.mxm = nv50_mxm_new,
-	.pci = nv40_pci_new,
+	.pci = g94_pci_new,
 	.pmu = gf100_pmu_new,
 	.therm = gt215_therm_new,
 	.timer = nv41_timer_new,
@@ -1601,7 +1601,7 @@ nvd7_chipset = {
 	.mc = gf100_mc_new,
 	.mmu = gf100_mmu_new,
 	.mxm = nv50_mxm_new,
-	.pci = nv40_pci_new,
+	.pci = g94_pci_new,
 	.therm = gf119_therm_new,
 	.timer = nv41_timer_new,
 	.ce[0] = gf100_ce_new,
@@ -1634,7 +1634,7 @@ nvd9_chipset = {
 	.mc = gf100_mc_new,
 	.mmu = gf100_mmu_new,
 	.mxm = nv50_mxm_new,
-	.pci = nv40_pci_new,
+	.pci = g94_pci_new,
 	.pmu = gf119_pmu_new,
 	.therm = gf119_therm_new,
 	.timer = nv41_timer_new,
@@ -1669,7 +1669,7 @@ nve4_chipset = {
 	.mc = gf100_mc_new,
 	.mmu = gf100_mmu_new,
 	.mxm = nv50_mxm_new,
-	.pci = nv40_pci_new,
+	.pci = g94_pci_new,
 	.pmu = gk104_pmu_new,
 	.therm = gf119_therm_new,
 	.timer = nv41_timer_new,
@@ -1706,7 +1706,7 @@ nve6_chipset = {
 	.mc = gf100_mc_new,
 	.mmu = gf100_mmu_new,
 	.mxm = nv50_mxm_new,
-	.pci = nv40_pci_new,
+	.pci = g94_pci_new,
 	.pmu = gk104_pmu_new,
 	.therm = gf119_therm_new,
 	.timer = nv41_timer_new,
@@ -1743,7 +1743,7 @@ nve7_chipset = {
 	.mc = gf100_mc_new,
 	.mmu = gf100_mmu_new,
 	.mxm = nv50_mxm_new,
-	.pci = nv40_pci_new,
+	.pci = g94_pci_new,
 	.pmu = gf119_pmu_new,
 	.therm = gf119_therm_new,
 	.timer = nv41_timer_new,
@@ -1804,7 +1804,7 @@ nvf0_chipset = {
 	.mc = gf100_mc_new,
 	.mmu = gf100_mmu_new,
 	.mxm = nv50_mxm_new,
-	.pci = nv40_pci_new,
+	.pci = g94_pci_new,
 	.pmu = gk110_pmu_new,
 	.therm = gf119_therm_new,
 	.timer = nv41_timer_new,
@@ -1840,7 +1840,7 @@ nvf1_chipset = {
 	.mc = gf100_mc_new,
 	.mmu = gf100_mmu_new,
 	.mxm = nv50_mxm_new,
-	.pci = nv40_pci_new,
+	.pci = g94_pci_new,
 	.pmu = gk110_pmu_new,
 	.therm = gf119_therm_new,
 	.timer = nv41_timer_new,
@@ -1876,7 +1876,7 @@ nv106_chipset = {
 	.mc = gk20a_mc_new,
 	.mmu = gf100_mmu_new,
 	.mxm = nv50_mxm_new,
-	.pci = nv40_pci_new,
+	.pci = g94_pci_new,
 	.pmu = gk208_pmu_new,
 	.therm = gf119_therm_new,
 	.timer = nv41_timer_new,
@@ -1912,7 +1912,7 @@ nv108_chipset = {
 	.mc = gk20a_mc_new,
 	.mmu = gf100_mmu_new,
 	.mxm = nv50_mxm_new,
-	.pci = nv40_pci_new,
+	.pci = g94_pci_new,
 	.pmu = gk208_pmu_new,
 	.therm = gf119_therm_new,
 	.timer = nv41_timer_new,
@@ -1948,7 +1948,7 @@ nv117_chipset = {
 	.mc = gk20a_mc_new,
 	.mmu = gf100_mmu_new,
 	.mxm = nv50_mxm_new,
-	.pci = nv40_pci_new,
+	.pci = g94_pci_new,
 	.pmu = gm107_pmu_new,
 	.therm = gm107_therm_new,
 	.timer = gk20a_timer_new,
@@ -1979,7 +1979,7 @@ nv124_chipset = {
 	.mc = gk20a_mc_new,
 	.mmu = gf100_mmu_new,
 	.mxm = nv50_mxm_new,
-	.pci = nv40_pci_new,
+	.pci = g94_pci_new,
 	.pmu = gm107_pmu_new,
 	.timer = gk20a_timer_new,
 	.volt = gk104_volt_new,
@@ -2010,7 +2010,7 @@ nv126_chipset = {
 	.mc = gk20a_mc_new,
 	.mmu = gf100_mmu_new,
 	.mxm = nv50_mxm_new,
-	.pci = nv40_pci_new,
+	.pci = g94_pci_new,
 	.pmu = gm107_pmu_new,
 	.timer = gk20a_timer_new,
 	.volt = gk104_volt_new,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild
index 57652f5f9bbc0..85cb52501ada0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild
@@ -5,4 +5,5 @@ nvkm-y += nvkm/subdev/pci/nv40.o
 nvkm-y += nvkm/subdev/pci/nv4c.o
 nvkm-y += nvkm/subdev/pci/nv50.o
 nvkm-y += nvkm/subdev/pci/g84.o
+nvkm-y += nvkm/subdev/pci/g94.o
 nvkm-y += nvkm/subdev/pci/gf100.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g94.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g94.c
new file mode 100644
index 0000000000000..1714421c25340
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g94.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2015 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "priv.h"
+
+static const struct nvkm_pci_func
+g94_pci_func = {
+	.rd32 = nv40_pci_rd32,
+	.wr08 = nv40_pci_wr08,
+	.wr32 = nv40_pci_wr32,
+	.msi_rearm = nv40_pci_msi_rearm,
+};
+
+int
+g94_pci_new(struct nvkm_device *device, int index, struct nvkm_pci **ppci)
+{
+	return nvkm_pci_new_(&g94_pci_func, device, index, ppci);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/nv40.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/nv40.c
index 090a187f165f3..6eb417765802d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/nv40.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/nv40.c
@@ -44,7 +44,7 @@ nv40_pci_wr32(struct nvkm_pci *pci, u16 addr, u32 data)
 	nvkm_wr32(device, 0x088000 + addr, data);
 }
 
-static void
+void
 nv40_pci_msi_rearm(struct nvkm_pci *pci)
 {
 	nvkm_pci_wr08(pci, 0x0068, 0xff);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h
index d7622aaeb86e8..68ce8e554ae90 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h
@@ -16,6 +16,7 @@ struct nvkm_pci_func {
 u32 nv40_pci_rd32(struct nvkm_pci *, u16);
 void nv40_pci_wr08(struct nvkm_pci *, u16, u8);
 void nv40_pci_wr32(struct nvkm_pci *, u16, u32);
+void nv40_pci_msi_rearm(struct nvkm_pci *);
 
 void nv50_pci_msi_rearm(struct nvkm_pci *);
 #endif

From c4266a9c7b0ee66b3d1ca22745d8eb9472e21b4b Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Thu, 1 Oct 2015 11:36:58 +1000
Subject: [PATCH 20/45] drm/nouveau/pci/nv46: attempt to fix msi, and re-enable
 by default

Was not able to obtain a trace of NVRM due to kernel version annoyances,
however, experimentally confirmed that the WAR we use on NV50/G8x boards
works here too.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h    |  2 +-
 drivers/gpu/drm/nouveau/nvkm/engine/device/base.c    |  4 ++--
 drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild       |  2 +-
 drivers/gpu/drm/nouveau/nvkm/subdev/pci/g84.c        |  2 +-
 .../drm/nouveau/nvkm/subdev/pci/{nv50.c => nv46.c}   | 12 ++++++------
 drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h       |  2 +-
 6 files changed, 12 insertions(+), 12 deletions(-)
 rename drivers/gpu/drm/nouveau/nvkm/subdev/pci/{nv50.c => nv46.c} (84%)

diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h
index 9c0b5e1605dce..39ca88f36df9e 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h
@@ -28,8 +28,8 @@ void nvkm_pci_rom_shadow(struct nvkm_pci *, bool shadow);
 
 int nv04_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int nv40_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
+int nv46_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int nv4c_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
-int nv50_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int g84_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int g94_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int gf100_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index 727e18e0acd0e..2cff79cf683d9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -637,7 +637,7 @@ nv46_chipset = {
 	.imem = nv40_instmem_new,
 	.mc = nv44_mc_new,
 	.mmu = nv44_mmu_new,
-	.pci = nv4c_pci_new,
+	.pci = nv46_pci_new,
 	.therm = nv40_therm_new,
 	.timer = nv41_timer_new,
 	.volt = nv40_volt_new,
@@ -822,7 +822,7 @@ nv50_chipset = {
 	.mc = nv50_mc_new,
 	.mmu = nv50_mmu_new,
 	.mxm = nv50_mxm_new,
-	.pci = nv50_pci_new,
+	.pci = nv46_pci_new,
 	.therm = nv50_therm_new,
 	.timer = nv41_timer_new,
 	.volt = nv40_volt_new,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild
index 85cb52501ada0..4476ef75acd64 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild
@@ -2,8 +2,8 @@ nvkm-y += nvkm/subdev/pci/agp.o
 nvkm-y += nvkm/subdev/pci/base.o
 nvkm-y += nvkm/subdev/pci/nv04.o
 nvkm-y += nvkm/subdev/pci/nv40.o
+nvkm-y += nvkm/subdev/pci/nv46.o
 nvkm-y += nvkm/subdev/pci/nv4c.o
-nvkm-y += nvkm/subdev/pci/nv50.o
 nvkm-y += nvkm/subdev/pci/g84.o
 nvkm-y += nvkm/subdev/pci/g94.o
 nvkm-y += nvkm/subdev/pci/gf100.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g84.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g84.c
index 30be277edf2a5..8f3b00199d143 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g84.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g84.c
@@ -30,7 +30,7 @@ g84_pci_func = {
 	.rd32 = nv40_pci_rd32,
 	.wr08 = nv40_pci_wr08,
 	.wr32 = nv40_pci_wr32,
-	.msi_rearm = nv50_pci_msi_rearm,
+	.msi_rearm = nv46_pci_msi_rearm,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/nv46.c
similarity index 84%
rename from drivers/gpu/drm/nouveau/nvkm/subdev/pci/nv50.c
rename to drivers/gpu/drm/nouveau/nvkm/subdev/pci/nv46.c
index 027481519ff9c..fc617e4c0ab6f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/nv46.c
@@ -25,11 +25,11 @@
 
 #include <core/pci.h>
 
-/* MSI re-arm through the PRI appears to be broken on NV50/G84/G86/G92,
+/* MSI re-arm through the PRI appears to be broken on NV46/NV50/G84/G86/G92,
  * so we access it via alternate PCI config space mechanisms.
  */
 void
-nv50_pci_msi_rearm(struct nvkm_pci *pci)
+nv46_pci_msi_rearm(struct nvkm_pci *pci)
 {
 	struct nvkm_device *device = pci->subdev.device;
 	struct pci_dev *pdev = device->func->pci(device)->pdev;
@@ -37,15 +37,15 @@ nv50_pci_msi_rearm(struct nvkm_pci *pci)
 }
 
 static const struct nvkm_pci_func
-nv50_pci_func = {
+nv46_pci_func = {
 	.rd32 = nv40_pci_rd32,
 	.wr08 = nv40_pci_wr08,
 	.wr32 = nv40_pci_wr32,
-	.msi_rearm = nv50_pci_msi_rearm,
+	.msi_rearm = nv46_pci_msi_rearm,
 };
 
 int
-nv50_pci_new(struct nvkm_device *device, int index, struct nvkm_pci **ppci)
+nv46_pci_new(struct nvkm_device *device, int index, struct nvkm_pci **ppci)
 {
-	return nvkm_pci_new_(&nv50_pci_func, device, index, ppci);
+	return nvkm_pci_new_(&nv46_pci_func, device, index, ppci);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h
index 68ce8e554ae90..473b8ab253513 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h
@@ -18,5 +18,5 @@ void nv40_pci_wr08(struct nvkm_pci *, u16, u8);
 void nv40_pci_wr32(struct nvkm_pci *, u16, u32);
 void nv40_pci_msi_rearm(struct nvkm_pci *);
 
-void nv50_pci_msi_rearm(struct nvkm_pci *);
+void nv46_pci_msi_rearm(struct nvkm_pci *);
 #endif

From 779d16aacc192bc70232304ffc6b86dcedad1b45 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Thu, 1 Oct 2015 09:42:54 +1000
Subject: [PATCH 21/45] drm/nouveau/pci: prepare for chipset-specific
 initialisation tasks

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c | 3 +++
 drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h | 1 +
 2 files changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
index d1c148e519228..21106223c2c37 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
@@ -111,6 +111,9 @@ nvkm_pci_init(struct nvkm_subdev *subdev)
 			return ret;
 	}
 
+	if (pci->func->init)
+		pci->func->init(pci);
+
 	ret = request_irq(pdev->irq, nvkm_pci_intr, IRQF_SHARED, "nvkm", pci);
 	if (ret)
 		return ret;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h
index 473b8ab253513..1acd4bc64d7e0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h
@@ -7,6 +7,7 @@ int nvkm_pci_new_(const struct nvkm_pci_func *, struct nvkm_device *,
 		  int index, struct nvkm_pci **);
 
 struct nvkm_pci_func {
+	void (*init)(struct nvkm_pci *);
 	u32 (*rd32)(struct nvkm_pci *, u16 addr);
 	void (*wr08)(struct nvkm_pci *, u16 addr, u8 data);
 	void (*wr32)(struct nvkm_pci *, u16 addr, u32 data);

From 560f989fe4260f0c729d3fc8a8691a3a02815f25 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Thu, 1 Oct 2015 14:58:04 +1000
Subject: [PATCH 22/45] drm/nouveau/pmu/gk104: check fuse to determine presence
 of PGOB

Not 100% confirmed, but seems to match from the few boards I've looked
at so far.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk104.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk104.c
index e33f5c03b9ace..d942fa7b9f187 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk104.c
@@ -27,6 +27,7 @@
 #include "fuc/gf119.fuc4.h"
 
 #include <core/option.h>
+#include <subdev/fuse.h>
 #include <subdev/timer.h>
 
 static void
@@ -57,6 +58,9 @@ gk104_pmu_pgob(struct nvkm_pmu *pmu, bool enable)
 {
 	struct nvkm_device *device = pmu->subdev.device;
 
+	if (!(nvkm_fuse_read(device->fuse, 0x31c) & 0x00000001))
+		return;
+
 	nvkm_mask(device, 0x000200, 0x00001000, 0x00000000);
 	nvkm_rd32(device, 0x000200);
 	nvkm_mask(device, 0x000200, 0x08000000, 0x08000000);

From 3c9aca318150ba1152e957a37473ff67d8ebba30 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Thu, 1 Oct 2015 15:00:23 +1000
Subject: [PATCH 23/45] drm/nouveau/pmu/gk107: enable PGOB codepaths

Reported to be needed as per fdo#70354 comment #61.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index 2cff79cf683d9..bbc9824af6e03 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -1744,7 +1744,7 @@ nve7_chipset = {
 	.mmu = gf100_mmu_new,
 	.mxm = nv50_mxm_new,
 	.pci = g94_pci_new,
-	.pmu = gf119_pmu_new,
+	.pmu = gk104_pmu_new,
 	.therm = gf119_therm_new,
 	.timer = nv41_timer_new,
 	.volt = gk104_volt_new,

From 4458c5639ba827d494ab06a81d1bb129ca5aea91 Mon Sep 17 00:00:00 2001
From: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Date: Wed, 2 Sep 2015 12:08:08 +0530
Subject: [PATCH 24/45] drm/nouveau: remove unused function

coverity.com reported that memset was using a buffer of size 0, on
checking the code it turned out that the function was not being used. So
remove it.

Signed-off-by: Sudip Mukherjee <sudip@vectorindia.org>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 .../gpu/drm/nouveau/include/nvkm/subdev/bios/pmu.h  |  2 --
 drivers/gpu/drm/nouveau/nvkm/subdev/bios/pmu.c      | 13 -------------
 2 files changed, 15 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/pmu.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/pmu.h
index d606875c125ae..3a643df6de049 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/pmu.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/pmu.h
@@ -4,8 +4,6 @@ struct nvbios_pmuT {
 };
 
 u32 nvbios_pmuTe(struct nvkm_bios *, u8 *ver, u8 *hdr, u8 *cnt, u8 *len);
-u32 nvbios_pmuTp(struct nvkm_bios *, u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
-		 struct nvbios_pmuT *);
 
 struct nvbios_pmuE {
 	u8  type;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pmu.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pmu.c
index 441ec451b7886..c268e5afe852f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pmu.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pmu.c
@@ -61,19 +61,6 @@ nvbios_pmuTe(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
 	return data;
 }
 
-u32
-nvbios_pmuTp(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
-	     struct nvbios_pmuT *info)
-{
-	u32 data = nvbios_pmuTe(bios, ver, hdr, cnt, len);
-	memset(info, 0x00, sizeof(*info));
-	switch (!!data * *ver) {
-	default:
-		break;
-	}
-	return data;
-}
-
 u32
 nvbios_pmuEe(struct nvkm_bios *bios, int idx, u8 *ver, u8 *hdr)
 {

From bad4274a69328256eb3eaad4baaefd1b33491f7e Mon Sep 17 00:00:00 2001
From: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Date: Fri, 11 Sep 2015 15:00:56 +0530
Subject: [PATCH 25/45] drm/nouveau: fix memory leak

If pm_runtime_get_sync() we were going to "out" but we missed freeing
vma.

Signed-off-by: Sudip Mukherjee <sudip@vectorindia.org>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_gem.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 6bf1a7895708e..a108cc37512ab 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -84,8 +84,10 @@ nouveau_gem_object_open(struct drm_gem_object *gem, struct drm_file *file_priv)
 		}
 
 		ret = pm_runtime_get_sync(dev);
-		if (ret < 0 && ret != -EACCES)
+		if (ret < 0 && ret != -EACCES) {
+			kfree(vma);
 			goto out;
+		}
 
 		ret = nouveau_bo_vma_add(nvbo, cli->vm, vma);
 		if (ret)

From 3988f645f053a6889d00324dac3e57bd62cb8900 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Wed, 7 Oct 2015 18:39:32 -0400
Subject: [PATCH 26/45] drm/nouveau/gr: document mp error 0x10

NVIDIA provided the documentation for mp error 0x10, INVALID_ADDR_SPACE,
which apparently happens when trying to use an atomic operation on
local or shared memory (instead of global memory).

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
index f1358a564e3e8..dda7a7d224c9b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
@@ -882,6 +882,7 @@ static const struct nvkm_enum gf100_mp_warp_error[] = {
 	{ 0x0d, "GPR_OUT_OF_BOUNDS" },
 	{ 0x0e, "MEM_OUT_OF_BOUNDS" },
 	{ 0x0f, "UNALIGNED_MEM_ACCESS" },
+	{ 0x10, "INVALID_ADDR_SPACE" },
 	{ 0x11, "INVALID_PARAM" },
 	{}
 };

From 2e69a5cd25a131cf283d3b655c01fb6db15dd60c Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Wed, 7 Oct 2015 18:39:33 -0400
Subject: [PATCH 27/45] drm/nouveau/gr: add FERMI_COMPUTE_B class to GF110+

GF110+ supports both the A and B compute classes, make sure to accept
both.

Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c | 1 +
 drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c | 1 +
 drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c | 1 +
 3 files changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c
index d13187409d685..d081ee41fc14e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c
@@ -98,6 +98,7 @@ gf110_gr = {
 		{ -1, -1, FERMI_B, &gf100_fermi },
 		{ -1, -1, FERMI_C, &gf100_fermi },
 		{ -1, -1, FERMI_COMPUTE_A },
+		{ -1, -1, FERMI_COMPUTE_B },
 		{}
 	}
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c
index 28483d8bf3d2e..d8e8af4d3b300 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c
@@ -135,6 +135,7 @@ gf117_gr = {
 		{ -1, -1, FERMI_B, &gf100_fermi },
 		{ -1, -1, FERMI_C, &gf100_fermi },
 		{ -1, -1, FERMI_COMPUTE_A },
+		{ -1, -1, FERMI_COMPUTE_B },
 		{}
 	}
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c
index 9811a72e03133..01faf9a737749 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c
@@ -189,6 +189,7 @@ gf119_gr = {
 		{ -1, -1, FERMI_B, &gf100_fermi },
 		{ -1, -1, FERMI_C, &gf100_fermi },
 		{ -1, -1, FERMI_COMPUTE_A },
+		{ -1, -1, FERMI_COMPUTE_B },
 		{}
 	}
 };

From 354a22496613366833edfe29300a6bfe6482255c Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Sun, 11 Oct 2015 14:18:09 +0200
Subject: [PATCH 28/45] drm/nouveau/disp,pm: constify nvkm_object_func
 structures

These nvkm_object_func structures are never modified.  All other
nvkm_object_func structures are declared as const.

Done with the help of Coccinelle.

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv04.c | 2 +-
 drivers/gpu/drm/nouveau/nvkm/engine/pm/base.c       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv04.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv04.c
index 62d3fb66d0ec2..2be846374d39a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv04.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv04.c
@@ -109,7 +109,7 @@ nv04_disp_mthd(struct nvkm_object *object, u32 mthd, void *data, u32 size)
 	return -EINVAL;
 }
 
-static struct nvkm_object_func
+static const struct nvkm_object_func
 nv04_disp_root = {
 	.mthd = nv04_disp_mthd,
 	.ntfy = nvkm_disp_ntfy,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/pm/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/pm/base.c
index 0db9be202c42f..2721592d3031c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/pm/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/pm/base.c
@@ -633,7 +633,7 @@ nvkm_perfmon_dtor(struct nvkm_object *object)
 	return perfmon;
 }
 
-static struct nvkm_object_func
+static const struct nvkm_object_func
 nvkm_perfmon = {
 	.dtor = nvkm_perfmon_dtor,
 	.mthd = nvkm_perfmon_mthd,

From 5d5b43f59bd214506265ea05a013229ff5291101 Mon Sep 17 00:00:00 2001
From: Pierre Moreau <pierre.morrow@free.fr>
Date: Sat, 3 Oct 2015 21:35:16 +0200
Subject: [PATCH 29/45] drm/nouveau/pci: Handle 5-bit and 8-bit tag field

If the hardware supports extended tag field (8-bit ones), then enable it.

This is usually done by the VBIOS, but not on some MBPs (see fdo#86537).

In case extended tag field is not supported, 5-bit tag field is used which
limits the possible number of requests to 32. Apparently bits 7:0 of
0x08841c stores some number of outstanding requests, so cap it to 32 if
extended tag is unsupported.

Fixes: fdo#86537

v2: Restrict changes to chipsets >= 0x84
v3:
  * Add nvkm_pci_mask to pci.h
  * Mask bit 8 before setting it
v4:
  * Rename `add` argument of nvkm_pci_mask to `value`
  * Move code from nvkm_pci_init to g84_pci_init and remove PCIe and chipset
    checks
v5:
  * Rebase code on latest PCI structure
  * Restore PCIe check
  * Fix namings in nvkm_pci_mask
  * Rephrase part of the commit message

Signed-off-by: Pierre Moreau <pierre.morrow@free.fr>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 .../gpu/drm/nouveau/include/nvkm/subdev/pci.h |  1 +
 .../gpu/drm/nouveau/nvkm/subdev/pci/base.c    |  8 +++++++
 drivers/gpu/drm/nouveau/nvkm/subdev/pci/g84.c | 24 +++++++++++++++++++
 drivers/gpu/drm/nouveau/nvkm/subdev/pci/g94.c |  1 +
 .../gpu/drm/nouveau/nvkm/subdev/pci/gf100.c   |  1 +
 .../gpu/drm/nouveau/nvkm/subdev/pci/priv.h    |  2 ++
 6 files changed, 37 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h
index 39ca88f36df9e..fee0a97c44c5b 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h
@@ -24,6 +24,7 @@ struct nvkm_pci {
 u32 nvkm_pci_rd32(struct nvkm_pci *, u16 addr);
 void nvkm_pci_wr08(struct nvkm_pci *, u16 addr, u8 data);
 void nvkm_pci_wr32(struct nvkm_pci *, u16 addr, u32 data);
+u32 nvkm_pci_mask(struct nvkm_pci *, u16 addr, u32 mask, u32 value);
 void nvkm_pci_rom_shadow(struct nvkm_pci *, bool shadow);
 
 int nv04_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
index 21106223c2c37..d671dcfaff3ce 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
@@ -46,6 +46,14 @@ nvkm_pci_wr32(struct nvkm_pci *pci, u16 addr, u32 data)
 	pci->func->wr32(pci, addr, data);
 }
 
+u32
+nvkm_pci_mask(struct nvkm_pci *pci, u16 addr, u32 mask, u32 value)
+{
+	u32 data = pci->func->rd32(pci, addr);
+	pci->func->wr32(pci, addr, (data & ~mask) | value);
+	return data;
+}
+
 void
 nvkm_pci_rom_shadow(struct nvkm_pci *pci, bool shadow)
 {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g84.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g84.c
index 8f3b00199d143..3faa6bfb895be 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g84.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g84.c
@@ -25,8 +25,32 @@
 
 #include <core/pci.h>
 
+void
+g84_pci_init(struct nvkm_pci *pci)
+{
+	/* The following only concerns PCIe cards. */
+	if (!pci_is_pcie(pci->pdev))
+		return;
+
+	/* Tag field is 8-bit long, regardless of EXT_TAG.
+	 * However, if EXT_TAG is disabled, only the lower 5 bits of the tag
+	 * field should be used, limiting the number of request to 32.
+	 *
+	 * Apparently, 0x041c stores some limit on the number of requests
+	 * possible, so if EXT_TAG is disabled, limit that requests number to
+	 * 32
+	 *
+	 * Fixes fdo#86537
+	 */
+	if (nvkm_pci_rd32(pci, 0x007c) & 0x00000020)
+		nvkm_pci_mask(pci, 0x0080, 0x00000100, 0x00000100);
+	else
+		nvkm_pci_mask(pci, 0x041c, 0x00000060, 0x00000000);
+}
+
 static const struct nvkm_pci_func
 g84_pci_func = {
+	.init = g84_pci_init,
 	.rd32 = nv40_pci_rd32,
 	.wr08 = nv40_pci_wr08,
 	.wr32 = nv40_pci_wr32,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g94.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g94.c
index 1714421c25340..cd311ee311cc4 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g94.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g94.c
@@ -25,6 +25,7 @@
 
 static const struct nvkm_pci_func
 g94_pci_func = {
+	.init = g84_pci_init,
 	.rd32 = nv40_pci_rd32,
 	.wr08 = nv40_pci_wr08,
 	.wr32 = nv40_pci_wr32,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf100.c
index 86f8226532d3a..25e1ae70867f3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf100.c
@@ -31,6 +31,7 @@ gf100_pci_msi_rearm(struct nvkm_pci *pci)
 
 static const struct nvkm_pci_func
 gf100_pci_func = {
+	.init = g84_pci_init,
 	.rd32 = nv40_pci_rd32,
 	.wr08 = nv40_pci_wr08,
 	.wr32 = nv40_pci_wr32,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h
index 1acd4bc64d7e0..cf46d38d0b0a3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h
@@ -20,4 +20,6 @@ void nv40_pci_wr32(struct nvkm_pci *, u16, u32);
 void nv40_pci_msi_rearm(struct nvkm_pci *);
 
 void nv46_pci_msi_rearm(struct nvkm_pci *);
+
+void g84_pci_init(struct nvkm_pci *pci);
 #endif

From b4f2bf33bb1d50f1b52fae673dc85dfd750e6712 Mon Sep 17 00:00:00 2001
From: Roy Spliet <rspliet@eclipso.eu>
Date: Wed, 30 Sep 2015 00:23:44 +0100
Subject: [PATCH 30/45] drm/nouveau/bios/rammap: Identify DLLoff for >= GF100

Signed-off-by: Roy Spliet <rspliet@eclipso.eu>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 .../nouveau/include/nvkm/subdev/bios/ramcfg.h |  1 -
 .../gpu/drm/nouveau/nvkm/subdev/bios/rammap.c |  2 +-
 .../gpu/drm/nouveau/nvkm/subdev/fb/gddr5.c    |  3 +-
 .../gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c | 41 +++++++++++++++----
 .../gpu/drm/nouveau/nvkm/subdev/fb/sddr3.c    |  4 +-
 5 files changed, 39 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/ramcfg.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/ramcfg.h
index 3a9abd38aca8d..dd48db7fee066 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/ramcfg.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/ramcfg.h
@@ -78,7 +78,6 @@ struct nvbios_ramcfg {
 			unsigned ramcfg_11_01_04:1;
 			unsigned ramcfg_11_01_08:1;
 			unsigned ramcfg_11_01_10:1;
-			unsigned ramcfg_11_01_20:1;
 			unsigned ramcfg_11_01_40:1;
 			unsigned ramcfg_11_01_80:1;
 			unsigned ramcfg_11_02_03:2;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/rammap.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/rammap.c
index f0e1fc74a52e3..3bbb1a76c3785 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/rammap.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/rammap.c
@@ -219,7 +219,7 @@ nvbios_rammapSp(struct nvkm_bios *bios, u32 data,
 		p->ramcfg_11_01_04 = (nvbios_rd08(bios, data + 0x01) & 0x04) >> 2;
 		p->ramcfg_11_01_08 = (nvbios_rd08(bios, data + 0x01) & 0x08) >> 3;
 		p->ramcfg_11_01_10 = (nvbios_rd08(bios, data + 0x01) & 0x10) >> 4;
-		p->ramcfg_11_01_20 = (nvbios_rd08(bios, data + 0x01) & 0x20) >> 5;
+		p->ramcfg_DLLoff =   (nvbios_rd08(bios, data + 0x01) & 0x20) >> 5;
 		p->ramcfg_11_01_40 = (nvbios_rd08(bios, data + 0x01) & 0x40) >> 6;
 		p->ramcfg_11_01_80 = (nvbios_rd08(bios, data + 0x01) & 0x80) >> 7;
 		p->ramcfg_11_02_03 = (nvbios_rd08(bios, data + 0x02) & 0x03) >> 0;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gddr5.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gddr5.c
index 24f83b09e6a1c..2cc074d3901af 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gddr5.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gddr5.c
@@ -38,11 +38,12 @@ nvkm_gddr5_calc(struct nvkm_ram *ram, bool nuts)
 	int WL, CL, WR, at[2], dt, ds;
 	int rq = ram->freq < 1000000; /* XXX */
 
+	xd = !ram->next->bios.ramcfg_DLLoff;
+
 	switch (ram->next->bios.ramcfg_ver) {
 	case 0x11:
 		pd =  ram->next->bios.ramcfg_11_01_80;
 		lf =  ram->next->bios.ramcfg_11_01_40;
-		xd = !ram->next->bios.ramcfg_11_01_20;
 		vh =  ram->next->bios.ramcfg_11_02_10;
 		vr =  ram->next->bios.ramcfg_11_02_04;
 		vo =  ram->next->bios.ramcfg_11_06;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c
index 989355622aac8..0d2056382c78e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c
@@ -673,6 +673,25 @@ gk104_ram_calc_gddr5(struct gk104_ram *ram, u32 freq)
  * DDR3
  ******************************************************************************/
 
+static void
+nvkm_sddr3_dll_reset(struct gk104_ramfuc *fuc)
+{
+	ram_nuke(fuc, mr[0]);
+	ram_mask(fuc, mr[0], 0x100, 0x100);
+	ram_mask(fuc, mr[0], 0x100, 0x000);
+}
+
+static void
+nvkm_sddr3_dll_disable(struct gk104_ramfuc *fuc)
+{
+	u32 mr1_old = ram_rd32(fuc, mr[1]);
+
+	if (!(mr1_old & 0x1)) {
+		ram_mask(fuc, mr[1], 0x1, 0x1);
+		ram_nsec(fuc, 1000);
+	}
+}
+
 static int
 gk104_ram_calc_sddr3(struct gk104_ram *ram, u32 freq)
 {
@@ -702,6 +721,10 @@ gk104_ram_calc_sddr3(struct gk104_ram *ram, u32 freq)
 		ram_mask(fuc, 0x10f808, 0x04000000, 0x04000000);
 
 	ram_wr32(fuc, 0x10f314, 0x00000001); /* PRECHARGE */
+
+	if (next->bios.ramcfg_DLLoff)
+		nvkm_sddr3_dll_disable(fuc);
+
 	ram_wr32(fuc, 0x10f210, 0x00000000); /* REFRESH_AUTO = 0 */
 	ram_wr32(fuc, 0x10f310, 0x00000001); /* REFRESH */
 	ram_mask(fuc, 0x10f200, 0x80000000, 0x80000000);
@@ -879,17 +902,20 @@ gk104_ram_calc_sddr3(struct gk104_ram *ram, u32 freq)
 	ram_wr32(fuc, 0x10f210, 0x80000000); /* REFRESH_AUTO = 1 */
 	ram_nsec(fuc, 1000);
 
-	ram_nuke(fuc, mr[0]);
-	ram_mask(fuc, mr[0], 0x100, 0x100);
-	ram_mask(fuc, mr[0], 0x100, 0x000);
+	if (!next->bios.ramcfg_DLLoff) {
+		ram_mask(fuc, mr[1], 0x1, 0x0);
+		nvkm_sddr3_dll_reset(fuc);
+	}
 
-	ram_mask(fuc, mr[2], 0xfff, ram->base.mr[2]);
+	ram_mask(fuc, mr[2], 0x00000fff, ram->base.mr[2]);
+	ram_mask(fuc, mr[1], 0xffffffff, ram->base.mr[1]);
 	ram_wr32(fuc, mr[0], ram->base.mr[0]);
 	ram_nsec(fuc, 1000);
 
-	ram_nuke(fuc, mr[0]);
-	ram_mask(fuc, mr[0], 0x100, 0x100);
-	ram_mask(fuc, mr[0], 0x100, 0x000);
+	if (!next->bios.ramcfg_DLLoff) {
+		nvkm_sddr3_dll_reset(fuc);
+		ram_nsec(fuc, 1000);
+	}
 
 	if (vc == 0 && ram_have(fuc, gpio2E)) {
 		u32 temp  = ram_mask(fuc, gpio2E, 0x3000, fuc->r_func2E[0]);
@@ -1600,6 +1626,7 @@ gk104_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram)
 		break;
 	case NVKM_RAM_TYPE_DDR3:
 		ram->fuc.r_mr[0] = ramfuc_reg(0x10f300);
+		ram->fuc.r_mr[1] = ramfuc_reg(0x10f304);
 		ram->fuc.r_mr[2] = ramfuc_reg(0x10f320);
 		break;
 	default:
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/sddr3.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/sddr3.c
index b4edc97dc8c58..26900333b1d61 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/sddr3.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/sddr3.c
@@ -70,6 +70,8 @@ nvkm_sddr3_calc(struct nvkm_ram *ram)
 {
 	int CWL, CL, WR, DLL = 0, ODT = 0;
 
+	DLL = !ram->next->bios.ramcfg_DLLoff;
+
 	switch (ram->next->bios.timing_ver) {
 	case 0x10:
 		if (ram->next->bios.timing_hdr < 0x17) {
@@ -79,7 +81,6 @@ nvkm_sddr3_calc(struct nvkm_ram *ram)
 		CWL = ram->next->bios.timing_10_CWL;
 		CL  = ram->next->bios.timing_10_CL;
 		WR  = ram->next->bios.timing_10_WR;
-		DLL = !ram->next->bios.ramcfg_DLLoff;
 		ODT = ram->next->bios.timing_10_ODT;
 		break;
 	case 0x20:
@@ -87,7 +88,6 @@ nvkm_sddr3_calc(struct nvkm_ram *ram)
 		CL  = (ram->next->bios.timing[1] & 0x0000001f) >> 0;
 		WR  = (ram->next->bios.timing[2] & 0x007f0000) >> 16;
 		/* XXX: Get these values from the VBIOS instead */
-		DLL = !(ram->mr[1] & 0x1);
 		ODT =   (ram->mr[1] & 0x004) >> 2 |
 			(ram->mr[1] & 0x040) >> 5 |
 		        (ram->mr[1] & 0x200) >> 7;

From e0a37f85fc95e3f2550446316bc4a27d00d75993 Mon Sep 17 00:00:00 2001
From: Roy Spliet <rspliet@eclipso.eu>
Date: Wed, 30 Sep 2015 00:23:45 +0100
Subject: [PATCH 31/45] drm/nouveau/fb/ramgt215: Transform GPIO ramfuc method
 from FBVREF-specific to generic

In preparation of changing FBVDDQ, as observed on at least one GDDR3 card.
While at it, adhere to func.log[1] properly for consistency.

Signed-off-by: Roy Spliet <rspliet@eclipso.eu>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 .../gpu/drm/nouveau/nvkm/subdev/fb/ramgt215.c | 41 ++++++++-----------
 .../gpu/drm/nouveau/nvkm/subdev/gpio/nv50.c   |  2 +-
 2 files changed, 19 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgt215.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgt215.c
index 5c08ae8023fa6..0c28f38cb8b38 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgt215.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgt215.c
@@ -34,9 +34,6 @@
 #include <subdev/clk/gt215.h>
 #include <subdev/gpio.h>
 
-/* XXX: Remove when memx gains GPIO support */
-extern int nv50_gpio_location(int line, u32 *reg, u32 *shift);
-
 struct gt215_ramfuc {
 	struct ramfuc base;
 	struct ramfuc_reg r_0x001610;
@@ -75,7 +72,7 @@ struct gt215_ramfuc {
 	struct ramfuc_reg r_0x111400;
 	struct ramfuc_reg r_0x611200;
 	struct ramfuc_reg r_mr[4];
-	struct ramfuc_reg r_gpioFBVREF;
+	struct ramfuc_reg r_gpio[4];
 };
 
 struct gt215_ltrain {
@@ -466,24 +463,27 @@ gt215_ram_lock_pll(struct gt215_ramfuc *fuc, struct gt215_clk_info *mclk)
 }
 
 static void
-gt215_ram_fbvref(struct gt215_ramfuc *fuc, u32 val)
+gt215_ram_gpio(struct gt215_ramfuc *fuc, u8 tag, u32 val)
 {
 	struct nvkm_gpio *gpio = fuc->base.fb->subdev.device->gpio;
 	struct dcb_gpio_func func;
 	u32 reg, sh, gpio_val;
 	int ret;
 
-	if (nvkm_gpio_get(gpio, 0, 0x2e, DCB_GPIO_UNUSED) != val) {
-		ret = nvkm_gpio_find(gpio, 0, 0x2e, DCB_GPIO_UNUSED, &func);
+	if (nvkm_gpio_get(gpio, 0, tag, DCB_GPIO_UNUSED) != val) {
+		ret = nvkm_gpio_find(gpio, 0, tag, DCB_GPIO_UNUSED, &func);
 		if (ret)
 			return;
 
-		nv50_gpio_location(func.line, &reg, &sh);
-		gpio_val = ram_rd32(fuc, gpioFBVREF);
+		reg = func.line >> 3;
+		sh = (func.line & 0x7) << 2;
+		gpio_val = ram_rd32(fuc, gpio[reg]);
 		if (gpio_val & (8 << sh))
 			val = !val;
+		if (!(func.log[1] & 1))
+			val = !val;
 
-		ram_mask(fuc, gpioFBVREF, (0x3 << sh), ((val | 0x2) << sh));
+		ram_mask(fuc, gpio[reg], (0x3 << sh), ((val | 0x2) << sh));
 		ram_nsec(fuc, 20000);
 	}
 }
@@ -642,8 +642,8 @@ gt215_ram_calc(struct nvkm_ram *base, u32 freq)
 		break;
 	}
 
-	if (fuc->r_gpioFBVREF.addr && next->bios.timing_10_ODT)
-		gt215_ram_fbvref(fuc, 0);
+	if (next->bios.timing_10_ODT)
+		gt215_ram_gpio(fuc, 0x2e, 1);
 
 	/* Brace RAM for impact */
 	ram_wr32(fuc, 0x1002d4, 0x00000001);
@@ -809,8 +809,8 @@ gt215_ram_calc(struct nvkm_ram *base, u32 freq)
 	ram_mask(fuc, 0x100718, 0xffffffff, unk718);
 	ram_mask(fuc, 0x111100, 0xffffffff, r111100);
 
-	if (fuc->r_gpioFBVREF.addr && !next->bios.timing_10_ODT)
-		gt215_ram_fbvref(fuc, 1);
+	if (!next->bios.timing_10_ODT)
+		gt215_ram_gpio(fuc, 0x2e, 0);
 
 	/* Reset DLL */
 	if (!next->bios.ramcfg_DLLoff)
@@ -919,10 +919,7 @@ gt215_ram_func = {
 int
 gt215_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram)
 {
-	struct nvkm_gpio *gpio = fb->subdev.device->gpio;
-	struct dcb_gpio_func func;
 	struct gt215_ram *ram;
-	u32 reg, shift;
 	int ret, i;
 
 	if (!(ram = kzalloc(sizeof(*ram), GFP_KERNEL)))
@@ -981,12 +978,10 @@ gt215_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram)
 		ram->fuc.r_mr[2] = ramfuc_reg(0x1002e0);
 		ram->fuc.r_mr[3] = ramfuc_reg(0x1002e4);
 	}
-
-	ret = nvkm_gpio_find(gpio, 0, 0x2e, DCB_GPIO_UNUSED, &func);
-	if (ret == 0) {
-		nv50_gpio_location(func.line, &reg, &shift);
-		ram->fuc.r_gpioFBVREF = ramfuc_reg(reg);
-	}
+	ram->fuc.r_gpio[0] = ramfuc_reg(0x00e104);
+	ram->fuc.r_gpio[1] = ramfuc_reg(0x00e108);
+	ram->fuc.r_gpio[2] = ramfuc_reg(0x00e120);
+	ram->fuc.r_gpio[3] = ramfuc_reg(0x00e124);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/nv50.c
index 8996649209ab1..73923fd5f7f2e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/nv50.c
@@ -54,7 +54,7 @@ nv50_gpio_reset(struct nvkm_gpio *gpio, u8 match)
 	}
 }
 
-int
+static int
 nv50_gpio_location(int line, u32 *reg, u32 *shift)
 {
 	const u32 nv50_gpio_reg[4] = { 0xe104, 0xe108, 0xe280, 0xe284 };

From ef6e8f4c7fd017ee66fc4b0fd3cfeae48c2e26d5 Mon Sep 17 00:00:00 2001
From: Roy Spliet <rspliet@eclipso.eu>
Date: Wed, 30 Sep 2015 00:23:46 +0100
Subject: [PATCH 32/45] drm/nouveau/fb/ramgt215: Change FBVDD/Q when BIOS asks
 for it

Signed-off-by: Roy Spliet <rspliet@eclipso.eu>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 .../nouveau/include/nvkm/subdev/bios/ramcfg.h  |  1 +
 .../gpu/drm/nouveau/nvkm/subdev/bios/rammap.c  |  1 +
 .../gpu/drm/nouveau/nvkm/subdev/fb/ramgt215.c  | 18 ++++++++++++++++++
 3 files changed, 20 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/ramcfg.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/ramcfg.h
index dd48db7fee066..dca6c060a24f4 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/ramcfg.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/ramcfg.h
@@ -39,6 +39,7 @@ struct nvbios_ramcfg {
 	unsigned ramcfg_timing;
 	unsigned ramcfg_DLLoff;
 	unsigned ramcfg_RON;
+	unsigned ramcfg_FBVDDQ;
 	union {
 		struct {
 			unsigned ramcfg_00_03_01:1;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/rammap.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/rammap.c
index 3bbb1a76c3785..74a4ab5b6ad1b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/rammap.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/rammap.c
@@ -205,6 +205,7 @@ nvbios_rammapSp(struct nvkm_bios *bios, u32 data,
 		p->ramcfg_DLLoff   = (nvbios_rd08(bios, data + 0x02) & 0x40) >> 6;
 		p->ramcfg_10_03_0f = (nvbios_rd08(bios, data + 0x03) & 0x0f) >> 0;
 		p->ramcfg_10_04_01 = (nvbios_rd08(bios, data + 0x04) & 0x01) >> 0;
+		p->ramcfg_FBVDDQ   = (nvbios_rd08(bios, data + 0x04) & 0x08) >> 3;
 		p->ramcfg_10_05    = (nvbios_rd08(bios, data + 0x05) & 0xff) >> 0;
 		p->ramcfg_10_06    = (nvbios_rd08(bios, data + 0x06) & 0xff) >> 0;
 		p->ramcfg_10_07    = (nvbios_rd08(bios, data + 0x07) & 0xff) >> 0;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgt215.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgt215.c
index 0c28f38cb8b38..8d81bf1e9a555 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgt215.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgt215.c
@@ -498,6 +498,7 @@ gt215_ram_calc(struct nvkm_ram *base, u32 freq)
 	struct nvkm_device *device = subdev->device;
 	struct nvkm_bios *bios = device->bios;
 	struct gt215_clk_info mclk;
+	struct nvkm_gpio *gpio = device->gpio;
 	struct nvkm_ram_data *next;
 	u8  ver, hdr, cnt, len, strap;
 	u32 data;
@@ -656,6 +657,23 @@ gt215_ram_calc(struct nvkm_ram *base, u32 freq)
 	if (device->chipset == 0xa3 && freq <= 500000)
 		ram_mask(fuc, 0x100700, 0x00000006, 0x00000006);
 
+	/* Alter FBVDD/Q, apparently must be done with PLL disabled, thus
+	 * set it to bypass */
+	if (nvkm_gpio_get(gpio, 0, 0x18, DCB_GPIO_UNUSED) ==
+			next->bios.ramcfg_FBVDDQ) {
+		data = ram_rd32(fuc, 0x004000) & 0x9;
+
+		if (data == 0x1)
+			ram_mask(fuc, 0x004000, 0x8, 0x8);
+		if (data & 0x1)
+			ram_mask(fuc, 0x004000, 0x1, 0x0);
+
+		gt215_ram_gpio(fuc, 0x18, !next->bios.ramcfg_FBVDDQ);
+
+		if (data & 0x1)
+			ram_mask(fuc, 0x004000, 0x1, 0x1);
+	}
+
 	/* Fiddle with clocks */
 	/* There's 4 scenario's
 	 * pll->pll: first switch to a 324MHz clock, set up new PLL, switch

From 0b0b78cd7def0fc001fc8cefa8621823bff6a8e5 Mon Sep 17 00:00:00 2001
From: Roy Spliet <rspliet@eclipso.eu>
Date: Wed, 30 Sep 2015 00:23:47 +0100
Subject: [PATCH 33/45] drm/nouveau/fb/ramgt215: Restructure r111100
 calculation for DDR2

Seems to be mostly equal to DDR3 on < GT218, should improve stability for
DDR2 reclocks.

Signed-off-by: Roy Spliet <rspliet@eclipso.eu>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 .../gpu/drm/nouveau/nvkm/subdev/fb/ramgt215.c | 64 ++++++++++---------
 1 file changed, 34 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgt215.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgt215.c
index 8d81bf1e9a555..d15ea886df270 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgt215.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgt215.c
@@ -771,39 +771,43 @@ gt215_ram_calc(struct nvkm_ram *base, u32 freq)
 	unk71c  = ram_rd32(fuc, 0x10071c) & ~0x00000100;
 	r111100 = ram_rd32(fuc, 0x111100) & ~0x3a800000;
 
-	if (next->bios.ramcfg_10_02_04) {
-		switch (ram->base.type) {
-		case NVKM_RAM_TYPE_DDR3:
-			if (device->chipset != 0xa8)
-				r111100 |= 0x00000004;
-			/* no break */
-		case NVKM_RAM_TYPE_DDR2:
-			r111100 |= 0x08000000;
-			break;
-		default:
-			break;
-		}
-	} else {
-		switch (ram->base.type) {
-		case NVKM_RAM_TYPE_DDR2:
-			r111100 |= 0x1a800000;
+	/* NVA8 seems to skip various bits related to ramcfg_10_02_04 */
+	if (device->chipset == 0xa8) {
+		r111100 |= 0x08000000;
+		if (!next->bios.ramcfg_10_02_04)
 			unk714  |= 0x00000010;
-			break;
-		case NVKM_RAM_TYPE_DDR3:
-			if (device->chipset == 0xa8) {
-				r111100 |=  0x08000000;
-			} else {
-				r111100 &= ~0x00000004;
+	} else {
+		if (next->bios.ramcfg_10_02_04) {
+			switch (ram->base.type) {
+			case NVKM_RAM_TYPE_DDR2:
+			case NVKM_RAM_TYPE_DDR3:
+				r111100 &= ~0x00000020;
+				if (next->bios.ramcfg_10_02_10)
+					r111100 |= 0x08000004;
+				else
+					r111100 |= 0x00000024;
+				break;
+			default:
+				break;
+			}
+		} else {
+			switch (ram->base.type) {
+			case NVKM_RAM_TYPE_DDR2:
+			case NVKM_RAM_TYPE_DDR3:
+				r111100 &= ~0x00000024;
 				r111100 |=  0x12800000;
+
+				if (next->bios.ramcfg_10_02_10)
+					r111100 |= 0x08000000;
+				unk714  |= 0x00000010;
+				break;
+			case NVKM_RAM_TYPE_GDDR3:
+				r111100 |= 0x30000000;
+				unk714  |= 0x00000020;
+				break;
+			default:
+				break;
 			}
-			unk714  |= 0x00000010;
-			break;
-		case NVKM_RAM_TYPE_GDDR3:
-			r111100 |= 0x30000000;
-			unk714  |= 0x00000020;
-			break;
-		default:
-			break;
 		}
 	}
 

From 1cf688dd1b6f8f3ab998000e0f14b37041c77e30 Mon Sep 17 00:00:00 2001
From: Roy Spliet <rspliet@eclipso.eu>
Date: Wed, 30 Sep 2015 00:23:48 +0100
Subject: [PATCH 34/45] drm/nouveau/fb/ramnv50: Voltage GPIOs

Does not seem to be necessary for NVA0, hence untested by me.

Signed-off-by: Roy Spliet <rspliet@eclipso.eu>
Tested-by: Pierre Moreau <pierre.morrow@free.fr>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 .../gpu/drm/nouveau/nvkm/subdev/bios/rammap.c |  1 +
 .../gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c  | 41 +++++++++++++++++++
 2 files changed, 42 insertions(+)

diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/rammap.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/rammap.c
index 74a4ab5b6ad1b..d0ae7454764ea 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/rammap.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/rammap.c
@@ -171,6 +171,7 @@ nvbios_rammapSp_from_perf(struct nvkm_bios *bios, u32 data, u8 size, int idx,
 	p->ramcfg_DLLoff   = (nvbios_rd08(bios, data + 0x03) & 0x04) >> 2;
 	p->ramcfg_00_03_08 = (nvbios_rd08(bios, data + 0x03) & 0x08) >> 3;
 	p->ramcfg_RON      = (nvbios_rd08(bios, data + 0x03) & 0x10) >> 3;
+	p->ramcfg_FBVDDQ   = (nvbios_rd08(bios, data + 0x03) & 0x80) >> 7;
 	p->ramcfg_00_04_02 = (nvbios_rd08(bios, data + 0x04) & 0x02) >> 1;
 	p->ramcfg_00_04_04 = (nvbios_rd08(bios, data + 0x04) & 0x04) >> 2;
 	p->ramcfg_00_04_20 = (nvbios_rd08(bios, data + 0x04) & 0x20) >> 5;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c
index 9197e0ef5cdb9..ae6b0c4043d3c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c
@@ -33,6 +33,7 @@
 #include <subdev/bios/rammap.h>
 #include <subdev/bios/timing.h>
 #include <subdev/clk/pll.h>
+#include <subdev/gpio.h>
 
 struct nv50_ramseq {
 	struct hwsq base;
@@ -59,6 +60,7 @@ struct nv50_ramseq {
 	struct hwsq_reg r_0x611200;
 	struct hwsq_reg r_timing[9];
 	struct hwsq_reg r_mr[4];
+	struct hwsq_reg r_gpio[4];
 };
 
 struct nv50_ram {
@@ -154,6 +156,33 @@ nvkm_sddr2_dll_reset(struct nv50_ramseq *hwsq)
 	ram_nsec(hwsq, 24000);
 }
 
+static void
+nv50_ram_gpio(struct nv50_ramseq *hwsq, u8 tag, u32 val)
+{
+	struct nvkm_gpio *gpio = hwsq->base.subdev->device->gpio;
+	struct dcb_gpio_func func;
+	u32 reg, sh, gpio_val;
+	int ret;
+
+	if (nvkm_gpio_get(gpio, 0, tag, DCB_GPIO_UNUSED) != val) {
+		ret = nvkm_gpio_find(gpio, 0, tag, DCB_GPIO_UNUSED, &func);
+		if (ret)
+			return;
+
+		reg = func.line >> 3;
+		sh = (func.line & 0x7) << 2;
+		gpio_val = ram_rd32(hwsq, gpio[reg]);
+
+		if (gpio_val & (8 << sh))
+			val = !val;
+		if (!(func.log[1] & 1))
+			val = !val;
+
+		ram_mask(hwsq, gpio[reg], (0x3 << sh), ((val | 0x2) << sh));
+		ram_nsec(hwsq, 20000);
+	}
+}
+
 static int
 nv50_ram_calc(struct nvkm_ram *base, u32 freq)
 {
@@ -250,6 +279,9 @@ nv50_ram_calc(struct nvkm_ram *base, u32 freq)
 	ram_wait(hwsq, 0x00, 0x01); /* wait for fb disabled */
 	ram_nsec(hwsq, 2000);
 
+	if (next->bios.timing_10_ODT)
+		nv50_ram_gpio(hwsq, 0x2e, 1);
+
 	ram_wr32(hwsq, 0x1002d4, 0x00000001); /* precharge */
 	ram_wr32(hwsq, 0x1002d0, 0x00000001); /* refresh */
 	ram_wr32(hwsq, 0x1002d0, 0x00000001); /* refresh */
@@ -288,6 +320,7 @@ nv50_ram_calc(struct nvkm_ram *base, u32 freq)
 	ram_mask(hwsq, 0x004008, 0x91ff0000, r004008);
 	if (subdev->device->chipset >= 0x96)
 		ram_wr32(hwsq, 0x100da0, r100da0);
+	nv50_ram_gpio(hwsq, 0x18, !next->bios.ramcfg_FBVDDQ);
 	ram_nsec(hwsq, 64000); /*XXX*/
 	ram_nsec(hwsq, 32000); /*XXX*/
 
@@ -364,6 +397,9 @@ nv50_ram_calc(struct nvkm_ram *base, u32 freq)
 	}
 	ram_mask(hwsq, mr[1], 0xffffffff, ram->base.mr[1]);
 
+	if (!next->bios.timing_10_ODT)
+		nv50_ram_gpio(hwsq, 0x2e, 0);
+
 	/* Reset DLL */
 	if (!next->bios.ramcfg_DLLoff)
 		nvkm_sddr2_dll_reset(hwsq);
@@ -634,5 +670,10 @@ nv50_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram)
 		ram->hwsq.r_mr[3] = hwsq_reg(0x1002e4);
 	}
 
+	ram->hwsq.r_gpio[0] = hwsq_reg(0x00e104);
+	ram->hwsq.r_gpio[1] = hwsq_reg(0x00e108);
+	ram->hwsq.r_gpio[2] = hwsq_reg(0x00e120);
+	ram->hwsq.r_gpio[3] = hwsq_reg(0x00e124);
+
 	return 0;
 }

From 797eb6ed8f13bb2f88bd605515990ef795a1eff7 Mon Sep 17 00:00:00 2001
From: Roy Spliet <rspliet@eclipso.eu>
Date: Wed, 30 Sep 2015 00:23:49 +0100
Subject: [PATCH 35/45] drm/nouveau/fb/ramnv50: Deal with cards without timing
 entries

Like Pierre's G94. We might want to structure Kepler similarly in a follow-up.

Signed-off-by: Roy Spliet <rspliet@eclipso.eu>
Tested-by: Pierre Moreau <pierre.morrow@free.fr>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 .../gpu/drm/nouveau/nvkm/subdev/fb/gddr3.c    | 10 +++--
 .../gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c  | 41 +++++++++++++++++--
 .../gpu/drm/nouveau/nvkm/subdev/fb/sddr2.c    |  6 +++
 3 files changed, 50 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gddr3.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gddr3.c
index 79b523aa52aad..60ece0a8a2e1b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gddr3.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gddr3.c
@@ -63,7 +63,7 @@ ramgddr3_wr_lo[] = {
 	{ 5, 2 }, { 7, 4 }, { 8, 5 }, { 9, 6 }, { 10, 7 },
 	{ 11, 0 }, { 13 , 1 },
 	/* the below are mentioned in some, but not all, gddr3 docs */
-	{ 4, 1 }, { 6, 3 }, { 12, 1 },
+	{ 4, 0 }, { 6, 3 }, { 12, 1 },
 	{ -1 }
 };
 
@@ -87,15 +87,17 @@ nvkm_gddr3_calc(struct nvkm_ram *ram)
 		WR  = (ram->next->bios.timing[2] & 0x007f0000) >> 16;
 		/* XXX: Get these values from the VBIOS instead */
 		DLL = !(ram->mr[1] & 0x1);
-		ODT =  (ram->mr[1] & 0x004) >> 2 |
-		       (ram->mr[1] & 0x040) >> 5 |
-		       (ram->mr[1] & 0x200) >> 7;
 		RON = !(ram->mr[1] & 0x300) >> 8;
 		break;
 	default:
 		return -ENOSYS;
 	}
 
+	if (ram->next->bios.timing_ver == 0x20 ||
+	    ram->next->bios.ramcfg_timing == 0xff) {
+		ODT =  (ram->mr[1] & 0xc) >> 2;
+	}
+
 	hi = ram->mr[2] & 0x1;
 	CL  = ramxlat(hi ? ramgddr3_cl_hi : ramgddr3_cl_lo, CL);
 	WR  = ramxlat(ramgddr3_wr_lo, WR);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c
index ae6b0c4043d3c..1c6ae6bcd5736 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c
@@ -146,6 +146,38 @@ nv50_ram_timing_calc(struct nv50_ram *ram, u32 *timing)
 	nvkm_debug(subdev, " 240: %08x\n", timing[8]);
 	return 0;
 }
+
+static int
+nv50_ram_timing_read(struct nv50_ram *ram, u32 *timing)
+{
+	unsigned int i;
+	struct nvbios_ramcfg *cfg = &ram->base.target.bios;
+	struct nvkm_subdev *subdev = &ram->base.fb->subdev;
+	struct nvkm_device *device = subdev->device;
+
+	for (i = 0; i <= 8; i++)
+		timing[i] = nvkm_rd32(device, 0x100220 + (i * 4));
+
+	/* Derive the bare minimum for the MR calculation to succeed */
+	cfg->timing_ver = 0x10;
+	T(CL) = (timing[3] & 0xff) + 1;
+
+	switch (ram->base.type) {
+	case NVKM_RAM_TYPE_DDR2:
+		T(CWL) = T(CL) - 1;
+		break;
+	case NVKM_RAM_TYPE_GDDR3:
+		T(CWL) = ((timing[2] & 0xff000000) >> 24) + 1;
+		break;
+	default:
+		return -ENOSYS;
+		break;
+	}
+
+	T(WR) = ((timing[1] >> 24) & 0xff) - 1 - T(CWL);
+
+	return 0;
+}
 #undef T
 
 static void
@@ -242,10 +274,11 @@ nv50_ram_calc(struct nvkm_ram *base, u32 freq)
 				 strap, data, ver, hdr);
 			return -EINVAL;
 		}
+		nv50_ram_timing_calc(ram, timing);
+	} else {
+		nv50_ram_timing_read(ram, timing);
 	}
 
-	nv50_ram_timing_calc(ram, timing);
-
 	ret = ram_init(hwsq, subdev);
 	if (ret)
 		return ret;
@@ -264,8 +297,10 @@ nv50_ram_calc(struct nvkm_ram *base, u32 freq)
 		break;
 	}
 
-	if (ret)
+	if (ret) {
+		nvkm_error(subdev, "Could not calculate MR\n");
 		return ret;
+	}
 
 	/* Always disable this bit during reclock */
 	ram_mask(hwsq, 0x100200, 0x00000800, 0x00000000);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/sddr2.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/sddr2.c
index 86bf67456b143..b9f1ffdfc6026 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/sddr2.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/sddr2.c
@@ -76,6 +76,12 @@ nvkm_sddr2_calc(struct nvkm_ram *ram)
 		return -ENOSYS;
 	}
 
+	if (ram->next->bios.timing_ver == 0x20 ||
+	    ram->next->bios.ramcfg_timing == 0xff) {
+		ODT =  (ram->mr[1] & 0x004) >> 2 |
+		       (ram->mr[1] & 0x040) >> 5;
+	}
+
 	CL  = ramxlat(ramddr2_cl, CL);
 	WR  = ramxlat(ramddr2_wr, WR);
 	if (CL < 0 || WR < 0)

From 4d9faafa0fdda2f4ba04b5cdffc0af1bab2312f4 Mon Sep 17 00:00:00 2001
From: Roy Spliet <rspliet@eclipso.eu>
Date: Wed, 30 Sep 2015 00:23:50 +0100
Subject: [PATCH 36/45] drm/nouveau/fb/ramnv50: Script changes for G94 and up

10053c is not even read on some cards, and I have no idea exactly what the
criteria are. Likely NVIDIA pre-scans the VBIOS and in their driver disables
all features that are never used. The practical effect should be the same
as this implementation though.

Signed-off-by: Roy Spliet <rspliet@eclipso.eu>
Tested-by: Pierre Moreau <pierre.morrow@free.fr>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 .../gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c  | 36 +++++++++++++++----
 1 file changed, 30 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c
index 1c6ae6bcd5736..d98d30699e66c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c
@@ -302,6 +302,9 @@ nv50_ram_calc(struct nvkm_ram *base, u32 freq)
 		return ret;
 	}
 
+	if (subdev->device->chipset <= 0x96 && !next->bios.ramcfg_00_03_02)
+		ram_mask(hwsq, 0x100710, 0x00000200, 0x00000000);
+
 	/* Always disable this bit during reclock */
 	ram_mask(hwsq, 0x100200, 0x00000800, 0x00000000);
 
@@ -353,8 +356,11 @@ nv50_ram_calc(struct nvkm_ram *base, u32 freq)
 			next->bios.rammap_00_16_40 << 14);
 	ram_mask(hwsq, 0x00400c, 0x0000ffff, (N1 << 8) | M1);
 	ram_mask(hwsq, 0x004008, 0x91ff0000, r004008);
-	if (subdev->device->chipset >= 0x96)
+
+	/* XXX: GDDR3 only? */
+	if (subdev->device->chipset >= 0x92)
 		ram_wr32(hwsq, 0x100da0, r100da0);
+
 	nv50_ram_gpio(hwsq, 0x18, !next->bios.ramcfg_FBVDDQ);
 	ram_nsec(hwsq, 64000); /*XXX*/
 	ram_nsec(hwsq, 32000); /*XXX*/
@@ -397,19 +403,33 @@ nv50_ram_calc(struct nvkm_ram *base, u32 freq)
 	ram_mask(hwsq, 0x100200, 0x00001000, !next->bios.ramcfg_00_04_02 << 12);
 
 	/* XXX: A lot of this could be "chipset"/"ram type" specific stuff */
-	unk710  = ram_rd32(hwsq, 0x100710) & ~0x00000101;
+	unk710  = ram_rd32(hwsq, 0x100710) & ~0x00000100;
 	unk714  = ram_rd32(hwsq, 0x100714) & ~0xf0000020;
 	unk718  = ram_rd32(hwsq, 0x100718) & ~0x00000100;
 	unk71c  = ram_rd32(hwsq, 0x10071c) & ~0x00000100;
+	if (subdev->device->chipset <= 0x96) {
+		unk710 &= ~0x0000006e;
+		unk714 &= ~0x00000100;
+
+		if (!next->bios.ramcfg_00_03_08)
+			unk710 |= 0x00000060;
+		if (!next->bios.ramcfg_FBVDDQ)
+			unk714 |= 0x00000100;
+		if ( next->bios.ramcfg_00_04_04)
+			unk710 |= 0x0000000e;
+	} else {
+		unk710 &= ~0x00000001;
+
+		if (!next->bios.ramcfg_00_03_08)
+			unk710 |= 0x00000001;
+	}
 
 	if ( next->bios.ramcfg_00_03_01)
 		unk71c |= 0x00000100;
 	if ( next->bios.ramcfg_00_03_02)
 		unk710 |= 0x00000100;
-	if (!next->bios.ramcfg_00_03_08) {
-		unk710 |= 0x1;
-		unk714 |= 0x20;
-	}
+	if (!next->bios.ramcfg_00_03_08)
+		unk714 |= 0x00000020;
 	if ( next->bios.ramcfg_00_04_04)
 		unk714 |= 0x70000000;
 	if ( next->bios.ramcfg_00_04_20)
@@ -420,6 +440,8 @@ nv50_ram_calc(struct nvkm_ram *base, u32 freq)
 	ram_mask(hwsq, 0x100718, 0xffffffff, unk718);
 	ram_mask(hwsq, 0x100710, 0xffffffff, unk710);
 
+	/* XXX: G94 does not even test these regs in trace. Harmless we do it,
+	 * but why is it omitted? */
 	if (next->bios.rammap_00_16_20) {
 		ram_wr32(hwsq, 0x1005a0, next->bios.ramcfg_00_07 << 16 |
 					 next->bios.ramcfg_00_06 << 8 |
@@ -450,6 +472,8 @@ nv50_ram_calc(struct nvkm_ram *base, u32 freq)
 		ram_mask(hwsq, 0x004008, 0x00004000, 0x00000000);
 	if (next->bios.ramcfg_00_03_02)
 		ram_mask(hwsq, 0x10021c, 0x00010000, 0x00010000);
+	if (subdev->device->chipset <= 0x96 && next->bios.ramcfg_00_03_02)
+		ram_mask(hwsq, 0x100710, 0x00000200, 0x00000200);
 
 	return 0;
 }

From 271c27665c2d2f719a2fca6a3530a82984f22cca Mon Sep 17 00:00:00 2001
From: Roy Spliet <rspliet@eclipso.eu>
Date: Wed, 30 Sep 2015 00:23:51 +0100
Subject: [PATCH 37/45] drm/nouveau/bus/hwsq: Implement VBLANK waiting
 heuristic

Avoids waiting for VBLANKS that never arrive on headless or otherwise
unconventional set-ups. Strategy taken from MEMX.

Signed-off-by: Roy Spliet <rspliet@eclipso.eu>
Tested-by: Pierre Moreau <pierre.morrow@free.fr>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 .../gpu/drm/nouveau/include/nvkm/subdev/bus.h |  1 +
 .../gpu/drm/nouveau/nvkm/subdev/bus/hwsq.c    | 32 +++++++++++++++++++
 .../gpu/drm/nouveau/nvkm/subdev/bus/hwsq.h    |  6 ++++
 .../gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c  |  3 +-
 .../gpu/drm/nouveau/nvkm/subdev/fb/ramseq.h   |  1 +
 5 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bus.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bus.h
index 6a04d9c079441..33a057c334f29 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bus.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bus.h
@@ -14,6 +14,7 @@ int  nvkm_hwsq_fini(struct nvkm_hwsq **, bool exec);
 void nvkm_hwsq_wr32(struct nvkm_hwsq *, u32 addr, u32 data);
 void nvkm_hwsq_setf(struct nvkm_hwsq *, u8 flag, int data);
 void nvkm_hwsq_wait(struct nvkm_hwsq *, u8 flag, u8 data);
+void nvkm_hwsq_wait_vblank(struct nvkm_hwsq *);
 void nvkm_hwsq_nsec(struct nvkm_hwsq *, u32 nsec);
 
 int nv04_bus_new(struct nvkm_device *, int, struct nvkm_bus **);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bus/hwsq.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bus/hwsq.c
index 79f1cf513b36f..2a5668938f2f7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bus/hwsq.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bus/hwsq.c
@@ -131,6 +131,38 @@ nvkm_hwsq_wait(struct nvkm_hwsq *hwsq, u8 flag, u8 data)
 	hwsq_cmd(hwsq, 3, (u8[]){ 0x5f, flag, data });
 }
 
+void
+nvkm_hwsq_wait_vblank(struct nvkm_hwsq *hwsq)
+{
+	struct nvkm_subdev *subdev = hwsq->subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 heads, x, y, px = 0;
+	int i, head_sync;
+
+	heads = nvkm_rd32(device, 0x610050);
+	for (i = 0; i < 2; i++) {
+		/* Heuristic: sync to head with biggest resolution */
+		if (heads & (2 << (i << 3))) {
+			x = nvkm_rd32(device, 0x610b40 + (0x540 * i));
+			y = (x & 0xffff0000) >> 16;
+			x &= 0x0000ffff;
+			if ((x * y) > px) {
+				px = (x * y);
+				head_sync = i;
+			}
+		}
+	}
+
+	if (px == 0) {
+		nvkm_debug(subdev, "WAIT VBLANK !NO ACTIVE HEAD\n");
+		return;
+	}
+
+	nvkm_debug(subdev, "WAIT VBLANK HEAD%d\n", head_sync);
+	nvkm_hwsq_wait(hwsq, head_sync ? 0x3 : 0x1, 0x0);
+	nvkm_hwsq_wait(hwsq, head_sync ? 0x3 : 0x1, 0x1);
+}
+
 void
 nvkm_hwsq_nsec(struct nvkm_hwsq *hwsq, u32 nsec)
 {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bus/hwsq.h b/drivers/gpu/drm/nouveau/nvkm/subdev/bus/hwsq.h
index 8117ec5a1468e..54ec3b131dfd3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bus/hwsq.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bus/hwsq.h
@@ -133,6 +133,12 @@ hwsq_wait(struct hwsq *ram, u8 flag, u8 data)
 	nvkm_hwsq_wait(ram->hwsq, flag, data);
 }
 
+static inline void
+hwsq_wait_vblank(struct hwsq *ram)
+{
+	nvkm_hwsq_wait_vblank(ram->hwsq);
+}
+
 static inline void
 hwsq_nsec(struct hwsq *ram, u32 nsec)
 {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c
index d98d30699e66c..87bde8ff2d6bb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c
@@ -308,8 +308,7 @@ nv50_ram_calc(struct nvkm_ram *base, u32 freq)
 	/* Always disable this bit during reclock */
 	ram_mask(hwsq, 0x100200, 0x00000800, 0x00000000);
 
-	ram_wait(hwsq, 0x01, 0x00); /* wait for !vblank */
-	ram_wait(hwsq, 0x01, 0x01); /* wait for vblank */
+	ram_wait_vblank(hwsq);
 	ram_wr32(hwsq, 0x611200, 0x00003300);
 	ram_wr32(hwsq, 0x002504, 0x00000001); /* block fifo */
 	ram_nsec(hwsq, 8000);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramseq.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramseq.h
index 0f1f97ccd5f69..8df7306d57299 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramseq.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramseq.h
@@ -11,5 +11,6 @@
 #define ram_mask(s,r,m,d)   hwsq_mask(&(s)->base, &(s)->r_##r, (m), (d))
 #define ram_setf(s,f,d)     hwsq_setf(&(s)->base, (f), (d))
 #define ram_wait(s,f,d)     hwsq_wait(&(s)->base, (f), (d))
+#define ram_wait_vblank(s)  hwsq_wait_vblank(&(s)->base)
 #define ram_nsec(s,n)       hwsq_nsec(&(s)->base, (n))
 #endif

From 0d42743dfa908a2ca4e349f883361906ebb4db95 Mon Sep 17 00:00:00 2001
From: Roy Spliet <rspliet@eclipso.eu>
Date: Wed, 30 Sep 2015 00:23:52 +0100
Subject: [PATCH 38/45] drm/nouveau/clk/g84: Enable reclocking for GDDR3
 G94-G200

Your milage may vary, as it's only been tested on a single G94 and one G96.

Signed-off-by: Roy Spliet <rspliet@eclipso.eu>
Tested-by: Pierre Moreau <pierre.morrow@free.fr>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nvkm/subdev/clk/g84.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/g84.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/g84.c
index 347da9ee20f53..f97e3ec196bb0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/g84.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/g84.c
@@ -44,5 +44,5 @@ int
 g84_clk_new(struct nvkm_device *device, int index, struct nvkm_clk **pclk)
 {
 	return nv50_clk_new_(&g84_clk, device, index,
-			     (device->chipset == 0xa0), pclk);
+			     (device->chipset >= 0x94), pclk);
 }

From 78eaf335e4c8224e74e5d512f20ec48109db9dac Mon Sep 17 00:00:00 2001
From: Karol Herbst <nouveau@karolherbst.de>
Date: Sun, 16 Aug 2015 10:19:25 +0200
Subject: [PATCH 39/45] drm/nouveau/pll/gk104: fix PLL instability due to bad
 configuration with gddr5

This patch uses an approach closer to the nvidia driver to configure
both PLLs for high gddr5 memory clocks (usually above 2400MHz)

Previously nouveau used the one PLL as it was used for the lower clocks
and just adjusted the second PLL to get as close as possible to the
requested clock.  This means for my card, that I got a 4050 MHz clock
although 4008 MHz was requested.

Now the driver iterates over a list of PLL configuration also used by
the nvidia driver and then adjust the second PLL to get near the
requested clock.  Also it hold to some restriction I found while
analyzing the PLL configurations

This won't fix all gddr5 high clock issues itself, but it should be
fine on hybrid gpu systems as found on many laptops these days.  Also
switching while normal desktop usage should be a lot more stable than
before.

v2: move the pll code into ramgk104

Signed-off-by: Karol Herbst <nouveau@karolherbst.de>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 .../gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c | 100 ++++++++++++++----
 1 file changed, 77 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c
index 0d2056382c78e..9df45030ff9fb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c
@@ -970,6 +970,67 @@ gk104_ram_calc_data(struct gk104_ram *ram, u32 khz, struct nvkm_ram_data *data)
 	return -EINVAL;
 }
 
+static int
+gk104_calc_pll_output(int fN, int M, int N, int P, int clk)
+{
+	return ((clk * N) + (((u16)(fN + 4096) * clk) >> 13)) / (M * P);
+}
+
+static int
+gk104_pll_calc_hiclk(int target_khz, int crystal,
+		int *N1, int *fN1, int *M1, int *P1,
+		int *N2, int *M2, int *P2)
+{
+	int best_clk = 0, best_err = target_khz, p_ref, n_ref;
+	bool upper = false;
+
+	*M1 = 1;
+	/* M has to be 1, otherwise it gets unstable */
+	*M2 = 1;
+	/* can be 1 or 2, sticking with 1 for simplicity */
+	*P2 = 1;
+
+	for (p_ref = 0x7; p_ref >= 0x5; --p_ref) {
+		for (n_ref = 0x25; n_ref <= 0x2b; ++n_ref) {
+			int cur_N, cur_clk, cur_err;
+
+			cur_clk = gk104_calc_pll_output(0, 1, n_ref, p_ref, crystal);
+			cur_N = target_khz / cur_clk;
+			cur_err = target_khz
+				- gk104_calc_pll_output(0xf000, 1, cur_N, 1, cur_clk);
+
+			/* we found a better combination */
+			if (cur_err < best_err) {
+				best_err = cur_err;
+				best_clk = cur_clk;
+				*N2 = cur_N;
+				*N1 = n_ref;
+				*P1 = p_ref;
+				upper = false;
+			}
+
+			cur_N += 1;
+			cur_err = gk104_calc_pll_output(0xf000, 1, cur_N, 1, cur_clk)
+				- target_khz;
+			if (cur_err < best_err) {
+				best_err = cur_err;
+				best_clk = cur_clk;
+				*N2 = cur_N;
+				*N1 = n_ref;
+				*P1 = p_ref;
+				upper = true;
+			}
+		}
+	}
+
+	/* adjust fN to get closer to the target clock */
+	*fN1 = (u16)((((best_err / *N2 * *P2) * (*P1 * *M1)) << 13) / crystal);
+	if (upper)
+		*fN1 = (u16)(1 - *fN1);
+
+	return gk104_calc_pll_output(*fN1, 1, *N1, *P1, crystal);
+}
+
 static int
 gk104_ram_calc_xits(struct gk104_ram *ram, struct nvkm_ram_data *next)
 {
@@ -994,31 +1055,24 @@ gk104_ram_calc_xits(struct gk104_ram *ram, struct nvkm_ram_data *next)
 	 * kepler boards, no idea how/why they're chosen.
 	 */
 	refclk = next->freq;
-	if (ram->mode == 2)
-		refclk = fuc->mempll.refclk;
-
-	/* calculate refpll coefficients */
-	ret = gt215_pll_calc(subdev, &fuc->refpll, refclk, &ram->N1,
-			     &ram->fN1, &ram->M1, &ram->P1);
-	fuc->mempll.refclk = ret;
-	if (ret <= 0) {
-		nvkm_error(subdev, "unable to calc refpll\n");
-		return -EINVAL;
-	}
-
-	/* calculate mempll coefficients, if we're using it */
 	if (ram->mode == 2) {
-		/* post-divider doesn't work... the reg takes the values but
-		 * appears to completely ignore it.  there *is* a bit at
-		 * bit 28 that appears to divide the clock by 2 if set.
-		 */
-		fuc->mempll.min_p = 1;
-		fuc->mempll.max_p = 2;
-
-		ret = gt215_pll_calc(subdev, &fuc->mempll, next->freq,
-				     &ram->N2, NULL, &ram->M2, &ram->P2);
+		ret = gk104_pll_calc_hiclk(next->freq, subdev->device->crystal,
+				&ram->N1, &ram->fN1, &ram->M1, &ram->P1,
+				&ram->N2, &ram->M2, &ram->P2);
+		fuc->mempll.refclk = ret;
+		if (ret <= 0) {
+			nvkm_error(subdev, "unable to calc plls\n");
+			return -EINVAL;
+		}
+		nvkm_debug(subdev, "sucessfully calced PLLs for clock %i kHz"
+				" (refclock: %i kHz)\n", next->freq, ret);
+	} else {
+		/* calculate refpll coefficients */
+		ret = gt215_pll_calc(subdev, &fuc->refpll, refclk, &ram->N1,
+				     &ram->fN1, &ram->M1, &ram->P1);
+		fuc->mempll.refclk = ret;
 		if (ret <= 0) {
-			nvkm_error(subdev, "unable to calc mempll\n");
+			nvkm_error(subdev, "unable to calc refpll\n");
 			return -EINVAL;
 		}
 	}

From a2736b07df1f17c24c8890eb7b2dac529d9725d3 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Tue, 27 Oct 2015 17:39:49 -0400
Subject: [PATCH 40/45] drm/nouveau/pci: enable c800 magic for Lenovo Y510P

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=70354#c75
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
index e8eb14e438f4d..c5fc9093ca636 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
@@ -258,6 +258,12 @@ nvkm_device_pci_10de_0df4[] = {
 	{}
 };
 
+static const struct nvkm_device_pci_vendor
+nvkm_device_pci_10de_0fcd[] = {
+	{ 0x17aa, 0x3801, NULL, { .War00C800_0 = true } }, /* Lenovo Y510P */
+	{}
+};
+
 static const struct nvkm_device_pci_vendor
 nvkm_device_pci_10de_0fd2[] = {
 	{ 0x1028, 0x0595, "GeForce GT 640M LE" },
@@ -1349,7 +1355,7 @@ nvkm_device_pci_10de[] = {
 	{ 0x0fc6, "GeForce GTX 650" },
 	{ 0x0fc8, "GeForce GT 740" },
 	{ 0x0fc9, "GeForce GT 730" },
-	{ 0x0fcd, "GeForce GT 755M" },
+	{ 0x0fcd, "GeForce GT 755M", nvkm_device_pci_10de_0fcd },
 	{ 0x0fce, "GeForce GT 640M LE" },
 	{ 0x0fd1, "GeForce GT 650M" },
 	{ 0x0fd2, "GeForce GT 640M", nvkm_device_pci_10de_0fd2 },

From b41c48571d504f293a9a6a6cfd6c8e5dcb4ccf83 Mon Sep 17 00:00:00 2001
From: Ilia Mirkin <imirkin@alum.mit.edu>
Date: Sat, 31 Oct 2015 15:06:11 -0400
Subject: [PATCH 41/45] drm/nouveau/pci: enable c800 magic for Medion Erazer
 X7827

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91557
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
index c5fc9093ca636..e3c783d0e2ab4 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
@@ -684,6 +684,7 @@ nvkm_device_pci_10de_1189[] = {
 static const struct nvkm_device_pci_vendor
 nvkm_device_pci_10de_1199[] = {
 	{ 0x1458, 0xd001, "GeForce GTX 760" },
+	{ 0x1462, 0x1106, "GeForce GTX 780M", { .War00C800_0 = true } }, /* Medion Erazer X7827 */
 	{}
 };
 

From 09433f24e6e16e590b289489f1f4e16d92e79080 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Tue, 3 Nov 2015 10:17:49 +1000
Subject: [PATCH 42/45] drm/nouveau/abi16: remove unused argument from
 nouveau_abi16_get()

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_abi16.c | 12 ++++++------
 drivers/gpu/drm/nouveau/nouveau_abi16.h |  3 +--
 drivers/gpu/drm/nouveau/nouveau_gem.c   |  2 +-
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index 0b3c8abd68431..c3dea3331e127 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -33,7 +33,7 @@
 #include "nouveau_abi16.h"
 
 struct nouveau_abi16 *
-nouveau_abi16_get(struct drm_file *file_priv, struct drm_device *dev)
+nouveau_abi16_get(struct drm_file *file_priv)
 {
 	struct nouveau_cli *cli = nouveau_cli(file_priv);
 	mutex_lock(&cli->mutex);
@@ -236,7 +236,7 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
 	struct drm_nouveau_channel_alloc *init = data;
 	struct nouveau_cli *cli = nouveau_cli(file_priv);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv, dev);
+	struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv);
 	struct nouveau_abi16_chan *chan;
 	struct nvif_device *device;
 	int ret;
@@ -342,7 +342,7 @@ int
 nouveau_abi16_ioctl_channel_free(ABI16_IOCTL_ARGS)
 {
 	struct drm_nouveau_channel_free *req = data;
-	struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv, dev);
+	struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv);
 	struct nouveau_abi16_chan *chan;
 
 	if (unlikely(!abi16))
@@ -359,7 +359,7 @@ int
 nouveau_abi16_ioctl_grobj_alloc(ABI16_IOCTL_ARGS)
 {
 	struct drm_nouveau_grobj_alloc *init = data;
-	struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv, dev);
+	struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv);
 	struct nouveau_abi16_chan *chan;
 	struct nouveau_abi16_ntfy *ntfy;
 	struct nvif_client *client;
@@ -452,7 +452,7 @@ nouveau_abi16_ioctl_notifierobj_alloc(ABI16_IOCTL_ARGS)
 {
 	struct drm_nouveau_notifierobj_alloc *info = data;
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv, dev);
+	struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv);
 	struct nouveau_abi16_chan *chan;
 	struct nouveau_abi16_ntfy *ntfy;
 	struct nvif_device *device = &abi16->device;
@@ -524,7 +524,7 @@ int
 nouveau_abi16_ioctl_gpuobj_free(ABI16_IOCTL_ARGS)
 {
 	struct drm_nouveau_gpuobj_free *fini = data;
-	struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv, dev);
+	struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv);
 	struct nouveau_abi16_chan *chan;
 	struct nouveau_abi16_ntfy *ntfy;
 	int ret = -ENOENT;
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.h b/drivers/gpu/drm/nouveau/nouveau_abi16.h
index 6584557afa40a..fa1d856d2ac40 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.h
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.h
@@ -33,8 +33,7 @@ struct nouveau_abi16 {
 	u64 handles;
 };
 
-struct nouveau_drm;
-struct nouveau_abi16 *nouveau_abi16_get(struct drm_file *, struct drm_device *);
+struct nouveau_abi16 *nouveau_abi16_get(struct drm_file *);
 int  nouveau_abi16_put(struct nouveau_abi16 *, int);
 void nouveau_abi16_fini(struct nouveau_abi16 *);
 s32  nouveau_abi16_swclass(struct nouveau_drm *);
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index a108cc37512ab..a0865c49ec83e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -668,7 +668,7 @@ int
 nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
 			  struct drm_file *file_priv)
 {
-	struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv, dev);
+	struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv);
 	struct nouveau_cli *cli = nouveau_cli(file_priv);
 	struct nouveau_abi16_chan *temp;
 	struct nouveau_drm *drm = nouveau_drm(dev);

From 786a57ef2cebb2d09d7f152b0ed4f1da1d368073 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Tue, 3 Nov 2015 10:55:45 +1000
Subject: [PATCH 43/45] drm/nouveau/abi16: introduce locked variant of
 nouveau_abi16_get()

USIF already takes the client mutex, but will need access to ABI16 data
in order to provide some limited interoperability.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_abi16.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index c3dea3331e127..998f5cb2dfac2 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -32,11 +32,10 @@
 #include "nouveau_chan.h"
 #include "nouveau_abi16.h"
 
-struct nouveau_abi16 *
-nouveau_abi16_get(struct drm_file *file_priv)
+static struct nouveau_abi16 *
+nouveau_abi16(struct drm_file *file_priv)
 {
 	struct nouveau_cli *cli = nouveau_cli(file_priv);
-	mutex_lock(&cli->mutex);
 	if (!cli->abi16) {
 		struct nouveau_abi16 *abi16;
 		cli->abi16 = abi16 = kzalloc(sizeof(*abi16), GFP_KERNEL);
@@ -59,12 +58,21 @@ nouveau_abi16_get(struct drm_file *file_priv)
 			kfree(cli->abi16);
 			cli->abi16 = NULL;
 		}
-
-		mutex_unlock(&cli->mutex);
 	}
 	return cli->abi16;
 }
 
+struct nouveau_abi16 *
+nouveau_abi16_get(struct drm_file *file_priv)
+{
+	struct nouveau_cli *cli = nouveau_cli(file_priv);
+	mutex_lock(&cli->mutex);
+	if (nouveau_abi16(file_priv))
+		return cli->abi16;
+	mutex_unlock(&cli->mutex);
+	return NULL;
+}
+
 int
 nouveau_abi16_put(struct nouveau_abi16 *abi16, int ret)
 {

From 2621a41647fe783be809e789faa5d8b6b06c8072 Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Tue, 3 Nov 2015 11:21:43 +1000
Subject: [PATCH 44/45] drm/nouveau/abi16: implement limited interoperability
 with usif/nvif

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_abi16.c | 39 +++++++++++++++++++++++++
 drivers/gpu/drm/nouveau/nouveau_abi16.h |  1 +
 drivers/gpu/drm/nouveau/nouveau_usif.c  | 15 ++++++++--
 3 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index 998f5cb2dfac2..7f50cf5f929e9 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -25,6 +25,7 @@
 #include <nvif/driver.h>
 #include <nvif/ioctl.h>
 #include <nvif/class.h>
+#include <nvif/unpack.h>
 
 #include "nouveau_drm.h"
 #include "nouveau_dma.h"
@@ -346,6 +347,44 @@ nouveau_abi16_chan(struct nouveau_abi16 *abi16, int channel)
 	return NULL;
 }
 
+int
+nouveau_abi16_usif(struct drm_file *file_priv, void *data, u32 size)
+{
+	union {
+		struct nvif_ioctl_v0 v0;
+	} *args = data;
+	struct nouveau_abi16_chan *chan;
+	struct nouveau_abi16 *abi16;
+	int ret;
+
+	if (nvif_unpack(args->v0, 0, 0, true)) {
+		switch (args->v0.type) {
+		case NVIF_IOCTL_V0_NEW:
+		case NVIF_IOCTL_V0_MTHD:
+		case NVIF_IOCTL_V0_SCLASS:
+			break;
+		default:
+			return -EACCES;
+		}
+	} else
+		return ret;
+
+	if (!(abi16 = nouveau_abi16(file_priv)))
+		return -ENOMEM;
+
+	if (args->v0.token != ~0ULL) {
+		if (!(chan = nouveau_abi16_chan(abi16, args->v0.token)))
+			return -EINVAL;
+		args->v0.object = nvif_handle(&chan->chan->user);
+		args->v0.owner  = NVIF_IOCTL_V0_OWNER_ANY;
+		return 0;
+	}
+
+	args->v0.object = nvif_handle(&abi16->device.object);
+	args->v0.owner  = NVIF_IOCTL_V0_OWNER_ANY;
+	return 0;
+}
+
 int
 nouveau_abi16_ioctl_channel_free(ABI16_IOCTL_ARGS)
 {
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.h b/drivers/gpu/drm/nouveau/nouveau_abi16.h
index fa1d856d2ac40..841cc556fad83 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.h
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.h
@@ -37,6 +37,7 @@ struct nouveau_abi16 *nouveau_abi16_get(struct drm_file *);
 int  nouveau_abi16_put(struct nouveau_abi16 *, int);
 void nouveau_abi16_fini(struct nouveau_abi16 *);
 s32  nouveau_abi16_swclass(struct nouveau_drm *);
+int  nouveau_abi16_usif(struct drm_file *, void *data, u32 size);
 
 #define NOUVEAU_GEM_DOMAIN_VRAM      (1 << 1)
 #define NOUVEAU_GEM_DOMAIN_GART      (1 << 2)
diff --git a/drivers/gpu/drm/nouveau/nouveau_usif.c b/drivers/gpu/drm/nouveau/nouveau_usif.c
index cb1182d7e80ea..89dc4ce63490e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_usif.c
+++ b/drivers/gpu/drm/nouveau/nouveau_usif.c
@@ -24,6 +24,7 @@
 
 #include "nouveau_drm.h"
 #include "nouveau_usif.h"
+#include "nouveau_abi16.h"
 
 #include <nvif/notify.h>
 #include <nvif/unpack.h>
@@ -316,11 +317,21 @@ usif_ioctl(struct drm_file *filp, void __user *user, u32 argc)
 	} else
 		goto done;
 
+	/* USIF slightly abuses some return-only ioctl members in order
+	 * to provide interoperability with the older ABI16 objects
+	 */
 	mutex_lock(&cli->mutex);
+	if (argv->v0.route) {
+		if (ret = -EINVAL, argv->v0.route == 0xff)
+			ret = nouveau_abi16_usif(filp, argv, argc);
+		if (ret) {
+			mutex_unlock(&cli->mutex);
+			goto done;
+		}
+	}
+
 	switch (argv->v0.type) {
 	case NVIF_IOCTL_V0_NEW:
-		/* ... except if we're creating children */
-		argv->v0.owner = NVIF_IOCTL_V0_OWNER_ANY;
 		ret = usif_object_new(filp, data, size, argv, argc);
 		break;
 	case NVIF_IOCTL_V0_NTFY_NEW:

From 79ef5dca5e5cd5a33662d64c927c1b9786d4edee Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Tue, 3 Nov 2015 12:40:13 +1000
Subject: [PATCH 45/45] drm/nouveau: bump patchlevel to indicate availability
 of abi16/nvif interop

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_drm.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.h b/drivers/gpu/drm/nouveau/nouveau_drm.h
index 51027a9030f62..3050042e6c6d5 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.h
@@ -10,7 +10,7 @@
 
 #define DRIVER_MAJOR		1
 #define DRIVER_MINOR		3
-#define DRIVER_PATCHLEVEL	0
+#define DRIVER_PATCHLEVEL	1
 
 /*
  * 1.1.1:
@@ -33,6 +33,8 @@
  * 1.3.0:
  *      - NVIF ABI modified, safe because only (current) users are test
  *        programs that get directly linked with NVKM.
+ * 1.3.1:
+ *      - implemented limited ABI16/NVIF interop
  */
 
 #include <nvif/client.h>