From 6ee0ad2a7f27f7dc365576b748bc98684f02882d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Thu, 8 Jan 2015 13:27:15 +0900 Subject: [PATCH 01/19] drm/amdkfd: Drop interrupt SW ring buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The work queue couldn't reliably prevent the SW ring buffer from overflowing, so dmesg was spammed by kfd kfd: Interrupt ring overflow, dropping interrupt. messages when running e.g. the Atlantis Substance demo from https://wiki.unrealengine.com/Linux_Demos on Kaveri. Since the SW ring buffer doesn't actually do anything at this point, just remove it for now. When actual interrupt processing code is added to amdkfd, it should try to do things immediately and only defer to work queues when necessary. Signed-off-by: Michel Dänzer Reviewed-by: Christian König Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/Makefile | 3 +- drivers/gpu/drm/amd/amdkfd/kfd_device.c | 20 +-- drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c | 176 --------------------- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 15 -- 4 files changed, 2 insertions(+), 212 deletions(-) delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index be6246de5091..307a309110e6 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -8,7 +8,6 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \ kfd_process.o kfd_queue.o kfd_mqd_manager.o \ kfd_kernel_queue.o kfd_packet_manager.o \ - kfd_process_queue_manager.o kfd_device_queue_manager.o \ - kfd_interrupt.o + kfd_process_queue_manager.o kfd_device_queue_manager.o obj-$(CONFIG_HSA_AMD) += amdkfd.o diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 43884ebd4303..633532a2e7ec 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -192,13 +192,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto kfd_topology_add_device_error; } - if (kfd_interrupt_init(kfd)) { - dev_err(kfd_device, - "Error initializing interrupts for device (%x:%x)\n", - kfd->pdev->vendor, kfd->pdev->device); - goto kfd_interrupt_error; - } - if (!device_iommu_pasid_init(kfd)) { dev_err(kfd_device, "Error initializing iommuv2 for device (%x:%x)\n", @@ -237,8 +230,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, device_queue_manager_error: amd_iommu_free_device(kfd->pdev); device_iommu_pasid_error: - kfd_interrupt_exit(kfd); -kfd_interrupt_error: kfd_topology_remove_device(kfd); kfd_topology_add_device_error: kfd2kgd->fini_sa_manager(kfd->kgd); @@ -254,7 +245,6 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) if (kfd->init_complete) { device_queue_manager_uninit(kfd->dqm); amd_iommu_free_device(kfd->pdev); - kfd_interrupt_exit(kfd); kfd_topology_remove_device(kfd); } @@ -296,13 +286,5 @@ int kgd2kfd_resume(struct kfd_dev *kfd) /* This is called directly from KGD at ISR. */ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) { - if (kfd->init_complete) { - spin_lock(&kfd->interrupt_lock); - - if (kfd->interrupts_active - && enqueue_ih_ring_entry(kfd, ih_ring_entry)) - schedule_work(&kfd->interrupt_work); - - spin_unlock(&kfd->interrupt_lock); - } + /* Process interrupts / schedule work as necessary */ } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c deleted file mode 100644 index 5b999095a1f7..000000000000 --- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c +++ /dev/null @@ -1,176 +0,0 @@ -/* - * Copyright 2014 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -/* - * KFD Interrupts. - * - * AMD GPUs deliver interrupts by pushing an interrupt description onto the - * interrupt ring and then sending an interrupt. KGD receives the interrupt - * in ISR and sends us a pointer to each new entry on the interrupt ring. - * - * We generally can't process interrupt-signaled events from ISR, so we call - * out to each interrupt client module (currently only the scheduler) to ask if - * each interrupt is interesting. If they return true, then it requires further - * processing so we copy it to an internal interrupt ring and call each - * interrupt client again from a work-queue. - * - * There's no acknowledgment for the interrupts we use. The hardware simply - * queues a new interrupt each time without waiting. - * - * The fixed-size internal queue means that it's possible for us to lose - * interrupts because we have no back-pressure to the hardware. - */ - -#include -#include -#include "kfd_priv.h" - -#define KFD_INTERRUPT_RING_SIZE 256 - -static void interrupt_wq(struct work_struct *); - -int kfd_interrupt_init(struct kfd_dev *kfd) -{ - void *interrupt_ring = kmalloc_array(KFD_INTERRUPT_RING_SIZE, - kfd->device_info->ih_ring_entry_size, - GFP_KERNEL); - if (!interrupt_ring) - return -ENOMEM; - - kfd->interrupt_ring = interrupt_ring; - kfd->interrupt_ring_size = - KFD_INTERRUPT_RING_SIZE * kfd->device_info->ih_ring_entry_size; - atomic_set(&kfd->interrupt_ring_wptr, 0); - atomic_set(&kfd->interrupt_ring_rptr, 0); - - spin_lock_init(&kfd->interrupt_lock); - - INIT_WORK(&kfd->interrupt_work, interrupt_wq); - - kfd->interrupts_active = true; - - /* - * After this function returns, the interrupt will be enabled. This - * barrier ensures that the interrupt running on a different processor - * sees all the above writes. - */ - smp_wmb(); - - return 0; -} - -void kfd_interrupt_exit(struct kfd_dev *kfd) -{ - /* - * Stop the interrupt handler from writing to the ring and scheduling - * workqueue items. The spinlock ensures that any interrupt running - * after we have unlocked sees interrupts_active = false. - */ - unsigned long flags; - - spin_lock_irqsave(&kfd->interrupt_lock, flags); - kfd->interrupts_active = false; - spin_unlock_irqrestore(&kfd->interrupt_lock, flags); - - /* - * Flush_scheduled_work ensures that there are no outstanding - * work-queue items that will access interrupt_ring. New work items - * can't be created because we stopped interrupt handling above. - */ - flush_scheduled_work(); - - kfree(kfd->interrupt_ring); -} - -/* - * This assumes that it can't be called concurrently with itself - * but only with dequeue_ih_ring_entry. - */ -bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry) -{ - unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr); - unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr); - - if ((rptr - wptr) % kfd->interrupt_ring_size == - kfd->device_info->ih_ring_entry_size) { - /* This is very bad, the system is likely to hang. */ - dev_err_ratelimited(kfd_chardev(), - "Interrupt ring overflow, dropping interrupt.\n"); - return false; - } - - memcpy(kfd->interrupt_ring + wptr, ih_ring_entry, - kfd->device_info->ih_ring_entry_size); - - wptr = (wptr + kfd->device_info->ih_ring_entry_size) % - kfd->interrupt_ring_size; - smp_wmb(); /* Ensure memcpy'd data is visible before wptr update. */ - atomic_set(&kfd->interrupt_ring_wptr, wptr); - - return true; -} - -/* - * This assumes that it can't be called concurrently with itself - * but only with enqueue_ih_ring_entry. - */ -static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry) -{ - /* - * Assume that wait queues have an implicit barrier, i.e. anything that - * happened in the ISR before it queued work is visible. - */ - - unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr); - unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr); - - if (rptr == wptr) - return false; - - memcpy(ih_ring_entry, kfd->interrupt_ring + rptr, - kfd->device_info->ih_ring_entry_size); - - rptr = (rptr + kfd->device_info->ih_ring_entry_size) % - kfd->interrupt_ring_size; - - /* - * Ensure the rptr write update is not visible until - * memcpy has finished reading. - */ - smp_mb(); - atomic_set(&kfd->interrupt_ring_rptr, rptr); - - return true; -} - -static void interrupt_wq(struct work_struct *work) -{ - struct kfd_dev *dev = container_of(work, struct kfd_dev, - interrupt_work); - - uint32_t ih_ring_entry[DIV_ROUND_UP( - dev->device_info->ih_ring_entry_size, - sizeof(uint32_t))]; - - while (dequeue_ih_ring_entry(dev, ih_ring_entry)) - ; -} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index a5edb29507e3..b3dc13c83169 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -135,22 +135,10 @@ struct kfd_dev { struct kgd2kfd_shared_resources shared_resources; - void *interrupt_ring; - size_t interrupt_ring_size; - atomic_t interrupt_ring_rptr; - atomic_t interrupt_ring_wptr; - struct work_struct interrupt_work; - spinlock_t interrupt_lock; - /* QCM Device instance */ struct device_queue_manager *dqm; bool init_complete; - /* - * Interrupts of interest to KFD are copied - * from the HW ring into a SW ring. - */ - bool interrupts_active; }; /* KGD2KFD callbacks */ @@ -531,10 +519,7 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev); struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx); /* Interrupts */ -int kfd_interrupt_init(struct kfd_dev *dev); -void kfd_interrupt_exit(struct kfd_dev *dev); void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); -bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry); /* Power Management */ void kgd2kfd_suspend(struct kfd_dev *kfd); From cbfc35b90f3b4853d1eb9fcb82e99531d6a1c629 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 5 Jan 2015 19:42:25 -0500 Subject: [PATCH 02/19] drm/radeon: fix VM flush on cayman/aruba (v3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to wait for the GPUVM flush to complete. There was some confusion as to how this mechanism was supposed to work. The operation is not atomic. For GPU initiated invalidations you need to read back a VM register to introduce enough latency for the update to complete. v2: drop gart changes v3: just read back rather than polling Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/ni.c | 10 ++++++++++ drivers/gpu/drm/radeon/ni_dma.c | 6 ++++++ drivers/gpu/drm/radeon/nid.h | 24 ++++++++++++++++++++++++ 3 files changed, 40 insertions(+) diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 360de9f1f491..aea48c89b241 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -2516,6 +2516,16 @@ void cayman_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, radeon_ring_write(ring, PACKET0(VM_INVALIDATE_REQUEST, 0)); radeon_ring_write(ring, 1 << vm_id); + /* wait for the invalidate to complete */ + radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) | /* always */ + WAIT_REG_MEM_ENGINE(0))); /* me */ + radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 0); /* ref */ + radeon_ring_write(ring, 0); /* mask */ + radeon_ring_write(ring, 0x20); /* poll interval */ + /* sync PFP to ME, otherwise we might get invalid PFP reads */ radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); radeon_ring_write(ring, 0x0); diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c index 50f88611ff60..4be2bb7cbef3 100644 --- a/drivers/gpu/drm/radeon/ni_dma.c +++ b/drivers/gpu/drm/radeon/ni_dma.c @@ -463,5 +463,11 @@ void cayman_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2)); radeon_ring_write(ring, 1 << vm_id); + + /* wait for invalidate to complete */ + radeon_ring_write(ring, DMA_SRBM_READ_PACKET); + radeon_ring_write(ring, (0xff << 20) | (VM_INVALIDATE_REQUEST >> 2)); + radeon_ring_write(ring, 0); /* mask */ + radeon_ring_write(ring, 0); /* value */ } diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h index 2e12e4d69253..ad7125486894 100644 --- a/drivers/gpu/drm/radeon/nid.h +++ b/drivers/gpu/drm/radeon/nid.h @@ -1133,6 +1133,23 @@ #define PACKET3_MEM_SEMAPHORE 0x39 #define PACKET3_MPEG_INDEX 0x3A #define PACKET3_WAIT_REG_MEM 0x3C +#define WAIT_REG_MEM_FUNCTION(x) ((x) << 0) + /* 0 - always + * 1 - < + * 2 - <= + * 3 - == + * 4 - != + * 5 - >= + * 6 - > + */ +#define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4) + /* 0 - reg + * 1 - mem + */ +#define WAIT_REG_MEM_ENGINE(x) ((x) << 8) + /* 0 - me + * 1 - pfp + */ #define PACKET3_MEM_WRITE 0x3D #define PACKET3_PFP_SYNC_ME 0x42 #define PACKET3_SURFACE_SYNC 0x43 @@ -1272,6 +1289,13 @@ (1 << 21) | \ (((n) & 0xFFFFF) << 0)) +#define DMA_SRBM_POLL_PACKET ((9 << 28) | \ + (1 << 27) | \ + (1 << 26)) + +#define DMA_SRBM_READ_PACKET ((9 << 28) | \ + (1 << 27)) + /* async DMA Packet types */ #define DMA_PACKET_WRITE 0x2 #define DMA_PACKET_COPY 0x3 From d474ea7e52cbaaae22711d857949ba6018562c29 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 5 Jan 2015 19:54:50 -0500 Subject: [PATCH 03/19] drm/radeon: fix VM flush on SI (v3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to wait for the GPUVM flush to complete. There was some confusion as to how this mechanism was supposed to work. The operation is not atomic. For GPU initiated invalidations you need to read back a VM register to introduce enough latency for the update to complete. v2: drop gart changes v3: just read back rather than polling Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/si.c | 10 ++++++++++ drivers/gpu/drm/radeon/si_dma.c | 8 ++++++++ drivers/gpu/drm/radeon/sid.h | 18 ++++++++++++++++++ 3 files changed, 36 insertions(+) diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 60df444bd075..5d89b874a1a2 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -5057,6 +5057,16 @@ void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, radeon_ring_write(ring, 0); radeon_ring_write(ring, 1 << vm_id); + /* wait for the invalidate to complete */ + radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) | /* always */ + WAIT_REG_MEM_ENGINE(0))); /* me */ + radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 0); /* ref */ + radeon_ring_write(ring, 0); /* mask */ + radeon_ring_write(ring, 0x20); /* poll interval */ + /* sync PFP to ME, otherwise we might get invalid PFP reads */ radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); radeon_ring_write(ring, 0x0); diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c index f5cc777e1c5f..aa7b872b2c43 100644 --- a/drivers/gpu/drm/radeon/si_dma.c +++ b/drivers/gpu/drm/radeon/si_dma.c @@ -206,6 +206,14 @@ void si_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0)); radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2)); radeon_ring_write(ring, 1 << vm_id); + + /* wait for invalidate to complete */ + radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0)); + radeon_ring_write(ring, VM_INVALIDATE_REQUEST); + radeon_ring_write(ring, 0xff << 16); /* retry */ + radeon_ring_write(ring, 1 << vm_id); /* mask */ + radeon_ring_write(ring, 0); /* value */ + radeon_ring_write(ring, (0 << 28) | 0x20); /* func(always) | poll interval */ } /** diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h index 4069be89e585..84999242c747 100644 --- a/drivers/gpu/drm/radeon/sid.h +++ b/drivers/gpu/drm/radeon/sid.h @@ -1632,6 +1632,23 @@ #define PACKET3_MPEG_INDEX 0x3A #define PACKET3_COPY_DW 0x3B #define PACKET3_WAIT_REG_MEM 0x3C +#define WAIT_REG_MEM_FUNCTION(x) ((x) << 0) + /* 0 - always + * 1 - < + * 2 - <= + * 3 - == + * 4 - != + * 5 - >= + * 6 - > + */ +#define WAIT_REG_MEM_MEM_SPACE(x) ((x) << 4) + /* 0 - reg + * 1 - mem + */ +#define WAIT_REG_MEM_ENGINE(x) ((x) << 8) + /* 0 - me + * 1 - pfp + */ #define PACKET3_MEM_WRITE 0x3D #define PACKET3_COPY_DATA 0x40 #define PACKET3_CP_DMA 0x41 @@ -1835,6 +1852,7 @@ #define DMA_PACKET_TRAP 0x7 #define DMA_PACKET_SRBM_WRITE 0x9 #define DMA_PACKET_CONSTANT_FILL 0xd +#define DMA_PACKET_POLL_REG_MEM 0xe #define DMA_PACKET_NOP 0xf #define VCE_STATUS 0x20004 From 3a01fd367e09ebf05d75a000407364e7ebe2b678 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 5 Jan 2015 19:59:47 -0500 Subject: [PATCH 04/19] drm/radeon: fix VM flush on CIK (v3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to wait for the GPUVM flush to complete. There was some confusion as to how this mechanism was supposed to work. The operation is not atomic. For GPU initiated invalidations you need to read back a VM register to introduce enough latency for the update to complete. v2: drop gart changes v3: just read back rather than polling Reviewed-by: Christian König Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/cik.c | 11 +++++++++++ drivers/gpu/drm/radeon/cik_sdma.c | 10 ++++++++++ 2 files changed, 21 insertions(+) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 6dcde3798b45..64fdae558d36 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -6033,6 +6033,17 @@ void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, radeon_ring_write(ring, 0); radeon_ring_write(ring, 1 << vm_id); + /* wait for the invalidate to complete */ + radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); + radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */ + WAIT_REG_MEM_FUNCTION(0) | /* always */ + WAIT_REG_MEM_ENGINE(0))); /* me */ + radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 0); /* ref */ + radeon_ring_write(ring, 0); /* mask */ + radeon_ring_write(ring, 0x20); /* poll interval */ + /* compute doesn't have PFP */ if (usepfp) { /* sync PFP to ME, otherwise we might get invalid PFP reads */ diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index dde5c7e29eb2..a0133c74f4cf 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -903,6 +903,9 @@ void cik_sdma_vm_pad_ib(struct radeon_ib *ib) void cik_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, unsigned vm_id, uint64_t pd_addr) { + u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(0) | + SDMA_POLL_REG_MEM_EXTRA_FUNC(0)); /* always */ + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); if (vm_id < 8) { radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2); @@ -943,5 +946,12 @@ void cik_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000)); radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); radeon_ring_write(ring, 1 << vm_id); + + radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits)); + radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2); + radeon_ring_write(ring, 0); + radeon_ring_write(ring, 0); /* reference */ + radeon_ring_write(ring, 0); /* mask */ + radeon_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */ } From 8dfe58b2063811b415626060316672741049d4d4 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Thu, 8 Jan 2015 16:46:16 +0200 Subject: [PATCH 05/19] drm/amdkfd: Fix sparse warning (different address space) Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 9c8961d22360..30c8fda9622e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -280,7 +280,7 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, q->queue); retval = mqd->load_mqd(mqd, q->mqd, q->pipe, - q->queue, q->properties.write_ptr); + q->queue, (uint32_t __user *) q->properties.write_ptr); if (retval != 0) { deallocate_hqd(dqm, q); mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); From 4369a69ec6ab86821352bd753c68af5880f87956 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 8 Jan 2015 10:46:33 -0500 Subject: [PATCH 06/19] drm/radeon: add a dpm quirk list Disable dpm on certain problematic boards rather than disabling dpm for the entire chip family since most boards work fine. https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1386534 https://bugzilla.kernel.org/show_bug.cgi?id=83731 Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_pm.c | 33 ++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index 32522cc940a1..f7da8fe96a66 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -1287,8 +1287,39 @@ static int radeon_pm_init_dpm(struct radeon_device *rdev) return ret; } +struct radeon_dpm_quirk { + u32 chip_vendor; + u32 chip_device; + u32 subsys_vendor; + u32 subsys_device; +}; + +/* cards with dpm stability problems */ +static struct radeon_dpm_quirk radeon_dpm_quirk_list[] = { + /* TURKS - https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1386534 */ + { PCI_VENDOR_ID_ATI, 0x6759, 0x1682, 0x3195 }, + /* TURKS - https://bugzilla.kernel.org/show_bug.cgi?id=83731 */ + { PCI_VENDOR_ID_ATI, 0x6840, 0x1179, 0xfb81 }, + { 0, 0, 0, 0 }, +}; + int radeon_pm_init(struct radeon_device *rdev) { + struct radeon_dpm_quirk *p = radeon_dpm_quirk_list; + bool disable_dpm = false; + + /* Apply dpm quirks */ + while (p && p->chip_device != 0) { + if (rdev->pdev->vendor == p->chip_vendor && + rdev->pdev->device == p->chip_device && + rdev->pdev->subsystem_vendor == p->subsys_vendor && + rdev->pdev->subsystem_device == p->subsys_device) { + disable_dpm = true; + break; + } + ++p; + } + /* enable dpm on rv6xx+ */ switch (rdev->family) { case CHIP_RV610: @@ -1344,6 +1375,8 @@ int radeon_pm_init(struct radeon_device *rdev) (!(rdev->flags & RADEON_IS_IGP)) && (!rdev->smc_fw)) rdev->pm.pm_method = PM_METHOD_PROFILE; + else if (disable_dpm && (radeon_dpm == -1)) + rdev->pm.pm_method = PM_METHOD_PROFILE; else if (radeon_dpm == 0) rdev->pm.pm_method = PM_METHOD_PROFILE; else From d40f74f7273b484ca4382e5ae08b59dee85e01e4 Mon Sep 17 00:00:00 2001 From: Hyungwon Hwang Date: Fri, 9 Jan 2015 10:24:41 +0900 Subject: [PATCH 07/19] drm/exynos: remove the redundant machine checking code This code is unnecessary, because same logic is already included. Refer this mail thread[1] for detail. [1] http://lists.freedesktop.org/archives/dri-devel/2015-January/075132.html Signed-off-by: Hyungwon Hwang Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_drm_drv.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index 121470a83d1a..1bcbe07cecfc 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -645,18 +645,6 @@ static int exynos_drm_init(void) if (!is_exynos) return -ENODEV; - /* - * Register device object only in case of Exynos SoC. - * - * Below codes resolves temporarily infinite loop issue incurred - * by Exynos drm driver when using multi-platform kernel. - * So these codes will be replaced with more generic way later. - */ - if (!of_machine_is_compatible("samsung,exynos3") && - !of_machine_is_compatible("samsung,exynos4") && - !of_machine_is_compatible("samsung,exynos5")) - return -ENODEV; - exynos_drm_pdev = platform_device_register_simple("exynos-drm", -1, NULL, 0); if (IS_ERR(exynos_drm_pdev)) From 63a3451641ec2e129dfe80044e3c96bc9f0bb690 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 19 Dec 2014 19:33:25 +0200 Subject: [PATCH 08/19] drm/i915: gen9: fix RPS interrupt routing to CPU vs. GT GEN8+ HW has the option to route PM interrupts to either the CPU or to GT. For GEN8 this was already set correctly to routing to CPU, but not for GEN9, so fix this. Note that when disabling RPS interrupts this was set already correctly, though in that case it didn't matter much except for the possibility of spurious interrupts. Signed-off-by: Imre Deak Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 964b28e3c630..78e308bebac9 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4369,7 +4369,7 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) if (INTEL_INFO(dev_priv->dev)->gen <= 7 && !IS_HASWELL(dev_priv->dev)) mask |= GEN6_PM_RP_UP_EI_EXPIRED; - if (IS_GEN8(dev_priv->dev)) + if (INTEL_INFO(dev_priv)->gen >= 8) mask |= GEN8_PMINTR_REDIRECT_TO_NON_DISP; return ~mask; From 59d02a1f45beb1b6f4ef83a47feb264cb3577725 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 19 Dec 2014 19:33:26 +0200 Subject: [PATCH 09/19] drm/i915: fix HW lockup due to missing RPS IRQ workaround on GEN6 In commit dbea3cea69508e9d548ed4a6be13de35492e5d15 Author: Imre Deak Date: Mon Dec 15 18:59:28 2014 +0200 drm/i915: sanitize RPS resetting during GPU reset we disable RPS interrupts during GPU resetting, but don't apply the necessary GEN6 HW workaround. This leads to a HW lockup during a subsequent "looping batchbuffer" workload. This is triggered by the testcase that submits exactly this kind of workload after a simulated GPU reset. I'm not sure how likely the bug would have triggered otherwise, since we would have applied the workaround anyway shortly after the GPU reset, when enabling GT powersaving from the deferred work. This may also fix unrelated issues, since during driver loading / suspending we also disable RPS interrupts and so we also had a short window during the rest of the loading / resuming where a similar workload could run without the workaround applied. v2: - separate the fix to route RPS interrupts to the CPU on GEN9 too to a separate patch (Daniel) Bisected-by: Ander Conselvan de Oliveira Testcase: igt/gem_reset_stats/ban-ctx-render Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=87429 Signed-off-by: Imre Deak Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_irq.c | 18 ++++++++++++++++-- drivers/gpu/drm/i915/intel_drv.h | 1 + drivers/gpu/drm/i915/intel_pm.c | 11 +---------- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index d0d3dfbe6d2a..ba86dc330547 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -292,6 +292,21 @@ void gen6_enable_rps_interrupts(struct drm_device *dev) spin_unlock_irq(&dev_priv->irq_lock); } +u32 gen6_sanitize_rps_pm_mask(struct drm_i915_private *dev_priv, u32 mask) +{ + /* + * IVB and SNB hard hangs on looping batchbuffer + * if GEN6_PM_UP_EI_EXPIRED is masked. + */ + if (INTEL_INFO(dev_priv)->gen <= 7 && !IS_HASWELL(dev_priv)) + mask &= ~GEN6_PM_RP_UP_EI_EXPIRED; + + if (INTEL_INFO(dev_priv)->gen >= 8) + mask &= ~GEN8_PMINTR_REDIRECT_TO_NON_DISP; + + return mask; +} + void gen6_disable_rps_interrupts(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -304,8 +319,7 @@ void gen6_disable_rps_interrupts(struct drm_device *dev) spin_lock_irq(&dev_priv->irq_lock); - I915_WRITE(GEN6_PMINTRMSK, INTEL_INFO(dev_priv)->gen >= 8 ? - ~GEN8_PMINTR_REDIRECT_TO_NON_DISP : ~0); + I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0)); __gen6_disable_pm_irq(dev_priv, dev_priv->pm_rps_events); I915_WRITE(gen6_pm_ier(dev_priv), I915_READ(gen6_pm_ier(dev_priv)) & diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 25fdbb16d4e0..3b40a17b8852 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -794,6 +794,7 @@ void gen6_disable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask); void gen6_reset_rps_interrupts(struct drm_device *dev); void gen6_enable_rps_interrupts(struct drm_device *dev); void gen6_disable_rps_interrupts(struct drm_device *dev); +u32 gen6_sanitize_rps_pm_mask(struct drm_i915_private *dev_priv, u32 mask); void intel_runtime_pm_disable_interrupts(struct drm_i915_private *dev_priv); void intel_runtime_pm_enable_interrupts(struct drm_i915_private *dev_priv); static inline bool intel_irqs_enabled(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 78e308bebac9..3801ff59595f 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4363,16 +4363,7 @@ static u32 gen6_rps_pm_mask(struct drm_i915_private *dev_priv, u8 val) mask |= dev_priv->pm_rps_events & (GEN6_PM_RP_DOWN_EI_EXPIRED | GEN6_PM_RP_UP_EI_EXPIRED); mask &= dev_priv->pm_rps_events; - /* IVB and SNB hard hangs on looping batchbuffer - * if GEN6_PM_UP_EI_EXPIRED is masked. - */ - if (INTEL_INFO(dev_priv->dev)->gen <= 7 && !IS_HASWELL(dev_priv->dev)) - mask |= GEN6_PM_RP_UP_EI_EXPIRED; - - if (INTEL_INFO(dev_priv)->gen >= 8) - mask |= GEN8_PMINTR_REDIRECT_TO_NON_DISP; - - return ~mask; + return gen6_sanitize_rps_pm_mask(dev_priv, ~mask); } /* gen6_set_rps is called to update the frequency request, but should also be From f24eeb191229b040deb3e813913e06a4316c6d3f Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Fri, 19 Dec 2014 19:33:27 +0200 Subject: [PATCH 10/19] drm/i915: vlv: sanitize RPS interrupt mask during GPU idling We apply the RPS interrupt workaround on VLV everywhere except when writing the mask directly during idling the GPU. For consistency do this also there. While at it also extend the code comment about affected platforms. I couldn't reproduce the issue on VLV fixed by this workaround, by removing the workaround from everywhere, while it's 100% reproducible on SNB using igt/gem_reset_stats/ban-ctx-render. So also add a note that it hasn't been verified if the workaround really applies to VLV/CHV. Suggested-by: Daniel Vetter Signed-off-by: Imre Deak Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_irq.c | 4 +++- drivers/gpu/drm/i915/intel_pm.c | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index ba86dc330547..b051a238baf9 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -295,8 +295,10 @@ void gen6_enable_rps_interrupts(struct drm_device *dev) u32 gen6_sanitize_rps_pm_mask(struct drm_i915_private *dev_priv, u32 mask) { /* - * IVB and SNB hard hangs on looping batchbuffer + * SNB,IVB can while VLV,CHV may hard hang on looping batchbuffer * if GEN6_PM_UP_EI_EXPIRED is masked. + * + * TODO: verify if this can be reproduced on VLV,CHV. */ if (INTEL_INFO(dev_priv)->gen <= 7 && !IS_HASWELL(dev_priv)) mask &= ~GEN6_PM_RP_UP_EI_EXPIRED; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 3801ff59595f..bf814a64582a 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4432,7 +4432,8 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv) return; /* Mask turbo interrupt so that they will not come in between */ - I915_WRITE(GEN6_PMINTRMSK, 0xffffffff); + I915_WRITE(GEN6_PMINTRMSK, + gen6_sanitize_rps_pm_mask(dev_priv, ~0)); vlv_force_gfx_clock(dev_priv, true); From 48bf5b2d00bfeb681f6500c626189c7cd2c964d2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 27 Dec 2014 09:48:28 +0000 Subject: [PATCH 11/19] drm/i915: Ban Haswell from using RCS flips Like Ivybridge, we have reports that we get random hangs when flipping with multiple pipes. Extend commit 2a92d5bca1999b69c78f3c3e97b5484985b094b9 Author: Chris Wilson Date: Tue Jul 8 10:40:29 2014 +0100 drm/i915: Disable RCS flips on Ivybridge to also apply to Haswell. Reported-and-tested-by: Scott Tsai Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=87759 Signed-off-by: Chris Wilson Reviewed-by: Daniel Vetter Cc: stable@vger.kernel.org # 2a92d5bca199 drm/i915: Disable RCS flips on Ivybridge Cc: stable@vger.kernel.org Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e2af1383b179..e7a16f119a29 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -9815,7 +9815,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, if (obj->tiling_mode != work->old_fb_obj->tiling_mode) /* vlv: DISPLAY_FLIP fails to change tiling */ ring = NULL; - } else if (IS_IVYBRIDGE(dev)) { + } else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) { ring = &dev_priv->ring[BCS]; } else if (INTEL_INFO(dev)->gen >= 7) { ring = obj->ring; From 226e5ae9e5f9108beb0bde4ac69f68fe6210fed9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 2 Jan 2015 09:47:10 +0000 Subject: [PATCH 12/19] drm/i915: Fix mutex->owner inspection race under DEBUG_MUTEXES If CONFIG_DEBUG_MUTEXES is set, the mutex->owner field is only cleared if the mutex debugging is enabled which introduces a race in our mutex_is_locked_by() - i.e. we may inspect the old owner value before it is acquired by the new task. This is the root cause of this error: diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c index 5cf6731..3ef3736 100644 --- a/kernel/locking/mutex-debug.c +++ b/kernel/locking/mutex-debug.c @@ -80,13 +80,13 @@ void debug_mutex_unlock(struct mutex *lock) DEBUG_LOCKS_WARN_ON(lock->owner != current); DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); - mutex_clear_owner(lock); } /* * __mutex_slowpath_needs_to_unlock() is explicitly 0 for debug * mutexes so that we can do it here after we've verified state. */ + mutex_clear_owner(lock); atomic_set(&lock->count, 1); } Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=87955 Signed-off-by: Chris Wilson Cc: stable@vger.kernel.org Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c11603b4cf1d..76354d3ba925 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5155,7 +5155,7 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) if (!mutex_is_locked(mutex)) return false; -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES) +#if defined(CONFIG_SMP) && !defined(CONFIG_DEBUG_MUTEXES) return mutex->owner == task; #else /* Since UP may be pre-empted, we cannot assume that we own the lock */ From ad1a62227f2e3d5eb4eb0b61a2d9005369bbef45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 9 Jan 2015 11:07:49 +0100 Subject: [PATCH 13/19] drm/radeon: don't print error on -ERESTARTSYS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index a46f73737994..d0b4f7d1140d 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -576,7 +576,7 @@ static void radeon_gem_va_update_vm(struct radeon_device *rdev, error_free: drm_free_large(vm_bos); - if (r) + if (r && r != -ERESTARTSYS) DRM_ERROR("Couldn't update BO_VA (%d)\n", r); } From 5615f890bc6babdc2998dec62f3552326d06eb7b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 12 Jan 2015 17:15:12 -0500 Subject: [PATCH 14/19] drm/radeon: add si dpm quirk list This adds a quirks list to fix stability problems with certain SI boards. bug: https://bugs.freedesktop.org/show_bug.cgi?id=76490 Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/si_dpm.c | 39 +++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 32e354b8b0ab..eff8a6444956 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -2908,6 +2908,22 @@ static int si_init_smc_spll_table(struct radeon_device *rdev) return ret; } +struct si_dpm_quirk { + u32 chip_vendor; + u32 chip_device; + u32 subsys_vendor; + u32 subsys_device; + u32 max_sclk; + u32 max_mclk; +}; + +/* cards with dpm stability problems */ +static struct si_dpm_quirk si_dpm_quirk_list[] = { + /* PITCAIRN - https://bugs.freedesktop.org/show_bug.cgi?id=76490 */ + { PCI_VENDOR_ID_ATI, 0x6810, 0x1462, 0x3036, 0, 120000 }, + { 0, 0, 0, 0 }, +}; + static void si_apply_state_adjust_rules(struct radeon_device *rdev, struct radeon_ps *rps) { @@ -2918,7 +2934,22 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, u32 mclk, sclk; u16 vddc, vddci; u32 max_sclk_vddc, max_mclk_vddci, max_mclk_vddc; + u32 max_sclk = 0, max_mclk = 0; int i; + struct si_dpm_quirk *p = si_dpm_quirk_list; + + /* Apply dpm quirks */ + while (p && p->chip_device != 0) { + if (rdev->pdev->vendor == p->chip_vendor && + rdev->pdev->device == p->chip_device && + rdev->pdev->subsystem_vendor == p->subsys_vendor && + rdev->pdev->subsystem_device == p->subsys_device) { + max_sclk = p->max_sclk; + max_mclk = p->max_mclk; + break; + } + ++p; + } if ((rdev->pm.dpm.new_active_crtc_count > 1) || ni_dpm_vblank_too_short(rdev)) @@ -2972,6 +3003,14 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, if (ps->performance_levels[i].mclk > max_mclk_vddc) ps->performance_levels[i].mclk = max_mclk_vddc; } + if (max_mclk) { + if (ps->performance_levels[i].mclk > max_mclk) + ps->performance_levels[i].mclk = max_mclk; + } + if (max_sclk) { + if (ps->performance_levels[i].sclk > max_sclk) + ps->performance_levels[i].sclk = max_sclk; + } } /* XXX validate the min clocks required for display */ From d8a74e186949e1a2c2f1309212478b0659bf9225 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 15 Jan 2015 10:52:33 -0500 Subject: [PATCH 15/19] drm/radeon: use rv515_ring_start on r5xx This was accidently lost in 76a0df859def. Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_asic.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c index 850de57069be..121aff6a3b41 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.c +++ b/drivers/gpu/drm/radeon/radeon_asic.c @@ -333,6 +333,20 @@ static struct radeon_asic_ring r300_gfx_ring = { .set_wptr = &r100_gfx_set_wptr, }; +static struct radeon_asic_ring rv515_gfx_ring = { + .ib_execute = &r100_ring_ib_execute, + .emit_fence = &r300_fence_ring_emit, + .emit_semaphore = &r100_semaphore_ring_emit, + .cs_parse = &r300_cs_parse, + .ring_start = &rv515_ring_start, + .ring_test = &r100_ring_test, + .ib_test = &r100_ib_test, + .is_lockup = &r100_gpu_is_lockup, + .get_rptr = &r100_gfx_get_rptr, + .get_wptr = &r100_gfx_get_wptr, + .set_wptr = &r100_gfx_set_wptr, +}; + static struct radeon_asic r300_asic = { .init = &r300_init, .fini = &r300_fini, @@ -748,7 +762,7 @@ static struct radeon_asic rv515_asic = { .set_page = &rv370_pcie_gart_set_page, }, .ring = { - [RADEON_RING_TYPE_GFX_INDEX] = &r300_gfx_ring + [RADEON_RING_TYPE_GFX_INDEX] = &rv515_gfx_ring }, .irq = { .set = &rs600_irq_set, @@ -814,7 +828,7 @@ static struct radeon_asic r520_asic = { .set_page = &rv370_pcie_gart_set_page, }, .ring = { - [RADEON_RING_TYPE_GFX_INDEX] = &r300_gfx_ring + [RADEON_RING_TYPE_GFX_INDEX] = &rv515_gfx_ring }, .irq = { .set = &rs600_irq_set, From 265134a0009e2c5893c0211563daae178066dd06 Mon Sep 17 00:00:00 2001 From: Joonyoung Shim Date: Mon, 12 Jan 2015 14:35:16 +0900 Subject: [PATCH 16/19] drm/exynos: fix reset codes for memory mapped hdmi phy This fixes reset codes to support memory mapped hdmi phy as well as hdmi phy dedicated i2c lines. Signed-off-by: Joonyoung Shim Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_hdmi.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c index 5765a161abdd..98051e8e855a 100644 --- a/drivers/gpu/drm/exynos/exynos_hdmi.c +++ b/drivers/gpu/drm/exynos/exynos_hdmi.c @@ -1669,7 +1669,6 @@ static void hdmi_mode_apply(struct hdmi_context *hdata) static void hdmiphy_conf_reset(struct hdmi_context *hdata) { - u8 buffer[2]; u32 reg; clk_disable_unprepare(hdata->res.sclk_hdmi); @@ -1677,11 +1676,8 @@ static void hdmiphy_conf_reset(struct hdmi_context *hdata) clk_prepare_enable(hdata->res.sclk_hdmi); /* operation mode */ - buffer[0] = 0x1f; - buffer[1] = 0x00; - - if (hdata->hdmiphy_port) - i2c_master_send(hdata->hdmiphy_port, buffer, 2); + hdmiphy_reg_writeb(hdata, HDMIPHY_MODE_SET_DONE, + HDMI_PHY_ENABLE_MODE_SET); if (hdata->type == HDMI_TYPE13) reg = HDMI_V13_PHY_RSTOUT; From bd508666e58ecf1712f8a132ab435cf0ef2d3d3c Mon Sep 17 00:00:00 2001 From: Joonyoung Shim Date: Sun, 18 Jan 2015 17:34:15 +0900 Subject: [PATCH 17/19] drm/exynos: remove unnecessary runtime pm operations In booting, we can see a below message. [ 3.241728] exynos-mixer 14450000.mixer: Unbalanced pm_runtime_enable! Already pm_runtime_enable is called by probe function. Remove pm_runtime_enable/disable from mixer_bind and mixer_unbind. Signed-off-by: Joonyoung Shim Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_mixer.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c index 820b76234ef4..71f1688b188c 100644 --- a/drivers/gpu/drm/exynos/exynos_mixer.c +++ b/drivers/gpu/drm/exynos/exynos_mixer.c @@ -1262,8 +1262,6 @@ static int mixer_bind(struct device *dev, struct device *manager, void *data) return ret; } - pm_runtime_enable(dev); - return 0; } @@ -1272,8 +1270,6 @@ static void mixer_unbind(struct device *dev, struct device *master, void *data) struct mixer_context *ctx = dev_get_drvdata(dev); mixer_mgr_remove(&ctx->manager); - - pm_runtime_disable(dev); } static const struct component_ops mixer_component_ops = { From 7c4c55845ca40b56c9486064cc5c6d9b884d4d76 Mon Sep 17 00:00:00 2001 From: Joonyoung Shim Date: Sun, 18 Jan 2015 17:48:29 +0900 Subject: [PATCH 18/19] drm/exynos: fix warning of vblank reference count Prevented re-enabling the vblank interrupt by drm_vblank_off and drm_vblank_get from mixer_wait_for_vblank returns error after drm_vblank_off. We get below warnings without this error handling because vblank reference count is mismatched by above sequence. setting mode 1920x1080-60Hz@XR24 on connectors 16, crtc 13 [ 19.900793] ------------[ cut here ]------------ [ 19.903959] WARNING: CPU: 0 PID: 0 at drivers/gpu/drm/drm_irq.c:1072 exynos_drm_crtc_finish_pageflip+0xac/0xdc() [ 19.914076] Modules linked in: [ 19.917116] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.19.0-rc4-00040-g3d729789-dirty #46 [ 19.925342] Hardware name: SAMSUNG EXYNOS (Flattened Device Tree) [ 19.931437] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [ 19.939131] [] (show_stack) from [] (dump_stack+0x84/0xc4) [ 19.946329] [] (dump_stack) from [] (warn_slowpath_common+0x80/0xb0) [ 19.954382] [] (warn_slowpath_common) from [] (warn_slowpath_null+0x1c/0x24) [ 19.963132] [] (warn_slowpath_null) from [] (exynos_drm_crtc_finish_pageflip+0xac/0xdc) [ 19.972841] [] (exynos_drm_crtc_finish_pageflip) from [] (mixer_irq_handler+0xdc/0x104) [ 19.982546] [] (mixer_irq_handler) from [] (handle_irq_event_percpu+0x78/0x134) [ 19.991555] [] (handle_irq_event_percpu) from [] (handle_irq_event+0x3c/0x5c) [ 20.000395] [] (handle_irq_event) from [] (handle_fasteoi_irq+0xe0/0x1ac) [ 20.008885] [] (handle_fasteoi_irq) from [] (generic_handle_irq+0x2c/0x3c) [ 20.017463] [] (generic_handle_irq) from [] (__handle_domain_irq+0x7c/0xec) [ 20.026128] [] (__handle_domain_irq) from [] (gic_handle_irq+0x30/0x68) [ 20.034449] [] (gic_handle_irq) from [] (__irq_svc+0x40/0x74) [ 20.041893] Exception stack(0xc06fff68 to 0xc06fffb0) [ 20.046923] ff60: 00000000 00000000 000052f6 c001b460 c06fe000 c07064e8 [ 20.055070] ff80: c04d743c c07392a2 c0739440 c06da340 ef7fca80 00000000 01000000 c06fffb0 [ 20.063212] ffa0: c000f24c c000f250 60000013 ffffffff [ 20.068245] [] (__irq_svc) from [] (arch_cpu_idle+0x38/0x3c) [ 20.075611] [] (arch_cpu_idle) from [] (cpu_startup_entry+0x108/0x16c) [ 20.083846] [] (cpu_startup_entry) from [] (start_kernel+0x3a0/0x3ac) [ 20.091980] ---[ end trace 2c76ee0500489d1b ]--- Signed-off-by: Joonyoung Shim Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_mixer.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c index 71f1688b188c..064ed6597def 100644 --- a/drivers/gpu/drm/exynos/exynos_mixer.c +++ b/drivers/gpu/drm/exynos/exynos_mixer.c @@ -1026,6 +1026,7 @@ static void mixer_win_disable(struct exynos_drm_manager *mgr, int zpos) static void mixer_wait_for_vblank(struct exynos_drm_manager *mgr) { struct mixer_context *mixer_ctx = mgr_to_mixer(mgr); + int err; mutex_lock(&mixer_ctx->mixer_mutex); if (!mixer_ctx->powered) { @@ -1034,7 +1035,11 @@ static void mixer_wait_for_vblank(struct exynos_drm_manager *mgr) } mutex_unlock(&mixer_ctx->mixer_mutex); - drm_vblank_get(mgr->crtc->dev, mixer_ctx->pipe); + err = drm_vblank_get(mgr->crtc->dev, mixer_ctx->pipe); + if (err < 0) { + DRM_DEBUG_KMS("failed to acquire vblank counter\n"); + return; + } atomic_set(&mixer_ctx->wait_vsync_event, 1); From 9aa609e1a3846d3c17087b62579867bab0f488de Mon Sep 17 00:00:00 2001 From: Rui Wang Date: Mon, 15 Dec 2014 11:28:26 -0800 Subject: [PATCH 19/19] drm: fb helper should avoid sleeping in panic context There are still some places in the fb helper that need to avoid sleeping in panic context. Here's an example: [ 65.615496] bad: scheduling from the idle thread! [ 65.620747] CPU: 92 PID: 0 Comm: swapper/92 Tainted: G M E 3.18.0-rc4-7-default+ #20 [ 65.630364] Hardware name: Intel Corporation BRICKLAND/BRICKLAND, BIOS BRHSXSD1.86B.0056.R01.1409242327 09/24/2014 [ 65.641923] ffff88087f693d80 ffff88087f689878 ffffffff81566db9 0000000000000000 [ 65.650226] ffff88087f693d80 ffff88087f689898 ffffffff810871ff ffff88046eb3e0d0 [ 65.658527] ffff88087f693d80 ffff88087f6898c8 ffffffff8107c1fa 000000017f6898b8 [ 65.666830] Call Trace: [ 65.669557] <#MC> [] dump_stack+0x46/0x58 [ 65.675994] [] dequeue_task_idle+0x2f/0x40 [ 65.682412] [] dequeue_task+0x5a/0x80 [ 65.688345] [] deactivate_task+0x23/0x30 [ 65.694569] [] __schedule+0x580/0x7f0 [ 65.700502] [] schedule_preempt_disabled+0x29/0x70 [ 65.707696] [] __ww_mutex_lock_slowpath+0xb8/0x162 [ 65.714891] [] __ww_mutex_lock+0x53/0x85 [ 65.721125] [] drm_modeset_lock+0x3d/0x110 [drm] [ 65.728132] [] __drm_modeset_lock_all+0x8a/0x120 [drm] [ 65.735721] [] drm_modeset_lock_all+0x10/0x30 [drm] [ 65.743015] [] drm_fb_helper_pan_display+0x2f/0xf0 [drm_kms_helper] [ 65.751857] [] fb_pan_display+0xd1/0x1a0 [ 65.758081] [] bit_update_start+0x20/0x50 [ 65.764400] [] fbcon_switch+0x3a2/0x550 [ 65.770528] [] redraw_screen+0x189/0x240 [ 65.776750] [] fbcon_blank+0x20a/0x2d0 [ 65.782778] [] ? erst_writer+0x209/0x330 [ 65.789002] [] ? internal_add_timer+0x63/0x80 [ 65.795710] [] ? mod_timer+0x127/0x1e0 [ 65.801740] [] do_unblank_screen+0xa8/0x1d0 [ 65.808255] [] unblank_screen+0x10/0x20 [ 65.814381] [] bust_spinlocks+0x19/0x40 [ 65.820508] [] panic+0x106/0x1f5 [ 65.825955] [] mce_panic+0x2ac/0x2e0 [ 65.831789] [] ? delay_tsc+0x4a/0x80 [ 65.837625] [] do_machine_check+0xbaf/0xbf0 [ 65.844138] [] ? intel_idle+0xc7/0x150 [ 65.850166] [] machine_check+0x1f/0x30 [ 65.856195] [] ? intel_idle+0xc7/0x150 [ 65.862222] <> [] cpuidle_enter_state+0x55/0x170 [ 65.869823] [] cpuidle_enter+0x17/0x20 [ 65.875852] [] cpu_startup_entry+0x2d8/0x370 [ 65.882467] [] start_secondary+0x159/0x180 There's __drm_modeset_lock_all() which Daniel Vetter introduced for this purpose. We can leverage that without reinventing anything. This patch works with the latest kernel. Reviewed-by: Rob Clark Tested-by: Tony Luck Signed-off-by: Rui Wang Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_fb_helper.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 52ce26d6b4fb..cf775a4449c1 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -741,7 +741,9 @@ int drm_fb_helper_setcmap(struct fb_cmap *cmap, struct fb_info *info) int i, j, rc = 0; int start; - drm_modeset_lock_all(dev); + if (__drm_modeset_lock_all(dev, !!oops_in_progress)) { + return -EBUSY; + } if (!drm_fb_helper_is_bound(fb_helper)) { drm_modeset_unlock_all(dev); return -EBUSY; @@ -915,7 +917,9 @@ int drm_fb_helper_pan_display(struct fb_var_screeninfo *var, int ret = 0; int i; - drm_modeset_lock_all(dev); + if (__drm_modeset_lock_all(dev, !!oops_in_progress)) { + return -EBUSY; + } if (!drm_fb_helper_is_bound(fb_helper)) { drm_modeset_unlock_all(dev); return -EBUSY;