Skip to content

Commit

Permalink
Merge branch 'drm-next-4.20' of git://people.freedesktop.org/~agd5f/l…
Browse files Browse the repository at this point in the history
…inux into drm-next

More new features and fixes for 4.20:
- Add dynamic powergating support for VCN on picasso
- Scheduler cleanup
- Vega20 support for KFD
- DC cleanups and bug fixes

Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexdeucher@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180927184348.2696-1-alexander.deucher@amd.com
  • Loading branch information
Dave Airlie committed Sep 27, 2018
2 parents 2de0b0a + 6a96243 commit 87c2ee7
Show file tree
Hide file tree
Showing 118 changed files with 1,856 additions and 717 deletions.
24 changes: 17 additions & 7 deletions drivers/gpu/drm/amd/amdgpu/amdgpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ extern int amdgpu_cik_support;
#define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */
#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
#define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */
#define AMDGPU_FENCE_JIFFIES_TIMEOUT (HZ / 2)
/* AMDGPU_IB_POOL_SIZE must be a power of 2 */
#define AMDGPU_IB_POOL_SIZE 16
#define AMDGPU_DEBUGFS_MAX_COMPONENTS 32
Expand Down Expand Up @@ -408,16 +409,25 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT
AMDGPU_DOORBELL64_GFX_RING0 = 0x8b,

/*
* Other graphics doorbells can be allocated here: from 0x8c to 0xef
* Other graphics doorbells can be allocated here: from 0x8c to 0xdf
* Graphics voltage island aperture 1
* default non-graphics QWORD index is 0xF0 - 0xFF inclusive
* default non-graphics QWORD index is 0xe0 - 0xFF inclusive
*/

/* sDMA engines */
AMDGPU_DOORBELL64_sDMA_ENGINE0 = 0xF0,
AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xF1,
AMDGPU_DOORBELL64_sDMA_ENGINE1 = 0xF2,
AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE1 = 0xF3,
/* sDMA engines reserved from 0xe0 -oxef */
AMDGPU_DOORBELL64_sDMA_ENGINE0 = 0xE0,
AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xE1,
AMDGPU_DOORBELL64_sDMA_ENGINE1 = 0xE8,
AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE1 = 0xE9,

/* For vega10 sriov, the sdma doorbell must be fixed as follow
* to keep the same setting with host driver, or it will
* happen conflicts
*/
AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 = 0xF0,
AMDGPU_VEGA10_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xF1,
AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 = 0xF2,
AMDGPU_VEGA10_DOORBELL64_sDMA_HI_PRI_ENGINE1 = 0xF3,

/* Interrupt handler */
AMDGPU_DOORBELL64_IH = 0xF4, /* For legacy interrupt ring buffer */
Expand Down
50 changes: 35 additions & 15 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
break;
case CHIP_VEGA10:
case CHIP_VEGA20:
case CHIP_RAVEN:
kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions();
break;
Expand Down Expand Up @@ -123,7 +124,7 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,

void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
{
int i;
int i, n;
int last_valid_bit;
if (adev->kfd) {
struct kgd2kfd_shared_resources gpu_resources = {
Expand Down Expand Up @@ -162,28 +163,47 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
&gpu_resources.doorbell_physical_address,
&gpu_resources.doorbell_aperture_size,
&gpu_resources.doorbell_start_offset);
if (adev->asic_type >= CHIP_VEGA10) {

if (adev->asic_type < CHIP_VEGA10) {
kgd2kfd->device_init(adev->kfd, &gpu_resources);
return;
}

n = (adev->asic_type < CHIP_VEGA20) ? 2 : 8;

for (i = 0; i < n; i += 2) {
/* On SOC15 the BIF is involved in routing
* doorbells using the low 12 bits of the
* address. Communicate the assignments to
* KFD. KFD uses two doorbell pages per
* process in case of 64-bit doorbells so we
* can use each doorbell assignment twice.
*/
gpu_resources.sdma_doorbell[0][0] =
AMDGPU_DOORBELL64_sDMA_ENGINE0;
gpu_resources.sdma_doorbell[0][1] =
AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200;
gpu_resources.sdma_doorbell[1][0] =
AMDGPU_DOORBELL64_sDMA_ENGINE1;
gpu_resources.sdma_doorbell[1][1] =
AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200;
/* Doorbells 0x0f0-0ff and 0x2f0-2ff are reserved for
* SDMA, IH and VCN. So don't use them for the CP.
*/
gpu_resources.reserved_doorbell_mask = 0x1f0;
gpu_resources.reserved_doorbell_val = 0x0f0;
if (adev->asic_type == CHIP_VEGA10) {
gpu_resources.sdma_doorbell[0][i] =
AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 + (i >> 1);
gpu_resources.sdma_doorbell[0][i+1] =
AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 + 0x200 + (i >> 1);
gpu_resources.sdma_doorbell[1][i] =
AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 + (i >> 1);
gpu_resources.sdma_doorbell[1][i+1] =
AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 + 0x200 + (i >> 1);
} else {
gpu_resources.sdma_doorbell[0][i] =
AMDGPU_DOORBELL64_sDMA_ENGINE0 + (i >> 1);
gpu_resources.sdma_doorbell[0][i+1] =
AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200 + (i >> 1);
gpu_resources.sdma_doorbell[1][i] =
AMDGPU_DOORBELL64_sDMA_ENGINE1 + (i >> 1);
gpu_resources.sdma_doorbell[1][i+1] =
AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200 + (i >> 1);
}
}
/* Doorbells 0x0e0-0ff and 0x2e0-2ff are reserved for
* SDMA, IH and VCN. So don't use them for the CP.
*/
gpu_resources.reserved_doorbell_mask = 0x1e0;
gpu_resources.reserved_doorbell_val = 0x0e0;

kgd2kfd->device_init(adev->kfd, &gpu_resources);
}
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm);
void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm);
uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct kgd_dev *kgd, uint64_t va, uint64_t size,
void *vm, struct kgd_mem **mem,
Expand Down
7 changes: 4 additions & 3 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
static void set_scratch_backing_va(struct kgd_dev *kgd,
uint64_t va, uint32_t vmid);
static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
uint32_t page_table_base);
uint64_t page_table_base);
static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd);
Expand Down Expand Up @@ -874,15 +874,16 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
}

static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
uint32_t page_table_base)
uint64_t page_table_base)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);

if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
pr_err("trying to set page table base for wrong VMID\n");
return;
}
WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base);
WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8,
lower_32_bits(page_table_base));
}

static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
Expand Down
12 changes: 6 additions & 6 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,6 @@ enum hqd_dequeue_request_type {
RESET_WAVES
};

struct vi_sdma_mqd;

/*
* Register access functions
*/
Expand Down Expand Up @@ -100,7 +98,7 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
static void set_scratch_backing_va(struct kgd_dev *kgd,
uint64_t va, uint32_t vmid);
static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
uint32_t page_table_base);
uint64_t page_table_base);
static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);

Expand Down Expand Up @@ -282,7 +280,8 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)

lock_srbm(kgd, mec, pipe, 0, 0);

WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK);
WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);

unlock_srbm(kgd);

Expand Down Expand Up @@ -834,15 +833,16 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
}

static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
uint32_t page_table_base)
uint64_t page_table_base)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);

if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
pr_err("trying to set page table base for wrong VMID\n");
return;
}
WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base);
WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8,
lower_32_bits(page_table_base));
}

static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
Expand Down
7 changes: 3 additions & 4 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
uint8_t vmid);
static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
uint32_t page_table_base);
uint64_t page_table_base);
static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
static void set_scratch_backing_va(struct kgd_dev *kgd,
uint64_t va, uint32_t vmid);
Expand Down Expand Up @@ -1013,11 +1013,10 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
}

static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
uint32_t page_table_base)
uint64_t page_table_base)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
uint64_t base = (uint64_t)page_table_base << PAGE_SHIFT |
AMDGPU_PTE_VALID;
uint64_t base = page_table_base | AMDGPU_PTE_VALID;

if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
pr_err("trying to set page table base for wrong VMID %u\n",
Expand Down
8 changes: 6 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1131,11 +1131,15 @@ void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm)
amdgpu_vm_release_compute(adev, avm);
}

uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
{
struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
struct amdgpu_bo *pd = avm->root.base.bo;
struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);

return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
if (adev->asic_type < CHIP_VEGA10)
return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
return avm->pd_phys_addr;
}

int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
Expand Down
66 changes: 64 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,19 @@ int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s)
return 0;
}

/**
* amdgpu_fence_schedule_fallback - schedule fallback check
*
* @ring: pointer to struct amdgpu_ring
*
* Start a timer as fallback to our interrupts.
*/
static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring)
{
mod_timer(&ring->fence_drv.fallback_timer,
jiffies + AMDGPU_FENCE_JIFFIES_TIMEOUT);
}

/**
* amdgpu_fence_process - check for fence activity
*
Expand All @@ -203,8 +216,10 @@ int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s)
* Checks the current fence value and calculates the last
* signalled fence value. Wakes the fence queue if the
* sequence number has increased.
*
* Returns true if fence was processed
*/
void amdgpu_fence_process(struct amdgpu_ring *ring)
bool amdgpu_fence_process(struct amdgpu_ring *ring)
{
struct amdgpu_fence_driver *drv = &ring->fence_drv;
uint32_t seq, last_seq;
Expand All @@ -216,8 +231,12 @@ void amdgpu_fence_process(struct amdgpu_ring *ring)

} while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq);

if (del_timer(&ring->fence_drv.fallback_timer) &&
seq != ring->fence_drv.sync_seq)
amdgpu_fence_schedule_fallback(ring);

if (unlikely(seq == last_seq))
return;
return false;

last_seq &= drv->num_fences_mask;
seq &= drv->num_fences_mask;
Expand All @@ -244,6 +263,24 @@ void amdgpu_fence_process(struct amdgpu_ring *ring)

dma_fence_put(fence);
} while (last_seq != seq);

return true;
}

/**
* amdgpu_fence_fallback - fallback for hardware interrupts
*
* @work: delayed work item
*
* Checks for fence activity.
*/
static void amdgpu_fence_fallback(struct timer_list *t)
{
struct amdgpu_ring *ring = from_timer(ring, t,
fence_drv.fallback_timer);

if (amdgpu_fence_process(ring))
DRM_WARN("Fence fallback timer expired on ring %s\n", ring->name);
}

/**
Expand Down Expand Up @@ -393,6 +430,8 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
atomic_set(&ring->fence_drv.last_seq, 0);
ring->fence_drv.initialized = false;

timer_setup(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback, 0);

ring->fence_drv.num_fences_mask = num_hw_submission * 2 - 1;
spin_lock_init(&ring->fence_drv.lock);
ring->fence_drv.fences = kcalloc(num_hw_submission * 2, sizeof(void *),
Expand Down Expand Up @@ -468,6 +507,7 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
amdgpu_irq_put(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type);
drm_sched_fini(&ring->sched);
del_timer_sync(&ring->fence_drv.fallback_timer);
for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j)
dma_fence_put(ring->fence_drv.fences[j]);
kfree(ring->fence_drv.fences);
Expand Down Expand Up @@ -560,6 +600,27 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f)
return (const char *)fence->ring->name;
}

/**
* amdgpu_fence_enable_signaling - enable signalling on fence
* @fence: fence
*
* This function is called with fence_queue lock held, and adds a callback
* to fence_queue that checks if this fence is signaled, and if so it
* signals the fence and removes itself.
*/
static bool amdgpu_fence_enable_signaling(struct dma_fence *f)
{
struct amdgpu_fence *fence = to_amdgpu_fence(f);
struct amdgpu_ring *ring = fence->ring;

if (!timer_pending(&ring->fence_drv.fallback_timer))
amdgpu_fence_schedule_fallback(ring);

DMA_FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);

return true;
}

/**
* amdgpu_fence_free - free up the fence memory
*
Expand Down Expand Up @@ -590,6 +651,7 @@ static void amdgpu_fence_release(struct dma_fence *f)
static const struct dma_fence_ops amdgpu_fence_ops = {
.get_driver_name = amdgpu_fence_get_driver_name,
.get_timeline_name = amdgpu_fence_get_timeline_name,
.enable_signaling = amdgpu_fence_enable_signaling,
.release = amdgpu_fence_release,
};

Expand Down
Loading

0 comments on commit 87c2ee7

Please sign in to comment.