Skip to content

Commit

Permalink
drm/msm/a6xx: Add support for using system cache(LLC)
Browse files Browse the repository at this point in the history
The last level system cache can be partitioned to 32 different
slices of which GPU has two slices preallocated. One slice is
used for caching GPU buffers and the other slice is used for
caching the GPU SMMU pagetables. This talks to the core system
cache driver to acquire the slice handles, configure the SCID's
to those slices and activates and deactivates the slices upon
GPU power collapse and restore.

Some support from the IOMMU driver is also needed to make use
of the system cache to set the right TCR attributes. GPU then
has the ability to override a few cacheability parameters which
it does to override write-allocate to write-no-allocate as the
GPU hardware does not benefit much from it.

DOMAIN_ATTR_IO_PGTABLE_CFG is another domain level attribute used
by the IOMMU driver for pagetable configuration which will be used
to set a quirk initially to set the right attributes to cache the
hardware pagetables into the system cache.

Signed-off-by: Sharat Masetty <smasetty@codeaurora.org>
[saiprakash.ranjan: fix to set attr before device attach to iommu and rebase]
Signed-off-by: Sai Prakash Ranjan <saiprakash.ranjan@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@chromium.org>
  • Loading branch information
Sharat Masetty authored and Rob Clark committed Nov 29, 2020
1 parent 40a72b0 commit 474dadb
Show file tree
Hide file tree
Showing 3 changed files with 104 additions and 0 deletions.
83 changes: 83 additions & 0 deletions drivers/gpu/drm/msm/adreno/a6xx_gpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
#include "a6xx_gpu.h"
#include "a6xx_gmu.xml.h"

#include <linux/bitfield.h>
#include <linux/devfreq.h>
#include <linux/soc/qcom/llcc-qcom.h>

#define GPU_PAS_ID 13

Expand Down Expand Up @@ -1020,6 +1022,79 @@ static irqreturn_t a6xx_irq(struct msm_gpu *gpu)
return IRQ_HANDLED;
}

static void a6xx_llc_rmw(struct a6xx_gpu *a6xx_gpu, u32 reg, u32 mask, u32 or)
{
return msm_rmw(a6xx_gpu->llc_mmio + (reg << 2), mask, or);
}

static void a6xx_llc_write(struct a6xx_gpu *a6xx_gpu, u32 reg, u32 value)
{
return msm_writel(value, a6xx_gpu->llc_mmio + (reg << 2));
}

static void a6xx_llc_deactivate(struct a6xx_gpu *a6xx_gpu)
{
llcc_slice_deactivate(a6xx_gpu->llc_slice);
llcc_slice_deactivate(a6xx_gpu->htw_llc_slice);
}

static void a6xx_llc_activate(struct a6xx_gpu *a6xx_gpu)
{
u32 cntl1_regval = 0;

if (IS_ERR(a6xx_gpu->llc_mmio))
return;

if (!llcc_slice_activate(a6xx_gpu->llc_slice)) {
u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice);

gpu_scid &= 0x1f;
cntl1_regval = (gpu_scid << 0) | (gpu_scid << 5) | (gpu_scid << 10) |
(gpu_scid << 15) | (gpu_scid << 20);
}

if (!llcc_slice_activate(a6xx_gpu->htw_llc_slice)) {
u32 gpuhtw_scid = llcc_get_slice_id(a6xx_gpu->htw_llc_slice);

gpuhtw_scid &= 0x1f;
cntl1_regval |= FIELD_PREP(GENMASK(29, 25), gpuhtw_scid);
}

if (cntl1_regval) {
/*
* Program the slice IDs for the various GPU blocks and GPU MMU
* pagetables
*/
a6xx_llc_write(a6xx_gpu, REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_1, cntl1_regval);

/*
* Program cacheability overrides to not allocate cache lines on
* a write miss
*/
a6xx_llc_rmw(a6xx_gpu, REG_A6XX_CX_MISC_SYSTEM_CACHE_CNTL_0, 0xF, 0x03);
}
}

static void a6xx_llc_slices_destroy(struct a6xx_gpu *a6xx_gpu)
{
llcc_slice_putd(a6xx_gpu->llc_slice);
llcc_slice_putd(a6xx_gpu->htw_llc_slice);
}

static void a6xx_llc_slices_init(struct platform_device *pdev,
struct a6xx_gpu *a6xx_gpu)
{
a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem", "gpu_cx");
if (IS_ERR(a6xx_gpu->llc_mmio))
return;

a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU);
a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW);

if (IS_ERR(a6xx_gpu->llc_slice) && IS_ERR(a6xx_gpu->htw_llc_slice))
a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL);
}

static int a6xx_pm_resume(struct msm_gpu *gpu)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
Expand All @@ -1036,6 +1111,8 @@ static int a6xx_pm_resume(struct msm_gpu *gpu)

msm_gpu_resume_devfreq(gpu);

a6xx_llc_activate(a6xx_gpu);

return 0;
}

Expand All @@ -1047,6 +1124,8 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu)

trace_msm_gpu_suspend(0);

a6xx_llc_deactivate(a6xx_gpu);

devfreq_suspend_device(gpu->devfreq.devfreq);

ret = a6xx_gmu_stop(a6xx_gpu);
Expand Down Expand Up @@ -1098,6 +1177,8 @@ static void a6xx_destroy(struct msm_gpu *gpu)
drm_gem_object_put(a6xx_gpu->shadow_bo);
}

a6xx_llc_slices_destroy(a6xx_gpu);

a6xx_gmu_remove(a6xx_gpu);

adreno_gpu_cleanup(adreno_gpu);
Expand Down Expand Up @@ -1216,6 +1297,8 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
if (info && info->revn == 650)
adreno_gpu->base.hw_apriv = true;

a6xx_llc_slices_init(pdev, a6xx_gpu);

ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
if (ret) {
a6xx_destroy(&(a6xx_gpu->base.base));
Expand Down
4 changes: 4 additions & 0 deletions drivers/gpu/drm/msm/adreno/a6xx_gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ struct a6xx_gpu {
uint32_t *shadow;

bool has_whereami;

void __iomem *llc_mmio;
void *llc_slice;
void *htw_llc_slice;
};

#define to_a6xx_gpu(x) container_of(x, struct a6xx_gpu, base)
Expand Down
17 changes: 17 additions & 0 deletions drivers/gpu/drm/msm/adreno/adreno_gpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <linux/soc/qcom/mdt_loader.h>
#include <soc/qcom/ocmem.h>
#include "adreno_gpu.h"
#include "a6xx_gpu.h"
#include "msm_gem.h"
#include "msm_mmu.h"

Expand Down Expand Up @@ -189,6 +190,9 @@ struct msm_gem_address_space *
adreno_iommu_create_address_space(struct msm_gpu *gpu,
struct platform_device *pdev)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
struct io_pgtable_domain_attr pgtbl_cfg;
struct iommu_domain *iommu;
struct msm_mmu *mmu;
struct msm_gem_address_space *aspace;
Expand All @@ -198,7 +202,20 @@ adreno_iommu_create_address_space(struct msm_gpu *gpu,
if (!iommu)
return NULL;

/*
* This allows GPU to set the bus attributes required to use system
* cache on behalf of the iommu page table walker.
*/
if (!IS_ERR(a6xx_gpu->htw_llc_slice)) {
pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_ARM_OUTER_WBWA;
iommu_domain_set_attr(iommu, DOMAIN_ATTR_IO_PGTABLE_CFG, &pgtbl_cfg);
}

mmu = msm_iommu_new(&pdev->dev, iommu);
if (IS_ERR(mmu)) {
iommu_domain_free(iommu);
return ERR_CAST(mmu);
}

/*
* Use the aperture start or SZ_16M, whichever is greater. This will
Expand Down

0 comments on commit 474dadb

Please sign in to comment.