diff --git a/drivers/gpu/drm/msm/registers/adreno/a6xx.xml b/drivers/gpu/drm/msm/registers/adreno/a6xx.xml
new file mode 100644
index 0000000000000..78524aaab9d4a
--- /dev/null
+++ b/drivers/gpu/drm/msm/registers/adreno/a6xx.xml
@@ -0,0 +1,4970 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Allow early z-test and early-lrz (if applicable)
+
+ Disable early z-test and early-lrz test (if applicable)
+
+
+ A special mode that allows early-lrz test but disables
+ early-z test. Which might sound a bit funny, since
+ lrz-test happens before z-test. But as long as a couple
+ conditions are maintained this allows using lrz-test in
+ cases where fragment shader has kill/discard:
+
+ 1) Disable lrz-write in cases where it is uncertain during
+ binning pass that a fragment will pass. Ie. if frag
+ shader has-kill, writes-z, or alpha/stencil test is
+ enabled. (For correctness, lrz-write must be disabled
+ when blend is enabled.) This is analogous to how a
+ z-prepass works.
+
+ 2) Disable lrz-write and test if a depth-test direction
+ reversal is detected. Due to condition (1), the contents
+ of the lrz buffer are a conservative estimation of the
+ depth buffer during the draw pass. Meaning that geometry
+ that we know for certain will not be visible will not pass
+ lrz-test. But geometry which may be (or contributes to
+ blend) will pass the lrz-test.
+
+ This allows us to keep early-lrz-test in cases where the frag
+ shader does not write-z (ie. we know the z-value before FS)
+ and does not have side-effects (image/ssbo writes, etc), but
+ does have kill/discard. Which turns out to be a common
+ enough case that it is useful to keep early-lrz test against
+ the conservative lrz buffer to discard fragments that we
+ know will definitely not be visible.
+
+
+ Not a real hw value, used internally by mesa
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ b0..7 identifies where MRB data starts (and RB data ends)
+ b8.15 identifies where VSD data starts (and MRB data ends)
+ b16..23 identifies where IB1 data starts (and RB data ends)
+ b24..31 identifies where IB2 data starts (and IB1 data ends)
+
+
+
+
+
+
+
+
+ low bits identify where CP_SET_DRAW_STATE stateobj
+ processing starts (and IB2 data ends). I'm guessing
+ b8 is part of this since (from downstream kgsl):
+
+ /* ROQ sizes are twice as big on a640/a680 than on a630 */
+ if (adreno_is_a640(adreno_dev) || adreno_is_a680(adreno_dev)) {
+ kgsl_regwrite(device, A6XX_CP_ROQ_THRESHOLDS_2, 0x02000140);
+ kgsl_regwrite(device, A6XX_CP_ROQ_THRESHOLDS_1, 0x8040362C);
+ } ...
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ number of remaining dwords incl current dword being consumed?
+
+
+
+ number of remaining dwords incl current dword being consumed?
+
+
+
+ number of remaining dwords incl current dword being consumed?
+
+
+
+ number of remaining dwords incl current dword being consumed?
+
+
+
+ number of dwords that have already been read but haven't been consumed by $addr
+
+
+
+ number of remaining dwords incl current dword being consumed?
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Set to true when binning, isn't changed afterwards
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Configures the mapping between VSC_PIPE buffer and
+ bin, X/Y specify the bin index in the horiz/vert
+ direction (0,0 is upper left, 0,1 is leftmost bin
+ on second row, and so on). W/H specify the number
+ of bins assigned to this VSC_PIPE in the horiz/vert
+ dimension.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Seems to be a bitmap of which tiles mapped to the VSC
+ pipe contain geometry.
+
+ I suppose we can connect a maximum of 32 tiles to a
+ single VSC pipe.
+
+
+
+
+
+
+ Has the size of data written to corresponding VSC_PRIM_STRM
+ buffer.
+
+
+
+
+
+
+ Has the size of data written to corresponding VSC pipe, ie.
+ same thing that is written out to VSC_DRAW_STRM_SIZE_ADDRESS_LO/HI
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ In addition to FLUSH_PER_OVERLAP, guarantee that UCHE
+ and CCU don't get out of sync when fetching the previous
+ value for the current pixel. With NO_FLUSH, there's the
+ possibility that the flags for the current pixel are
+ flushed before the data or vice-versa, leading to
+ texture fetches via UCHE getting out of sync values.
+ This mode should eliminate that. It's used in bypass
+ mode for coherent blending
+ (GL_KHR_blend_equation_advanced_coherent) as well as
+ non-coherent blending.
+
+
+
+ Invalidate UCHE and wait for any pending work to finish
+ if there was possibly an overlapping primitive prior to
+ the current one. This is similar to a combination of
+ GRAS_SC_CONTROL::INJECT_L2_INVALIDATE_EVENT and
+ WAIT_RB_IDLE_ALL_TRI on a3xx. It's used in GMEM mode for
+ coherent blending
+ (GL_KHR_blend_equation_advanced_coherent).
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ LRZ write also disabled for blend/etc.
+
+ update MAX instead of MIN value, ie. GL_GREATER/GL_GEQUAL
+
+
+ Clears the LRZ block being touched to:
+ - 0.0 if GREATER
+ - 1.0 if LESS
+
+
+
+
+
+
+
+ If DISABLE_ON_WRONG_DIR enabled - write new LRZ direction into
+ buffer, in case of mismatched direction writes 0 (disables LRZ).
+
+
+
+ Disable LRZ based on previous direction and the current one.
+ If DIR_WRITE is not enabled - there is no write to direction buffer.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Z_READ_ENABLE bit is set for zfunc other than GL_ALWAYS or GL_NEVER
+ also set when Z_BOUNDS_ENABLE is set
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ For clearing depth/stencil
+ 1 - depth
+ 2 - stencil
+ 3 - depth+stencil
+ For clearing color buffer:
+ then probably a component mask, I always see 0xf
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Swaps TESS_CW_TRIS/TESS_CCW_TRIS, and also makes
+ triangle fans and triangle strips use the D3D
+ order instead of the OpenGL order.
+
+
+
+
+
+
+
+ geometry shader
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Multi-position output lets the last geometry
+ stage shader write multiple copies of
+ gl_Position. If disabled then the VS is run once
+ for each view, and ViewID is passed as a
+ register to the VS.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Packed array of a6xx_varying_interp_mode
+
+
+
+ Packed array of a6xx_varying_ps_repl_mode
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ num of varyings plus four for gl_Position (plus one if gl_PointSize)
+ plus # of transform-feedback (streamout) varyings if using the
+ hw streamout (rather than stg instructions in shader)
+
+
+
+
+
+
+ The number of extra copies of POSITION, i.e.
+ number of views minus one when multi-position
+ output is enabled, otherwise 0.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This VPC location will be overwritten with
+ ViewID when multiview is enabled. It's used when
+ fragment shaders read ViewID. It's only
+ strictly required for multi-position output,
+ where the same VS invocation is used for all the
+ views at once, but it can be used when multi-pos
+ output is disabled too, to avoid having to pass
+ ViewID through the VS.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ num of varyings plus four for gl_Position (plus one if gl_PointSize)
+ plus # of transform-feedback (streamout) varyings if using the
+ hw streamout (rather than stg instructions in shader)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ size in vec4s of per-primitive storage for gs. TODO: not actually in VPC
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Possibly not really "initiating" the draw but the layout is similar
+ to VGT_DRAW_INITIATOR on older gens
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Written by CP_SET_VISIBILITY_OVERRIDE handler
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This is the ID of the current patch within the
+ subdraw, used to calculate the offset of the
+ patch within the HS->DS buffers. When a draw is
+ split into multiple subdraws then this differs
+ from gl_PrimitiveID on the second, third, etc.
+ subdraws.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The size of memory that ldp/stp can address.
+
+
+
+ Seems to be the same as a3xx. The maximum stack
+ size in units of 4 calls, so a call depth of 7
+ would result in a value of 2.
+ TODO: What's the actual size per call, i.e. the
+ size of the PC? a3xx docs say it's 16 bits
+ there, but the length register now takes 28 bits
+ so it's probably been bumped to 32 bits.
+
+
+
+
+
+
+
+
+ There are four indices used to compute the
+ private memory location for an access:
+
+ - stp/ldp offset
+ - fiber id
+ - wavefront id (a swizzled version of what "getwid" returns)
+ - SP ID (the same as what "getspid" returns)
+
+ The stride for the SP ID is always set by
+ TOTALPVTMEMSIZE. In the per-wave layout, the
+ indices are used in this order:
+
+ - offset % 4 (offset within dword)
+ - fiber id
+ - offset / 4
+ - wavefront id
+ - SP ID
+
+ and the stride for the wavefront ID is
+ MEMSIZEPERITEM, multiplied by 128 (fibers per
+ wavefront). In the per-fiber layout, the indices
+ are used in this order:
+
+ - offset
+ - fiber id % 4
+ - wavefront id
+ - fiber id / 4
+ - SP ID
+
+ and the stride for the fiber id/wavefront id
+ combo is MEMSIZEPERITEM.
+
+ Note: Accesses of more than 1 dword do not work
+ with per-fiber layout. The blob will fall back
+ to per-wave instead.
+
+
+
+
+
+
+ This seems to be be the equivalent of HWSTACKOFFSET in
+ a3xx. The ldp/stp offset formula above isn't affected by
+ HWSTACKSIZEPERTHREAD at all, so the HW return address
+ stack seems to be after all the normal per-SP private
+ memory.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Normally the size of the output of the last stage in
+ dwords. It should be programmed as follows:
+
+ size less than 63 - size
+ size of 63 (?) or 64 - 63
+ size greater than 64 - 64
+
+ What to program when the size is 61-63 is a guess, but
+ both the blob and ir3 align the size to 4 dword's so it
+ doesn't matter in practice.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Enable ALL helper invocations in a quad. Necessary for
+ fine derivatives and quad subgroup ops.
+
+
+
+
+
+
+
+ Enable helper invocations. Enables 3 out of 4 fragments,
+ because the coarse derivatives only use half of the quad
+ and so one pixel's value is always unused.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ per MRT
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Similar to "(eq)" flag but disables helper invocations
+ after the texture prefetch.
+
+
+
+ Bypass writing to regs and overwrite output with color from
+ CONSTSLOTID const regs.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Results in color being zero
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ If 0 - all 32k of shared storage is enabled, otherwise
+ (SHARED_SIZE + 1) * 1k is enabled.
+ The ldl/stl offset seems to be rewritten to 0 when it is beyond
+ this limit. This is different from ldlw/stlw, which wraps at
+ 64k (and has 36k of storage on A640 - reads between 36k-64k
+ always return 0)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This can alternatively be interpreted as a pitch shift, ie, the
+ descriptor size is 2 << N dwords
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Specify for which components the output color should be read
+ from alias, e.g. for:
+
+ alias.1.b32.0 r3.x, c8.x
+ alias.1.b32.0 r2.x, c4.x
+ alias.1.b32.0 r1.x, c4.x
+ alias.1.b32.0 r0.x, c0.x
+
+ the SP_PS_ALIASED_COMPONENTS would be 0x00001111
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This register clears pending loads queued up by
+ CP_LOAD_STATE6. Each bit resets a particular kind(s) of
+ CP_LOAD_STATE6.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This register clears pending loads queued up by
+ CP_LOAD_STATE6. Each bit resets a particular kind(s) of
+ CP_LOAD_STATE6.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Shared constants are intended to be used for Vulkan push
+ constants. When enabled, 8 vec4's are reserved in the FS
+ const pool and 16 in the geometry const pool although
+ only 8 are actually used (why?) and they are mapped to
+ c504-c511 in each stage. Both VS and FS shared consts
+ are written using ST6_CONSTANTS/SB6_IBO, so that both
+ the geometry and FS shared consts can be written at once
+ by using CP_LOAD_STATE6 rather than
+ CP_LOAD_STATE6_FRAG/CP_LOAD_STATE6_GEOM. In addition
+ DST_OFF and NUM_UNIT are in units of dwords instead of
+ vec4's.
+
+ There is also a separate shared constant pool for CS,
+ which is loaded through CP_LOAD_STATE6_FRAG with
+ ST6_UBO/ST6_IBO. However the only real difference for CS
+ is the dword units.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Texture sampler dwords
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ clamp result to [0, 1] if the format is unorm or
+ [-1, 1] if the format is snorm, *after*
+ filtering. Has no effect for other formats.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Texture constant dwords
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ probably for D3D structured UAVs, normally set to 1
+
+
+
+
+
+ Pitch in bytes (so actually stride)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml b/drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml
new file mode 100644
index 0000000000000..6531749d30f4e
--- /dev/null
+++ b/drivers/gpu/drm/msm/registers/adreno/a6xx_gmu.xml
@@ -0,0 +1,228 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+