Skip to content

Commit

Permalink
drm/radeon/kms: optimize CS state checking for r100->r500
Browse files Browse the repository at this point in the history
The colorbuffer, zbuffer, and texture states are checked only once when
they get changed. This improves performance in the apps which emit
lots of draw packets and few state changes.

This drops performance in glxgears by a 1% or so, but glxgears is not
a benchmark we care about.
The time spent in the kernel when running Torcs dropped from 33% to 23%
and the frame rate is higher, which is a good thing.

r600 might need something like this as well.

Signed-off-by: Marek Olšák <maraeo@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
  • Loading branch information
Marek Olšák authored and Dave Airlie committed Feb 13, 2011
1 parent 01e2f53 commit 40b4a75
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 12 deletions.
40 changes: 36 additions & 4 deletions drivers/gpu/drm/radeon/r100.c
Original file line number Diff line number Diff line change
Expand Up @@ -1427,6 +1427,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
}
track->zb.robj = reloc->robj;
track->zb.offset = idx_value;
track->zb_dirty = true;
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
break;
case RADEON_RB3D_COLOROFFSET:
Expand All @@ -1439,6 +1440,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
}
track->cb[0].robj = reloc->robj;
track->cb[0].offset = idx_value;
track->cb_dirty = true;
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
break;
case RADEON_PP_TXOFFSET_0:
Expand All @@ -1454,6 +1456,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
}
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
track->textures[i].robj = reloc->robj;
track->tex_dirty = true;
break;
case RADEON_PP_CUBIC_OFFSET_T0_0:
case RADEON_PP_CUBIC_OFFSET_T0_1:
Expand All @@ -1471,6 +1474,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
track->textures[0].cube_info[i].offset = idx_value;
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
track->textures[0].cube_info[i].robj = reloc->robj;
track->tex_dirty = true;
break;
case RADEON_PP_CUBIC_OFFSET_T1_0:
case RADEON_PP_CUBIC_OFFSET_T1_1:
Expand All @@ -1488,6 +1492,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
track->textures[1].cube_info[i].offset = idx_value;
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
track->textures[1].cube_info[i].robj = reloc->robj;
track->tex_dirty = true;
break;
case RADEON_PP_CUBIC_OFFSET_T2_0:
case RADEON_PP_CUBIC_OFFSET_T2_1:
Expand All @@ -1505,9 +1510,12 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
track->textures[2].cube_info[i].offset = idx_value;
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
track->textures[2].cube_info[i].robj = reloc->robj;
track->tex_dirty = true;
break;
case RADEON_RE_WIDTH_HEIGHT:
track->maxy = ((idx_value >> 16) & 0x7FF);
track->cb_dirty = true;
track->zb_dirty = true;
break;
case RADEON_RB3D_COLORPITCH:
r = r100_cs_packet_next_reloc(p, &reloc);
Expand All @@ -1528,9 +1536,11 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
ib[idx] = tmp;

track->cb[0].pitch = idx_value & RADEON_COLORPITCH_MASK;
track->cb_dirty = true;
break;
case RADEON_RB3D_DEPTHPITCH:
track->zb.pitch = idx_value & RADEON_DEPTHPITCH_MASK;
track->zb_dirty = true;
break;
case RADEON_RB3D_CNTL:
switch ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) {
Expand All @@ -1555,6 +1565,8 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
return -EINVAL;
}
track->z_enabled = !!(idx_value & RADEON_Z_ENABLE);
track->cb_dirty = true;
track->zb_dirty = true;
break;
case RADEON_RB3D_ZSTENCILCNTL:
switch (idx_value & 0xf) {
Expand All @@ -1572,6 +1584,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
default:
break;
}
track->zb_dirty = true;
break;
case RADEON_RB3D_ZPASS_ADDR:
r = r100_cs_packet_next_reloc(p, &reloc);
Expand All @@ -1588,6 +1601,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
uint32_t temp = idx_value >> 4;
for (i = 0; i < track->num_texture; i++)
track->textures[i].enabled = !!(temp & (1 << i));
track->tex_dirty = true;
}
break;
case RADEON_SE_VF_CNTL:
Expand All @@ -1602,12 +1616,14 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
i = (reg - RADEON_PP_TEX_SIZE_0) / 8;
track->textures[i].width = (idx_value & RADEON_TEX_USIZE_MASK) + 1;
track->textures[i].height = ((idx_value & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1;
track->tex_dirty = true;
break;
case RADEON_PP_TEX_PITCH_0:
case RADEON_PP_TEX_PITCH_1:
case RADEON_PP_TEX_PITCH_2:
i = (reg - RADEON_PP_TEX_PITCH_0) / 8;
track->textures[i].pitch = idx_value + 32;
track->tex_dirty = true;
break;
case RADEON_PP_TXFILTER_0:
case RADEON_PP_TXFILTER_1:
Expand All @@ -1621,6 +1637,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
tmp = (idx_value >> 27) & 0x7;
if (tmp == 2 || tmp == 6)
track->textures[i].roundup_h = false;
track->tex_dirty = true;
break;
case RADEON_PP_TXFORMAT_0:
case RADEON_PP_TXFORMAT_1:
Expand Down Expand Up @@ -1673,6 +1690,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
}
track->textures[i].cube_info[4].width = 1 << ((idx_value >> 16) & 0xf);
track->textures[i].cube_info[4].height = 1 << ((idx_value >> 20) & 0xf);
track->tex_dirty = true;
break;
case RADEON_PP_CUBIC_FACES_0:
case RADEON_PP_CUBIC_FACES_1:
Expand All @@ -1683,6 +1701,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf);
track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf);
}
track->tex_dirty = true;
break;
default:
printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
Expand Down Expand Up @@ -3318,9 +3337,9 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
unsigned long size;
unsigned prim_walk;
unsigned nverts;
unsigned num_cb = track->num_cb;
unsigned num_cb = track->cb_dirty ? track->num_cb : 0;

if (!track->zb_cb_clear && !track->color_channel_mask &&
if (num_cb && !track->zb_cb_clear && !track->color_channel_mask &&
!track->blend_read_enable)
num_cb = 0;

Expand All @@ -3341,7 +3360,9 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
return -EINVAL;
}
}
if (track->z_enabled) {
track->cb_dirty = false;

if (track->zb_dirty && track->z_enabled) {
if (track->zb.robj == NULL) {
DRM_ERROR("[drm] No buffer for z buffer !\n");
return -EINVAL;
Expand All @@ -3358,6 +3379,8 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
return -EINVAL;
}
}
track->zb_dirty = false;

prim_walk = (track->vap_vf_cntl >> 4) & 0x3;
if (track->vap_vf_cntl & (1 << 14)) {
nverts = track->vap_alt_nverts;
Expand Down Expand Up @@ -3417,13 +3440,22 @@ int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
prim_walk);
return -EINVAL;
}
return r100_cs_track_texture_check(rdev, track);

if (track->tex_dirty) {
track->tex_dirty = false;
return r100_cs_track_texture_check(rdev, track);
}
return 0;
}

void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track)
{
unsigned i, face;

track->cb_dirty = true;
track->zb_dirty = true;
track->tex_dirty = true;

if (rdev->family < CHIP_R300) {
track->num_cb = 1;
if (rdev->family <= CHIP_RS200)
Expand Down
11 changes: 4 additions & 7 deletions drivers/gpu/drm/radeon/r100_track.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,7 @@ struct r100_cs_track_texture {
unsigned compress_format;
};

struct r100_cs_track_limits {
unsigned num_cb;
unsigned num_texture;
unsigned max_levels;
};

struct r100_cs_track {
struct radeon_device *rdev;
unsigned num_cb;
unsigned num_texture;
unsigned maxy;
Expand All @@ -78,6 +71,10 @@ struct r100_cs_track {
bool separate_cube;
bool zb_cb_clear;
bool blend_read_enable;

bool cb_dirty;
bool zb_dirty;
bool tex_dirty;
};

int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track);
Expand Down
18 changes: 18 additions & 0 deletions drivers/gpu/drm/radeon/r200.c
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
}
track->zb.robj = reloc->robj;
track->zb.offset = idx_value;
track->zb_dirty = true;
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
break;
case RADEON_RB3D_COLOROFFSET:
Expand All @@ -196,6 +197,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
}
track->cb[0].robj = reloc->robj;
track->cb[0].offset = idx_value;
track->cb_dirty = true;
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
break;
case R200_PP_TXOFFSET_0:
Expand All @@ -214,6 +216,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
}
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
track->textures[i].robj = reloc->robj;
track->tex_dirty = true;
break;
case R200_PP_CUBIC_OFFSET_F1_0:
case R200_PP_CUBIC_OFFSET_F2_0:
Expand Down Expand Up @@ -257,9 +260,12 @@ int r200_packet0_check(struct radeon_cs_parser *p,
track->textures[i].cube_info[face - 1].offset = idx_value;
ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
track->textures[i].cube_info[face - 1].robj = reloc->robj;
track->tex_dirty = true;
break;
case RADEON_RE_WIDTH_HEIGHT:
track->maxy = ((idx_value >> 16) & 0x7FF);
track->cb_dirty = true;
track->zb_dirty = true;
break;
case RADEON_RB3D_COLORPITCH:
r = r100_cs_packet_next_reloc(p, &reloc);
Expand All @@ -280,9 +286,11 @@ int r200_packet0_check(struct radeon_cs_parser *p,
ib[idx] = tmp;

track->cb[0].pitch = idx_value & RADEON_COLORPITCH_MASK;
track->cb_dirty = true;
break;
case RADEON_RB3D_DEPTHPITCH:
track->zb.pitch = idx_value & RADEON_DEPTHPITCH_MASK;
track->zb_dirty = true;
break;
case RADEON_RB3D_CNTL:
switch ((idx_value >> RADEON_RB3D_COLOR_FORMAT_SHIFT) & 0x1f) {
Expand Down Expand Up @@ -312,6 +320,8 @@ int r200_packet0_check(struct radeon_cs_parser *p,
}

track->z_enabled = !!(idx_value & RADEON_Z_ENABLE);
track->cb_dirty = true;
track->zb_dirty = true;
break;
case RADEON_RB3D_ZSTENCILCNTL:
switch (idx_value & 0xf) {
Expand All @@ -329,6 +339,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
default:
break;
}
track->zb_dirty = true;
break;
case RADEON_RB3D_ZPASS_ADDR:
r = r100_cs_packet_next_reloc(p, &reloc);
Expand All @@ -345,6 +356,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
uint32_t temp = idx_value >> 4;
for (i = 0; i < track->num_texture; i++)
track->textures[i].enabled = !!(temp & (1 << i));
track->tex_dirty = true;
}
break;
case RADEON_SE_VF_CNTL:
Expand All @@ -369,6 +381,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
i = (reg - R200_PP_TXSIZE_0) / 32;
track->textures[i].width = (idx_value & RADEON_TEX_USIZE_MASK) + 1;
track->textures[i].height = ((idx_value & RADEON_TEX_VSIZE_MASK) >> RADEON_TEX_VSIZE_SHIFT) + 1;
track->tex_dirty = true;
break;
case R200_PP_TXPITCH_0:
case R200_PP_TXPITCH_1:
Expand All @@ -378,6 +391,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
case R200_PP_TXPITCH_5:
i = (reg - R200_PP_TXPITCH_0) / 32;
track->textures[i].pitch = idx_value + 32;
track->tex_dirty = true;
break;
case R200_PP_TXFILTER_0:
case R200_PP_TXFILTER_1:
Expand All @@ -394,6 +408,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
tmp = (idx_value >> 27) & 0x7;
if (tmp == 2 || tmp == 6)
track->textures[i].roundup_h = false;
track->tex_dirty = true;
break;
case R200_PP_TXMULTI_CTL_0:
case R200_PP_TXMULTI_CTL_1:
Expand Down Expand Up @@ -432,6 +447,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
track->textures[i].tex_coord_type = 1;
break;
}
track->tex_dirty = true;
break;
case R200_PP_TXFORMAT_0:
case R200_PP_TXFORMAT_1:
Expand Down Expand Up @@ -488,6 +504,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
}
track->textures[i].cube_info[4].width = 1 << ((idx_value >> 16) & 0xf);
track->textures[i].cube_info[4].height = 1 << ((idx_value >> 20) & 0xf);
track->tex_dirty = true;
break;
case R200_PP_CUBIC_FACES_0:
case R200_PP_CUBIC_FACES_1:
Expand All @@ -501,6 +518,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
track->textures[i].cube_info[face].width = 1 << ((tmp >> (face * 8)) & 0xf);
track->textures[i].cube_info[face].height = 1 << ((tmp >> ((face * 8) + 4)) & 0xf);
}
track->tex_dirty = true;
break;
default:
printk(KERN_ERR "Forbidden register 0x%04X in cs at %d\n",
Expand Down
Loading

0 comments on commit 40b4a75

Please sign in to comment.