Skip to content

Commit

Permalink
drm/i915/vlv: WA for Turbo and RC6 to work together.
Browse files Browse the repository at this point in the history
With RC6 enabled, BYT has an HW issue in determining the right
Gfx busyness.
WA for Turbo + RC6: Use SW based Gfx busy-ness detection to decide
on increasing/decreasing the freq. This logic will monitor C0
counters of render/media power-wells over EI period and takes
necessary action based on these values

v2: Refactor duplicate code. (Ville)

v3: Reformat the comments. (Ville)

v4: Enable required counters and remove unwanted code (Ville)

v5: Added frequency change acceleration support and remove kernel-doc
style comments. (Ville)

v6: Updated comment section and Fix w/a comment. (Ville)

Signed-off-by: Deepak S <deepak.s@linux.intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
  • Loading branch information
Deepak S authored and Daniel Vetter committed Jul 8, 2014
1 parent 8e09bf8 commit 31685c2
Show file tree
Hide file tree
Showing 4 changed files with 167 additions and 4 deletions.
15 changes: 15 additions & 0 deletions drivers/gpu/drm/i915/i915_drv.h
Original file line number Diff line number Diff line change
Expand Up @@ -902,6 +902,12 @@ struct vlv_s0ix_state {
u32 clock_gate_dis2;
};

struct intel_rps_ei_calc {
u32 cz_ts_ei;
u32 render_ei_c0;
u32 media_ei_c0;
};

struct intel_gen6_power_mgmt {
/* work and pm_iir are protected by dev_priv->irq_lock */
struct work_struct work;
Expand All @@ -926,6 +932,8 @@ struct intel_gen6_power_mgmt {
u8 rp1_freq; /* "less than" RP0 power/freqency */
u8 rp0_freq; /* Non-overclocked max frequency. */

u32 ei_interrupt_count;

int last_adj;
enum { LOW_POWER, BETWEEN, HIGH_POWER } power;

Expand Down Expand Up @@ -1527,6 +1535,13 @@ struct drm_i915_private {
/* gen6+ rps state */
struct intel_gen6_power_mgmt rps;

/* rps wa up ei calculation */
struct intel_rps_ei_calc rps_up_ei;

/* rps wa down ei calculation */
struct intel_rps_ei_calc rps_down_ei;


/* ilk-only ips/rps state. Everything in here is protected by the global
* mchdev_lock in intel_pm.c */
struct intel_ilk_power_mgmt ips;
Expand Down
133 changes: 132 additions & 1 deletion drivers/gpu/drm/i915/i915_irq.c
Original file line number Diff line number Diff line change
Expand Up @@ -1272,6 +1272,131 @@ static void notify_ring(struct drm_device *dev,
i915_queue_hangcheck(dev);
}

static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
struct intel_rps_ei_calc *rps_ei)
{
u32 cz_ts, cz_freq_khz;
u32 render_count, media_count;
u32 elapsed_render, elapsed_media, elapsed_time;
u32 residency = 0;

cz_ts = vlv_punit_read(dev_priv, PUNIT_REG_CZ_TIMESTAMP);
cz_freq_khz = DIV_ROUND_CLOSEST(dev_priv->mem_freq * 1000, 4);

render_count = I915_READ(VLV_RENDER_C0_COUNT_REG);
media_count = I915_READ(VLV_MEDIA_C0_COUNT_REG);

if (rps_ei->cz_ts_ei == 0) {
rps_ei->cz_ts_ei = cz_ts;
rps_ei->render_ei_c0 = render_count;
rps_ei->media_ei_c0 = media_count;

return dev_priv->rps.cur_freq;
}

elapsed_time = cz_ts - rps_ei->cz_ts_ei;
rps_ei->cz_ts_ei = cz_ts;

elapsed_render = render_count - rps_ei->render_ei_c0;
rps_ei->render_ei_c0 = render_count;

elapsed_media = media_count - rps_ei->media_ei_c0;
rps_ei->media_ei_c0 = media_count;

/* Convert all the counters into common unit of milli sec */
elapsed_time /= VLV_CZ_CLOCK_TO_MILLI_SEC;
elapsed_render /= cz_freq_khz;
elapsed_media /= cz_freq_khz;

/*
* Calculate overall C0 residency percentage
* only if elapsed time is non zero
*/
if (elapsed_time) {
residency =
((max(elapsed_render, elapsed_media) * 100)
/ elapsed_time);
}

return residency;
}

/**
* vlv_calc_delay_from_C0_counters - Increase/Decrease freq based on GPU
* busy-ness calculated from C0 counters of render & media power wells
* @dev_priv: DRM device private
*
*/
static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
{
u32 residency_C0_up = 0, residency_C0_down = 0;
u8 new_delay, adj;

dev_priv->rps.ei_interrupt_count++;

WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock));


if (dev_priv->rps_up_ei.cz_ts_ei == 0) {
vlv_c0_residency(dev_priv, &dev_priv->rps_up_ei);
vlv_c0_residency(dev_priv, &dev_priv->rps_down_ei);
return dev_priv->rps.cur_freq;
}


/*
* To down throttle, C0 residency should be less than down threshold
* for continous EI intervals. So calculate down EI counters
* once in VLV_INT_COUNT_FOR_DOWN_EI
*/
if (dev_priv->rps.ei_interrupt_count == VLV_INT_COUNT_FOR_DOWN_EI) {

dev_priv->rps.ei_interrupt_count = 0;

residency_C0_down = vlv_c0_residency(dev_priv,
&dev_priv->rps_down_ei);
} else {
residency_C0_up = vlv_c0_residency(dev_priv,
&dev_priv->rps_up_ei);
}

new_delay = dev_priv->rps.cur_freq;

adj = dev_priv->rps.last_adj;
/* C0 residency is greater than UP threshold. Increase Frequency */
if (residency_C0_up >= VLV_RP_UP_EI_THRESHOLD) {
if (adj > 0)
adj *= 2;
else
adj = 1;

if (dev_priv->rps.cur_freq < dev_priv->rps.max_freq_softlimit)
new_delay = dev_priv->rps.cur_freq + adj;

/*
* For better performance, jump directly
* to RPe if we're below it.
*/
if (new_delay < dev_priv->rps.efficient_freq)
new_delay = dev_priv->rps.efficient_freq;

} else if (!dev_priv->rps.ei_interrupt_count &&
(residency_C0_down < VLV_RP_DOWN_EI_THRESHOLD)) {
if (adj < 0)
adj *= 2;
else
adj = -1;
/*
* This means, C0 residency is less than down threshold over
* a period of VLV_INT_COUNT_FOR_DOWN_EI. So, reduce the freq
*/
if (dev_priv->rps.cur_freq > dev_priv->rps.min_freq_softlimit)
new_delay = dev_priv->rps.cur_freq + adj;
}

return new_delay;
}

static void gen6_pm_rps_work(struct work_struct *work)
{
struct drm_i915_private *dev_priv =
Expand Down Expand Up @@ -1320,6 +1445,8 @@ static void gen6_pm_rps_work(struct work_struct *work)
else
new_delay = dev_priv->rps.min_freq_softlimit;
adj = 0;
} else if (pm_iir & GEN6_PM_RP_UP_EI_EXPIRED) {
new_delay = vlv_calc_delay_from_C0_counters(dev_priv);
} else if (pm_iir & GEN6_PM_RP_DOWN_THRESHOLD) {
if (adj < 0)
adj *= 2;
Expand Down Expand Up @@ -4511,7 +4638,11 @@ void intel_irq_init(struct drm_device *dev)
INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);

/* Let's track the enabled rps events */
dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;
if (IS_VALLEYVIEW(dev))
/* WaGsvRC0ResidenncyMethod:VLV */
dev_priv->pm_rps_events = GEN6_PM_RP_UP_EI_EXPIRED;
else
dev_priv->pm_rps_events = GEN6_PM_RPS_EVENTS;

setup_timer(&dev_priv->gpu_error.hangcheck_timer,
i915_hangcheck_elapsed,
Expand Down
11 changes: 11 additions & 0 deletions drivers/gpu/drm/i915/i915_reg.h
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,7 @@ enum punit_power_well {
#define PUNIT_REG_GPU_FREQ_STS 0xd8
#define GENFREQSTATUS (1<<0)
#define PUNIT_REG_MEDIA_TURBO_FREQ_REQ 0xdc
#define PUNIT_REG_CZ_TIMESTAMP 0xce

#define PUNIT_FUSE_BUS2 0xf6 /* bits 47:40 */
#define PUNIT_FUSE_BUS1 0xf5 /* bits 55:48 */
Expand All @@ -556,6 +557,11 @@ enum punit_power_well {
#define FB_FMAX_VMIN_FREQ_LO_SHIFT 27
#define FB_FMAX_VMIN_FREQ_LO_MASK 0xf8000000

#define VLV_CZ_CLOCK_TO_MILLI_SEC 100000
#define VLV_RP_UP_EI_THRESHOLD 90
#define VLV_RP_DOWN_EI_THRESHOLD 70
#define VLV_INT_COUNT_FOR_DOWN_EI 5

/* vlv2 north clock has */
#define CCK_FUSE_REG 0x8
#define CCK_FUSE_HPLL_FREQ_MASK 0x3
Expand Down Expand Up @@ -5394,6 +5400,7 @@ enum punit_power_well {
#define VLV_GTLC_ALLOWWAKEERR (1 << 1)
#define VLV_GTLC_PW_MEDIA_STATUS_MASK (1 << 5)
#define VLV_GTLC_PW_RENDER_STATUS_MASK (1 << 7)
#define VLV_GTLC_SURVIVABILITY_REG 0x130098
#define FORCEWAKE_MT 0xa188 /* multi-threaded */
#define FORCEWAKE_KERNEL 0x1
#define FORCEWAKE_USER 0x2
Expand Down Expand Up @@ -5541,6 +5548,8 @@ enum punit_power_well {
#define GEN6_GT_GFX_RC6_LOCKED 0x138104
#define VLV_COUNTER_CONTROL 0x138104
#define VLV_COUNT_RANGE_HIGH (1<<15)
#define VLV_MEDIA_RC0_COUNT_EN (1<<5)
#define VLV_RENDER_RC0_COUNT_EN (1<<4)
#define VLV_MEDIA_RC6_COUNT_EN (1<<1)
#define VLV_RENDER_RC6_COUNT_EN (1<<0)
#define GEN6_GT_GFX_RC6 0x138108
Expand All @@ -5549,6 +5558,8 @@ enum punit_power_well {

#define GEN6_GT_GFX_RC6p 0x13810C
#define GEN6_GT_GFX_RC6pp 0x138110
#define VLV_RENDER_C0_COUNT_REG 0x138118
#define VLV_MEDIA_C0_COUNT_REG 0x13811C

#define GEN6_PCODE_MAILBOX 0x138124
#define GEN6_PCODE_READY (1<<31)
Expand Down
12 changes: 9 additions & 3 deletions drivers/gpu/drm/i915/intel_pm.c
Original file line number Diff line number Diff line change
Expand Up @@ -3282,8 +3282,11 @@ static void vlv_set_rps_idle(struct drm_i915_private *dev_priv)

vlv_force_gfx_clock(dev_priv, false);

I915_WRITE(GEN6_PMINTRMSK,
gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
if (dev_priv->pm_rps_events & GEN6_PM_RP_UP_EI_EXPIRED)
I915_WRITE(GEN6_PMINTRMSK, ~dev_priv->pm_rps_events);
else
I915_WRITE(GEN6_PMINTRMSK,
gen6_rps_pm_mask(dev_priv, dev_priv->rps.cur_freq));
}

void gen6_rps_idle(struct drm_i915_private *dev_priv)
Expand Down Expand Up @@ -4125,6 +4128,7 @@ static void valleyview_enable_rps(struct drm_device *dev)
I915_WRITE(GEN6_RP_DOWN_EI, 350000);

I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 0xf4240);

I915_WRITE(GEN6_RP_CONTROL,
GEN6_RP_MEDIA_TURBO |
Expand All @@ -4145,9 +4149,11 @@ static void valleyview_enable_rps(struct drm_device *dev)

/* allows RC6 residency counter to work */
I915_WRITE(VLV_COUNTER_CONTROL,
_MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH |
_MASKED_BIT_ENABLE(VLV_MEDIA_RC0_COUNT_EN |
VLV_RENDER_RC0_COUNT_EN |
VLV_MEDIA_RC6_COUNT_EN |
VLV_RENDER_RC6_COUNT_EN));

if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE)
rc6_mode = GEN7_RC_CTL_TO_MODE | VLV_RC_CTL_CTX_RST_PARALLEL;

Expand Down

0 comments on commit 31685c2

Please sign in to comment.