From af2ceed2c73ad53026ab1a9ba5ce73540c58cfcf Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Sun, 24 Jun 2012 16:42:32 +0200 Subject: [PATCH] --- yaml --- r: 318629 b: refs/heads/master c: 8090c6b9daa04dda649ac0a2209601042abfb0a4 h: refs/heads/master i: 318627: c144e27f278547238488212091fc79c214d7722d v: v3 --- [refs] | 2 +- trunk/drivers/gpu/drm/drm_drv.c | 2 +- trunk/drivers/gpu/drm/drm_info.c | 2 +- trunk/drivers/gpu/drm/gma500/intel_bios.c | 2 +- trunk/drivers/gpu/drm/gma500/intel_bios.h | 2 +- trunk/drivers/gpu/drm/i915/i915_suspend.c | 5 +- trunk/drivers/gpu/drm/i915/intel_bios.c | 2 +- trunk/drivers/gpu/drm/i915/intel_bios.h | 2 +- trunk/drivers/gpu/drm/i915/intel_display.c | 21 +- trunk/drivers/gpu/drm/i915/intel_drv.h | 9 +- trunk/drivers/gpu/drm/i915/intel_pm.c | 37 +- trunk/drivers/gpu/drm/radeon/evergreen.c | 81 +- .../gpu/drm/radeon/evergreen_blit_kms.c | 3 +- trunk/drivers/gpu/drm/radeon/ni.c | 2 +- trunk/drivers/gpu/drm/radeon/r100.c | 1344 +++++++++-------- trunk/drivers/gpu/drm/radeon/r200.c | 4 +- trunk/drivers/gpu/drm/radeon/r600.c | 86 +- trunk/drivers/gpu/drm/radeon/r600_blit_kms.c | 30 +- trunk/drivers/gpu/drm/radeon/r600_hdmi.c | 6 +- trunk/drivers/gpu/drm/radeon/radeon.h | 127 +- trunk/drivers/gpu/drm/radeon/radeon_asic.h | 13 +- .../drivers/gpu/drm/radeon/radeon_benchmark.c | 10 +- trunk/drivers/gpu/drm/radeon/radeon_cs.c | 47 +- trunk/drivers/gpu/drm/radeon/radeon_device.c | 7 +- trunk/drivers/gpu/drm/radeon/radeon_fence.c | 113 +- trunk/drivers/gpu/drm/radeon/radeon_gart.c | 24 +- trunk/drivers/gpu/drm/radeon/radeon_gem.c | 2 + trunk/drivers/gpu/drm/radeon/radeon_irq_kms.c | 133 +- trunk/drivers/gpu/drm/radeon/radeon_kms.c | 12 +- trunk/drivers/gpu/drm/radeon/radeon_object.c | 8 +- trunk/drivers/gpu/drm/radeon/radeon_pm.c | 16 +- trunk/drivers/gpu/drm/radeon/radeon_ring.c | 43 +- trunk/drivers/gpu/drm/radeon/radeon_sa.c | 2 +- .../drivers/gpu/drm/radeon/radeon_semaphore.c | 71 +- trunk/drivers/gpu/drm/radeon/radeon_test.c | 72 +- trunk/drivers/gpu/drm/radeon/radeon_ttm.c | 44 +- trunk/drivers/gpu/drm/radeon/rs600.c | 23 +- trunk/drivers/gpu/drm/radeon/si.c | 64 +- 38 files changed, 1249 insertions(+), 1224 deletions(-) diff --git a/[refs] b/[refs] index dfa212ed9c54..e608f2ece107 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 74da01dcfbb6300d758490d5d4efa1314c0e4e8b +refs/heads/master: 8090c6b9daa04dda649ac0a2209601042abfb0a4 diff --git a/trunk/drivers/gpu/drm/drm_drv.c b/trunk/drivers/gpu/drm/drm_drv.c index 946bd91c57ec..8a9d0792e4ec 100644 --- a/trunk/drivers/gpu/drm/drm_drv.c +++ b/trunk/drivers/gpu/drm/drm_drv.c @@ -486,7 +486,7 @@ long drm_ioctl(struct file *filp, kfree(kdata); atomic_dec(&dev->ioctl_count); if (retcode) - DRM_DEBUG("ret = %d\n", retcode); + DRM_DEBUG("ret = %x\n", retcode); return retcode; } diff --git a/trunk/drivers/gpu/drm/drm_info.c b/trunk/drivers/gpu/drm/drm_info.c index 4076a33e5cad..ab1162da70f8 100644 --- a/trunk/drivers/gpu/drm/drm_info.c +++ b/trunk/drivers/gpu/drm/drm_info.c @@ -235,7 +235,7 @@ int drm_clients_info(struct seq_file *m, void *data) } -static int drm_gem_one_name_info(int id, void *ptr, void *data) +int drm_gem_one_name_info(int id, void *ptr, void *data) { struct drm_gem_object *obj = ptr; struct seq_file *m = data; diff --git a/trunk/drivers/gpu/drm/gma500/intel_bios.c b/trunk/drivers/gpu/drm/gma500/intel_bios.c index 8d7caf0f363e..973d7f6d66b7 100644 --- a/trunk/drivers/gpu/drm/gma500/intel_bios.c +++ b/trunk/drivers/gpu/drm/gma500/intel_bios.c @@ -427,7 +427,7 @@ parse_device_mapping(struct drm_psb_private *dev_priv, * * Returns 0 on success, nonzero on failure. */ -int psb_intel_init_bios(struct drm_device *dev) +bool psb_intel_init_bios(struct drm_device *dev) { struct drm_psb_private *dev_priv = dev->dev_private; struct pci_dev *pdev = dev->pdev; diff --git a/trunk/drivers/gpu/drm/gma500/intel_bios.h b/trunk/drivers/gpu/drm/gma500/intel_bios.h index 2e95523b84b1..0a738663eb5a 100644 --- a/trunk/drivers/gpu/drm/gma500/intel_bios.h +++ b/trunk/drivers/gpu/drm/gma500/intel_bios.h @@ -431,7 +431,7 @@ struct bdb_driver_features { u8 custom_vbt_version; } __attribute__((packed)); -extern int psb_intel_init_bios(struct drm_device *dev); +extern bool psb_intel_init_bios(struct drm_device *dev); extern void psb_intel_destroy_bios(struct drm_device *dev); /* diff --git a/trunk/drivers/gpu/drm/i915/i915_suspend.c b/trunk/drivers/gpu/drm/i915/i915_suspend.c index 0ede02a99d91..740c076ea330 100644 --- a/trunk/drivers/gpu/drm/i915/i915_suspend.c +++ b/trunk/drivers/gpu/drm/i915/i915_suspend.c @@ -825,10 +825,7 @@ int i915_save_state(struct drm_device *dev) dev_priv->saveIMR = I915_READ(IMR); } - if (IS_IRONLAKE_M(dev)) - ironlake_disable_drps(dev); - if (INTEL_INFO(dev)->gen >= 6) - gen6_disable_rps(dev); + intel_disable_gt_powersave(dev); /* Cache mode state */ dev_priv->saveCACHE_MODE_0 = I915_READ(CACHE_MODE_0); diff --git a/trunk/drivers/gpu/drm/i915/intel_bios.c b/trunk/drivers/gpu/drm/i915/intel_bios.c index 8c6074154bf6..353459362f6f 100644 --- a/trunk/drivers/gpu/drm/i915/intel_bios.c +++ b/trunk/drivers/gpu/drm/i915/intel_bios.c @@ -692,7 +692,7 @@ static const struct dmi_system_id intel_no_opregion_vbt[] = { * * Returns 0 on success, nonzero on failure. */ -int +bool intel_parse_bios(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; diff --git a/trunk/drivers/gpu/drm/i915/intel_bios.h b/trunk/drivers/gpu/drm/i915/intel_bios.h index 31c2107e7825..dbda6e3bdf07 100644 --- a/trunk/drivers/gpu/drm/i915/intel_bios.h +++ b/trunk/drivers/gpu/drm/i915/intel_bios.h @@ -476,7 +476,7 @@ struct bdb_edp { } __attribute__ ((packed)); void intel_setup_bios(struct drm_device *dev); -int intel_parse_bios(struct drm_device *dev); +bool intel_parse_bios(struct drm_device *dev); /* * Driver<->VBIOS interaction occurs through scratch bits in diff --git a/trunk/drivers/gpu/drm/i915/intel_display.c b/trunk/drivers/gpu/drm/i915/intel_display.c index b3052ef70d16..fdca5b925c6c 100644 --- a/trunk/drivers/gpu/drm/i915/intel_display.c +++ b/trunk/drivers/gpu/drm/i915/intel_display.c @@ -7172,20 +7172,9 @@ static void ivb_pch_pwm_override(struct drm_device *dev) void intel_modeset_init_hw(struct drm_device *dev) { - struct drm_i915_private *dev_priv = dev->dev_private; - intel_init_clock_gating(dev); - if (IS_IRONLAKE_M(dev)) { - ironlake_enable_drps(dev); - ironlake_enable_rc6(dev); - intel_init_emon(dev); - } - - if ((IS_GEN6(dev) || IS_GEN7(dev)) && !IS_VALLEYVIEW(dev)) { - gen6_enable_rps(dev_priv); - gen6_update_ring_freq(dev_priv); - } + intel_enable_gt_powersave(dev); if (IS_IVYBRIDGE(dev)) ivb_pch_pwm_override(dev); @@ -7277,13 +7266,7 @@ void intel_modeset_cleanup(struct drm_device *dev) intel_disable_fbc(dev); - if (IS_IRONLAKE_M(dev)) - ironlake_disable_drps(dev); - if ((IS_GEN6(dev) || IS_GEN7(dev)) && !IS_VALLEYVIEW(dev)) - gen6_disable_rps(dev); - - if (IS_IRONLAKE_M(dev)) - ironlake_disable_rc6(dev); + intel_disable_gt_powersave(dev); if (IS_VALLEYVIEW(dev)) vlv_init_dpio(dev); diff --git a/trunk/drivers/gpu/drm/i915/intel_drv.h b/trunk/drivers/gpu/drm/i915/intel_drv.h index 5290e9df327b..cc1573b2b60b 100644 --- a/trunk/drivers/gpu/drm/i915/intel_drv.h +++ b/trunk/drivers/gpu/drm/i915/intel_drv.h @@ -425,9 +425,6 @@ extern void intel_crtc_fb_gamma_set(struct drm_crtc *crtc, u16 red, u16 green, extern void intel_crtc_fb_gamma_get(struct drm_crtc *crtc, u16 *red, u16 *green, u16 *blue, int regno); extern void intel_enable_clock_gating(struct drm_device *dev); -extern void ironlake_disable_rc6(struct drm_device *dev); -extern void ironlake_enable_drps(struct drm_device *dev); -extern void ironlake_disable_drps(struct drm_device *dev); extern int intel_pin_and_fence_fb_obj(struct drm_device *dev, struct drm_i915_gem_object *obj, @@ -494,10 +491,8 @@ extern void intel_update_fbc(struct drm_device *dev); extern void intel_gpu_ips_init(struct drm_i915_private *dev_priv); extern void intel_gpu_ips_teardown(void); -extern void gen6_enable_rps(struct drm_i915_private *dev_priv); -extern void gen6_update_ring_freq(struct drm_i915_private *dev_priv); -extern void gen6_disable_rps(struct drm_device *dev); -extern void intel_init_emon(struct drm_device *dev); +extern void intel_enable_gt_powersave(struct drm_device *dev); +extern void intel_disable_gt_powersave(struct drm_device *dev); extern void intel_ddi_dpms(struct drm_encoder *encoder, int mode); extern void intel_ddi_mode_set(struct drm_encoder *encoder, diff --git a/trunk/drivers/gpu/drm/i915/intel_pm.c b/trunk/drivers/gpu/drm/i915/intel_pm.c index 7504fbce05cc..2baba10723f5 100644 --- a/trunk/drivers/gpu/drm/i915/intel_pm.c +++ b/trunk/drivers/gpu/drm/i915/intel_pm.c @@ -2184,7 +2184,7 @@ bool ironlake_set_drps(struct drm_device *dev, u8 val) return true; } -void ironlake_enable_drps(struct drm_device *dev) +static void ironlake_enable_drps(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; u32 rgvmodectl = I915_READ(MEMMODECTL); @@ -2248,7 +2248,7 @@ void ironlake_enable_drps(struct drm_device *dev) getrawmonotonic(&dev_priv->last_time2); } -void ironlake_disable_drps(struct drm_device *dev) +static void ironlake_disable_drps(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; u16 rgvswctl = I915_READ16(MEMSWCTL); @@ -2301,7 +2301,7 @@ void gen6_set_rps(struct drm_device *dev, u8 val) dev_priv->cur_delay = val; } -void gen6_disable_rps(struct drm_device *dev) +static void gen6_disable_rps(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -2349,7 +2349,7 @@ int intel_enable_rc6(const struct drm_device *dev) return (INTEL_RC6_ENABLE | INTEL_RC6p_ENABLE); } -void gen6_enable_rps(struct drm_i915_private *dev_priv) +static void gen6_enable_rps(struct drm_i915_private *dev_priv) { struct intel_ring_buffer *ring; u32 rp_state_cap; @@ -2494,7 +2494,7 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv) mutex_unlock(&dev_priv->dev->struct_mutex); } -void gen6_update_ring_freq(struct drm_i915_private *dev_priv) +static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) { int min_freq = 15; int gpu_freq, ia_freq, max_ia_freq; @@ -3156,8 +3156,7 @@ void intel_gpu_ips_teardown(void) i915_mch_dev = NULL; spin_unlock(&mchdev_lock); } - -void intel_init_emon(struct drm_device *dev) +static void intel_init_emon(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; u32 lcfuse; @@ -3228,6 +3227,30 @@ void intel_init_emon(struct drm_device *dev) dev_priv->corr = (lcfuse & LCFUSE_HIV_MASK); } +void intel_disable_gt_powersave(struct drm_device *dev) +{ + if (IS_IRONLAKE_M(dev)) + ironlake_disable_drps(dev); + if (INTEL_INFO(dev)->gen >= 6 && !IS_VALLEYVIEW(dev)) + gen6_disable_rps(dev); +} + +void intel_enable_gt_powersave(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + if (IS_IRONLAKE_M(dev)) { + ironlake_enable_drps(dev); + ironlake_enable_rc6(dev); + intel_init_emon(dev); + } + + if ((IS_GEN6(dev) || IS_GEN7(dev)) && !IS_VALLEYVIEW(dev)) { + gen6_enable_rps(dev_priv); + gen6_update_ring_freq(dev_priv); + } +} + static void ironlake_init_clock_gating(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; diff --git a/trunk/drivers/gpu/drm/radeon/evergreen.c b/trunk/drivers/gpu/drm/radeon/evergreen.c index f716e081c813..7fb3d2e0434c 100644 --- a/trunk/drivers/gpu/drm/radeon/evergreen.c +++ b/trunk/drivers/gpu/drm/radeon/evergreen.c @@ -428,7 +428,6 @@ void evergreen_hpd_init(struct radeon_device *rdev) { struct drm_device *dev = rdev->ddev; struct drm_connector *connector; - unsigned enabled = 0; u32 tmp = DC_HPDx_CONNECTION_TIMER(0x9c4) | DC_HPDx_RX_INT_TIMER(0xfa) | DC_HPDx_EN; @@ -437,64 +436,73 @@ void evergreen_hpd_init(struct radeon_device *rdev) switch (radeon_connector->hpd.hpd) { case RADEON_HPD_1: WREG32(DC_HPD1_CONTROL, tmp); + rdev->irq.hpd[0] = true; break; case RADEON_HPD_2: WREG32(DC_HPD2_CONTROL, tmp); + rdev->irq.hpd[1] = true; break; case RADEON_HPD_3: WREG32(DC_HPD3_CONTROL, tmp); + rdev->irq.hpd[2] = true; break; case RADEON_HPD_4: WREG32(DC_HPD4_CONTROL, tmp); + rdev->irq.hpd[3] = true; break; case RADEON_HPD_5: WREG32(DC_HPD5_CONTROL, tmp); + rdev->irq.hpd[4] = true; break; case RADEON_HPD_6: WREG32(DC_HPD6_CONTROL, tmp); + rdev->irq.hpd[5] = true; break; default: break; } radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd); - enabled |= 1 << radeon_connector->hpd.hpd; } - radeon_irq_kms_enable_hpd(rdev, enabled); + if (rdev->irq.installed) + evergreen_irq_set(rdev); } void evergreen_hpd_fini(struct radeon_device *rdev) { struct drm_device *dev = rdev->ddev; struct drm_connector *connector; - unsigned disabled = 0; list_for_each_entry(connector, &dev->mode_config.connector_list, head) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); switch (radeon_connector->hpd.hpd) { case RADEON_HPD_1: WREG32(DC_HPD1_CONTROL, 0); + rdev->irq.hpd[0] = false; break; case RADEON_HPD_2: WREG32(DC_HPD2_CONTROL, 0); + rdev->irq.hpd[1] = false; break; case RADEON_HPD_3: WREG32(DC_HPD3_CONTROL, 0); + rdev->irq.hpd[2] = false; break; case RADEON_HPD_4: WREG32(DC_HPD4_CONTROL, 0); + rdev->irq.hpd[3] = false; break; case RADEON_HPD_5: WREG32(DC_HPD5_CONTROL, 0); + rdev->irq.hpd[4] = false; break; case RADEON_HPD_6: WREG32(DC_HPD6_CONTROL, 0); + rdev->irq.hpd[5] = false; break; default: break; } - disabled |= 1 << radeon_connector->hpd.hpd; } - radeon_irq_kms_disable_hpd(rdev, disabled); } /* watermark setup */ @@ -1363,7 +1371,7 @@ void evergreen_mc_program(struct radeon_device *rdev) */ void evergreen_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { - struct radeon_ring *ring = &rdev->ring[ib->ring]; + struct radeon_ring *ring = &rdev->ring[ib->fence->ring]; /* set to DX10/11 mode */ radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0)); @@ -2340,20 +2348,20 @@ int evergreen_irq_set(struct radeon_device *rdev) if (rdev->family >= CHIP_CAYMAN) { /* enable CP interrupts on all rings */ - if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { + if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) { DRM_DEBUG("evergreen_irq_set: sw int gfx\n"); cp_int_cntl |= TIME_STAMP_INT_ENABLE; } - if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) { + if (rdev->irq.sw_int[CAYMAN_RING_TYPE_CP1_INDEX]) { DRM_DEBUG("evergreen_irq_set: sw int cp1\n"); cp_int_cntl1 |= TIME_STAMP_INT_ENABLE; } - if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) { + if (rdev->irq.sw_int[CAYMAN_RING_TYPE_CP2_INDEX]) { DRM_DEBUG("evergreen_irq_set: sw int cp2\n"); cp_int_cntl2 |= TIME_STAMP_INT_ENABLE; } } else { - if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { + if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) { DRM_DEBUG("evergreen_irq_set: sw int gfx\n"); cp_int_cntl |= RB_INT_ENABLE; cp_int_cntl |= TIME_STAMP_INT_ENABLE; @@ -2361,32 +2369,32 @@ int evergreen_irq_set(struct radeon_device *rdev) } if (rdev->irq.crtc_vblank_int[0] || - atomic_read(&rdev->irq.pflip[0])) { + rdev->irq.pflip[0]) { DRM_DEBUG("evergreen_irq_set: vblank 0\n"); crtc1 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[1] || - atomic_read(&rdev->irq.pflip[1])) { + rdev->irq.pflip[1]) { DRM_DEBUG("evergreen_irq_set: vblank 1\n"); crtc2 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[2] || - atomic_read(&rdev->irq.pflip[2])) { + rdev->irq.pflip[2]) { DRM_DEBUG("evergreen_irq_set: vblank 2\n"); crtc3 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[3] || - atomic_read(&rdev->irq.pflip[3])) { + rdev->irq.pflip[3]) { DRM_DEBUG("evergreen_irq_set: vblank 3\n"); crtc4 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[4] || - atomic_read(&rdev->irq.pflip[4])) { + rdev->irq.pflip[4]) { DRM_DEBUG("evergreen_irq_set: vblank 4\n"); crtc5 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[5] || - atomic_read(&rdev->irq.pflip[5])) { + rdev->irq.pflip[5]) { DRM_DEBUG("evergreen_irq_set: vblank 5\n"); crtc6 |= VBLANK_INT_MASK; } @@ -2668,6 +2676,7 @@ int evergreen_irq_process(struct radeon_device *rdev) u32 rptr; u32 src_id, src_data; u32 ring_index; + unsigned long flags; bool queue_hotplug = false; bool queue_hdmi = false; @@ -2675,21 +2684,22 @@ int evergreen_irq_process(struct radeon_device *rdev) return IRQ_NONE; wptr = evergreen_get_ih_wptr(rdev); - -restart_ih: - /* is somebody else already processing irqs? */ - if (atomic_xchg(&rdev->ih.lock, 1)) - return IRQ_NONE; - rptr = rdev->ih.rptr; DRM_DEBUG("r600_irq_process start: rptr %d, wptr %d\n", rptr, wptr); + spin_lock_irqsave(&rdev->ih.lock, flags); + if (rptr == wptr) { + spin_unlock_irqrestore(&rdev->ih.lock, flags); + return IRQ_NONE; + } +restart_ih: /* Order reading of wptr vs. reading of IH ring data */ rmb(); /* display interrupts */ evergreen_irq_ack(rdev); + rdev->ih.wptr = wptr; while (rptr != wptr) { /* wptr/rptr are in bytes! */ ring_index = rptr / 4; @@ -2706,7 +2716,7 @@ int evergreen_irq_process(struct radeon_device *rdev) rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (atomic_read(&rdev->irq.pflip[0])) + if (rdev->irq.pflip[0]) radeon_crtc_handle_flip(rdev, 0); rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT; DRM_DEBUG("IH: D1 vblank\n"); @@ -2732,7 +2742,7 @@ int evergreen_irq_process(struct radeon_device *rdev) rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (atomic_read(&rdev->irq.pflip[1])) + if (rdev->irq.pflip[1]) radeon_crtc_handle_flip(rdev, 1); rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; DRM_DEBUG("IH: D2 vblank\n"); @@ -2758,7 +2768,7 @@ int evergreen_irq_process(struct radeon_device *rdev) rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (atomic_read(&rdev->irq.pflip[2])) + if (rdev->irq.pflip[2]) radeon_crtc_handle_flip(rdev, 2); rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; DRM_DEBUG("IH: D3 vblank\n"); @@ -2784,7 +2794,7 @@ int evergreen_irq_process(struct radeon_device *rdev) rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (atomic_read(&rdev->irq.pflip[3])) + if (rdev->irq.pflip[3]) radeon_crtc_handle_flip(rdev, 3); rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; DRM_DEBUG("IH: D4 vblank\n"); @@ -2810,7 +2820,7 @@ int evergreen_irq_process(struct radeon_device *rdev) rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (atomic_read(&rdev->irq.pflip[4])) + if (rdev->irq.pflip[4]) radeon_crtc_handle_flip(rdev, 4); rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; DRM_DEBUG("IH: D5 vblank\n"); @@ -2836,7 +2846,7 @@ int evergreen_irq_process(struct radeon_device *rdev) rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (atomic_read(&rdev->irq.pflip[5])) + if (rdev->irq.pflip[5]) radeon_crtc_handle_flip(rdev, 5); rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; DRM_DEBUG("IH: D6 vblank\n"); @@ -2976,6 +2986,7 @@ int evergreen_irq_process(struct radeon_device *rdev) break; case 233: /* GUI IDLE */ DRM_DEBUG("IH: GUI idle\n"); + rdev->pm.gui_idle = true; wake_up(&rdev->irq.idle_queue); break; default: @@ -2987,19 +2998,17 @@ int evergreen_irq_process(struct radeon_device *rdev) rptr += 16; rptr &= rdev->ih.ptr_mask; } + /* make sure wptr hasn't changed while processing */ + wptr = evergreen_get_ih_wptr(rdev); + if (wptr != rdev->ih.wptr) + goto restart_ih; if (queue_hotplug) schedule_work(&rdev->hotplug_work); if (queue_hdmi) schedule_work(&rdev->audio_work); rdev->ih.rptr = rptr; WREG32(IH_RB_RPTR, rdev->ih.rptr); - atomic_set(&rdev->ih.lock, 0); - - /* make sure wptr hasn't changed while processing */ - wptr = evergreen_get_ih_wptr(rdev); - if (wptr != rptr) - goto restart_ih; - + spin_unlock_irqrestore(&rdev->ih.lock, flags); return IRQ_HANDLED; } diff --git a/trunk/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/trunk/drivers/gpu/drm/radeon/evergreen_blit_kms.c index e512560ffc6f..1e96bd458cfd 100644 --- a/trunk/drivers/gpu/drm/radeon/evergreen_blit_kms.c +++ b/trunk/drivers/gpu/drm/radeon/evergreen_blit_kms.c @@ -622,8 +622,7 @@ int evergreen_blit_init(struct radeon_device *rdev) rdev->r600_blit.primitives.draw_auto = draw_auto; rdev->r600_blit.primitives.set_default_state = set_default_state; - rdev->r600_blit.ring_size_common = 8; /* sync semaphore */ - rdev->r600_blit.ring_size_common += 55; /* shaders + def state */ + rdev->r600_blit.ring_size_common = 55; /* shaders + def state */ rdev->r600_blit.ring_size_common += 16; /* fence emit for VB IB */ rdev->r600_blit.ring_size_common += 5; /* done copy */ rdev->r600_blit.ring_size_common += 16; /* fence emit for done copy */ diff --git a/trunk/drivers/gpu/drm/radeon/ni.c b/trunk/drivers/gpu/drm/radeon/ni.c index 2366be3df074..b7bf18e40215 100644 --- a/trunk/drivers/gpu/drm/radeon/ni.c +++ b/trunk/drivers/gpu/drm/radeon/ni.c @@ -850,7 +850,7 @@ void cayman_fence_ring_emit(struct radeon_device *rdev, void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { - struct radeon_ring *ring = &rdev->ring[ib->ring]; + struct radeon_ring *ring = &rdev->ring[ib->fence->ring]; /* set to DX10/11 mode */ radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0)); diff --git a/trunk/drivers/gpu/drm/radeon/r100.c b/trunk/drivers/gpu/drm/radeon/r100.c index d06c8dd8ddfa..fb44e7e49083 100644 --- a/trunk/drivers/gpu/drm/radeon/r100.c +++ b/trunk/drivers/gpu/drm/radeon/r100.c @@ -103,6 +103,112 @@ void r100_wait_for_vblank(struct radeon_device *rdev, int crtc) * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280 */ +int r100_reloc_pitch_offset(struct radeon_cs_parser *p, + struct radeon_cs_packet *pkt, + unsigned idx, + unsigned reg) +{ + int r; + u32 tile_flags = 0; + u32 tmp; + struct radeon_cs_reloc *reloc; + u32 value; + + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for ib[%d]=0x%04X\n", + idx, reg); + r100_cs_dump_packet(p, pkt); + return r; + } + + value = radeon_get_ib_value(p, idx); + tmp = value & 0x003fffff; + tmp += (((u32)reloc->lobj.gpu_offset) >> 10); + + if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { + if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) + tile_flags |= RADEON_DST_TILE_MACRO; + if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) { + if (reg == RADEON_SRC_PITCH_OFFSET) { + DRM_ERROR("Cannot src blit from microtiled surface\n"); + r100_cs_dump_packet(p, pkt); + return -EINVAL; + } + tile_flags |= RADEON_DST_TILE_MICRO; + } + + tmp |= tile_flags; + p->ib.ptr[idx] = (value & 0x3fc00000) | tmp; + } else + p->ib.ptr[idx] = (value & 0xffc00000) | tmp; + return 0; +} + +int r100_packet3_load_vbpntr(struct radeon_cs_parser *p, + struct radeon_cs_packet *pkt, + int idx) +{ + unsigned c, i; + struct radeon_cs_reloc *reloc; + struct r100_cs_track *track; + int r = 0; + volatile uint32_t *ib; + u32 idx_value; + + ib = p->ib.ptr; + track = (struct r100_cs_track *)p->track; + c = radeon_get_ib_value(p, idx++) & 0x1F; + if (c > 16) { + DRM_ERROR("Only 16 vertex buffers are allowed %d\n", + pkt->opcode); + r100_cs_dump_packet(p, pkt); + return -EINVAL; + } + track->num_arrays = c; + for (i = 0; i < (c - 1); i+=2, idx+=3) { + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for packet3 %d\n", + pkt->opcode); + r100_cs_dump_packet(p, pkt); + return r; + } + idx_value = radeon_get_ib_value(p, idx); + ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); + + track->arrays[i + 0].esize = idx_value >> 8; + track->arrays[i + 0].robj = reloc->robj; + track->arrays[i + 0].esize &= 0x7F; + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for packet3 %d\n", + pkt->opcode); + r100_cs_dump_packet(p, pkt); + return r; + } + ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset); + track->arrays[i + 1].robj = reloc->robj; + track->arrays[i + 1].esize = idx_value >> 24; + track->arrays[i + 1].esize &= 0x7F; + } + if (c & 1) { + r = r100_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("No reloc for packet3 %d\n", + pkt->opcode); + r100_cs_dump_packet(p, pkt); + return r; + } + idx_value = radeon_get_ib_value(p, idx); + ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); + track->arrays[i + 0].robj = reloc->robj; + track->arrays[i + 0].esize = idx_value >> 8; + track->arrays[i + 0].esize &= 0x7F; + } + return r; +} + void r100_pre_page_flip(struct radeon_device *rdev, int crtc) { /* enable the pflip int */ @@ -461,27 +567,43 @@ void r100_hpd_init(struct radeon_device *rdev) { struct drm_device *dev = rdev->ddev; struct drm_connector *connector; - unsigned enable = 0; list_for_each_entry(connector, &dev->mode_config.connector_list, head) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); - enable |= 1 << radeon_connector->hpd.hpd; + switch (radeon_connector->hpd.hpd) { + case RADEON_HPD_1: + rdev->irq.hpd[0] = true; + break; + case RADEON_HPD_2: + rdev->irq.hpd[1] = true; + break; + default: + break; + } radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd); } - radeon_irq_kms_enable_hpd(rdev, enable); + if (rdev->irq.installed) + r100_irq_set(rdev); } void r100_hpd_fini(struct radeon_device *rdev) { struct drm_device *dev = rdev->ddev; struct drm_connector *connector; - unsigned disable = 0; list_for_each_entry(connector, &dev->mode_config.connector_list, head) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); - disable |= 1 << radeon_connector->hpd.hpd; + switch (radeon_connector->hpd.hpd) { + case RADEON_HPD_1: + rdev->irq.hpd[0] = false; + break; + case RADEON_HPD_2: + rdev->irq.hpd[1] = false; + break; + default: + break; + } } - radeon_irq_kms_disable_hpd(rdev, disable); } /* @@ -513,6 +635,15 @@ int r100_pci_gart_init(struct radeon_device *rdev) return radeon_gart_table_ram_alloc(rdev); } +/* required on r1xx, r2xx, r300, r(v)350, r420/r481, rs400/rs480 */ +void r100_enable_bm(struct radeon_device *rdev) +{ + uint32_t tmp; + /* Enable bus mastering */ + tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS; + WREG32(RADEON_BUS_CNTL, tmp); +} + int r100_pci_gart_enable(struct radeon_device *rdev) { uint32_t tmp; @@ -574,18 +705,18 @@ int r100_irq_set(struct radeon_device *rdev) WREG32(R_000040_GEN_INT_CNTL, 0); return -EINVAL; } - if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { + if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) { tmp |= RADEON_SW_INT_ENABLE; } if (rdev->irq.gui_idle) { tmp |= RADEON_GUI_IDLE_MASK; } if (rdev->irq.crtc_vblank_int[0] || - atomic_read(&rdev->irq.pflip[0])) { + rdev->irq.pflip[0]) { tmp |= RADEON_CRTC_VBLANK_MASK; } if (rdev->irq.crtc_vblank_int[1] || - atomic_read(&rdev->irq.pflip[1])) { + rdev->irq.pflip[1]) { tmp |= RADEON_CRTC2_VBLANK_MASK; } if (rdev->irq.hpd[0]) { @@ -651,6 +782,7 @@ int r100_irq_process(struct radeon_device *rdev) /* gui idle interrupt */ if (status & RADEON_GUI_IDLE_STAT) { rdev->irq.gui_idle_acked = true; + rdev->pm.gui_idle = true; wake_up(&rdev->irq.idle_queue); } /* Vertical blank interrupts */ @@ -660,7 +792,7 @@ int r100_irq_process(struct radeon_device *rdev) rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (atomic_read(&rdev->irq.pflip[0])) + if (rdev->irq.pflip[0]) radeon_crtc_handle_flip(rdev, 0); } if (status & RADEON_CRTC2_VBLANK_STAT) { @@ -669,7 +801,7 @@ int r100_irq_process(struct radeon_device *rdev) rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (atomic_read(&rdev->irq.pflip[1])) + if (rdev->irq.pflip[1]) radeon_crtc_handle_flip(rdev, 1); } if (status & RADEON_FP_DETECT_STAT) { @@ -751,7 +883,7 @@ int r100_copy_blit(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, - struct radeon_fence **fence) + struct radeon_fence *fence) { struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; uint32_t cur_pages; @@ -815,7 +947,7 @@ int r100_copy_blit(struct radeon_device *rdev, RADEON_WAIT_HOST_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE); if (fence) { - r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX); + r = radeon_fence_emit(rdev, fence); } radeon_ring_unlock_commit(rdev, ring); return r; @@ -1091,112 +1223,6 @@ void r100_cp_disable(struct radeon_device *rdev) /* * CS functions */ -int r100_reloc_pitch_offset(struct radeon_cs_parser *p, - struct radeon_cs_packet *pkt, - unsigned idx, - unsigned reg) -{ - int r; - u32 tile_flags = 0; - u32 tmp; - struct radeon_cs_reloc *reloc; - u32 value; - - r = r100_cs_packet_next_reloc(p, &reloc); - if (r) { - DRM_ERROR("No reloc for ib[%d]=0x%04X\n", - idx, reg); - r100_cs_dump_packet(p, pkt); - return r; - } - - value = radeon_get_ib_value(p, idx); - tmp = value & 0x003fffff; - tmp += (((u32)reloc->lobj.gpu_offset) >> 10); - - if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) { - if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) - tile_flags |= RADEON_DST_TILE_MACRO; - if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) { - if (reg == RADEON_SRC_PITCH_OFFSET) { - DRM_ERROR("Cannot src blit from microtiled surface\n"); - r100_cs_dump_packet(p, pkt); - return -EINVAL; - } - tile_flags |= RADEON_DST_TILE_MICRO; - } - - tmp |= tile_flags; - p->ib.ptr[idx] = (value & 0x3fc00000) | tmp; - } else - p->ib.ptr[idx] = (value & 0xffc00000) | tmp; - return 0; -} - -int r100_packet3_load_vbpntr(struct radeon_cs_parser *p, - struct radeon_cs_packet *pkt, - int idx) -{ - unsigned c, i; - struct radeon_cs_reloc *reloc; - struct r100_cs_track *track; - int r = 0; - volatile uint32_t *ib; - u32 idx_value; - - ib = p->ib.ptr; - track = (struct r100_cs_track *)p->track; - c = radeon_get_ib_value(p, idx++) & 0x1F; - if (c > 16) { - DRM_ERROR("Only 16 vertex buffers are allowed %d\n", - pkt->opcode); - r100_cs_dump_packet(p, pkt); - return -EINVAL; - } - track->num_arrays = c; - for (i = 0; i < (c - 1); i+=2, idx+=3) { - r = r100_cs_packet_next_reloc(p, &reloc); - if (r) { - DRM_ERROR("No reloc for packet3 %d\n", - pkt->opcode); - r100_cs_dump_packet(p, pkt); - return r; - } - idx_value = radeon_get_ib_value(p, idx); - ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); - - track->arrays[i + 0].esize = idx_value >> 8; - track->arrays[i + 0].robj = reloc->robj; - track->arrays[i + 0].esize &= 0x7F; - r = r100_cs_packet_next_reloc(p, &reloc); - if (r) { - DRM_ERROR("No reloc for packet3 %d\n", - pkt->opcode); - r100_cs_dump_packet(p, pkt); - return r; - } - ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset); - track->arrays[i + 1].robj = reloc->robj; - track->arrays[i + 1].esize = idx_value >> 24; - track->arrays[i + 1].esize &= 0x7F; - } - if (c & 1) { - r = r100_cs_packet_next_reloc(p, &reloc); - if (r) { - DRM_ERROR("No reloc for packet3 %d\n", - pkt->opcode); - r100_cs_dump_packet(p, pkt); - return r; - } - idx_value = radeon_get_ib_value(p, idx); - ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset); - track->arrays[i + 0].robj = reloc->robj; - track->arrays[i + 0].esize = idx_value >> 8; - track->arrays[i + 0].esize &= 0x7F; - } - return r; -} - int r100_cs_parse_packet0(struct radeon_cs_parser *p, struct radeon_cs_packet *pkt, const unsigned *auth, unsigned n, @@ -2022,514 +2048,132 @@ int r100_cs_parse(struct radeon_cs_parser *p) return 0; } -static void r100_cs_track_texture_print(struct r100_cs_track_texture *t) -{ - DRM_ERROR("pitch %d\n", t->pitch); - DRM_ERROR("use_pitch %d\n", t->use_pitch); - DRM_ERROR("width %d\n", t->width); - DRM_ERROR("width_11 %d\n", t->width_11); - DRM_ERROR("height %d\n", t->height); - DRM_ERROR("height_11 %d\n", t->height_11); - DRM_ERROR("num levels %d\n", t->num_levels); - DRM_ERROR("depth %d\n", t->txdepth); - DRM_ERROR("bpp %d\n", t->cpp); - DRM_ERROR("coordinate type %d\n", t->tex_coord_type); - DRM_ERROR("width round to power of 2 %d\n", t->roundup_w); - DRM_ERROR("height round to power of 2 %d\n", t->roundup_h); - DRM_ERROR("compress format %d\n", t->compress_format); -} -static int r100_track_compress_size(int compress_format, int w, int h) +/* + * Global GPU functions + */ +void r100_errata(struct radeon_device *rdev) { - int block_width, block_height, block_bytes; - int wblocks, hblocks; - int min_wblocks; - int sz; - - block_width = 4; - block_height = 4; + rdev->pll_errata = 0; - switch (compress_format) { - case R100_TRACK_COMP_DXT1: - block_bytes = 8; - min_wblocks = 4; - break; - default: - case R100_TRACK_COMP_DXT35: - block_bytes = 16; - min_wblocks = 2; - break; + if (rdev->family == CHIP_RV200 || rdev->family == CHIP_RS200) { + rdev->pll_errata |= CHIP_ERRATA_PLL_DUMMYREADS; } - hblocks = (h + block_height - 1) / block_height; - wblocks = (w + block_width - 1) / block_width; - if (wblocks < min_wblocks) - wblocks = min_wblocks; - sz = wblocks * hblocks * block_bytes; - return sz; + if (rdev->family == CHIP_RV100 || + rdev->family == CHIP_RS100 || + rdev->family == CHIP_RS200) { + rdev->pll_errata |= CHIP_ERRATA_PLL_DELAY; + } } -static int r100_cs_track_cube(struct radeon_device *rdev, - struct r100_cs_track *track, unsigned idx) +/* Wait for vertical sync on primary CRTC */ +void r100_gpu_wait_for_vsync(struct radeon_device *rdev) { - unsigned face, w, h; - struct radeon_bo *cube_robj; - unsigned long size; - unsigned compress_format = track->textures[idx].compress_format; + uint32_t crtc_gen_cntl, tmp; + int i; - for (face = 0; face < 5; face++) { - cube_robj = track->textures[idx].cube_info[face].robj; - w = track->textures[idx].cube_info[face].width; - h = track->textures[idx].cube_info[face].height; + crtc_gen_cntl = RREG32(RADEON_CRTC_GEN_CNTL); + if ((crtc_gen_cntl & RADEON_CRTC_DISP_REQ_EN_B) || + !(crtc_gen_cntl & RADEON_CRTC_EN)) { + return; + } + /* Clear the CRTC_VBLANK_SAVE bit */ + WREG32(RADEON_CRTC_STATUS, RADEON_CRTC_VBLANK_SAVE_CLEAR); + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = RREG32(RADEON_CRTC_STATUS); + if (tmp & RADEON_CRTC_VBLANK_SAVE) { + return; + } + DRM_UDELAY(1); + } +} - if (compress_format) { - size = r100_track_compress_size(compress_format, w, h); - } else - size = w * h; - size *= track->textures[idx].cpp; +/* Wait for vertical sync on secondary CRTC */ +void r100_gpu_wait_for_vsync2(struct radeon_device *rdev) +{ + uint32_t crtc2_gen_cntl, tmp; + int i; - size += track->textures[idx].cube_info[face].offset; + crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL); + if ((crtc2_gen_cntl & RADEON_CRTC2_DISP_REQ_EN_B) || + !(crtc2_gen_cntl & RADEON_CRTC2_EN)) + return; - if (size > radeon_bo_size(cube_robj)) { - DRM_ERROR("Cube texture offset greater than object size %lu %lu\n", - size, radeon_bo_size(cube_robj)); - r100_cs_track_texture_print(&track->textures[idx]); - return -1; + /* Clear the CRTC_VBLANK_SAVE bit */ + WREG32(RADEON_CRTC2_STATUS, RADEON_CRTC2_VBLANK_SAVE_CLEAR); + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = RREG32(RADEON_CRTC2_STATUS); + if (tmp & RADEON_CRTC2_VBLANK_SAVE) { + return; } + DRM_UDELAY(1); } - return 0; } -static int r100_cs_track_texture_check(struct radeon_device *rdev, - struct r100_cs_track *track) +int r100_rbbm_fifo_wait_for_entry(struct radeon_device *rdev, unsigned n) { - struct radeon_bo *robj; - unsigned long size; - unsigned u, i, w, h, d; - int ret; + unsigned i; + uint32_t tmp; - for (u = 0; u < track->num_texture; u++) { - if (!track->textures[u].enabled) - continue; - if (track->textures[u].lookup_disable) - continue; - robj = track->textures[u].robj; - if (robj == NULL) { - DRM_ERROR("No texture bound to unit %u\n", u); - return -EINVAL; + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK; + if (tmp >= n) { + return 0; } - size = 0; - for (i = 0; i <= track->textures[u].num_levels; i++) { - if (track->textures[u].use_pitch) { - if (rdev->family < CHIP_R300) - w = (track->textures[u].pitch / track->textures[u].cpp) / (1 << i); - else - w = track->textures[u].pitch / (1 << i); - } else { - w = track->textures[u].width; - if (rdev->family >= CHIP_RV515) - w |= track->textures[u].width_11; - w = w / (1 << i); - if (track->textures[u].roundup_w) - w = roundup_pow_of_two(w); - } - h = track->textures[u].height; - if (rdev->family >= CHIP_RV515) - h |= track->textures[u].height_11; - h = h / (1 << i); - if (track->textures[u].roundup_h) - h = roundup_pow_of_two(h); - if (track->textures[u].tex_coord_type == 1) { - d = (1 << track->textures[u].txdepth) / (1 << i); - if (!d) - d = 1; - } else { - d = 1; - } - if (track->textures[u].compress_format) { + DRM_UDELAY(1); + } + return -1; +} - size += r100_track_compress_size(track->textures[u].compress_format, w, h) * d; - /* compressed textures are block based */ - } else - size += w * h * d; - } - size *= track->textures[u].cpp; +int r100_gui_wait_for_idle(struct radeon_device *rdev) +{ + unsigned i; + uint32_t tmp; - switch (track->textures[u].tex_coord_type) { - case 0: - case 1: - break; - case 2: - if (track->separate_cube) { - ret = r100_cs_track_cube(rdev, track, u); - if (ret) - return ret; - } else - size *= 6; - break; - default: - DRM_ERROR("Invalid texture coordinate type %u for unit " - "%u\n", track->textures[u].tex_coord_type, u); - return -EINVAL; - } - if (size > radeon_bo_size(robj)) { - DRM_ERROR("Texture of unit %u needs %lu bytes but is " - "%lu\n", u, size, radeon_bo_size(robj)); - r100_cs_track_texture_print(&track->textures[u]); - return -EINVAL; + if (r100_rbbm_fifo_wait_for_entry(rdev, 64)) { + printk(KERN_WARNING "radeon: wait for empty RBBM fifo failed !" + " Bad things might happen.\n"); + } + for (i = 0; i < rdev->usec_timeout; i++) { + tmp = RREG32(RADEON_RBBM_STATUS); + if (!(tmp & RADEON_RBBM_ACTIVE)) { + return 0; } + DRM_UDELAY(1); } - return 0; + return -1; } -int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track) +int r100_mc_wait_for_idle(struct radeon_device *rdev) { unsigned i; - unsigned long size; - unsigned prim_walk; - unsigned nverts; - unsigned num_cb = track->cb_dirty ? track->num_cb : 0; - - if (num_cb && !track->zb_cb_clear && !track->color_channel_mask && - !track->blend_read_enable) - num_cb = 0; + uint32_t tmp; - for (i = 0; i < num_cb; i++) { - if (track->cb[i].robj == NULL) { - DRM_ERROR("[drm] No buffer for color buffer %d !\n", i); - return -EINVAL; - } - size = track->cb[i].pitch * track->cb[i].cpp * track->maxy; - size += track->cb[i].offset; - if (size > radeon_bo_size(track->cb[i].robj)) { - DRM_ERROR("[drm] Buffer too small for color buffer %d " - "(need %lu have %lu) !\n", i, size, - radeon_bo_size(track->cb[i].robj)); - DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n", - i, track->cb[i].pitch, track->cb[i].cpp, - track->cb[i].offset, track->maxy); - return -EINVAL; + for (i = 0; i < rdev->usec_timeout; i++) { + /* read MC_STATUS */ + tmp = RREG32(RADEON_MC_STATUS); + if (tmp & RADEON_MC_IDLE) { + return 0; } + DRM_UDELAY(1); } - track->cb_dirty = false; + return -1; +} - if (track->zb_dirty && track->z_enabled) { - if (track->zb.robj == NULL) { - DRM_ERROR("[drm] No buffer for z buffer !\n"); - return -EINVAL; - } - size = track->zb.pitch * track->zb.cpp * track->maxy; - size += track->zb.offset; - if (size > radeon_bo_size(track->zb.robj)) { - DRM_ERROR("[drm] Buffer too small for z buffer " - "(need %lu have %lu) !\n", size, - radeon_bo_size(track->zb.robj)); - DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n", - track->zb.pitch, track->zb.cpp, - track->zb.offset, track->maxy); - return -EINVAL; - } +bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) +{ + u32 rbbm_status; + + rbbm_status = RREG32(R_000E40_RBBM_STATUS); + if (!G_000E40_GUI_ACTIVE(rbbm_status)) { + radeon_ring_lockup_update(ring); + return false; } - track->zb_dirty = false; - - if (track->aa_dirty && track->aaresolve) { - if (track->aa.robj == NULL) { - DRM_ERROR("[drm] No buffer for AA resolve buffer %d !\n", i); - return -EINVAL; - } - /* I believe the format comes from colorbuffer0. */ - size = track->aa.pitch * track->cb[0].cpp * track->maxy; - size += track->aa.offset; - if (size > radeon_bo_size(track->aa.robj)) { - DRM_ERROR("[drm] Buffer too small for AA resolve buffer %d " - "(need %lu have %lu) !\n", i, size, - radeon_bo_size(track->aa.robj)); - DRM_ERROR("[drm] AA resolve buffer %d (%u %u %u %u)\n", - i, track->aa.pitch, track->cb[0].cpp, - track->aa.offset, track->maxy); - return -EINVAL; - } - } - track->aa_dirty = false; - - prim_walk = (track->vap_vf_cntl >> 4) & 0x3; - if (track->vap_vf_cntl & (1 << 14)) { - nverts = track->vap_alt_nverts; - } else { - nverts = (track->vap_vf_cntl >> 16) & 0xFFFF; - } - switch (prim_walk) { - case 1: - for (i = 0; i < track->num_arrays; i++) { - size = track->arrays[i].esize * track->max_indx * 4; - if (track->arrays[i].robj == NULL) { - DRM_ERROR("(PW %u) Vertex array %u no buffer " - "bound\n", prim_walk, i); - return -EINVAL; - } - if (size > radeon_bo_size(track->arrays[i].robj)) { - dev_err(rdev->dev, "(PW %u) Vertex array %u " - "need %lu dwords have %lu dwords\n", - prim_walk, i, size >> 2, - radeon_bo_size(track->arrays[i].robj) - >> 2); - DRM_ERROR("Max indices %u\n", track->max_indx); - return -EINVAL; - } - } - break; - case 2: - for (i = 0; i < track->num_arrays; i++) { - size = track->arrays[i].esize * (nverts - 1) * 4; - if (track->arrays[i].robj == NULL) { - DRM_ERROR("(PW %u) Vertex array %u no buffer " - "bound\n", prim_walk, i); - return -EINVAL; - } - if (size > radeon_bo_size(track->arrays[i].robj)) { - dev_err(rdev->dev, "(PW %u) Vertex array %u " - "need %lu dwords have %lu dwords\n", - prim_walk, i, size >> 2, - radeon_bo_size(track->arrays[i].robj) - >> 2); - return -EINVAL; - } - } - break; - case 3: - size = track->vtx_size * nverts; - if (size != track->immd_dwords) { - DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n", - track->immd_dwords, size); - DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n", - nverts, track->vtx_size); - return -EINVAL; - } - break; - default: - DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n", - prim_walk); - return -EINVAL; - } - - if (track->tex_dirty) { - track->tex_dirty = false; - return r100_cs_track_texture_check(rdev, track); - } - return 0; -} - -void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track) -{ - unsigned i, face; - - track->cb_dirty = true; - track->zb_dirty = true; - track->tex_dirty = true; - track->aa_dirty = true; - - if (rdev->family < CHIP_R300) { - track->num_cb = 1; - if (rdev->family <= CHIP_RS200) - track->num_texture = 3; - else - track->num_texture = 6; - track->maxy = 2048; - track->separate_cube = 1; - } else { - track->num_cb = 4; - track->num_texture = 16; - track->maxy = 4096; - track->separate_cube = 0; - track->aaresolve = false; - track->aa.robj = NULL; - } - - for (i = 0; i < track->num_cb; i++) { - track->cb[i].robj = NULL; - track->cb[i].pitch = 8192; - track->cb[i].cpp = 16; - track->cb[i].offset = 0; - } - track->z_enabled = true; - track->zb.robj = NULL; - track->zb.pitch = 8192; - track->zb.cpp = 4; - track->zb.offset = 0; - track->vtx_size = 0x7F; - track->immd_dwords = 0xFFFFFFFFUL; - track->num_arrays = 11; - track->max_indx = 0x00FFFFFFUL; - for (i = 0; i < track->num_arrays; i++) { - track->arrays[i].robj = NULL; - track->arrays[i].esize = 0x7F; - } - for (i = 0; i < track->num_texture; i++) { - track->textures[i].compress_format = R100_TRACK_COMP_NONE; - track->textures[i].pitch = 16536; - track->textures[i].width = 16536; - track->textures[i].height = 16536; - track->textures[i].width_11 = 1 << 11; - track->textures[i].height_11 = 1 << 11; - track->textures[i].num_levels = 12; - if (rdev->family <= CHIP_RS200) { - track->textures[i].tex_coord_type = 0; - track->textures[i].txdepth = 0; - } else { - track->textures[i].txdepth = 16; - track->textures[i].tex_coord_type = 1; - } - track->textures[i].cpp = 64; - track->textures[i].robj = NULL; - /* CS IB emission code makes sure texture unit are disabled */ - track->textures[i].enabled = false; - track->textures[i].lookup_disable = false; - track->textures[i].roundup_w = true; - track->textures[i].roundup_h = true; - if (track->separate_cube) - for (face = 0; face < 5; face++) { - track->textures[i].cube_info[face].robj = NULL; - track->textures[i].cube_info[face].width = 16536; - track->textures[i].cube_info[face].height = 16536; - track->textures[i].cube_info[face].offset = 0; - } - } -} - -/* - * Global GPU functions - */ -void r100_errata(struct radeon_device *rdev) -{ - rdev->pll_errata = 0; - - if (rdev->family == CHIP_RV200 || rdev->family == CHIP_RS200) { - rdev->pll_errata |= CHIP_ERRATA_PLL_DUMMYREADS; - } - - if (rdev->family == CHIP_RV100 || - rdev->family == CHIP_RS100 || - rdev->family == CHIP_RS200) { - rdev->pll_errata |= CHIP_ERRATA_PLL_DELAY; - } -} - -/* Wait for vertical sync on primary CRTC */ -void r100_gpu_wait_for_vsync(struct radeon_device *rdev) -{ - uint32_t crtc_gen_cntl, tmp; - int i; - - crtc_gen_cntl = RREG32(RADEON_CRTC_GEN_CNTL); - if ((crtc_gen_cntl & RADEON_CRTC_DISP_REQ_EN_B) || - !(crtc_gen_cntl & RADEON_CRTC_EN)) { - return; - } - /* Clear the CRTC_VBLANK_SAVE bit */ - WREG32(RADEON_CRTC_STATUS, RADEON_CRTC_VBLANK_SAVE_CLEAR); - for (i = 0; i < rdev->usec_timeout; i++) { - tmp = RREG32(RADEON_CRTC_STATUS); - if (tmp & RADEON_CRTC_VBLANK_SAVE) { - return; - } - DRM_UDELAY(1); - } -} - -/* Wait for vertical sync on secondary CRTC */ -void r100_gpu_wait_for_vsync2(struct radeon_device *rdev) -{ - uint32_t crtc2_gen_cntl, tmp; - int i; - - crtc2_gen_cntl = RREG32(RADEON_CRTC2_GEN_CNTL); - if ((crtc2_gen_cntl & RADEON_CRTC2_DISP_REQ_EN_B) || - !(crtc2_gen_cntl & RADEON_CRTC2_EN)) - return; - - /* Clear the CRTC_VBLANK_SAVE bit */ - WREG32(RADEON_CRTC2_STATUS, RADEON_CRTC2_VBLANK_SAVE_CLEAR); - for (i = 0; i < rdev->usec_timeout; i++) { - tmp = RREG32(RADEON_CRTC2_STATUS); - if (tmp & RADEON_CRTC2_VBLANK_SAVE) { - return; - } - DRM_UDELAY(1); - } -} - -int r100_rbbm_fifo_wait_for_entry(struct radeon_device *rdev, unsigned n) -{ - unsigned i; - uint32_t tmp; - - for (i = 0; i < rdev->usec_timeout; i++) { - tmp = RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK; - if (tmp >= n) { - return 0; - } - DRM_UDELAY(1); - } - return -1; -} - -int r100_gui_wait_for_idle(struct radeon_device *rdev) -{ - unsigned i; - uint32_t tmp; - - if (r100_rbbm_fifo_wait_for_entry(rdev, 64)) { - printk(KERN_WARNING "radeon: wait for empty RBBM fifo failed !" - " Bad things might happen.\n"); - } - for (i = 0; i < rdev->usec_timeout; i++) { - tmp = RREG32(RADEON_RBBM_STATUS); - if (!(tmp & RADEON_RBBM_ACTIVE)) { - return 0; - } - DRM_UDELAY(1); - } - return -1; -} - -int r100_mc_wait_for_idle(struct radeon_device *rdev) -{ - unsigned i; - uint32_t tmp; - - for (i = 0; i < rdev->usec_timeout; i++) { - /* read MC_STATUS */ - tmp = RREG32(RADEON_MC_STATUS); - if (tmp & RADEON_MC_IDLE) { - return 0; - } - DRM_UDELAY(1); - } - return -1; -} - -bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) -{ - u32 rbbm_status; - - rbbm_status = RREG32(R_000E40_RBBM_STATUS); - if (!G_000E40_GUI_ACTIVE(rbbm_status)) { - radeon_ring_lockup_update(ring); - return false; - } - /* force CP activities */ - radeon_ring_force_activity(rdev, ring); - return radeon_ring_test_lockup(rdev, ring); -} - -/* required on r1xx, r2xx, r300, r(v)350, r420/r481, rs400/rs480 */ -void r100_enable_bm(struct radeon_device *rdev) -{ - uint32_t tmp; - /* Enable bus mastering */ - tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS; - WREG32(RADEON_BUS_CNTL, tmp); -} + /* force CP activities */ + radeon_ring_force_activity(rdev, ring); + return radeon_ring_test_lockup(rdev, ring); +} void r100_bm_disable(struct radeon_device *rdev) { @@ -3534,86 +3178,460 @@ void r100_bandwidth_update(struct radeon_device *rdev) temp_ff.full = dfixed_const((16/pixel_bytes2)); disp_drain_rate2.full = dfixed_div(pix_clk2, temp_ff); - grph2_cntl = RREG32(RADEON_GRPH2_BUFFER_CNTL); - grph2_cntl &= ~(RADEON_GRPH_STOP_REQ_MASK); - grph2_cntl |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT); - grph2_cntl &= ~(RADEON_GRPH_START_REQ_MASK); - if ((rdev->family == CHIP_R350) && - (stop_req > 0x15)) { - stop_req -= 0x10; + grph2_cntl = RREG32(RADEON_GRPH2_BUFFER_CNTL); + grph2_cntl &= ~(RADEON_GRPH_STOP_REQ_MASK); + grph2_cntl |= (stop_req << RADEON_GRPH_STOP_REQ_SHIFT); + grph2_cntl &= ~(RADEON_GRPH_START_REQ_MASK); + if ((rdev->family == CHIP_R350) && + (stop_req > 0x15)) { + stop_req -= 0x10; + } + grph2_cntl |= (stop_req << RADEON_GRPH_START_REQ_SHIFT); + grph2_cntl |= RADEON_GRPH_BUFFER_SIZE; + grph2_cntl &= ~(RADEON_GRPH_CRITICAL_CNTL | + RADEON_GRPH_CRITICAL_AT_SOF | + RADEON_GRPH_STOP_CNTL); + + if ((rdev->family == CHIP_RS100) || + (rdev->family == CHIP_RS200)) + critical_point2 = 0; + else { + temp = (rdev->mc.vram_width * rdev->mc.vram_is_ddr + 1)/128; + temp_ff.full = dfixed_const(temp); + temp_ff.full = dfixed_mul(mclk_ff, temp_ff); + if (sclk_ff.full < temp_ff.full) + temp_ff.full = sclk_ff.full; + + read_return_rate.full = temp_ff.full; + + if (mode1) { + temp_ff.full = read_return_rate.full - disp_drain_rate.full; + time_disp1_drop_priority.full = dfixed_div(crit_point_ff, temp_ff); + } else { + time_disp1_drop_priority.full = 0; + } + crit_point_ff.full = disp_latency.full + time_disp1_drop_priority.full + disp_latency.full; + crit_point_ff.full = dfixed_mul(crit_point_ff, disp_drain_rate2); + crit_point_ff.full += dfixed_const_half(0); + + critical_point2 = dfixed_trunc(crit_point_ff); + + if (rdev->disp_priority == 2) { + critical_point2 = 0; + } + + if (max_stop_req - critical_point2 < 4) + critical_point2 = 0; + + } + + if (critical_point2 == 0 && rdev->family == CHIP_R300) { + /* some R300 cards have problem with this set to 0 */ + critical_point2 = 0x10; + } + + WREG32(RADEON_GRPH2_BUFFER_CNTL, ((grph2_cntl & ~RADEON_GRPH_CRITICAL_POINT_MASK) | + (critical_point2 << RADEON_GRPH_CRITICAL_POINT_SHIFT))); + + if ((rdev->family == CHIP_RS400) || + (rdev->family == CHIP_RS480)) { +#if 0 + /* attempt to program RS400 disp2 regs correctly ??? */ + temp = RREG32(RS400_DISP2_REQ_CNTL1); + temp &= ~(RS400_DISP2_START_REQ_LEVEL_MASK | + RS400_DISP2_STOP_REQ_LEVEL_MASK); + WREG32(RS400_DISP2_REQ_CNTL1, (temp | + (critical_point2 << RS400_DISP1_START_REQ_LEVEL_SHIFT) | + (critical_point2 << RS400_DISP1_STOP_REQ_LEVEL_SHIFT))); + temp = RREG32(RS400_DISP2_REQ_CNTL2); + temp &= ~(RS400_DISP2_CRITICAL_POINT_START_MASK | + RS400_DISP2_CRITICAL_POINT_STOP_MASK); + WREG32(RS400_DISP2_REQ_CNTL2, (temp | + (critical_point2 << RS400_DISP2_CRITICAL_POINT_START_SHIFT) | + (critical_point2 << RS400_DISP2_CRITICAL_POINT_STOP_SHIFT))); +#endif + WREG32(RS400_DISP2_REQ_CNTL1, 0x105DC1CC); + WREG32(RS400_DISP2_REQ_CNTL2, 0x2749D000); + WREG32(RS400_DMIF_MEM_CNTL1, 0x29CA71DC); + WREG32(RS400_DISP1_REQ_CNTL1, 0x28FBC3AC); + } + + DRM_DEBUG_KMS("GRPH2_BUFFER_CNTL from to %x\n", + (unsigned int)RREG32(RADEON_GRPH2_BUFFER_CNTL)); + } +} + +static void r100_cs_track_texture_print(struct r100_cs_track_texture *t) +{ + DRM_ERROR("pitch %d\n", t->pitch); + DRM_ERROR("use_pitch %d\n", t->use_pitch); + DRM_ERROR("width %d\n", t->width); + DRM_ERROR("width_11 %d\n", t->width_11); + DRM_ERROR("height %d\n", t->height); + DRM_ERROR("height_11 %d\n", t->height_11); + DRM_ERROR("num levels %d\n", t->num_levels); + DRM_ERROR("depth %d\n", t->txdepth); + DRM_ERROR("bpp %d\n", t->cpp); + DRM_ERROR("coordinate type %d\n", t->tex_coord_type); + DRM_ERROR("width round to power of 2 %d\n", t->roundup_w); + DRM_ERROR("height round to power of 2 %d\n", t->roundup_h); + DRM_ERROR("compress format %d\n", t->compress_format); +} + +static int r100_track_compress_size(int compress_format, int w, int h) +{ + int block_width, block_height, block_bytes; + int wblocks, hblocks; + int min_wblocks; + int sz; + + block_width = 4; + block_height = 4; + + switch (compress_format) { + case R100_TRACK_COMP_DXT1: + block_bytes = 8; + min_wblocks = 4; + break; + default: + case R100_TRACK_COMP_DXT35: + block_bytes = 16; + min_wblocks = 2; + break; + } + + hblocks = (h + block_height - 1) / block_height; + wblocks = (w + block_width - 1) / block_width; + if (wblocks < min_wblocks) + wblocks = min_wblocks; + sz = wblocks * hblocks * block_bytes; + return sz; +} + +static int r100_cs_track_cube(struct radeon_device *rdev, + struct r100_cs_track *track, unsigned idx) +{ + unsigned face, w, h; + struct radeon_bo *cube_robj; + unsigned long size; + unsigned compress_format = track->textures[idx].compress_format; + + for (face = 0; face < 5; face++) { + cube_robj = track->textures[idx].cube_info[face].robj; + w = track->textures[idx].cube_info[face].width; + h = track->textures[idx].cube_info[face].height; + + if (compress_format) { + size = r100_track_compress_size(compress_format, w, h); + } else + size = w * h; + size *= track->textures[idx].cpp; + + size += track->textures[idx].cube_info[face].offset; + + if (size > radeon_bo_size(cube_robj)) { + DRM_ERROR("Cube texture offset greater than object size %lu %lu\n", + size, radeon_bo_size(cube_robj)); + r100_cs_track_texture_print(&track->textures[idx]); + return -1; + } + } + return 0; +} + +static int r100_cs_track_texture_check(struct radeon_device *rdev, + struct r100_cs_track *track) +{ + struct radeon_bo *robj; + unsigned long size; + unsigned u, i, w, h, d; + int ret; + + for (u = 0; u < track->num_texture; u++) { + if (!track->textures[u].enabled) + continue; + if (track->textures[u].lookup_disable) + continue; + robj = track->textures[u].robj; + if (robj == NULL) { + DRM_ERROR("No texture bound to unit %u\n", u); + return -EINVAL; + } + size = 0; + for (i = 0; i <= track->textures[u].num_levels; i++) { + if (track->textures[u].use_pitch) { + if (rdev->family < CHIP_R300) + w = (track->textures[u].pitch / track->textures[u].cpp) / (1 << i); + else + w = track->textures[u].pitch / (1 << i); + } else { + w = track->textures[u].width; + if (rdev->family >= CHIP_RV515) + w |= track->textures[u].width_11; + w = w / (1 << i); + if (track->textures[u].roundup_w) + w = roundup_pow_of_two(w); + } + h = track->textures[u].height; + if (rdev->family >= CHIP_RV515) + h |= track->textures[u].height_11; + h = h / (1 << i); + if (track->textures[u].roundup_h) + h = roundup_pow_of_two(h); + if (track->textures[u].tex_coord_type == 1) { + d = (1 << track->textures[u].txdepth) / (1 << i); + if (!d) + d = 1; + } else { + d = 1; + } + if (track->textures[u].compress_format) { + + size += r100_track_compress_size(track->textures[u].compress_format, w, h) * d; + /* compressed textures are block based */ + } else + size += w * h * d; + } + size *= track->textures[u].cpp; + + switch (track->textures[u].tex_coord_type) { + case 0: + case 1: + break; + case 2: + if (track->separate_cube) { + ret = r100_cs_track_cube(rdev, track, u); + if (ret) + return ret; + } else + size *= 6; + break; + default: + DRM_ERROR("Invalid texture coordinate type %u for unit " + "%u\n", track->textures[u].tex_coord_type, u); + return -EINVAL; + } + if (size > radeon_bo_size(robj)) { + DRM_ERROR("Texture of unit %u needs %lu bytes but is " + "%lu\n", u, size, radeon_bo_size(robj)); + r100_cs_track_texture_print(&track->textures[u]); + return -EINVAL; + } + } + return 0; +} + +int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track) +{ + unsigned i; + unsigned long size; + unsigned prim_walk; + unsigned nverts; + unsigned num_cb = track->cb_dirty ? track->num_cb : 0; + + if (num_cb && !track->zb_cb_clear && !track->color_channel_mask && + !track->blend_read_enable) + num_cb = 0; + + for (i = 0; i < num_cb; i++) { + if (track->cb[i].robj == NULL) { + DRM_ERROR("[drm] No buffer for color buffer %d !\n", i); + return -EINVAL; } - grph2_cntl |= (stop_req << RADEON_GRPH_START_REQ_SHIFT); - grph2_cntl |= RADEON_GRPH_BUFFER_SIZE; - grph2_cntl &= ~(RADEON_GRPH_CRITICAL_CNTL | - RADEON_GRPH_CRITICAL_AT_SOF | - RADEON_GRPH_STOP_CNTL); + size = track->cb[i].pitch * track->cb[i].cpp * track->maxy; + size += track->cb[i].offset; + if (size > radeon_bo_size(track->cb[i].robj)) { + DRM_ERROR("[drm] Buffer too small for color buffer %d " + "(need %lu have %lu) !\n", i, size, + radeon_bo_size(track->cb[i].robj)); + DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n", + i, track->cb[i].pitch, track->cb[i].cpp, + track->cb[i].offset, track->maxy); + return -EINVAL; + } + } + track->cb_dirty = false; - if ((rdev->family == CHIP_RS100) || - (rdev->family == CHIP_RS200)) - critical_point2 = 0; - else { - temp = (rdev->mc.vram_width * rdev->mc.vram_is_ddr + 1)/128; - temp_ff.full = dfixed_const(temp); - temp_ff.full = dfixed_mul(mclk_ff, temp_ff); - if (sclk_ff.full < temp_ff.full) - temp_ff.full = sclk_ff.full; + if (track->zb_dirty && track->z_enabled) { + if (track->zb.robj == NULL) { + DRM_ERROR("[drm] No buffer for z buffer !\n"); + return -EINVAL; + } + size = track->zb.pitch * track->zb.cpp * track->maxy; + size += track->zb.offset; + if (size > radeon_bo_size(track->zb.robj)) { + DRM_ERROR("[drm] Buffer too small for z buffer " + "(need %lu have %lu) !\n", size, + radeon_bo_size(track->zb.robj)); + DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n", + track->zb.pitch, track->zb.cpp, + track->zb.offset, track->maxy); + return -EINVAL; + } + } + track->zb_dirty = false; - read_return_rate.full = temp_ff.full; + if (track->aa_dirty && track->aaresolve) { + if (track->aa.robj == NULL) { + DRM_ERROR("[drm] No buffer for AA resolve buffer %d !\n", i); + return -EINVAL; + } + /* I believe the format comes from colorbuffer0. */ + size = track->aa.pitch * track->cb[0].cpp * track->maxy; + size += track->aa.offset; + if (size > radeon_bo_size(track->aa.robj)) { + DRM_ERROR("[drm] Buffer too small for AA resolve buffer %d " + "(need %lu have %lu) !\n", i, size, + radeon_bo_size(track->aa.robj)); + DRM_ERROR("[drm] AA resolve buffer %d (%u %u %u %u)\n", + i, track->aa.pitch, track->cb[0].cpp, + track->aa.offset, track->maxy); + return -EINVAL; + } + } + track->aa_dirty = false; - if (mode1) { - temp_ff.full = read_return_rate.full - disp_drain_rate.full; - time_disp1_drop_priority.full = dfixed_div(crit_point_ff, temp_ff); - } else { - time_disp1_drop_priority.full = 0; + prim_walk = (track->vap_vf_cntl >> 4) & 0x3; + if (track->vap_vf_cntl & (1 << 14)) { + nverts = track->vap_alt_nverts; + } else { + nverts = (track->vap_vf_cntl >> 16) & 0xFFFF; + } + switch (prim_walk) { + case 1: + for (i = 0; i < track->num_arrays; i++) { + size = track->arrays[i].esize * track->max_indx * 4; + if (track->arrays[i].robj == NULL) { + DRM_ERROR("(PW %u) Vertex array %u no buffer " + "bound\n", prim_walk, i); + return -EINVAL; } - crit_point_ff.full = disp_latency.full + time_disp1_drop_priority.full + disp_latency.full; - crit_point_ff.full = dfixed_mul(crit_point_ff, disp_drain_rate2); - crit_point_ff.full += dfixed_const_half(0); - - critical_point2 = dfixed_trunc(crit_point_ff); - - if (rdev->disp_priority == 2) { - critical_point2 = 0; + if (size > radeon_bo_size(track->arrays[i].robj)) { + dev_err(rdev->dev, "(PW %u) Vertex array %u " + "need %lu dwords have %lu dwords\n", + prim_walk, i, size >> 2, + radeon_bo_size(track->arrays[i].robj) + >> 2); + DRM_ERROR("Max indices %u\n", track->max_indx); + return -EINVAL; + } + } + break; + case 2: + for (i = 0; i < track->num_arrays; i++) { + size = track->arrays[i].esize * (nverts - 1) * 4; + if (track->arrays[i].robj == NULL) { + DRM_ERROR("(PW %u) Vertex array %u no buffer " + "bound\n", prim_walk, i); + return -EINVAL; + } + if (size > radeon_bo_size(track->arrays[i].robj)) { + dev_err(rdev->dev, "(PW %u) Vertex array %u " + "need %lu dwords have %lu dwords\n", + prim_walk, i, size >> 2, + radeon_bo_size(track->arrays[i].robj) + >> 2); + return -EINVAL; } + } + break; + case 3: + size = track->vtx_size * nverts; + if (size != track->immd_dwords) { + DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n", + track->immd_dwords, size); + DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n", + nverts, track->vtx_size); + return -EINVAL; + } + break; + default: + DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n", + prim_walk); + return -EINVAL; + } - if (max_stop_req - critical_point2 < 4) - critical_point2 = 0; + if (track->tex_dirty) { + track->tex_dirty = false; + return r100_cs_track_texture_check(rdev, track); + } + return 0; +} - } +void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track) +{ + unsigned i, face; - if (critical_point2 == 0 && rdev->family == CHIP_R300) { - /* some R300 cards have problem with this set to 0 */ - critical_point2 = 0x10; - } + track->cb_dirty = true; + track->zb_dirty = true; + track->tex_dirty = true; + track->aa_dirty = true; - WREG32(RADEON_GRPH2_BUFFER_CNTL, ((grph2_cntl & ~RADEON_GRPH_CRITICAL_POINT_MASK) | - (critical_point2 << RADEON_GRPH_CRITICAL_POINT_SHIFT))); + if (rdev->family < CHIP_R300) { + track->num_cb = 1; + if (rdev->family <= CHIP_RS200) + track->num_texture = 3; + else + track->num_texture = 6; + track->maxy = 2048; + track->separate_cube = 1; + } else { + track->num_cb = 4; + track->num_texture = 16; + track->maxy = 4096; + track->separate_cube = 0; + track->aaresolve = false; + track->aa.robj = NULL; + } - if ((rdev->family == CHIP_RS400) || - (rdev->family == CHIP_RS480)) { -#if 0 - /* attempt to program RS400 disp2 regs correctly ??? */ - temp = RREG32(RS400_DISP2_REQ_CNTL1); - temp &= ~(RS400_DISP2_START_REQ_LEVEL_MASK | - RS400_DISP2_STOP_REQ_LEVEL_MASK); - WREG32(RS400_DISP2_REQ_CNTL1, (temp | - (critical_point2 << RS400_DISP1_START_REQ_LEVEL_SHIFT) | - (critical_point2 << RS400_DISP1_STOP_REQ_LEVEL_SHIFT))); - temp = RREG32(RS400_DISP2_REQ_CNTL2); - temp &= ~(RS400_DISP2_CRITICAL_POINT_START_MASK | - RS400_DISP2_CRITICAL_POINT_STOP_MASK); - WREG32(RS400_DISP2_REQ_CNTL2, (temp | - (critical_point2 << RS400_DISP2_CRITICAL_POINT_START_SHIFT) | - (critical_point2 << RS400_DISP2_CRITICAL_POINT_STOP_SHIFT))); -#endif - WREG32(RS400_DISP2_REQ_CNTL1, 0x105DC1CC); - WREG32(RS400_DISP2_REQ_CNTL2, 0x2749D000); - WREG32(RS400_DMIF_MEM_CNTL1, 0x29CA71DC); - WREG32(RS400_DISP1_REQ_CNTL1, 0x28FBC3AC); + for (i = 0; i < track->num_cb; i++) { + track->cb[i].robj = NULL; + track->cb[i].pitch = 8192; + track->cb[i].cpp = 16; + track->cb[i].offset = 0; + } + track->z_enabled = true; + track->zb.robj = NULL; + track->zb.pitch = 8192; + track->zb.cpp = 4; + track->zb.offset = 0; + track->vtx_size = 0x7F; + track->immd_dwords = 0xFFFFFFFFUL; + track->num_arrays = 11; + track->max_indx = 0x00FFFFFFUL; + for (i = 0; i < track->num_arrays; i++) { + track->arrays[i].robj = NULL; + track->arrays[i].esize = 0x7F; + } + for (i = 0; i < track->num_texture; i++) { + track->textures[i].compress_format = R100_TRACK_COMP_NONE; + track->textures[i].pitch = 16536; + track->textures[i].width = 16536; + track->textures[i].height = 16536; + track->textures[i].width_11 = 1 << 11; + track->textures[i].height_11 = 1 << 11; + track->textures[i].num_levels = 12; + if (rdev->family <= CHIP_RS200) { + track->textures[i].tex_coord_type = 0; + track->textures[i].txdepth = 0; + } else { + track->textures[i].txdepth = 16; + track->textures[i].tex_coord_type = 1; } - - DRM_DEBUG_KMS("GRPH2_BUFFER_CNTL from to %x\n", - (unsigned int)RREG32(RADEON_GRPH2_BUFFER_CNTL)); + track->textures[i].cpp = 64; + track->textures[i].robj = NULL; + /* CS IB emission code makes sure texture unit are disabled */ + track->textures[i].enabled = false; + track->textures[i].lookup_disable = false; + track->textures[i].roundup_w = true; + track->textures[i].roundup_h = true; + if (track->separate_cube) + for (face = 0; face < 5; face++) { + track->textures[i].cube_info[face].robj = NULL; + track->textures[i].cube_info[face].width = 16536; + track->textures[i].cube_info[face].height = 16536; + track->textures[i].cube_info[face].offset = 0; + } } } diff --git a/trunk/drivers/gpu/drm/radeon/r200.c b/trunk/drivers/gpu/drm/radeon/r200.c index f0889259eb08..a26144d01207 100644 --- a/trunk/drivers/gpu/drm/radeon/r200.c +++ b/trunk/drivers/gpu/drm/radeon/r200.c @@ -85,7 +85,7 @@ int r200_copy_dma(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, - struct radeon_fence **fence) + struct radeon_fence *fence) { struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; uint32_t size; @@ -120,7 +120,7 @@ int r200_copy_dma(struct radeon_device *rdev, radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0)); radeon_ring_write(ring, RADEON_WAIT_DMA_GUI_IDLE); if (fence) { - r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX); + r = radeon_fence_emit(rdev, fence); } radeon_ring_unlock_commit(rdev, ring); return r; diff --git a/trunk/drivers/gpu/drm/radeon/r600.c b/trunk/drivers/gpu/drm/radeon/r600.c index 43d0c41922a5..bff627293812 100644 --- a/trunk/drivers/gpu/drm/radeon/r600.c +++ b/trunk/drivers/gpu/drm/radeon/r600.c @@ -709,7 +709,6 @@ void r600_hpd_init(struct radeon_device *rdev) { struct drm_device *dev = rdev->ddev; struct drm_connector *connector; - unsigned enable = 0; list_for_each_entry(connector, &dev->mode_config.connector_list, head) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); @@ -730,22 +729,28 @@ void r600_hpd_init(struct radeon_device *rdev) switch (radeon_connector->hpd.hpd) { case RADEON_HPD_1: WREG32(DC_HPD1_CONTROL, tmp); + rdev->irq.hpd[0] = true; break; case RADEON_HPD_2: WREG32(DC_HPD2_CONTROL, tmp); + rdev->irq.hpd[1] = true; break; case RADEON_HPD_3: WREG32(DC_HPD3_CONTROL, tmp); + rdev->irq.hpd[2] = true; break; case RADEON_HPD_4: WREG32(DC_HPD4_CONTROL, tmp); + rdev->irq.hpd[3] = true; break; /* DCE 3.2 */ case RADEON_HPD_5: WREG32(DC_HPD5_CONTROL, tmp); + rdev->irq.hpd[4] = true; break; case RADEON_HPD_6: WREG32(DC_HPD6_CONTROL, tmp); + rdev->irq.hpd[5] = true; break; default: break; @@ -754,73 +759,85 @@ void r600_hpd_init(struct radeon_device *rdev) switch (radeon_connector->hpd.hpd) { case RADEON_HPD_1: WREG32(DC_HOT_PLUG_DETECT1_CONTROL, DC_HOT_PLUG_DETECTx_EN); + rdev->irq.hpd[0] = true; break; case RADEON_HPD_2: WREG32(DC_HOT_PLUG_DETECT2_CONTROL, DC_HOT_PLUG_DETECTx_EN); + rdev->irq.hpd[1] = true; break; case RADEON_HPD_3: WREG32(DC_HOT_PLUG_DETECT3_CONTROL, DC_HOT_PLUG_DETECTx_EN); + rdev->irq.hpd[2] = true; break; default: break; } } - enable |= 1 << radeon_connector->hpd.hpd; radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd); } - radeon_irq_kms_enable_hpd(rdev, enable); + if (rdev->irq.installed) + r600_irq_set(rdev); } void r600_hpd_fini(struct radeon_device *rdev) { struct drm_device *dev = rdev->ddev; struct drm_connector *connector; - unsigned disable = 0; - list_for_each_entry(connector, &dev->mode_config.connector_list, head) { - struct radeon_connector *radeon_connector = to_radeon_connector(connector); - if (ASIC_IS_DCE3(rdev)) { + if (ASIC_IS_DCE3(rdev)) { + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + struct radeon_connector *radeon_connector = to_radeon_connector(connector); switch (radeon_connector->hpd.hpd) { case RADEON_HPD_1: WREG32(DC_HPD1_CONTROL, 0); + rdev->irq.hpd[0] = false; break; case RADEON_HPD_2: WREG32(DC_HPD2_CONTROL, 0); + rdev->irq.hpd[1] = false; break; case RADEON_HPD_3: WREG32(DC_HPD3_CONTROL, 0); + rdev->irq.hpd[2] = false; break; case RADEON_HPD_4: WREG32(DC_HPD4_CONTROL, 0); + rdev->irq.hpd[3] = false; break; /* DCE 3.2 */ case RADEON_HPD_5: WREG32(DC_HPD5_CONTROL, 0); + rdev->irq.hpd[4] = false; break; case RADEON_HPD_6: WREG32(DC_HPD6_CONTROL, 0); + rdev->irq.hpd[5] = false; break; default: break; } - } else { + } + } else { + list_for_each_entry(connector, &dev->mode_config.connector_list, head) { + struct radeon_connector *radeon_connector = to_radeon_connector(connector); switch (radeon_connector->hpd.hpd) { case RADEON_HPD_1: WREG32(DC_HOT_PLUG_DETECT1_CONTROL, 0); + rdev->irq.hpd[0] = false; break; case RADEON_HPD_2: WREG32(DC_HOT_PLUG_DETECT2_CONTROL, 0); + rdev->irq.hpd[1] = false; break; case RADEON_HPD_3: WREG32(DC_HOT_PLUG_DETECT3_CONTROL, 0); + rdev->irq.hpd[2] = false; break; default: break; } } - disable |= 1 << radeon_connector->hpd.hpd; } - radeon_irq_kms_disable_hpd(rdev, disable); } /* @@ -2292,18 +2309,17 @@ int r600_copy_blit(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, - struct radeon_fence **fence) + struct radeon_fence *fence) { - struct radeon_semaphore *sem = NULL; struct radeon_sa_bo *vb = NULL; int r; - r = r600_blit_prepare_copy(rdev, num_gpu_pages, fence, &vb, &sem); + r = r600_blit_prepare_copy(rdev, num_gpu_pages, &vb); if (r) { return r; } r600_kms_blit_copy(rdev, src_offset, dst_offset, num_gpu_pages, vb); - r600_blit_done_copy(rdev, fence, vb, sem); + r600_blit_done_copy(rdev, fence, vb); return 0; } @@ -2591,7 +2607,7 @@ void r600_fini(struct radeon_device *rdev) */ void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { - struct radeon_ring *ring = &rdev->ring[ib->ring]; + struct radeon_ring *ring = &rdev->ring[ib->fence->ring]; /* FIXME: implement */ radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); @@ -2841,6 +2857,7 @@ void r600_disable_interrupts(struct radeon_device *rdev) WREG32(IH_RB_RPTR, 0); WREG32(IH_RB_WPTR, 0); rdev->ih.enabled = false; + rdev->ih.wptr = 0; rdev->ih.rptr = 0; } @@ -3025,18 +3042,18 @@ int r600_irq_set(struct radeon_device *rdev) hdmi1 = RREG32(HDMI1_AUDIO_PACKET_CONTROL) & ~HDMI0_AZ_FORMAT_WTRIG_MASK; } - if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { + if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) { DRM_DEBUG("r600_irq_set: sw int\n"); cp_int_cntl |= RB_INT_ENABLE; cp_int_cntl |= TIME_STAMP_INT_ENABLE; } if (rdev->irq.crtc_vblank_int[0] || - atomic_read(&rdev->irq.pflip[0])) { + rdev->irq.pflip[0]) { DRM_DEBUG("r600_irq_set: vblank 0\n"); mode_int |= D1MODE_VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[1] || - atomic_read(&rdev->irq.pflip[1])) { + rdev->irq.pflip[1]) { DRM_DEBUG("r600_irq_set: vblank 1\n"); mode_int |= D2MODE_VBLANK_INT_MASK; } @@ -3292,6 +3309,7 @@ int r600_irq_process(struct radeon_device *rdev) u32 rptr; u32 src_id, src_data; u32 ring_index; + unsigned long flags; bool queue_hotplug = false; bool queue_hdmi = false; @@ -3303,21 +3321,24 @@ int r600_irq_process(struct radeon_device *rdev) RREG32(IH_RB_WPTR); wptr = r600_get_ih_wptr(rdev); - -restart_ih: - /* is somebody else already processing irqs? */ - if (atomic_xchg(&rdev->ih.lock, 1)) - return IRQ_NONE; - rptr = rdev->ih.rptr; DRM_DEBUG("r600_irq_process start: rptr %d, wptr %d\n", rptr, wptr); + spin_lock_irqsave(&rdev->ih.lock, flags); + + if (rptr == wptr) { + spin_unlock_irqrestore(&rdev->ih.lock, flags); + return IRQ_NONE; + } + +restart_ih: /* Order reading of wptr vs. reading of IH ring data */ rmb(); /* display interrupts */ r600_irq_ack(rdev); + rdev->ih.wptr = wptr; while (rptr != wptr) { /* wptr/rptr are in bytes! */ ring_index = rptr / 4; @@ -3334,7 +3355,7 @@ int r600_irq_process(struct radeon_device *rdev) rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (atomic_read(&rdev->irq.pflip[0])) + if (rdev->irq.pflip[0]) radeon_crtc_handle_flip(rdev, 0); rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VBLANK_INTERRUPT; DRM_DEBUG("IH: D1 vblank\n"); @@ -3360,7 +3381,7 @@ int r600_irq_process(struct radeon_device *rdev) rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (atomic_read(&rdev->irq.pflip[1])) + if (rdev->irq.pflip[1]) radeon_crtc_handle_flip(rdev, 1); rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VBLANK_INTERRUPT; DRM_DEBUG("IH: D2 vblank\n"); @@ -3459,6 +3480,7 @@ int r600_irq_process(struct radeon_device *rdev) break; case 233: /* GUI IDLE */ DRM_DEBUG("IH: GUI idle\n"); + rdev->pm.gui_idle = true; wake_up(&rdev->irq.idle_queue); break; default: @@ -3470,19 +3492,17 @@ int r600_irq_process(struct radeon_device *rdev) rptr += 16; rptr &= rdev->ih.ptr_mask; } + /* make sure wptr hasn't changed while processing */ + wptr = r600_get_ih_wptr(rdev); + if (wptr != rdev->ih.wptr) + goto restart_ih; if (queue_hotplug) schedule_work(&rdev->hotplug_work); if (queue_hdmi) schedule_work(&rdev->audio_work); rdev->ih.rptr = rptr; WREG32(IH_RB_RPTR, rdev->ih.rptr); - atomic_set(&rdev->ih.lock, 0); - - /* make sure wptr hasn't changed while processing */ - wptr = r600_get_ih_wptr(rdev); - if (wptr != rptr) - goto restart_ih; - + spin_unlock_irqrestore(&rdev->ih.lock, flags); return IRQ_HANDLED; } diff --git a/trunk/drivers/gpu/drm/radeon/r600_blit_kms.c b/trunk/drivers/gpu/drm/radeon/r600_blit_kms.c index 2b8d6418a30c..03b6e0d3d503 100644 --- a/trunk/drivers/gpu/drm/radeon/r600_blit_kms.c +++ b/trunk/drivers/gpu/drm/radeon/r600_blit_kms.c @@ -512,8 +512,7 @@ int r600_blit_init(struct radeon_device *rdev) rdev->r600_blit.primitives.draw_auto = draw_auto; rdev->r600_blit.primitives.set_default_state = set_default_state; - rdev->r600_blit.ring_size_common = 8; /* sync semaphore */ - rdev->r600_blit.ring_size_common += 40; /* shaders + def state */ + rdev->r600_blit.ring_size_common = 40; /* shaders + def state */ rdev->r600_blit.ring_size_common += 5; /* done copy */ rdev->r600_blit.ring_size_common += 16; /* fence emit for done copy */ @@ -667,8 +666,7 @@ static unsigned r600_blit_create_rect(unsigned num_gpu_pages, int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages, - struct radeon_fence **fence, struct radeon_sa_bo **vb, - struct radeon_semaphore **sem) + struct radeon_sa_bo **vb) { struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; int r; @@ -691,50 +689,34 @@ int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages, return r; } - r = radeon_semaphore_create(rdev, sem); - if (r) { - radeon_sa_bo_free(rdev, vb, NULL); - return r; - } - /* calculate number of loops correctly */ ring_size = num_loops * dwords_per_loop; ring_size += rdev->r600_blit.ring_size_common; r = radeon_ring_lock(rdev, ring, ring_size); if (r) { radeon_sa_bo_free(rdev, vb, NULL); - radeon_semaphore_free(rdev, sem, NULL); return r; } - if (radeon_fence_need_sync(*fence, RADEON_RING_TYPE_GFX_INDEX)) { - radeon_semaphore_sync_rings(rdev, *sem, (*fence)->ring, - RADEON_RING_TYPE_GFX_INDEX); - radeon_fence_note_sync(*fence, RADEON_RING_TYPE_GFX_INDEX); - } else { - radeon_semaphore_free(rdev, sem, NULL); - } - rdev->r600_blit.primitives.set_default_state(rdev); rdev->r600_blit.primitives.set_shaders(rdev); return 0; } -void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence **fence, - struct radeon_sa_bo *vb, struct radeon_semaphore *sem) +void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence, + struct radeon_sa_bo *vb) { struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; int r; - r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX); + r = radeon_fence_emit(rdev, fence); if (r) { radeon_ring_unlock_undo(rdev, ring); return; } radeon_ring_unlock_commit(rdev, ring); - radeon_sa_bo_free(rdev, &vb, *fence); - radeon_semaphore_free(rdev, &sem, *fence); + radeon_sa_bo_free(rdev, &vb, fence); } void r600_kms_blit_copy(struct radeon_device *rdev, diff --git a/trunk/drivers/gpu/drm/radeon/r600_hdmi.c b/trunk/drivers/gpu/drm/radeon/r600_hdmi.c index e3558c3ef24a..82a0a4c919c0 100644 --- a/trunk/drivers/gpu/drm/radeon/r600_hdmi.c +++ b/trunk/drivers/gpu/drm/radeon/r600_hdmi.c @@ -519,7 +519,8 @@ void r600_hdmi_enable(struct drm_encoder *encoder) if (rdev->irq.installed) { /* if irq is available use it */ - radeon_irq_kms_enable_afmt(rdev, dig->afmt->id); + rdev->irq.afmt[dig->afmt->id] = true; + radeon_irq_set(rdev); } dig->afmt->enabled = true; @@ -555,7 +556,8 @@ void r600_hdmi_disable(struct drm_encoder *encoder) offset, radeon_encoder->encoder_id); /* disable irq */ - radeon_irq_kms_disable_afmt(rdev, dig->afmt->id); + rdev->irq.afmt[dig->afmt->id] = false; + radeon_irq_set(rdev); /* Older chipsets not handled by AtomBIOS */ if (rdev->family >= CHIP_R600 && !ASIC_IS_DCE3(rdev)) { diff --git a/trunk/drivers/gpu/drm/radeon/radeon.h b/trunk/drivers/gpu/drm/radeon/radeon.h index 77b4519b19b8..fefcca55c1eb 100644 --- a/trunk/drivers/gpu/drm/radeon/radeon.h +++ b/trunk/drivers/gpu/drm/radeon/radeon.h @@ -113,6 +113,7 @@ extern int radeon_lockup_timeout; /* fence seq are set to this number when signaled */ #define RADEON_FENCE_SIGNALED_SEQ 0LL +#define RADEON_FENCE_NOTEMITED_SEQ (~0LL) /* internal ring indices */ /* r1xx+ has gfx CP ring */ @@ -159,6 +160,48 @@ static inline int radeon_atrm_get_bios_chunk(uint8_t *bios, int offset, int len) #endif bool radeon_get_bios(struct radeon_device *rdev); + +/* + * Mutex which allows recursive locking from the same process. + */ +struct radeon_mutex { + struct mutex mutex; + struct task_struct *owner; + int level; +}; + +static inline void radeon_mutex_init(struct radeon_mutex *mutex) +{ + mutex_init(&mutex->mutex); + mutex->owner = NULL; + mutex->level = 0; +} + +static inline void radeon_mutex_lock(struct radeon_mutex *mutex) +{ + if (mutex_trylock(&mutex->mutex)) { + /* The mutex was unlocked before, so it's ours now */ + mutex->owner = current; + } else if (mutex->owner != current) { + /* Another process locked the mutex, take it */ + mutex_lock(&mutex->mutex); + mutex->owner = current; + } + /* Otherwise the mutex was already locked by this process */ + + mutex->level++; +} + +static inline void radeon_mutex_unlock(struct radeon_mutex *mutex) +{ + if (--mutex->level > 0) + return; + + mutex->owner = NULL; + mutex_unlock(&mutex->mutex); +} + + /* * Dummy page */ @@ -215,8 +258,8 @@ struct radeon_fence_driver { uint32_t scratch_reg; uint64_t gpu_addr; volatile uint32_t *cpu_addr; - /* sync_seq is protected by ring emission lock */ - uint64_t sync_seq[RADEON_NUM_RINGS]; + /* seq is protected by ring emission lock */ + uint64_t seq; atomic64_t last_seq; unsigned long last_activity; bool initialized; @@ -234,7 +277,8 @@ struct radeon_fence { int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring); int radeon_fence_driver_init(struct radeon_device *rdev); void radeon_fence_driver_fini(struct radeon_device *rdev); -int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, int ring); +int radeon_fence_create(struct radeon_device *rdev, struct radeon_fence **fence, int ring); +int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence); void radeon_fence_process(struct radeon_device *rdev, int ring); bool radeon_fence_signaled(struct radeon_fence *fence); int radeon_fence_wait(struct radeon_fence *fence, bool interruptible); @@ -246,27 +290,6 @@ int radeon_fence_wait_any(struct radeon_device *rdev, struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence); void radeon_fence_unref(struct radeon_fence **fence); unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring); -bool radeon_fence_need_sync(struct radeon_fence *fence, int ring); -void radeon_fence_note_sync(struct radeon_fence *fence, int ring); -static inline struct radeon_fence *radeon_fence_later(struct radeon_fence *a, - struct radeon_fence *b) -{ - if (!a) { - return b; - } - - if (!b) { - return a; - } - - BUG_ON(a->ring != b->ring); - - if (a->seq > b->seq) { - return a; - } else { - return b; - } -} /* * Tiling registers @@ -428,9 +451,10 @@ void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring, struct radeon_semaphore *semaphore); int radeon_semaphore_sync_rings(struct radeon_device *rdev, struct radeon_semaphore *semaphore, - int signaler, int waiter); + bool sync_to[RADEON_NUM_RINGS], + int dst_ring); void radeon_semaphore_free(struct radeon_device *rdev, - struct radeon_semaphore **semaphore, + struct radeon_semaphore *semaphore, struct radeon_fence *fence); /* @@ -573,18 +597,21 @@ union radeon_irq_stat_regs { #define RADEON_MAX_AFMT_BLOCKS 6 struct radeon_irq { - bool installed; - spinlock_t lock; - atomic_t ring_int[RADEON_NUM_RINGS]; - bool crtc_vblank_int[RADEON_MAX_CRTCS]; - atomic_t pflip[RADEON_MAX_CRTCS]; - wait_queue_head_t vblank_queue; - bool hpd[RADEON_MAX_HPD_PINS]; - bool gui_idle; - bool gui_idle_acked; - wait_queue_head_t idle_queue; - bool afmt[RADEON_MAX_AFMT_BLOCKS]; - union radeon_irq_stat_regs stat_regs; + bool installed; + bool sw_int[RADEON_NUM_RINGS]; + bool crtc_vblank_int[RADEON_MAX_CRTCS]; + bool pflip[RADEON_MAX_CRTCS]; + wait_queue_head_t vblank_queue; + bool hpd[RADEON_MAX_HPD_PINS]; + bool gui_idle; + bool gui_idle_acked; + wait_queue_head_t idle_queue; + bool afmt[RADEON_MAX_AFMT_BLOCKS]; + spinlock_t sw_lock; + int sw_refcount[RADEON_NUM_RINGS]; + union radeon_irq_stat_regs stat_regs; + spinlock_t pflip_lock[RADEON_MAX_CRTCS]; + int pflip_refcount[RADEON_MAX_CRTCS]; }; int radeon_irq_kms_init(struct radeon_device *rdev); @@ -593,11 +620,6 @@ void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring); void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring); void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc); void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc); -void radeon_irq_kms_enable_afmt(struct radeon_device *rdev, int block); -void radeon_irq_kms_disable_afmt(struct radeon_device *rdev, int block); -void radeon_irq_kms_enable_hpd(struct radeon_device *rdev, unsigned hpd_mask); -void radeon_irq_kms_disable_hpd(struct radeon_device *rdev, unsigned hpd_mask); -int radeon_irq_kms_wait_gui_idle(struct radeon_device *rdev); /* * CP & rings. @@ -608,11 +630,9 @@ struct radeon_ib { uint32_t length_dw; uint64_t gpu_addr; uint32_t *ptr; - int ring; struct radeon_fence *fence; unsigned vm_id; bool is_const_ib; - struct radeon_fence *sync_to[RADEON_NUM_RINGS]; struct radeon_semaphore *semaphore; }; @@ -670,7 +690,6 @@ struct radeon_vm_funcs { }; struct radeon_vm_manager { - struct mutex lock; struct list_head lru_vm; uint32_t use_bitmap; struct radeon_sa_manager sa_manager; @@ -699,10 +718,13 @@ struct r600_ih { struct radeon_bo *ring_obj; volatile uint32_t *ring; unsigned rptr; + unsigned rptr_offs; + unsigned wptr; + unsigned wptr_old; unsigned ring_size; uint64_t gpu_addr; uint32_t ptr_mask; - atomic_t lock; + spinlock_t lock; bool enabled; }; @@ -1017,12 +1039,11 @@ struct radeon_power_state { struct radeon_pm { struct mutex mutex; - /* write locked while reprogramming mclk */ - struct rw_semaphore mclk_lock; u32 active_crtcs; int active_crtc_count; int req_vblank; bool vblank_sync; + bool gui_idle; fixed20_12 max_bandwidth; fixed20_12 igp_sideport_mclk; fixed20_12 igp_system_mclk; @@ -1171,20 +1192,20 @@ struct radeon_asic { uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, - struct radeon_fence **fence); + struct radeon_fence *fence); u32 blit_ring_index; int (*dma)(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, - struct radeon_fence **fence); + struct radeon_fence *fence); u32 dma_ring_index; /* method used for bo copy */ int (*copy)(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, - struct radeon_fence **fence); + struct radeon_fence *fence); /* ring used for bo copies */ u32 copy_ring_index; } copy; @@ -1491,6 +1512,7 @@ struct radeon_device { struct radeon_gem gem; struct radeon_pm pm; uint32_t bios_scratch[RADEON_BIOS_NUM_SCRATCH]; + struct radeon_mutex cs_mutex; struct radeon_wb wb; struct radeon_dummy_page dummy_page; bool shutdown; @@ -1512,6 +1534,7 @@ struct radeon_device { struct work_struct audio_work; int num_crtc; /* number of crtcs */ struct mutex dc_hw_i2c_mutex; /* display controller hw i2c mutex */ + struct mutex vram_mutex; bool audio_enabled; struct r600_audio audio_status; /* audio stuff */ struct notifier_block acpi_nb; diff --git a/trunk/drivers/gpu/drm/radeon/radeon_asic.h b/trunk/drivers/gpu/drm/radeon/radeon_asic.h index 94c427ab0f5c..e76a941ef14e 100644 --- a/trunk/drivers/gpu/drm/radeon/radeon_asic.h +++ b/trunk/drivers/gpu/drm/radeon/radeon_asic.h @@ -79,7 +79,7 @@ int r100_copy_blit(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, - struct radeon_fence **fence); + struct radeon_fence *fence); int r100_set_surface_reg(struct radeon_device *rdev, int reg, uint32_t tiling_flags, uint32_t pitch, uint32_t offset, uint32_t obj_size); @@ -144,7 +144,7 @@ extern int r200_copy_dma(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, unsigned num_gpu_pages, - struct radeon_fence **fence); + struct radeon_fence *fence); void r200_set_safe_registers(struct radeon_device *rdev); /* @@ -318,7 +318,7 @@ void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib); int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp); int r600_copy_blit(struct radeon_device *rdev, uint64_t src_offset, uint64_t dst_offset, - unsigned num_gpu_pages, struct radeon_fence **fence); + unsigned num_gpu_pages, struct radeon_fence *fence); void r600_hpd_init(struct radeon_device *rdev); void r600_hpd_fini(struct radeon_device *rdev); bool r600_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd); @@ -363,10 +363,9 @@ int r600_hdmi_buffer_status_changed(struct drm_encoder *encoder); void r600_hdmi_update_audio_settings(struct drm_encoder *encoder); /* r600 blit */ int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages, - struct radeon_fence **fence, struct radeon_sa_bo **vb, - struct radeon_semaphore **sem); -void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence **fence, - struct radeon_sa_bo *vb, struct radeon_semaphore *sem); + struct radeon_sa_bo **vb); +void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence, + struct radeon_sa_bo *vb); void r600_kms_blit_copy(struct radeon_device *rdev, u64 src_gpu_addr, u64 dst_gpu_addr, unsigned num_gpu_pages, diff --git a/trunk/drivers/gpu/drm/radeon/radeon_benchmark.c b/trunk/drivers/gpu/drm/radeon/radeon_benchmark.c index bedda9caadd9..364f5b1a04b9 100644 --- a/trunk/drivers/gpu/drm/radeon/radeon_benchmark.c +++ b/trunk/drivers/gpu/drm/radeon/radeon_benchmark.c @@ -45,14 +45,20 @@ static int radeon_benchmark_do_move(struct radeon_device *rdev, unsigned size, for (i = 0; i < n; i++) { switch (flag) { case RADEON_BENCHMARK_COPY_DMA: + r = radeon_fence_create(rdev, &fence, radeon_copy_dma_ring_index(rdev)); + if (r) + return r; r = radeon_copy_dma(rdev, saddr, daddr, size / RADEON_GPU_PAGE_SIZE, - &fence); + fence); break; case RADEON_BENCHMARK_COPY_BLIT: + r = radeon_fence_create(rdev, &fence, radeon_copy_blit_ring_index(rdev)); + if (r) + return r; r = radeon_copy_blit(rdev, saddr, daddr, size / RADEON_GPU_PAGE_SIZE, - &fence); + fence); break; default: DRM_ERROR("Unknown copy method\n"); diff --git a/trunk/drivers/gpu/drm/radeon/radeon_cs.c b/trunk/drivers/gpu/drm/radeon/radeon_cs.c index f1b75275f594..142f89462aa4 100644 --- a/trunk/drivers/gpu/drm/radeon/radeon_cs.c +++ b/trunk/drivers/gpu/drm/radeon/radeon_cs.c @@ -115,20 +115,36 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority return 0; } -static void radeon_cs_sync_rings(struct radeon_cs_parser *p) +static int radeon_cs_sync_rings(struct radeon_cs_parser *p) { - int i; + bool sync_to_ring[RADEON_NUM_RINGS] = { }; + bool need_sync = false; + int i, r; for (i = 0; i < p->nrelocs; i++) { - struct radeon_fence *a, *b; + struct radeon_fence *fence; if (!p->relocs[i].robj || !p->relocs[i].robj->tbo.sync_obj) continue; - a = p->relocs[i].robj->tbo.sync_obj; - b = p->ib.sync_to[a->ring]; - p->ib.sync_to[a->ring] = radeon_fence_later(a, b); + fence = p->relocs[i].robj->tbo.sync_obj; + if (fence->ring != p->ring && !radeon_fence_signaled(fence)) { + sync_to_ring[fence->ring] = true; + need_sync = true; + } + } + + if (!need_sync) { + return 0; + } + + r = radeon_semaphore_create(p->rdev, &p->ib.semaphore); + if (r) { + return r; } + + return radeon_semaphore_sync_rings(p->rdev, p->ib.semaphore, + sync_to_ring, p->ring); } /* XXX: note that this is called from the legacy UMS CS ioctl as well */ @@ -352,7 +368,10 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev, DRM_ERROR("Invalid command stream !\n"); return r; } - radeon_cs_sync_rings(parser); + r = radeon_cs_sync_rings(parser); + if (r) { + DRM_ERROR("Failed to synchronize rings !\n"); + } parser->ib.vm_id = 0; r = radeon_ib_schedule(rdev, &parser->ib); if (r) { @@ -440,7 +459,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, return r; } - mutex_lock(&rdev->vm_manager.lock); mutex_lock(&vm->mutex); r = radeon_vm_bind(rdev, vm); if (r) { @@ -450,7 +468,10 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, if (r) { goto out; } - radeon_cs_sync_rings(parser); + r = radeon_cs_sync_rings(parser); + if (r) { + DRM_ERROR("Failed to synchronize rings !\n"); + } if ((rdev->family >= CHIP_TAHITI) && (parser->chunk_const_ib_idx != -1)) { @@ -478,8 +499,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev, } vm->fence = radeon_fence_ref(parser->ib.fence); } - mutex_unlock(&vm->mutex); - mutex_unlock(&rdev->vm_manager.lock); + mutex_unlock(&fpriv->vm.mutex); return r; } @@ -499,7 +519,9 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) struct radeon_cs_parser parser; int r; + radeon_mutex_lock(&rdev->cs_mutex); if (!rdev->accel_working) { + radeon_mutex_unlock(&rdev->cs_mutex); return -EBUSY; } /* initialize parser */ @@ -513,6 +535,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) DRM_ERROR("Failed to initialize parser !\n"); radeon_cs_parser_fini(&parser, r); r = radeon_cs_handle_lockup(rdev, r); + radeon_mutex_unlock(&rdev->cs_mutex); return r; } r = radeon_cs_parser_relocs(&parser); @@ -521,6 +544,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) DRM_ERROR("Failed to parse relocation %d!\n", r); radeon_cs_parser_fini(&parser, r); r = radeon_cs_handle_lockup(rdev, r); + radeon_mutex_unlock(&rdev->cs_mutex); return r; } r = radeon_cs_ib_chunk(rdev, &parser); @@ -534,6 +558,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) out: radeon_cs_parser_fini(&parser, r); r = radeon_cs_handle_lockup(rdev, r); + radeon_mutex_unlock(&rdev->cs_mutex); return r; } diff --git a/trunk/drivers/gpu/drm/radeon/radeon_device.c b/trunk/drivers/gpu/drm/radeon/radeon_device.c index f654ba80e96e..066c98b888a5 100644 --- a/trunk/drivers/gpu/drm/radeon/radeon_device.c +++ b/trunk/drivers/gpu/drm/radeon/radeon_device.c @@ -728,19 +728,20 @@ int radeon_device_init(struct radeon_device *rdev, /* mutex initialization are all done here so we * can recall function without having locking issues */ + radeon_mutex_init(&rdev->cs_mutex); mutex_init(&rdev->ring_lock); mutex_init(&rdev->dc_hw_i2c_mutex); - atomic_set(&rdev->ih.lock, 0); + if (rdev->family >= CHIP_R600) + spin_lock_init(&rdev->ih.lock); mutex_init(&rdev->gem.mutex); mutex_init(&rdev->pm.mutex); - init_rwsem(&rdev->pm.mclk_lock); + mutex_init(&rdev->vram_mutex); init_waitqueue_head(&rdev->irq.vblank_queue); init_waitqueue_head(&rdev->irq.idle_queue); r = radeon_gem_init(rdev); if (r) return r; /* initialize vm here */ - mutex_init(&rdev->vm_manager.lock); rdev->vm_manager.use_bitmap = 1; rdev->vm_manager.max_pfn = 1 << 20; INIT_LIST_HEAD(&rdev->vm_manager.lru_vm); diff --git a/trunk/drivers/gpu/drm/radeon/radeon_fence.c b/trunk/drivers/gpu/drm/radeon/radeon_fence.c index 7b55625a5e18..11f5f402d22c 100644 --- a/trunk/drivers/gpu/drm/radeon/radeon_fence.c +++ b/trunk/drivers/gpu/drm/radeon/radeon_fence.c @@ -61,21 +61,15 @@ static u32 radeon_fence_read(struct radeon_device *rdev, int ring) return seq; } -int radeon_fence_emit(struct radeon_device *rdev, - struct radeon_fence **fence, - int ring) +int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence) { /* we are protected by the ring emission mutex */ - *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL); - if ((*fence) == NULL) { - return -ENOMEM; + if (fence->seq && fence->seq < RADEON_FENCE_NOTEMITED_SEQ) { + return 0; } - kref_init(&((*fence)->kref)); - (*fence)->rdev = rdev; - (*fence)->seq = ++rdev->fence_drv[ring].sync_seq[ring]; - (*fence)->ring = ring; - radeon_fence_ring_emit(rdev, ring, *fence); - trace_radeon_fence_emit(rdev->ddev, (*fence)->seq); + fence->seq = ++rdev->fence_drv[fence->ring].seq; + radeon_fence_ring_emit(rdev, fence->ring, fence); + trace_radeon_fence_emit(rdev->ddev, fence->seq); return 0; } @@ -144,9 +138,25 @@ static void radeon_fence_destroy(struct kref *kref) struct radeon_fence *fence; fence = container_of(kref, struct radeon_fence, kref); + fence->seq = RADEON_FENCE_NOTEMITED_SEQ; kfree(fence); } +int radeon_fence_create(struct radeon_device *rdev, + struct radeon_fence **fence, + int ring) +{ + *fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL); + if ((*fence) == NULL) { + return -ENOMEM; + } + kref_init(&((*fence)->kref)); + (*fence)->rdev = rdev; + (*fence)->seq = RADEON_FENCE_NOTEMITED_SEQ; + (*fence)->ring = ring; + return 0; +} + static bool radeon_fence_seq_signaled(struct radeon_device *rdev, u64 seq, unsigned ring) { @@ -166,6 +176,10 @@ bool radeon_fence_signaled(struct radeon_fence *fence) if (!fence) { return true; } + if (fence->seq == RADEON_FENCE_NOTEMITED_SEQ) { + WARN(1, "Querying an unemitted fence : %p !\n", fence); + return true; + } if (fence->seq == RADEON_FENCE_SIGNALED_SEQ) { return true; } @@ -430,7 +444,9 @@ int radeon_fence_wait_any(struct radeon_device *rdev, return 0; } - seq[i] = fences[i]->seq; + if (fences[i]->seq < RADEON_FENCE_NOTEMITED_SEQ) { + seq[i] = fences[i]->seq; + } } r = radeon_fence_wait_any_seq(rdev, seq, intr); @@ -449,7 +465,7 @@ int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring) * wait. */ seq = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL; - if (seq >= rdev->fence_drv[ring].sync_seq[ring]) { + if (seq >= rdev->fence_drv[ring].seq) { /* nothing to wait for, last_seq is already the last emited fence */ return -ENOENT; @@ -464,7 +480,7 @@ int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring) * activity can be scheduled so there won't be concurrent access * to seq value. */ - return radeon_fence_wait_seq(rdev, rdev->fence_drv[ring].sync_seq[ring], + return radeon_fence_wait_seq(rdev, rdev->fence_drv[ring].seq, ring, false, false); } @@ -492,8 +508,7 @@ unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring) * but it's ok to report slightly wrong fence count here. */ radeon_fence_process(rdev, ring); - emitted = rdev->fence_drv[ring].sync_seq[ring] - - atomic64_read(&rdev->fence_drv[ring].last_seq); + emitted = rdev->fence_drv[ring].seq - atomic64_read(&rdev->fence_drv[ring].last_seq); /* to avoid 32bits warp around */ if (emitted > 0x10000000) { emitted = 0x10000000; @@ -501,51 +516,6 @@ unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring) return (unsigned)emitted; } -bool radeon_fence_need_sync(struct radeon_fence *fence, int dst_ring) -{ - struct radeon_fence_driver *fdrv; - - if (!fence) { - return false; - } - - if (fence->ring == dst_ring) { - return false; - } - - /* we are protected by the ring mutex */ - fdrv = &fence->rdev->fence_drv[dst_ring]; - if (fence->seq <= fdrv->sync_seq[fence->ring]) { - return false; - } - - return true; -} - -void radeon_fence_note_sync(struct radeon_fence *fence, int dst_ring) -{ - struct radeon_fence_driver *dst, *src; - unsigned i; - - if (!fence) { - return; - } - - if (fence->ring == dst_ring) { - return; - } - - /* we are protected by the ring mutex */ - src = &fence->rdev->fence_drv[fence->ring]; - dst = &fence->rdev->fence_drv[dst_ring]; - for (i = 0; i < RADEON_NUM_RINGS; ++i) { - if (i == dst_ring) { - continue; - } - dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]); - } -} - int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) { uint64_t index; @@ -567,7 +537,7 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) } rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4]; rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index; - radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring); + radeon_fence_write(rdev, rdev->fence_drv[ring].seq, ring); rdev->fence_drv[ring].initialized = true; dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n", ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr); @@ -576,13 +546,10 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring) static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring) { - int i; - rdev->fence_drv[ring].scratch_reg = -1; rdev->fence_drv[ring].cpu_addr = NULL; rdev->fence_drv[ring].gpu_addr = 0; - for (i = 0; i < RADEON_NUM_RINGS; ++i) - rdev->fence_drv[ring].sync_seq[i] = 0; + rdev->fence_drv[ring].seq = 0; atomic64_set(&rdev->fence_drv[ring].last_seq, 0); rdev->fence_drv[ring].last_activity = jiffies; rdev->fence_drv[ring].initialized = false; @@ -628,7 +595,7 @@ static int radeon_debugfs_fence_info(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *)m->private; struct drm_device *dev = node->minor->dev; struct radeon_device *rdev = dev->dev_private; - int i, j; + int i; for (i = 0; i < RADEON_NUM_RINGS; ++i) { if (!rdev->fence_drv[i].initialized) @@ -637,14 +604,8 @@ static int radeon_debugfs_fence_info(struct seq_file *m, void *data) seq_printf(m, "--- ring %d ---\n", i); seq_printf(m, "Last signaled fence 0x%016llx\n", (unsigned long long)atomic64_read(&rdev->fence_drv[i].last_seq)); - seq_printf(m, "Last emitted 0x%016llx\n", - rdev->fence_drv[i].sync_seq[i]); - - for (j = 0; j < RADEON_NUM_RINGS; ++j) { - if (i != j && rdev->fence_drv[j].initialized) - seq_printf(m, "Last sync to ring %d 0x%016llx\n", - j, rdev->fence_drv[i].sync_seq[j]); - } + seq_printf(m, "Last emitted 0x%016llx\n", + rdev->fence_drv[i].seq); } return 0; } diff --git a/trunk/drivers/gpu/drm/radeon/radeon_gart.c b/trunk/drivers/gpu/drm/radeon/radeon_gart.c index 2b34c1a91421..59d44937dd9f 100644 --- a/trunk/drivers/gpu/drm/radeon/radeon_gart.c +++ b/trunk/drivers/gpu/drm/radeon/radeon_gart.c @@ -305,7 +305,7 @@ int radeon_vm_manager_init(struct radeon_device *rdev) return r; } -/* global mutex must be lock */ +/* cs mutex must be lock */ static void radeon_vm_unbind_locked(struct radeon_device *rdev, struct radeon_vm *vm) { @@ -356,17 +356,17 @@ int radeon_vm_manager_suspend(struct radeon_device *rdev) { struct radeon_vm *vm, *tmp; - mutex_lock(&rdev->vm_manager.lock); + radeon_mutex_lock(&rdev->cs_mutex); /* unbind all active vm */ list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) { radeon_vm_unbind_locked(rdev, vm); } rdev->vm_manager.funcs->fini(rdev); - mutex_unlock(&rdev->vm_manager.lock); + radeon_mutex_unlock(&rdev->cs_mutex); return radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager); } -/* global mutex must be locked */ +/* cs mutex must be lock */ void radeon_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm) { mutex_lock(&vm->mutex); @@ -374,7 +374,7 @@ void radeon_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm) mutex_unlock(&vm->mutex); } -/* global and local mutex must be locked */ +/* cs mutex must be lock & vm mutex must be lock */ int radeon_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm) { struct radeon_vm *vm_evict; @@ -478,7 +478,7 @@ int radeon_vm_bo_add(struct radeon_device *rdev, if (last_pfn > vm->last_pfn) { /* release mutex and lock in right order */ mutex_unlock(&vm->mutex); - mutex_lock(&rdev->vm_manager.lock); + radeon_mutex_lock(&rdev->cs_mutex); mutex_lock(&vm->mutex); /* and check again */ if (last_pfn > vm->last_pfn) { @@ -487,7 +487,7 @@ int radeon_vm_bo_add(struct radeon_device *rdev, radeon_vm_unbind_locked(rdev, vm); vm->last_pfn = (last_pfn + align) & ~align; } - mutex_unlock(&rdev->vm_manager.lock); + radeon_mutex_unlock(&rdev->cs_mutex); } head = &vm->va; last_offset = 0; @@ -542,7 +542,7 @@ static u64 radeon_vm_get_addr(struct radeon_device *rdev, return addr; } -/* object have to be reserved & global and local mutex must be locked */ +/* object have to be reserved & cs mutex took & vm mutex took */ int radeon_vm_bo_update_pte(struct radeon_device *rdev, struct radeon_vm *vm, struct radeon_bo *bo, @@ -601,10 +601,10 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev, if (bo_va == NULL) return 0; - mutex_lock(&rdev->vm_manager.lock); + radeon_mutex_lock(&rdev->cs_mutex); mutex_lock(&vm->mutex); radeon_vm_bo_update_pte(rdev, vm, bo, NULL); - mutex_unlock(&rdev->vm_manager.lock); + radeon_mutex_unlock(&rdev->cs_mutex); list_del(&bo_va->vm_list); mutex_unlock(&vm->mutex); list_del(&bo_va->bo_list); @@ -647,10 +647,10 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) struct radeon_bo_va *bo_va, *tmp; int r; - mutex_lock(&rdev->vm_manager.lock); + radeon_mutex_lock(&rdev->cs_mutex); mutex_lock(&vm->mutex); radeon_vm_unbind_locked(rdev, vm); - mutex_unlock(&rdev->vm_manager.lock); + radeon_mutex_unlock(&rdev->cs_mutex); /* remove all bo */ r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); diff --git a/trunk/drivers/gpu/drm/radeon/radeon_gem.c b/trunk/drivers/gpu/drm/radeon/radeon_gem.c index d9b0809be4e4..f28bd4b7ef98 100644 --- a/trunk/drivers/gpu/drm/radeon/radeon_gem.c +++ b/trunk/drivers/gpu/drm/radeon/radeon_gem.c @@ -159,9 +159,11 @@ void radeon_gem_object_close(struct drm_gem_object *obj, static int radeon_gem_handle_lockup(struct radeon_device *rdev, int r) { if (r == -EDEADLK) { + radeon_mutex_lock(&rdev->cs_mutex); r = radeon_gpu_reset(rdev); if (!r) r = -EAGAIN; + radeon_mutex_unlock(&rdev->cs_mutex); } return r; } diff --git a/trunk/drivers/gpu/drm/radeon/radeon_irq_kms.c b/trunk/drivers/gpu/drm/radeon/radeon_irq_kms.c index 6664514bbdca..5df58d1aba06 100644 --- a/trunk/drivers/gpu/drm/radeon/radeon_irq_kms.c +++ b/trunk/drivers/gpu/drm/radeon/radeon_irq_kms.c @@ -32,8 +32,6 @@ #include "radeon.h" #include "atom.h" -#define RADEON_WAIT_IDLE_TIMEOUT 200 - irqreturn_t radeon_driver_irq_handler_kms(DRM_IRQ_ARGS) { struct drm_device *dev = (struct drm_device *) arg; @@ -64,56 +62,56 @@ static void radeon_hotplug_work_func(struct work_struct *work) void radeon_driver_irq_preinstall_kms(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private; - unsigned long irqflags; unsigned i; - spin_lock_irqsave(&rdev->irq.lock, irqflags); /* Disable *all* interrupts */ for (i = 0; i < RADEON_NUM_RINGS; i++) - atomic_set(&rdev->irq.ring_int[i], 0); + rdev->irq.sw_int[i] = false; rdev->irq.gui_idle = false; for (i = 0; i < RADEON_MAX_HPD_PINS; i++) rdev->irq.hpd[i] = false; for (i = 0; i < RADEON_MAX_CRTCS; i++) { rdev->irq.crtc_vblank_int[i] = false; - atomic_set(&rdev->irq.pflip[i], 0); + rdev->irq.pflip[i] = false; rdev->irq.afmt[i] = false; } radeon_irq_set(rdev); - spin_unlock_irqrestore(&rdev->irq.lock, irqflags); /* Clear bits */ radeon_irq_process(rdev); } int radeon_driver_irq_postinstall_kms(struct drm_device *dev) { + struct radeon_device *rdev = dev->dev_private; + unsigned i; + dev->max_vblank_count = 0x001fffff; + for (i = 0; i < RADEON_NUM_RINGS; i++) + rdev->irq.sw_int[i] = true; + radeon_irq_set(rdev); return 0; } void radeon_driver_irq_uninstall_kms(struct drm_device *dev) { struct radeon_device *rdev = dev->dev_private; - unsigned long irqflags; unsigned i; if (rdev == NULL) { return; } - spin_lock_irqsave(&rdev->irq.lock, irqflags); /* Disable *all* interrupts */ for (i = 0; i < RADEON_NUM_RINGS; i++) - atomic_set(&rdev->irq.ring_int[i], 0); + rdev->irq.sw_int[i] = false; rdev->irq.gui_idle = false; for (i = 0; i < RADEON_MAX_HPD_PINS; i++) rdev->irq.hpd[i] = false; for (i = 0; i < RADEON_MAX_CRTCS; i++) { rdev->irq.crtc_vblank_int[i] = false; - atomic_set(&rdev->irq.pflip[i], 0); + rdev->irq.pflip[i] = false; rdev->irq.afmt[i] = false; } radeon_irq_set(rdev); - spin_unlock_irqrestore(&rdev->irq.lock, irqflags); } static bool radeon_msi_ok(struct radeon_device *rdev) @@ -170,12 +168,15 @@ static bool radeon_msi_ok(struct radeon_device *rdev) int radeon_irq_kms_init(struct radeon_device *rdev) { + int i; int r = 0; INIT_WORK(&rdev->hotplug_work, radeon_hotplug_work_func); INIT_WORK(&rdev->audio_work, r600_audio_update_hdmi); - spin_lock_init(&rdev->irq.lock); + spin_lock_init(&rdev->irq.sw_lock); + for (i = 0; i < rdev->num_crtc; i++) + spin_lock_init(&rdev->irq.pflip_lock[i]); r = drm_vblank_init(rdev->ddev, rdev->num_crtc); if (r) { return r; @@ -216,28 +217,25 @@ void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring) { unsigned long irqflags; - if (!rdev->ddev->irq_enabled) - return; - - if (atomic_inc_return(&rdev->irq.ring_int[ring]) == 1) { - spin_lock_irqsave(&rdev->irq.lock, irqflags); + spin_lock_irqsave(&rdev->irq.sw_lock, irqflags); + if (rdev->ddev->irq_enabled && (++rdev->irq.sw_refcount[ring] == 1)) { + rdev->irq.sw_int[ring] = true; radeon_irq_set(rdev); - spin_unlock_irqrestore(&rdev->irq.lock, irqflags); } + spin_unlock_irqrestore(&rdev->irq.sw_lock, irqflags); } void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring) { unsigned long irqflags; - if (!rdev->ddev->irq_enabled) - return; - - if (atomic_dec_and_test(&rdev->irq.ring_int[ring])) { - spin_lock_irqsave(&rdev->irq.lock, irqflags); + spin_lock_irqsave(&rdev->irq.sw_lock, irqflags); + BUG_ON(rdev->ddev->irq_enabled && rdev->irq.sw_refcount[ring] <= 0); + if (rdev->ddev->irq_enabled && (--rdev->irq.sw_refcount[ring] == 0)) { + rdev->irq.sw_int[ring] = false; radeon_irq_set(rdev); - spin_unlock_irqrestore(&rdev->irq.lock, irqflags); } + spin_unlock_irqrestore(&rdev->irq.sw_lock, irqflags); } void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc) @@ -247,14 +245,12 @@ void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc) if (crtc < 0 || crtc >= rdev->num_crtc) return; - if (!rdev->ddev->irq_enabled) - return; - - if (atomic_inc_return(&rdev->irq.pflip[crtc]) == 1) { - spin_lock_irqsave(&rdev->irq.lock, irqflags); + spin_lock_irqsave(&rdev->irq.pflip_lock[crtc], irqflags); + if (rdev->ddev->irq_enabled && (++rdev->irq.pflip_refcount[crtc] == 1)) { + rdev->irq.pflip[crtc] = true; radeon_irq_set(rdev); - spin_unlock_irqrestore(&rdev->irq.lock, irqflags); } + spin_unlock_irqrestore(&rdev->irq.pflip_lock[crtc], irqflags); } void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc) @@ -264,77 +260,12 @@ void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc) if (crtc < 0 || crtc >= rdev->num_crtc) return; - if (!rdev->ddev->irq_enabled) - return; - - if (atomic_dec_and_test(&rdev->irq.pflip[crtc])) { - spin_lock_irqsave(&rdev->irq.lock, irqflags); + spin_lock_irqsave(&rdev->irq.pflip_lock[crtc], irqflags); + BUG_ON(rdev->ddev->irq_enabled && rdev->irq.pflip_refcount[crtc] <= 0); + if (rdev->ddev->irq_enabled && (--rdev->irq.pflip_refcount[crtc] == 0)) { + rdev->irq.pflip[crtc] = false; radeon_irq_set(rdev); - spin_unlock_irqrestore(&rdev->irq.lock, irqflags); } + spin_unlock_irqrestore(&rdev->irq.pflip_lock[crtc], irqflags); } -void radeon_irq_kms_enable_afmt(struct radeon_device *rdev, int block) -{ - unsigned long irqflags; - - spin_lock_irqsave(&rdev->irq.lock, irqflags); - rdev->irq.afmt[block] = true; - radeon_irq_set(rdev); - spin_unlock_irqrestore(&rdev->irq.lock, irqflags); - -} - -void radeon_irq_kms_disable_afmt(struct radeon_device *rdev, int block) -{ - unsigned long irqflags; - - spin_lock_irqsave(&rdev->irq.lock, irqflags); - rdev->irq.afmt[block] = false; - radeon_irq_set(rdev); - spin_unlock_irqrestore(&rdev->irq.lock, irqflags); -} - -void radeon_irq_kms_enable_hpd(struct radeon_device *rdev, unsigned hpd_mask) -{ - unsigned long irqflags; - int i; - - spin_lock_irqsave(&rdev->irq.lock, irqflags); - for (i = 0; i < RADEON_MAX_HPD_PINS; ++i) - rdev->irq.hpd[i] |= !!(hpd_mask & (1 << i)); - radeon_irq_set(rdev); - spin_unlock_irqrestore(&rdev->irq.lock, irqflags); -} - -void radeon_irq_kms_disable_hpd(struct radeon_device *rdev, unsigned hpd_mask) -{ - unsigned long irqflags; - int i; - - spin_lock_irqsave(&rdev->irq.lock, irqflags); - for (i = 0; i < RADEON_MAX_HPD_PINS; ++i) - rdev->irq.hpd[i] &= !(hpd_mask & (1 << i)); - radeon_irq_set(rdev); - spin_unlock_irqrestore(&rdev->irq.lock, irqflags); -} - -int radeon_irq_kms_wait_gui_idle(struct radeon_device *rdev) -{ - unsigned long irqflags; - int r; - - spin_lock_irqsave(&rdev->irq.lock, irqflags); - rdev->irq.gui_idle = true; - radeon_irq_set(rdev); - spin_unlock_irqrestore(&rdev->irq.lock, irqflags); - - r = wait_event_timeout(rdev->irq.idle_queue, radeon_gui_idle(rdev), - msecs_to_jiffies(RADEON_WAIT_IDLE_TIMEOUT)); - - spin_lock_irqsave(&rdev->irq.lock, irqflags); - rdev->irq.gui_idle = false; - radeon_irq_set(rdev); - spin_unlock_irqrestore(&rdev->irq.lock, irqflags); - return r; -} diff --git a/trunk/drivers/gpu/drm/radeon/radeon_kms.c b/trunk/drivers/gpu/drm/radeon/radeon_kms.c index 18b81d63ee7b..5c58d7d90cb2 100644 --- a/trunk/drivers/gpu/drm/radeon/radeon_kms.c +++ b/trunk/drivers/gpu/drm/radeon/radeon_kms.c @@ -382,35 +382,29 @@ u32 radeon_get_vblank_counter_kms(struct drm_device *dev, int crtc) int radeon_enable_vblank_kms(struct drm_device *dev, int crtc) { struct radeon_device *rdev = dev->dev_private; - unsigned long irqflags; - int r; if (crtc < 0 || crtc >= rdev->num_crtc) { DRM_ERROR("Invalid crtc %d\n", crtc); return -EINVAL; } - spin_lock_irqsave(&rdev->irq.lock, irqflags); rdev->irq.crtc_vblank_int[crtc] = true; - r = radeon_irq_set(rdev); - spin_unlock_irqrestore(&rdev->irq.lock, irqflags); - return r; + + return radeon_irq_set(rdev); } void radeon_disable_vblank_kms(struct drm_device *dev, int crtc) { struct radeon_device *rdev = dev->dev_private; - unsigned long irqflags; if (crtc < 0 || crtc >= rdev->num_crtc) { DRM_ERROR("Invalid crtc %d\n", crtc); return; } - spin_lock_irqsave(&rdev->irq.lock, irqflags); rdev->irq.crtc_vblank_int[crtc] = false; + radeon_irq_set(rdev); - spin_unlock_irqrestore(&rdev->irq.lock, irqflags); } int radeon_get_vblank_timestamp_kms(struct drm_device *dev, int crtc, diff --git a/trunk/drivers/gpu/drm/radeon/radeon_object.c b/trunk/drivers/gpu/drm/radeon/radeon_object.c index 6ecb2006e27e..830f1a7b486f 100644 --- a/trunk/drivers/gpu/drm/radeon/radeon_object.c +++ b/trunk/drivers/gpu/drm/radeon/radeon_object.c @@ -154,11 +154,11 @@ int radeon_bo_create(struct radeon_device *rdev, INIT_LIST_HEAD(&bo->va); radeon_ttm_placement_from_domain(bo, domain); /* Kernel allocation are uninterruptible */ - down_read(&rdev->pm.mclk_lock); + mutex_lock(&rdev->vram_mutex); r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type, &bo->placement, page_align, 0, !kernel, NULL, acc_size, sg, &radeon_ttm_bo_destroy); - up_read(&rdev->pm.mclk_lock); + mutex_unlock(&rdev->vram_mutex); if (unlikely(r != 0)) { if (r != -ERESTARTSYS) { if (domain == RADEON_GEM_DOMAIN_VRAM) { @@ -219,9 +219,9 @@ void radeon_bo_unref(struct radeon_bo **bo) return; rdev = (*bo)->rdev; tbo = &((*bo)->tbo); - down_read(&rdev->pm.mclk_lock); + mutex_lock(&rdev->vram_mutex); ttm_bo_unref(&tbo); - up_read(&rdev->pm.mclk_lock); + mutex_unlock(&rdev->vram_mutex); if (tbo == NULL) *bo = NULL; } diff --git a/trunk/drivers/gpu/drm/radeon/radeon_pm.c b/trunk/drivers/gpu/drm/radeon/radeon_pm.c index 7ae606600107..5b37e283ec38 100644 --- a/trunk/drivers/gpu/drm/radeon/radeon_pm.c +++ b/trunk/drivers/gpu/drm/radeon/radeon_pm.c @@ -34,6 +34,7 @@ #define RADEON_IDLE_LOOP_MS 100 #define RADEON_RECLOCK_DELAY_MS 200 #define RADEON_WAIT_VBLANK_TIMEOUT 200 +#define RADEON_WAIT_IDLE_TIMEOUT 200 static const char *radeon_pm_state_type_name[5] = { "Default", @@ -250,14 +251,21 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev) return; mutex_lock(&rdev->ddev->struct_mutex); - down_write(&rdev->pm.mclk_lock); + mutex_lock(&rdev->vram_mutex); mutex_lock(&rdev->ring_lock); /* gui idle int has issues on older chips it seems */ if (rdev->family >= CHIP_R600) { if (rdev->irq.installed) { - /* wait for GPU to become idle */ - radeon_irq_kms_wait_gui_idle(rdev); + /* wait for GPU idle */ + rdev->pm.gui_idle = false; + rdev->irq.gui_idle = true; + radeon_irq_set(rdev); + wait_event_interruptible_timeout( + rdev->irq.idle_queue, rdev->pm.gui_idle, + msecs_to_jiffies(RADEON_WAIT_IDLE_TIMEOUT)); + rdev->irq.gui_idle = false; + radeon_irq_set(rdev); } } else { struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]; @@ -295,7 +303,7 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev) rdev->pm.dynpm_planned_action = DYNPM_ACTION_NONE; mutex_unlock(&rdev->ring_lock); - up_write(&rdev->pm.mclk_lock); + mutex_unlock(&rdev->vram_mutex); mutex_unlock(&rdev->ddev->struct_mutex); } diff --git a/trunk/drivers/gpu/drm/radeon/radeon_ring.c b/trunk/drivers/gpu/drm/radeon/radeon_ring.c index 0826e77f99ae..983658c91358 100644 --- a/trunk/drivers/gpu/drm/radeon/radeon_ring.c +++ b/trunk/drivers/gpu/drm/radeon/radeon_ring.c @@ -42,43 +42,40 @@ int radeon_debugfs_sa_init(struct radeon_device *rdev); int radeon_ib_get(struct radeon_device *rdev, int ring, struct radeon_ib *ib, unsigned size) { - int i, r; + int r; r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib->sa_bo, size, 256, true); if (r) { dev_err(rdev->dev, "failed to get a new IB (%d)\n", r); return r; } - - r = radeon_semaphore_create(rdev, &ib->semaphore); + r = radeon_fence_create(rdev, &ib->fence, ring); if (r) { + dev_err(rdev->dev, "failed to create fence for new IB (%d)\n", r); + radeon_sa_bo_free(rdev, &ib->sa_bo, NULL); return r; } - ib->ring = ring; - ib->fence = NULL; ib->ptr = radeon_sa_bo_cpu_addr(ib->sa_bo); ib->gpu_addr = radeon_sa_bo_gpu_addr(ib->sa_bo); ib->vm_id = 0; ib->is_const_ib = false; - for (i = 0; i < RADEON_NUM_RINGS; ++i) - ib->sync_to[i] = NULL; + ib->semaphore = NULL; return 0; } void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib) { - radeon_semaphore_free(rdev, &ib->semaphore, ib->fence); + radeon_semaphore_free(rdev, ib->semaphore, ib->fence); radeon_sa_bo_free(rdev, &ib->sa_bo, ib->fence); radeon_fence_unref(&ib->fence); } int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib) { - struct radeon_ring *ring = &rdev->ring[ib->ring]; - bool need_sync = false; - int i, r = 0; + struct radeon_ring *ring = &rdev->ring[ib->fence->ring]; + int r = 0; if (!ib->length_dw || !ring->ready) { /* TODO: Nothings in the ib we should report. */ @@ -87,31 +84,13 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib) } /* 64 dwords should be enough for fence too */ - r = radeon_ring_lock(rdev, ring, 64 + RADEON_NUM_RINGS * 8); + r = radeon_ring_lock(rdev, ring, 64); if (r) { dev_err(rdev->dev, "scheduling IB failed (%d).\n", r); return r; } - for (i = 0; i < RADEON_NUM_RINGS; ++i) { - struct radeon_fence *fence = ib->sync_to[i]; - if (radeon_fence_need_sync(fence, ib->ring)) { - need_sync = true; - radeon_semaphore_sync_rings(rdev, ib->semaphore, - fence->ring, ib->ring); - radeon_fence_note_sync(fence, ib->ring); - } - } - /* immediately free semaphore when we don't need to sync */ - if (!need_sync) { - radeon_semaphore_free(rdev, &ib->semaphore, NULL); - } - radeon_ring_ib_execute(rdev, ib->ring, ib); - r = radeon_fence_emit(rdev, &ib->fence, ib->ring); - if (r) { - dev_err(rdev->dev, "failed to emit fence for new IB (%d)\n", r); - radeon_ring_unlock_undo(rdev, ring); - return r; - } + radeon_ring_ib_execute(rdev, ib->fence->ring, ib); + radeon_fence_emit(rdev, ib->fence); radeon_ring_unlock_commit(rdev, ring); return 0; } diff --git a/trunk/drivers/gpu/drm/radeon/radeon_sa.c b/trunk/drivers/gpu/drm/radeon/radeon_sa.c index 81dbb5b946ef..32059b745728 100644 --- a/trunk/drivers/gpu/drm/radeon/radeon_sa.c +++ b/trunk/drivers/gpu/drm/radeon/radeon_sa.c @@ -349,7 +349,7 @@ void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **sa_bo, sa_manager = (*sa_bo)->manager; spin_lock(&sa_manager->lock); - if (fence && !radeon_fence_signaled(fence)) { + if (fence && fence->seq && fence->seq < RADEON_FENCE_NOTEMITED_SEQ) { (*sa_bo)->fence = radeon_fence_ref(fence); list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[fence->ring]); diff --git a/trunk/drivers/gpu/drm/radeon/radeon_semaphore.c b/trunk/drivers/gpu/drm/radeon/radeon_semaphore.c index 7cc78de6ddc3..e2ace5dce117 100644 --- a/trunk/drivers/gpu/drm/radeon/radeon_semaphore.c +++ b/trunk/drivers/gpu/drm/radeon/radeon_semaphore.c @@ -68,49 +68,70 @@ void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring, radeon_semaphore_ring_emit(rdev, ring, &rdev->ring[ring], semaphore, true); } -/* caller must hold ring lock */ int radeon_semaphore_sync_rings(struct radeon_device *rdev, struct radeon_semaphore *semaphore, - int signaler, int waiter) + bool sync_to[RADEON_NUM_RINGS], + int dst_ring) { - int r; + int i = 0, r; - /* no need to signal and wait on the same ring */ - if (signaler == waiter) { - return 0; + mutex_lock(&rdev->ring_lock); + r = radeon_ring_alloc(rdev, &rdev->ring[dst_ring], RADEON_NUM_RINGS * 8); + if (r) { + goto error; } - /* prevent GPU deadlocks */ - if (!rdev->ring[signaler].ready) { - dev_err(rdev->dev, "Trying to sync to a disabled ring!"); - return -EINVAL; - } + for (i = 0; i < RADEON_NUM_RINGS; ++i) { + /* no need to sync to our own or unused rings */ + if (!sync_to[i] || i == dst_ring) + continue; - r = radeon_ring_alloc(rdev, &rdev->ring[signaler], 8); - if (r) { - return r; + /* prevent GPU deadlocks */ + if (!rdev->ring[i].ready) { + dev_err(rdev->dev, "Trying to sync to a disabled ring!"); + r = -EINVAL; + goto error; + } + + r = radeon_ring_alloc(rdev, &rdev->ring[i], 8); + if (r) { + goto error; + } + + radeon_semaphore_emit_signal(rdev, i, semaphore); + radeon_semaphore_emit_wait(rdev, dst_ring, semaphore); + + radeon_ring_commit(rdev, &rdev->ring[i]); } - radeon_semaphore_emit_signal(rdev, signaler, semaphore); - radeon_ring_commit(rdev, &rdev->ring[signaler]); - /* we assume caller has already allocated space on waiters ring */ - radeon_semaphore_emit_wait(rdev, waiter, semaphore); + radeon_ring_commit(rdev, &rdev->ring[dst_ring]); + mutex_unlock(&rdev->ring_lock); return 0; + +error: + /* unlock all locks taken so far */ + for (--i; i >= 0; --i) { + if (sync_to[i] || i == dst_ring) { + radeon_ring_undo(&rdev->ring[i]); + } + } + radeon_ring_undo(&rdev->ring[dst_ring]); + mutex_unlock(&rdev->ring_lock); + return r; } void radeon_semaphore_free(struct radeon_device *rdev, - struct radeon_semaphore **semaphore, + struct radeon_semaphore *semaphore, struct radeon_fence *fence) { - if (semaphore == NULL || *semaphore == NULL) { + if (semaphore == NULL) { return; } - if ((*semaphore)->waiters > 0) { + if (semaphore->waiters > 0) { dev_err(rdev->dev, "semaphore %p has more waiters than signalers," - " hardware lockup imminent!\n", *semaphore); + " hardware lockup imminent!\n", semaphore); } - radeon_sa_bo_free(rdev, &(*semaphore)->sa_bo, fence); - kfree(*semaphore); - *semaphore = NULL; + radeon_sa_bo_free(rdev, &semaphore->sa_bo, fence); + kfree(semaphore); } diff --git a/trunk/drivers/gpu/drm/radeon/radeon_test.c b/trunk/drivers/gpu/drm/radeon/radeon_test.c index a94f66fb3b13..efff929ea49d 100644 --- a/trunk/drivers/gpu/drm/radeon/radeon_test.c +++ b/trunk/drivers/gpu/drm/radeon/radeon_test.c @@ -106,7 +106,13 @@ void radeon_test_moves(struct radeon_device *rdev) radeon_bo_kunmap(gtt_obj[i]); - r = radeon_copy(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence); + r = radeon_fence_create(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX); + if (r) { + DRM_ERROR("Failed to create GTT->VRAM fence %d\n", i); + goto out_cleanup; + } + + r = radeon_copy(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, fence); if (r) { DRM_ERROR("Failed GTT->VRAM copy %d\n", i); goto out_cleanup; @@ -149,7 +155,13 @@ void radeon_test_moves(struct radeon_device *rdev) radeon_bo_kunmap(vram_obj); - r = radeon_copy(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence); + r = radeon_fence_create(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX); + if (r) { + DRM_ERROR("Failed to create VRAM->GTT fence %d\n", i); + goto out_cleanup; + } + + r = radeon_copy(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, fence); if (r) { DRM_ERROR("Failed VRAM->GTT copy %d\n", i); goto out_cleanup; @@ -233,6 +245,17 @@ void radeon_test_ring_sync(struct radeon_device *rdev, int ridxB = radeon_ring_index(rdev, ringB); int r; + r = radeon_fence_create(rdev, &fence1, ridxA); + if (r) { + DRM_ERROR("Failed to create sync fence 1\n"); + goto out_cleanup; + } + r = radeon_fence_create(rdev, &fence2, ridxA); + if (r) { + DRM_ERROR("Failed to create sync fence 2\n"); + goto out_cleanup; + } + r = radeon_semaphore_create(rdev, &semaphore); if (r) { DRM_ERROR("Failed to create semaphore\n"); @@ -245,19 +268,9 @@ void radeon_test_ring_sync(struct radeon_device *rdev, goto out_cleanup; } radeon_semaphore_emit_wait(rdev, ridxA, semaphore); - r = radeon_fence_emit(rdev, &fence1, ridxA); - if (r) { - DRM_ERROR("Failed to emit fence 1\n"); - radeon_ring_unlock_undo(rdev, ringA); - goto out_cleanup; - } + radeon_fence_emit(rdev, fence1); radeon_semaphore_emit_wait(rdev, ridxA, semaphore); - r = radeon_fence_emit(rdev, &fence2, ridxA); - if (r) { - DRM_ERROR("Failed to emit fence 2\n"); - radeon_ring_unlock_undo(rdev, ringA); - goto out_cleanup; - } + radeon_fence_emit(rdev, fence2); radeon_ring_unlock_commit(rdev, ringA); mdelay(1000); @@ -303,7 +316,8 @@ void radeon_test_ring_sync(struct radeon_device *rdev, } out_cleanup: - radeon_semaphore_free(rdev, &semaphore, NULL); + if (semaphore) + radeon_semaphore_free(rdev, semaphore, NULL); if (fence1) radeon_fence_unref(&fence1); @@ -328,6 +342,17 @@ void radeon_test_ring_sync2(struct radeon_device *rdev, bool sigA, sigB; int i, r; + r = radeon_fence_create(rdev, &fenceA, ridxA); + if (r) { + DRM_ERROR("Failed to create sync fence 1\n"); + goto out_cleanup; + } + r = radeon_fence_create(rdev, &fenceB, ridxB); + if (r) { + DRM_ERROR("Failed to create sync fence 2\n"); + goto out_cleanup; + } + r = radeon_semaphore_create(rdev, &semaphore); if (r) { DRM_ERROR("Failed to create semaphore\n"); @@ -340,12 +365,7 @@ void radeon_test_ring_sync2(struct radeon_device *rdev, goto out_cleanup; } radeon_semaphore_emit_wait(rdev, ridxA, semaphore); - r = radeon_fence_emit(rdev, &fenceA, ridxA); - if (r) { - DRM_ERROR("Failed to emit sync fence 1\n"); - radeon_ring_unlock_undo(rdev, ringA); - goto out_cleanup; - } + radeon_fence_emit(rdev, fenceA); radeon_ring_unlock_commit(rdev, ringA); r = radeon_ring_lock(rdev, ringB, 64); @@ -354,12 +374,7 @@ void radeon_test_ring_sync2(struct radeon_device *rdev, goto out_cleanup; } radeon_semaphore_emit_wait(rdev, ridxB, semaphore); - r = radeon_fence_emit(rdev, &fenceB, ridxB); - if (r) { - DRM_ERROR("Failed to create sync fence 2\n"); - radeon_ring_unlock_undo(rdev, ringB); - goto out_cleanup; - } + radeon_fence_emit(rdev, fenceB); radeon_ring_unlock_commit(rdev, ringB); mdelay(1000); @@ -421,7 +436,8 @@ void radeon_test_ring_sync2(struct radeon_device *rdev, } out_cleanup: - radeon_semaphore_free(rdev, &semaphore, NULL); + if (semaphore) + radeon_semaphore_free(rdev, semaphore, NULL); if (fenceA) radeon_fence_unref(&fenceA); diff --git a/trunk/drivers/gpu/drm/radeon/radeon_ttm.c b/trunk/drivers/gpu/drm/radeon/radeon_ttm.c index 0881131a0388..c94a2257761f 100644 --- a/trunk/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/trunk/drivers/gpu/drm/radeon/radeon_ttm.c @@ -222,11 +222,15 @@ static int radeon_move_blit(struct ttm_buffer_object *bo, { struct radeon_device *rdev; uint64_t old_start, new_start; - struct radeon_fence *fence; - int r, ridx; + struct radeon_fence *fence, *old_fence; + struct radeon_semaphore *sem = NULL; + int r; rdev = radeon_get_rdev(bo->bdev); - ridx = radeon_copy_ring_index(rdev); + r = radeon_fence_create(rdev, &fence, radeon_copy_ring_index(rdev)); + if (unlikely(r)) { + return r; + } old_start = old_mem->start << PAGE_SHIFT; new_start = new_mem->start << PAGE_SHIFT; @@ -239,6 +243,7 @@ static int radeon_move_blit(struct ttm_buffer_object *bo, break; default: DRM_ERROR("Unknown placement %d\n", old_mem->mem_type); + radeon_fence_unref(&fence); return -EINVAL; } switch (new_mem->mem_type) { @@ -250,23 +255,46 @@ static int radeon_move_blit(struct ttm_buffer_object *bo, break; default: DRM_ERROR("Unknown placement %d\n", old_mem->mem_type); + radeon_fence_unref(&fence); return -EINVAL; } - if (!rdev->ring[ridx].ready) { + if (!rdev->ring[radeon_copy_ring_index(rdev)].ready) { DRM_ERROR("Trying to move memory with ring turned off.\n"); + radeon_fence_unref(&fence); return -EINVAL; } BUILD_BUG_ON((PAGE_SIZE % RADEON_GPU_PAGE_SIZE) != 0); /* sync other rings */ - fence = bo->sync_obj; + old_fence = bo->sync_obj; + if (old_fence && old_fence->ring != fence->ring + && !radeon_fence_signaled(old_fence)) { + bool sync_to_ring[RADEON_NUM_RINGS] = { }; + sync_to_ring[old_fence->ring] = true; + + r = radeon_semaphore_create(rdev, &sem); + if (r) { + radeon_fence_unref(&fence); + return r; + } + + r = radeon_semaphore_sync_rings(rdev, sem, + sync_to_ring, fence->ring); + if (r) { + radeon_semaphore_free(rdev, sem, NULL); + radeon_fence_unref(&fence); + return r; + } + } + r = radeon_copy(rdev, old_start, new_start, new_mem->num_pages * (PAGE_SIZE / RADEON_GPU_PAGE_SIZE), /* GPU pages */ - &fence); + fence); /* FIXME: handle copy error */ r = ttm_bo_move_accel_cleanup(bo, (void *)fence, NULL, evict, no_wait_reserve, no_wait_gpu, new_mem); + radeon_semaphore_free(rdev, sem, fence); radeon_fence_unref(&fence); return r; } @@ -797,9 +825,9 @@ static int radeon_ttm_fault(struct vm_area_struct *vma, struct vm_fault *vmf) return VM_FAULT_NOPAGE; } rdev = radeon_get_rdev(bo->bdev); - down_read(&rdev->pm.mclk_lock); + mutex_lock(&rdev->vram_mutex); r = ttm_vm_ops->fault(vma, vmf); - up_read(&rdev->pm.mclk_lock); + mutex_unlock(&rdev->vram_mutex); return r; } diff --git a/trunk/drivers/gpu/drm/radeon/rs600.c b/trunk/drivers/gpu/drm/radeon/rs600.c index e11bc4651784..e95c5e61d4e2 100644 --- a/trunk/drivers/gpu/drm/radeon/rs600.c +++ b/trunk/drivers/gpu/drm/radeon/rs600.c @@ -294,7 +294,6 @@ void rs600_hpd_init(struct radeon_device *rdev) { struct drm_device *dev = rdev->ddev; struct drm_connector *connector; - unsigned enable = 0; list_for_each_entry(connector, &dev->mode_config.connector_list, head) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); @@ -302,25 +301,26 @@ void rs600_hpd_init(struct radeon_device *rdev) case RADEON_HPD_1: WREG32(R_007D00_DC_HOT_PLUG_DETECT1_CONTROL, S_007D00_DC_HOT_PLUG_DETECT1_EN(1)); + rdev->irq.hpd[0] = true; break; case RADEON_HPD_2: WREG32(R_007D10_DC_HOT_PLUG_DETECT2_CONTROL, S_007D10_DC_HOT_PLUG_DETECT2_EN(1)); + rdev->irq.hpd[1] = true; break; default: break; } - enable |= 1 << radeon_connector->hpd.hpd; radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd); } - radeon_irq_kms_enable_hpd(rdev, enable); + if (rdev->irq.installed) + rs600_irq_set(rdev); } void rs600_hpd_fini(struct radeon_device *rdev) { struct drm_device *dev = rdev->ddev; struct drm_connector *connector; - unsigned disable = 0; list_for_each_entry(connector, &dev->mode_config.connector_list, head) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); @@ -328,17 +328,17 @@ void rs600_hpd_fini(struct radeon_device *rdev) case RADEON_HPD_1: WREG32(R_007D00_DC_HOT_PLUG_DETECT1_CONTROL, S_007D00_DC_HOT_PLUG_DETECT1_EN(0)); + rdev->irq.hpd[0] = false; break; case RADEON_HPD_2: WREG32(R_007D10_DC_HOT_PLUG_DETECT2_CONTROL, S_007D10_DC_HOT_PLUG_DETECT2_EN(0)); + rdev->irq.hpd[1] = false; break; default: break; } - disable |= 1 << radeon_connector->hpd.hpd; } - radeon_irq_kms_disable_hpd(rdev, disable); } int rs600_asic_reset(struct radeon_device *rdev) @@ -564,18 +564,18 @@ int rs600_irq_set(struct radeon_device *rdev) WREG32(R_000040_GEN_INT_CNTL, 0); return -EINVAL; } - if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { + if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) { tmp |= S_000040_SW_INT_EN(1); } if (rdev->irq.gui_idle) { tmp |= S_000040_GUI_IDLE(1); } if (rdev->irq.crtc_vblank_int[0] || - atomic_read(&rdev->irq.pflip[0])) { + rdev->irq.pflip[0]) { mode_int |= S_006540_D1MODE_VBLANK_INT_MASK(1); } if (rdev->irq.crtc_vblank_int[1] || - atomic_read(&rdev->irq.pflip[1])) { + rdev->irq.pflip[1]) { mode_int |= S_006540_D2MODE_VBLANK_INT_MASK(1); } if (rdev->irq.hpd[0]) { @@ -686,6 +686,7 @@ int rs600_irq_process(struct radeon_device *rdev) /* GUI idle */ if (G_000040_GUI_IDLE(status)) { rdev->irq.gui_idle_acked = true; + rdev->pm.gui_idle = true; wake_up(&rdev->irq.idle_queue); } /* Vertical blank interrupts */ @@ -695,7 +696,7 @@ int rs600_irq_process(struct radeon_device *rdev) rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (atomic_read(&rdev->irq.pflip[0])) + if (rdev->irq.pflip[0]) radeon_crtc_handle_flip(rdev, 0); } if (G_007EDC_LB_D2_VBLANK_INTERRUPT(rdev->irq.stat_regs.r500.disp_int)) { @@ -704,7 +705,7 @@ int rs600_irq_process(struct radeon_device *rdev) rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (atomic_read(&rdev->irq.pflip[1])) + if (rdev->irq.pflip[1]) radeon_crtc_handle_flip(rdev, 1); } if (G_007EDC_DC_HOT_PLUG_DETECT1_INTERRUPT(rdev->irq.stat_regs.r500.disp_int)) { diff --git a/trunk/drivers/gpu/drm/radeon/si.c b/trunk/drivers/gpu/drm/radeon/si.c index 34603b3c80ab..c7b61f16ecfd 100644 --- a/trunk/drivers/gpu/drm/radeon/si.c +++ b/trunk/drivers/gpu/drm/radeon/si.c @@ -1762,7 +1762,7 @@ void si_fence_ring_emit(struct radeon_device *rdev, */ void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) { - struct radeon_ring *ring = &rdev->ring[ib->ring]; + struct radeon_ring *ring = &rdev->ring[ib->fence->ring]; u32 header; if (ib->is_const_ib) @@ -2702,7 +2702,7 @@ int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) if (ib->is_const_ib) ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt); else { - switch (ib->ring) { + switch (ib->fence->ring) { case RADEON_RING_TYPE_GFX_INDEX: ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt); break; @@ -2711,7 +2711,7 @@ int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt); break; default: - dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring); + dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->fence->ring); ret = -EINVAL; break; } @@ -2942,6 +2942,7 @@ static void si_disable_interrupts(struct radeon_device *rdev) WREG32(IH_RB_RPTR, 0); WREG32(IH_RB_WPTR, 0); rdev->ih.enabled = false; + rdev->ih.wptr = 0; rdev->ih.rptr = 0; } @@ -3092,45 +3093,45 @@ int si_irq_set(struct radeon_device *rdev) hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN; /* enable CP interrupts on all rings */ - if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) { + if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) { DRM_DEBUG("si_irq_set: sw int gfx\n"); cp_int_cntl |= TIME_STAMP_INT_ENABLE; } - if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) { + if (rdev->irq.sw_int[CAYMAN_RING_TYPE_CP1_INDEX]) { DRM_DEBUG("si_irq_set: sw int cp1\n"); cp_int_cntl1 |= TIME_STAMP_INT_ENABLE; } - if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) { + if (rdev->irq.sw_int[CAYMAN_RING_TYPE_CP2_INDEX]) { DRM_DEBUG("si_irq_set: sw int cp2\n"); cp_int_cntl2 |= TIME_STAMP_INT_ENABLE; } if (rdev->irq.crtc_vblank_int[0] || - atomic_read(&rdev->irq.pflip[0])) { + rdev->irq.pflip[0]) { DRM_DEBUG("si_irq_set: vblank 0\n"); crtc1 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[1] || - atomic_read(&rdev->irq.pflip[1])) { + rdev->irq.pflip[1]) { DRM_DEBUG("si_irq_set: vblank 1\n"); crtc2 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[2] || - atomic_read(&rdev->irq.pflip[2])) { + rdev->irq.pflip[2]) { DRM_DEBUG("si_irq_set: vblank 2\n"); crtc3 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[3] || - atomic_read(&rdev->irq.pflip[3])) { + rdev->irq.pflip[3]) { DRM_DEBUG("si_irq_set: vblank 3\n"); crtc4 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[4] || - atomic_read(&rdev->irq.pflip[4])) { + rdev->irq.pflip[4]) { DRM_DEBUG("si_irq_set: vblank 4\n"); crtc5 |= VBLANK_INT_MASK; } if (rdev->irq.crtc_vblank_int[5] || - atomic_read(&rdev->irq.pflip[5])) { + rdev->irq.pflip[5]) { DRM_DEBUG("si_irq_set: vblank 5\n"); crtc6 |= VBLANK_INT_MASK; } @@ -3358,27 +3359,29 @@ int si_irq_process(struct radeon_device *rdev) u32 rptr; u32 src_id, src_data, ring_id; u32 ring_index; + unsigned long flags; bool queue_hotplug = false; if (!rdev->ih.enabled || rdev->shutdown) return IRQ_NONE; wptr = si_get_ih_wptr(rdev); - -restart_ih: - /* is somebody else already processing irqs? */ - if (atomic_xchg(&rdev->ih.lock, 1)) - return IRQ_NONE; - rptr = rdev->ih.rptr; DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr); + spin_lock_irqsave(&rdev->ih.lock, flags); + if (rptr == wptr) { + spin_unlock_irqrestore(&rdev->ih.lock, flags); + return IRQ_NONE; + } +restart_ih: /* Order reading of wptr vs. reading of IH ring data */ rmb(); /* display interrupts */ si_irq_ack(rdev); + rdev->ih.wptr = wptr; while (rptr != wptr) { /* wptr/rptr are in bytes! */ ring_index = rptr / 4; @@ -3396,7 +3399,7 @@ int si_irq_process(struct radeon_device *rdev) rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (atomic_read(&rdev->irq.pflip[0])) + if (rdev->irq.pflip[0]) radeon_crtc_handle_flip(rdev, 0); rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT; DRM_DEBUG("IH: D1 vblank\n"); @@ -3422,7 +3425,7 @@ int si_irq_process(struct radeon_device *rdev) rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (atomic_read(&rdev->irq.pflip[1])) + if (rdev->irq.pflip[1]) radeon_crtc_handle_flip(rdev, 1); rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; DRM_DEBUG("IH: D2 vblank\n"); @@ -3448,7 +3451,7 @@ int si_irq_process(struct radeon_device *rdev) rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (atomic_read(&rdev->irq.pflip[2])) + if (rdev->irq.pflip[2]) radeon_crtc_handle_flip(rdev, 2); rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; DRM_DEBUG("IH: D3 vblank\n"); @@ -3474,7 +3477,7 @@ int si_irq_process(struct radeon_device *rdev) rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (atomic_read(&rdev->irq.pflip[3])) + if (rdev->irq.pflip[3]) radeon_crtc_handle_flip(rdev, 3); rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; DRM_DEBUG("IH: D4 vblank\n"); @@ -3500,7 +3503,7 @@ int si_irq_process(struct radeon_device *rdev) rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (atomic_read(&rdev->irq.pflip[4])) + if (rdev->irq.pflip[4]) radeon_crtc_handle_flip(rdev, 4); rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; DRM_DEBUG("IH: D5 vblank\n"); @@ -3526,7 +3529,7 @@ int si_irq_process(struct radeon_device *rdev) rdev->pm.vblank_sync = true; wake_up(&rdev->irq.vblank_queue); } - if (atomic_read(&rdev->irq.pflip[5])) + if (rdev->irq.pflip[5]) radeon_crtc_handle_flip(rdev, 5); rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; DRM_DEBUG("IH: D6 vblank\n"); @@ -3617,6 +3620,7 @@ int si_irq_process(struct radeon_device *rdev) break; case 233: /* GUI IDLE */ DRM_DEBUG("IH: GUI idle\n"); + rdev->pm.gui_idle = true; wake_up(&rdev->irq.idle_queue); break; default: @@ -3628,17 +3632,15 @@ int si_irq_process(struct radeon_device *rdev) rptr += 16; rptr &= rdev->ih.ptr_mask; } + /* make sure wptr hasn't changed while processing */ + wptr = si_get_ih_wptr(rdev); + if (wptr != rdev->ih.wptr) + goto restart_ih; if (queue_hotplug) schedule_work(&rdev->hotplug_work); rdev->ih.rptr = rptr; WREG32(IH_RB_RPTR, rdev->ih.rptr); - atomic_set(&rdev->ih.lock, 0); - - /* make sure wptr hasn't changed while processing */ - wptr = si_get_ih_wptr(rdev); - if (wptr != rptr) - goto restart_ih; - + spin_unlock_irqrestore(&rdev->ih.lock, flags); return IRQ_HANDLED; }